ruby-statistics 2.0.5 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CONTRIBUTING.md +1 -0
- data/README.md +1 -1
- data/lib/statistics/distribution/empirical.rb +26 -0
- data/lib/statistics/distribution/weibull.rb +1 -1
- data/lib/statistics/spearman_rank_coefficient.rb +71 -0
- data/lib/statistics/statistical_test/kolmogorov_smirnov_test.rb +70 -0
- data/lib/statistics/version.rb +1 -1
- metadata +7 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d33cf13ba623ecbb23488499a13a52c75c906b3eb258e7b146b628d10a84de89
|
4
|
+
data.tar.gz: a075adb8960b0906cd276a138e70fd3bc76c738d9ed70afac08d57ccd50d86b3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '0799701996d9c3496e35b9f2f73024c359bbc263c3e34995b047bbbda1c0acff8b1ae5bf323bd3f2fd1901bb9eb3331271ccbd2fc16d6911d179644a8ad1878f'
|
7
|
+
data.tar.gz: fe31571ab416c16b9832a4dff937e583c41a67b58a4676315f5cbde7720773cbb705b35973e16ef3e002cc542ea7b947e4a93157ae1ea09fecaa0950ecea7ab1
|
data/CONTRIBUTING.md
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/estebanz01/ruby-statistics. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant code of conduct](https://www.contributor-covenant.org/).
|
data/README.md
CHANGED
@@ -52,7 +52,7 @@ normal = Statistics::Distribution::StandardNormal.new # Using all namespaces.
|
|
52
52
|
```
|
53
53
|
|
54
54
|
## Documentation
|
55
|
-
You can find a bit more detailed documentation of all available distributions, tests and functions in the [Documentation Index](https://github.com/estebanz01/ruby-statistics/wiki
|
55
|
+
You can find a bit more detailed documentation of all available distributions, tests and functions in the [Documentation Index](https://github.com/estebanz01/ruby-statistics/wiki)
|
56
56
|
|
57
57
|
## Development
|
58
58
|
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Statistics
|
2
|
+
module Distribution
|
3
|
+
class Empirical
|
4
|
+
attr_accessor :samples
|
5
|
+
|
6
|
+
def initialize(samples:)
|
7
|
+
self.samples = samples
|
8
|
+
end
|
9
|
+
|
10
|
+
# Formula grabbed from here: https://statlect.com/asymptotic-theory/empirical-distribution
|
11
|
+
def cumulative_function(x:)
|
12
|
+
cumulative_sum = samples.reduce(0) do |summation, sample|
|
13
|
+
summation += if sample <= x
|
14
|
+
1
|
15
|
+
else
|
16
|
+
0
|
17
|
+
end
|
18
|
+
|
19
|
+
summation
|
20
|
+
end
|
21
|
+
|
22
|
+
cumulative_sum / samples.size.to_f
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -45,7 +45,7 @@ module Statistics
|
|
45
45
|
# Using the inverse CDF function, also called quantile, we can calculate
|
46
46
|
# a random sample that follows a weibull distribution.
|
47
47
|
#
|
48
|
-
# Formula extracted from
|
48
|
+
# Formula extracted from https://www.taygeta.com/random/weibull.html
|
49
49
|
def random(elements: 1, seed: Random.new_seed)
|
50
50
|
results = []
|
51
51
|
|
@@ -0,0 +1,71 @@
|
|
1
|
+
module Statistics
|
2
|
+
class SpearmanRankCoefficient
|
3
|
+
def self.rank(data:, return_ranks_only: true)
|
4
|
+
descending_order_data = data.sort { |a, b| b <=> a }
|
5
|
+
rankings = {}
|
6
|
+
|
7
|
+
data.each do |value|
|
8
|
+
# If we have ties, the find_index method will only retrieve the index of the
|
9
|
+
# first element in the list (i.e, the most close to the left of the array),
|
10
|
+
# so when a tie is detected, we increase the temporal ranking by the number of
|
11
|
+
# counted elements at that particular time and then we increase the counter.
|
12
|
+
temporal_ranking = descending_order_data.find_index(value) + 1 # 0-index
|
13
|
+
|
14
|
+
if rankings.fetch(value, false)
|
15
|
+
rankings[value][:rank] += (temporal_ranking + rankings[value][:counter])
|
16
|
+
rankings[value][:counter] += 1
|
17
|
+
rankings[value][:tie_rank] = rankings[value][:rank] / rankings[value][:counter].to_f
|
18
|
+
else
|
19
|
+
rankings[value] = { counter: 1, rank: temporal_ranking, tie_rank: temporal_ranking }
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
if return_ranks_only
|
24
|
+
data.map do |value|
|
25
|
+
rankings[value][:tie_rank]
|
26
|
+
end
|
27
|
+
else
|
28
|
+
rankings
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# Formulas extracted from: https://statistics.laerd.com/statistical-guides/spearmans-rank-order-correlation-statistical-guide.php
|
33
|
+
def self.coefficient(set_one, set_two)
|
34
|
+
raise 'Both group sets must have the same number of cases.' if set_one.size != set_two.size
|
35
|
+
return if set_one.size == 0 && set_two.size == 0
|
36
|
+
|
37
|
+
set_one_mean, set_two_mean = set_one.mean, set_two.mean
|
38
|
+
have_tie_ranks = (set_one + set_two).any? { |rank| rank.is_a?(Float) }
|
39
|
+
|
40
|
+
if have_tie_ranks
|
41
|
+
numerator = 0
|
42
|
+
squared_differences_set_one = 0
|
43
|
+
squared_differences_set_two = 0
|
44
|
+
|
45
|
+
set_one.size.times do |idx|
|
46
|
+
local_diff_one = (set_one[idx] - set_one_mean)
|
47
|
+
local_diff_two = (set_two[idx] - set_two_mean)
|
48
|
+
|
49
|
+
squared_differences_set_one += local_diff_one ** 2
|
50
|
+
squared_differences_set_two += local_diff_two ** 2
|
51
|
+
|
52
|
+
numerator += local_diff_one * local_diff_two
|
53
|
+
end
|
54
|
+
|
55
|
+
denominator = Math.sqrt(squared_differences_set_one * squared_differences_set_two)
|
56
|
+
|
57
|
+
numerator / denominator.to_f # This is rho or spearman's coefficient.
|
58
|
+
else
|
59
|
+
sum_squared_differences = set_one.each_with_index.reduce(0) do |memo, (rank_one, index)|
|
60
|
+
memo += ((rank_one - set_two[index]) ** 2)
|
61
|
+
memo
|
62
|
+
end
|
63
|
+
|
64
|
+
numerator = 6 * sum_squared_differences
|
65
|
+
denominator = ((set_one.size ** 3) - set_one.size)
|
66
|
+
|
67
|
+
1.0 - (numerator / denominator.to_f) # This is rho or spearman's coefficient.
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
module Statistics
|
2
|
+
module StatisticalTest
|
3
|
+
class KolmogorovSmirnovTest
|
4
|
+
# Common alpha, and critical D are calculated following formulas from: https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test#Two-sample_Kolmogorov%E2%80%93Smirnov_test
|
5
|
+
def self.two_samples(group_one:, group_two:, alpha: 0.05)
|
6
|
+
samples = group_one + group_two # We can use unbalaced group samples
|
7
|
+
|
8
|
+
ecdf_one = Distribution::Empirical.new(samples: group_one)
|
9
|
+
ecdf_two = Distribution::Empirical.new(samples: group_two)
|
10
|
+
|
11
|
+
d_max = samples.sort.map do |sample|
|
12
|
+
d1 = ecdf_one.cumulative_function(x: sample)
|
13
|
+
d2 = ecdf_two.cumulative_function(x: sample)
|
14
|
+
|
15
|
+
(d1 - d2).abs
|
16
|
+
end.max
|
17
|
+
|
18
|
+
# TODO: Validate calculation of Common alpha.
|
19
|
+
common_alpha = Math.sqrt((-0.5 * Math.log(alpha)))
|
20
|
+
radicand = (group_one.size + group_two.size) / (group_one.size * group_two.size).to_f
|
21
|
+
|
22
|
+
critical_d = common_alpha * Math.sqrt(radicand)
|
23
|
+
# critical_d = self.critical_d(alpha: alpha, n: samples.size)
|
24
|
+
|
25
|
+
# We are unable to calculate the p_value, because we don't have the Kolmogorov distribution
|
26
|
+
# defined. We reject the null hypotesis if Dmax is > than Dcritical.
|
27
|
+
{ d_max: d_max,
|
28
|
+
d_critical: critical_d,
|
29
|
+
total_samples: samples.size,
|
30
|
+
alpha: alpha,
|
31
|
+
null: d_max <= critical_d,
|
32
|
+
alternative: d_max > critical_d,
|
33
|
+
confidence_level: 1.0 - alpha }
|
34
|
+
end
|
35
|
+
|
36
|
+
# This is an implementation of the formula presented by Paul Molin and Hervé Abdi in a paper,
|
37
|
+
# called "New Table and numerical approximations for Kolmogorov-Smirnov / Lilliefors / Van Soest
|
38
|
+
# normality test".
|
39
|
+
# In this paper, the authors defines a couple of 6th-degree polynomial functions that allow us
|
40
|
+
# to find an aproximation of the real critical value. This is based in the conclusions made by
|
41
|
+
# Dagnelie (1968), where indicates that critical values given by Lilliefors can be approximated
|
42
|
+
# numerically.
|
43
|
+
#
|
44
|
+
# In general, the formula found is:
|
45
|
+
# C(N, alpha) ^ -2 = A(alpha) * N + B(alpha).
|
46
|
+
#
|
47
|
+
# Where A(alpha), B(alpha) are two 6th degree polynomial functions computed using the principle
|
48
|
+
# of Monte Carlo simulations.
|
49
|
+
#
|
50
|
+
# paper can be found here: https://utdallas.edu/~herve/MolinAbdi1998-LillieforsTechReport.pdf
|
51
|
+
# def self.critical_d(alpha:, n:)
|
52
|
+
# confidence = 1.0 - alpha
|
53
|
+
|
54
|
+
# a_alpha = 6.32207539843126 -17.1398870006148 * confidence +
|
55
|
+
# 38.42812675101057 * (confidence ** 2) - 45.93241384693391 * (confidence ** 3) +
|
56
|
+
# 7.88697700041829 * (confidence ** 4) + 29.79317711037858 * (confidence ** 5) -
|
57
|
+
# 18.48090137098585 * (confidence ** 6)
|
58
|
+
|
59
|
+
# b_alpha = 12.940399038404 - 53.458334259532 * confidence +
|
60
|
+
# 186.923866119699 * (confidence ** 2) - 410.582178349305 * (confidence ** 3) +
|
61
|
+
# 517.377862566267 * (confidence ** 4) - 343.581476222384 * (confidence ** 5) +
|
62
|
+
# 92.123451358715 * (confidence ** 6)
|
63
|
+
|
64
|
+
# Math.sqrt(1.0 / (a_alpha * n + b_alpha))
|
65
|
+
# end
|
66
|
+
end
|
67
|
+
|
68
|
+
KSTest = KolmogorovSmirnovTest # Alias
|
69
|
+
end
|
70
|
+
end
|
data/lib/statistics/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-statistics
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0
|
4
|
+
version: 2.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- esteban zapata
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-12-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -126,6 +126,7 @@ files:
|
|
126
126
|
- ".rspec"
|
127
127
|
- ".travis.yml"
|
128
128
|
- CODE_OF_CONDUCT.md
|
129
|
+
- CONTRIBUTING.md
|
129
130
|
- Gemfile
|
130
131
|
- LICENSE
|
131
132
|
- LICENSE.txt
|
@@ -141,6 +142,7 @@ files:
|
|
141
142
|
- lib/statistics/distribution/beta.rb
|
142
143
|
- lib/statistics/distribution/binomial.rb
|
143
144
|
- lib/statistics/distribution/chi_squared.rb
|
145
|
+
- lib/statistics/distribution/empirical.rb
|
144
146
|
- lib/statistics/distribution/f.rb
|
145
147
|
- lib/statistics/distribution/geometric.rb
|
146
148
|
- lib/statistics/distribution/logseries.rb
|
@@ -150,9 +152,11 @@ files:
|
|
150
152
|
- lib/statistics/distribution/t_student.rb
|
151
153
|
- lib/statistics/distribution/uniform.rb
|
152
154
|
- lib/statistics/distribution/weibull.rb
|
155
|
+
- lib/statistics/spearman_rank_coefficient.rb
|
153
156
|
- lib/statistics/statistical_test.rb
|
154
157
|
- lib/statistics/statistical_test/chi_squared_test.rb
|
155
158
|
- lib/statistics/statistical_test/f_test.rb
|
159
|
+
- lib/statistics/statistical_test/kolmogorov_smirnov_test.rb
|
156
160
|
- lib/statistics/statistical_test/t_test.rb
|
157
161
|
- lib/statistics/statistical_test/wilcoxon_rank_sum_test.rb
|
158
162
|
- lib/statistics/version.rb
|
@@ -177,7 +181,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
177
181
|
version: '0'
|
178
182
|
requirements: []
|
179
183
|
rubyforge_project:
|
180
|
-
rubygems_version: 2.7.
|
184
|
+
rubygems_version: 2.7.7
|
181
185
|
signing_key:
|
182
186
|
specification_version: 4
|
183
187
|
summary: A ruby gem for som specific statistics. Inspired by the jStat js library.
|