ruby-statistics 2.0.5 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CONTRIBUTING.md +1 -0
- data/README.md +1 -1
- data/lib/statistics/distribution/empirical.rb +26 -0
- data/lib/statistics/distribution/weibull.rb +1 -1
- data/lib/statistics/spearman_rank_coefficient.rb +71 -0
- data/lib/statistics/statistical_test/kolmogorov_smirnov_test.rb +70 -0
- data/lib/statistics/version.rb +1 -1
- metadata +7 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d33cf13ba623ecbb23488499a13a52c75c906b3eb258e7b146b628d10a84de89
|
4
|
+
data.tar.gz: a075adb8960b0906cd276a138e70fd3bc76c738d9ed70afac08d57ccd50d86b3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '0799701996d9c3496e35b9f2f73024c359bbc263c3e34995b047bbbda1c0acff8b1ae5bf323bd3f2fd1901bb9eb3331271ccbd2fc16d6911d179644a8ad1878f'
|
7
|
+
data.tar.gz: fe31571ab416c16b9832a4dff937e583c41a67b58a4676315f5cbde7720773cbb705b35973e16ef3e002cc542ea7b947e4a93157ae1ea09fecaa0950ecea7ab1
|
data/CONTRIBUTING.md
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/estebanz01/ruby-statistics. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant code of conduct](https://www.contributor-covenant.org/).
|
data/README.md
CHANGED
@@ -52,7 +52,7 @@ normal = Statistics::Distribution::StandardNormal.new # Using all namespaces.
|
|
52
52
|
```
|
53
53
|
|
54
54
|
## Documentation
|
55
|
-
You can find a bit more detailed documentation of all available distributions, tests and functions in the [Documentation Index](https://github.com/estebanz01/ruby-statistics/wiki
|
55
|
+
You can find a bit more detailed documentation of all available distributions, tests and functions in the [Documentation Index](https://github.com/estebanz01/ruby-statistics/wiki)
|
56
56
|
|
57
57
|
## Development
|
58
58
|
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Statistics
|
2
|
+
module Distribution
|
3
|
+
class Empirical
|
4
|
+
attr_accessor :samples
|
5
|
+
|
6
|
+
def initialize(samples:)
|
7
|
+
self.samples = samples
|
8
|
+
end
|
9
|
+
|
10
|
+
# Formula grabbed from here: https://statlect.com/asymptotic-theory/empirical-distribution
|
11
|
+
def cumulative_function(x:)
|
12
|
+
cumulative_sum = samples.reduce(0) do |summation, sample|
|
13
|
+
summation += if sample <= x
|
14
|
+
1
|
15
|
+
else
|
16
|
+
0
|
17
|
+
end
|
18
|
+
|
19
|
+
summation
|
20
|
+
end
|
21
|
+
|
22
|
+
cumulative_sum / samples.size.to_f
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -45,7 +45,7 @@ module Statistics
|
|
45
45
|
# Using the inverse CDF function, also called quantile, we can calculate
|
46
46
|
# a random sample that follows a weibull distribution.
|
47
47
|
#
|
48
|
-
# Formula extracted from
|
48
|
+
# Formula extracted from https://www.taygeta.com/random/weibull.html
|
49
49
|
def random(elements: 1, seed: Random.new_seed)
|
50
50
|
results = []
|
51
51
|
|
@@ -0,0 +1,71 @@
|
|
1
|
+
module Statistics
|
2
|
+
class SpearmanRankCoefficient
|
3
|
+
def self.rank(data:, return_ranks_only: true)
|
4
|
+
descending_order_data = data.sort { |a, b| b <=> a }
|
5
|
+
rankings = {}
|
6
|
+
|
7
|
+
data.each do |value|
|
8
|
+
# If we have ties, the find_index method will only retrieve the index of the
|
9
|
+
# first element in the list (i.e, the most close to the left of the array),
|
10
|
+
# so when a tie is detected, we increase the temporal ranking by the number of
|
11
|
+
# counted elements at that particular time and then we increase the counter.
|
12
|
+
temporal_ranking = descending_order_data.find_index(value) + 1 # 0-index
|
13
|
+
|
14
|
+
if rankings.fetch(value, false)
|
15
|
+
rankings[value][:rank] += (temporal_ranking + rankings[value][:counter])
|
16
|
+
rankings[value][:counter] += 1
|
17
|
+
rankings[value][:tie_rank] = rankings[value][:rank] / rankings[value][:counter].to_f
|
18
|
+
else
|
19
|
+
rankings[value] = { counter: 1, rank: temporal_ranking, tie_rank: temporal_ranking }
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
if return_ranks_only
|
24
|
+
data.map do |value|
|
25
|
+
rankings[value][:tie_rank]
|
26
|
+
end
|
27
|
+
else
|
28
|
+
rankings
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# Formulas extracted from: https://statistics.laerd.com/statistical-guides/spearmans-rank-order-correlation-statistical-guide.php
|
33
|
+
def self.coefficient(set_one, set_two)
|
34
|
+
raise 'Both group sets must have the same number of cases.' if set_one.size != set_two.size
|
35
|
+
return if set_one.size == 0 && set_two.size == 0
|
36
|
+
|
37
|
+
set_one_mean, set_two_mean = set_one.mean, set_two.mean
|
38
|
+
have_tie_ranks = (set_one + set_two).any? { |rank| rank.is_a?(Float) }
|
39
|
+
|
40
|
+
if have_tie_ranks
|
41
|
+
numerator = 0
|
42
|
+
squared_differences_set_one = 0
|
43
|
+
squared_differences_set_two = 0
|
44
|
+
|
45
|
+
set_one.size.times do |idx|
|
46
|
+
local_diff_one = (set_one[idx] - set_one_mean)
|
47
|
+
local_diff_two = (set_two[idx] - set_two_mean)
|
48
|
+
|
49
|
+
squared_differences_set_one += local_diff_one ** 2
|
50
|
+
squared_differences_set_two += local_diff_two ** 2
|
51
|
+
|
52
|
+
numerator += local_diff_one * local_diff_two
|
53
|
+
end
|
54
|
+
|
55
|
+
denominator = Math.sqrt(squared_differences_set_one * squared_differences_set_two)
|
56
|
+
|
57
|
+
numerator / denominator.to_f # This is rho or spearman's coefficient.
|
58
|
+
else
|
59
|
+
sum_squared_differences = set_one.each_with_index.reduce(0) do |memo, (rank_one, index)|
|
60
|
+
memo += ((rank_one - set_two[index]) ** 2)
|
61
|
+
memo
|
62
|
+
end
|
63
|
+
|
64
|
+
numerator = 6 * sum_squared_differences
|
65
|
+
denominator = ((set_one.size ** 3) - set_one.size)
|
66
|
+
|
67
|
+
1.0 - (numerator / denominator.to_f) # This is rho or spearman's coefficient.
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
module Statistics
|
2
|
+
module StatisticalTest
|
3
|
+
class KolmogorovSmirnovTest
|
4
|
+
# Common alpha, and critical D are calculated following formulas from: https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test#Two-sample_Kolmogorov%E2%80%93Smirnov_test
|
5
|
+
def self.two_samples(group_one:, group_two:, alpha: 0.05)
|
6
|
+
samples = group_one + group_two # We can use unbalaced group samples
|
7
|
+
|
8
|
+
ecdf_one = Distribution::Empirical.new(samples: group_one)
|
9
|
+
ecdf_two = Distribution::Empirical.new(samples: group_two)
|
10
|
+
|
11
|
+
d_max = samples.sort.map do |sample|
|
12
|
+
d1 = ecdf_one.cumulative_function(x: sample)
|
13
|
+
d2 = ecdf_two.cumulative_function(x: sample)
|
14
|
+
|
15
|
+
(d1 - d2).abs
|
16
|
+
end.max
|
17
|
+
|
18
|
+
# TODO: Validate calculation of Common alpha.
|
19
|
+
common_alpha = Math.sqrt((-0.5 * Math.log(alpha)))
|
20
|
+
radicand = (group_one.size + group_two.size) / (group_one.size * group_two.size).to_f
|
21
|
+
|
22
|
+
critical_d = common_alpha * Math.sqrt(radicand)
|
23
|
+
# critical_d = self.critical_d(alpha: alpha, n: samples.size)
|
24
|
+
|
25
|
+
# We are unable to calculate the p_value, because we don't have the Kolmogorov distribution
|
26
|
+
# defined. We reject the null hypotesis if Dmax is > than Dcritical.
|
27
|
+
{ d_max: d_max,
|
28
|
+
d_critical: critical_d,
|
29
|
+
total_samples: samples.size,
|
30
|
+
alpha: alpha,
|
31
|
+
null: d_max <= critical_d,
|
32
|
+
alternative: d_max > critical_d,
|
33
|
+
confidence_level: 1.0 - alpha }
|
34
|
+
end
|
35
|
+
|
36
|
+
# This is an implementation of the formula presented by Paul Molin and Hervé Abdi in a paper,
|
37
|
+
# called "New Table and numerical approximations for Kolmogorov-Smirnov / Lilliefors / Van Soest
|
38
|
+
# normality test".
|
39
|
+
# In this paper, the authors defines a couple of 6th-degree polynomial functions that allow us
|
40
|
+
# to find an aproximation of the real critical value. This is based in the conclusions made by
|
41
|
+
# Dagnelie (1968), where indicates that critical values given by Lilliefors can be approximated
|
42
|
+
# numerically.
|
43
|
+
#
|
44
|
+
# In general, the formula found is:
|
45
|
+
# C(N, alpha) ^ -2 = A(alpha) * N + B(alpha).
|
46
|
+
#
|
47
|
+
# Where A(alpha), B(alpha) are two 6th degree polynomial functions computed using the principle
|
48
|
+
# of Monte Carlo simulations.
|
49
|
+
#
|
50
|
+
# paper can be found here: https://utdallas.edu/~herve/MolinAbdi1998-LillieforsTechReport.pdf
|
51
|
+
# def self.critical_d(alpha:, n:)
|
52
|
+
# confidence = 1.0 - alpha
|
53
|
+
|
54
|
+
# a_alpha = 6.32207539843126 -17.1398870006148 * confidence +
|
55
|
+
# 38.42812675101057 * (confidence ** 2) - 45.93241384693391 * (confidence ** 3) +
|
56
|
+
# 7.88697700041829 * (confidence ** 4) + 29.79317711037858 * (confidence ** 5) -
|
57
|
+
# 18.48090137098585 * (confidence ** 6)
|
58
|
+
|
59
|
+
# b_alpha = 12.940399038404 - 53.458334259532 * confidence +
|
60
|
+
# 186.923866119699 * (confidence ** 2) - 410.582178349305 * (confidence ** 3) +
|
61
|
+
# 517.377862566267 * (confidence ** 4) - 343.581476222384 * (confidence ** 5) +
|
62
|
+
# 92.123451358715 * (confidence ** 6)
|
63
|
+
|
64
|
+
# Math.sqrt(1.0 / (a_alpha * n + b_alpha))
|
65
|
+
# end
|
66
|
+
end
|
67
|
+
|
68
|
+
KSTest = KolmogorovSmirnovTest # Alias
|
69
|
+
end
|
70
|
+
end
|
data/lib/statistics/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-statistics
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0
|
4
|
+
version: 2.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- esteban zapata
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-12-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -126,6 +126,7 @@ files:
|
|
126
126
|
- ".rspec"
|
127
127
|
- ".travis.yml"
|
128
128
|
- CODE_OF_CONDUCT.md
|
129
|
+
- CONTRIBUTING.md
|
129
130
|
- Gemfile
|
130
131
|
- LICENSE
|
131
132
|
- LICENSE.txt
|
@@ -141,6 +142,7 @@ files:
|
|
141
142
|
- lib/statistics/distribution/beta.rb
|
142
143
|
- lib/statistics/distribution/binomial.rb
|
143
144
|
- lib/statistics/distribution/chi_squared.rb
|
145
|
+
- lib/statistics/distribution/empirical.rb
|
144
146
|
- lib/statistics/distribution/f.rb
|
145
147
|
- lib/statistics/distribution/geometric.rb
|
146
148
|
- lib/statistics/distribution/logseries.rb
|
@@ -150,9 +152,11 @@ files:
|
|
150
152
|
- lib/statistics/distribution/t_student.rb
|
151
153
|
- lib/statistics/distribution/uniform.rb
|
152
154
|
- lib/statistics/distribution/weibull.rb
|
155
|
+
- lib/statistics/spearman_rank_coefficient.rb
|
153
156
|
- lib/statistics/statistical_test.rb
|
154
157
|
- lib/statistics/statistical_test/chi_squared_test.rb
|
155
158
|
- lib/statistics/statistical_test/f_test.rb
|
159
|
+
- lib/statistics/statistical_test/kolmogorov_smirnov_test.rb
|
156
160
|
- lib/statistics/statistical_test/t_test.rb
|
157
161
|
- lib/statistics/statistical_test/wilcoxon_rank_sum_test.rb
|
158
162
|
- lib/statistics/version.rb
|
@@ -177,7 +181,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
177
181
|
version: '0'
|
178
182
|
requirements: []
|
179
183
|
rubyforge_project:
|
180
|
-
rubygems_version: 2.7.
|
184
|
+
rubygems_version: 2.7.7
|
181
185
|
signing_key:
|
182
186
|
specification_version: 4
|
183
187
|
summary: A ruby gem for som specific statistics. Inspired by the jStat js library.
|