ruby-statistics 2.0.4 → 2.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 298bc7d8dff1aeabc7db9c11fe9d7987f16bde40
4
- data.tar.gz: 1d796e62c18052f87fc2616b4c1a5f777080c1ab
2
+ SHA256:
3
+ metadata.gz: 6612502f03d8077d0158d997a42dfbc4d1002f2ab01ce2b7bdb5fbd510187e3e
4
+ data.tar.gz: 14fb04073b5b788dfa9e93aa586daef050dd105c2d2f8bdd17db30ad1fbcf144
5
5
  SHA512:
6
- metadata.gz: 98e8c58f34668e839be9689c74debd75bd7a6869372536d7e9927a63f77fca59ab05e06b413705f0d286094292cb566c01e6fe71145cdd7d2152fc930829910e
7
- data.tar.gz: 37b78191adb8d659f21134346a8a415c5bd7bd8a7dd99b2c1f8d7793a2ea741c43e60d8235a7d5fcc2bc0284b24e8e58e8404e0b4b4401ee3bc60f7e1afc8b8b
6
+ metadata.gz: '09590f836a59563819a1a847830e5dc2ee3554415cadc81c35b2a0f43ab1af87204f028659e8aa2f30a14b58c69c3e4f65db5e722d0a00ced5d92faa1e7dce82'
7
+ data.tar.gz: 2e66a26c23bf1f05cb9de40e992b302c4f0fef13aa70b4e509de479cb15b9700d4032f5d548aa45110f161ef9dac417f9b1872479a02dca0e729a051be2a4fc8
@@ -0,0 +1,15 @@
1
+ # To get started with Dependabot version updates, you'll need to specify which
2
+ # package ecosystems to update and where the package manifests are located.
3
+ # Please see the documentation for all configuration options:
4
+ # https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
5
+
6
+ version: 2
7
+ updates:
8
+ - package-ecosystem: "bundler" # See documentation for possible values
9
+ directory: "/" # Location of package manifests
10
+ schedule:
11
+ interval: "weekly"
12
+ - package-ecosystem: "github-actions" # See documentation for possible values
13
+ directory: "/" # Location of package manifests
14
+ schedule:
15
+ interval: "weekly"
@@ -0,0 +1,35 @@
1
+ name: Ruby
2
+
3
+ on: [push]
4
+
5
+ jobs:
6
+ build:
7
+
8
+ runs-on: ubuntu-latest
9
+
10
+ steps:
11
+ - uses: actions/checkout@v2.3.4
12
+ - name: Set up Ruby 2.6
13
+ uses: actions/setup-ruby@v1.1.2
14
+ with:
15
+ ruby-version: 2.6.x
16
+ - name: Build and test with Rake
17
+ run: |
18
+ gem install bundler
19
+ bundle install --jobs 2 --retry 1
20
+ bundle exec rake
21
+ build_2_7:
22
+
23
+ runs-on: ubuntu-latest
24
+
25
+ steps:
26
+ - uses: actions/checkout@v2.3.4
27
+ - name: Set up Ruby 2.7
28
+ uses: actions/setup-ruby@v1.1.2
29
+ with:
30
+ ruby-version: 2.7.x
31
+ - name: Build and test with Rake
32
+ run: |
33
+ gem install bundler
34
+ bundle install --jobs 2 --retry 1
35
+ bundle exec rake
data/.travis.yml CHANGED
@@ -1,8 +1,9 @@
1
1
  sudo: false
2
2
  language: ruby
3
3
  rvm:
4
- - 2.2
5
- - 2.3.1
6
- - 2.4.0
7
- - 2.5.0
8
- before_install: gem install bundler
4
+ - 2.5.1
5
+ - 2.6.0
6
+ - 2.6.3
7
+ - 2.6.5
8
+ - 2.7
9
+ before_install: gem update --system && gem install bundler
data/CONTRIBUTING.md ADDED
@@ -0,0 +1 @@
1
+ Bug reports and pull requests are welcome on GitHub at https://github.com/estebanz01/ruby-statistics. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant code of conduct](https://www.contributor-covenant.org/).
data/README.md CHANGED
@@ -5,10 +5,11 @@
5
5
  A basic ruby gem that implements some statistical methods, functions and concepts to be used in any ruby environment without depending on any mathematical software like `R`, `Matlab`, `Octave` or similar.
6
6
 
7
7
  Unit test runs under the following ruby versions:
8
- * Ruby 2.2.
9
- * Ruby 2.3.1.
10
- * Ruby 2.4.0.
11
- * Ruby 2.5.0.
8
+ * Ruby 2.5.1.
9
+ * Ruby 2.6.0.
10
+ * Ruby 2.6.3.
11
+ * Ruby 2.6.5.
12
+ * Ruby 2.7.
12
13
 
13
14
  We got the inspiration from the folks at [JStat](https://github.com/jstat/jstat) and some interesting lectures about [Keystroke dynamics](http://www.biometric-solutions.com/keystroke-dynamics.html).
14
15
 
@@ -52,7 +53,7 @@ normal = Statistics::Distribution::StandardNormal.new # Using all namespaces.
52
53
  ```
53
54
 
54
55
  ## Documentation
55
- You can find a bit more detailed documentation of all available distributions, tests and functions in the [Documentation Index](https://github.com/estebanz01/ruby-statistics/wiki/Documentation-Index)
56
+ You can find a bit more detailed documentation of all available distributions, tests and functions in the [Documentation Index](https://github.com/estebanz01/ruby-statistics/wiki)
56
57
 
57
58
  ## Development
58
59
 
data/lib/math.rb CHANGED
@@ -9,11 +9,11 @@ module Math
9
9
  end
10
10
 
11
11
  def self.combination(n, r)
12
- self.factorial(n)/(self.factorial(r) * self.factorial(n - r)).to_f # n!/(r! * [n - r]!)
12
+ self.factorial(n)/(self.factorial(r) * self.factorial(n - r)).to_r # n!/(r! * [n - r]!)
13
13
  end
14
14
 
15
15
  def self.permutation(n, k)
16
- self.factorial(n)/self.factorial(n - k).to_f
16
+ self.factorial(n)/self.factorial(n - k).to_r
17
17
  end
18
18
 
19
19
  # Function adapted from the python implementation that exists in https://en.wikipedia.org/wiki/Simpson%27s_rule#Sample_implementation
@@ -24,7 +24,8 @@ module Math
24
24
  return
25
25
  end
26
26
 
27
- h = (b - a)/n.to_f
27
+ h = (b - a)/n.to_r
28
+
28
29
  resA = yield(a)
29
30
  resB = yield(b)
30
31
 
@@ -45,7 +46,7 @@ module Math
45
46
 
46
47
  def self.lower_incomplete_gamma_function(s, x)
47
48
  # The greater the iterations, the better. That's why we are iterating 10_000 * x times
48
- self.simpson_rule(0, x, (10_000 * x.round).round) do |t|
49
+ self.simpson_rule(0, x.to_r, (10_000 * x.round).round) do |t|
49
50
  (t ** (s - 1)) * Math.exp(-t)
50
51
  end
51
52
  end
@@ -72,7 +73,7 @@ module Math
72
73
  # To avoid overflow problems, the implementation applies the logarithm properties
73
74
  # to calculate in a faster and safer way the values.
74
75
  lbet_ab = (Math.lgamma(alp)[0] + Math.lgamma(bet)[0] - Math.lgamma(alp + bet)[0]).freeze
75
- front = (Math.exp(Math.log(x) * alp + Math.log(1.0 - x) * bet - lbet_ab) / alp.to_f).freeze
76
+ front = (Math.exp(Math.log(x) * alp + Math.log(1.0 - x) * bet - lbet_ab) / alp.to_r).freeze
76
77
 
77
78
  # This is the non-log version of the left part of the formula (before the continuous fraction)
78
79
  # down_left = alp * self.beta_function(alp, bet)
@@ -0,0 +1,35 @@
1
+ module Statistics
2
+ module Distribution
3
+ class Bernoulli
4
+ def self.density_function(n, p)
5
+ return if n != 0 && n != 1 # The support of the distribution is n = {0, 1}.
6
+
7
+ case n
8
+ when 0 then 1.0 - p
9
+ when 1 then p
10
+ end
11
+ end
12
+
13
+ def self.cumulative_function(n, p)
14
+ return if n != 0 && n != 1 # The support of the distribution is n = {0, 1}.
15
+
16
+ case n
17
+ when 0 then 1.0 - p
18
+ when 1 then 1.0
19
+ end
20
+ end
21
+
22
+ def self.variance(p)
23
+ p * (1.0 - p)
24
+ end
25
+
26
+ def self.skewness(p)
27
+ (1.0 - 2.0*p).to_r / Math.sqrt(p * (1.0 - p))
28
+ end
29
+
30
+ def self.kurtosis(p)
31
+ (6.0 * (p ** 2) - (6 * p) + 1) / (p * (1.0 - p))
32
+ end
33
+ end
34
+ end
35
+ end
@@ -4,8 +4,8 @@ module Statistics
4
4
  attr_accessor :alpha, :beta
5
5
 
6
6
  def initialize(alp, bet)
7
- self.alpha = alp.to_f
8
- self.beta = bet.to_f
7
+ self.alpha = alp.to_r
8
+ self.beta = bet.to_r
9
9
  end
10
10
 
11
11
  def cumulative_function(value)
@@ -0,0 +1,26 @@
1
+ module Statistics
2
+ module Distribution
3
+ class Empirical
4
+ attr_accessor :samples
5
+
6
+ def initialize(samples:)
7
+ self.samples = samples
8
+ end
9
+
10
+ # Formula grabbed from here: https://statlect.com/asymptotic-theory/empirical-distribution
11
+ def cumulative_function(x:)
12
+ cumulative_sum = samples.reduce(0) do |summation, sample|
13
+ summation += if sample <= x
14
+ 1
15
+ else
16
+ 0
17
+ end
18
+
19
+ summation
20
+ end
21
+
22
+ cumulative_sum / samples.size.to_r
23
+ end
24
+ end
25
+ end
26
+ end
@@ -10,7 +10,7 @@ module Statistics
10
10
 
11
11
  # Formula extracted from http://www.itl.nist.gov/div898/handbook/eda/section3/eda3665.htm#CDF
12
12
  def cumulative_function(value)
13
- k = d2/(d2 + d1 * value.to_f)
13
+ k = d2/(d2 + d1 * value.to_r)
14
14
 
15
15
  1 - Math.incomplete_beta_function(k, d2/2.0, d1/2.0)
16
16
  end
@@ -18,28 +18,28 @@ module Statistics
18
18
  def density_function(value)
19
19
  return if d1 < 0 || d2 < 0 # F-pdf is well defined for the [0, +infinity) interval.
20
20
 
21
- val = value.to_f
21
+ val = value.to_r
22
22
  upper = ((d1 * val) ** d1) * (d2**d2)
23
23
  lower = (d1 * val + d2) ** (d1 + d2)
24
- up = Math.sqrt(upper/lower.to_f)
24
+ up = Math.sqrt(upper/lower.to_r)
25
25
  down = val * Math.beta_function(d1/2.0, d2/2.0)
26
26
 
27
- up/down.to_f
27
+ up/down.to_r
28
28
  end
29
29
 
30
30
  def mean
31
31
  return if d2 <= 2
32
32
 
33
- d2/(d2 - 2).to_f
33
+ d2/(d2 - 2).to_r
34
34
  end
35
35
 
36
36
  def mode
37
37
  return if d1 <= 2
38
38
 
39
- left = (d1 - 2)/d1.to_f
40
- right = d2/(d2 + 2).to_f
39
+ left = (d1 - 2)/d1.to_r
40
+ right = d2/(d2 + 2).to_r
41
41
 
42
- left * right
42
+ (left * right).to_f
43
43
  end
44
44
  end
45
45
  end
@@ -0,0 +1,76 @@
1
+ module Statistics
2
+ module Distribution
3
+ class Geometric
4
+ attr_accessor :probability_of_success, :always_success_allowed
5
+
6
+ def initialize(p, always_success: false)
7
+ self.probability_of_success = p.to_r
8
+ self.always_success_allowed = always_success
9
+ end
10
+
11
+ def density_function(k)
12
+ k = k.to_i
13
+
14
+ if always_success_allowed
15
+ return if k < 0
16
+
17
+ ((1.0 - probability_of_success) ** k) * probability_of_success
18
+ else
19
+ return if k <= 0
20
+
21
+ ((1.0 - probability_of_success) ** (k - 1.0)) * probability_of_success
22
+ end
23
+ end
24
+
25
+ def cumulative_function(k)
26
+ k = k.to_i
27
+
28
+ if always_success_allowed
29
+ return if k < 0
30
+
31
+ 1.0 - ((1.0 - probability_of_success) ** (k + 1.0))
32
+ else
33
+ return if k <= 0
34
+
35
+ 1.0 - ((1.0 - probability_of_success) ** k)
36
+ end
37
+ end
38
+
39
+ def mean
40
+ if always_success_allowed
41
+ (1.0 - probability_of_success) / probability_of_success
42
+ else
43
+ 1.0 / probability_of_success
44
+ end
45
+ end
46
+
47
+ def median
48
+ if always_success_allowed
49
+ (-1.0 / Math.log2(1.0 - probability_of_success)).ceil - 1.0
50
+ else
51
+ (-1.0 / Math.log2(1.0 - probability_of_success)).ceil
52
+ end
53
+ end
54
+
55
+ def mode
56
+ if always_success_allowed
57
+ 0.0
58
+ else
59
+ 1.0
60
+ end
61
+ end
62
+
63
+ def variance
64
+ (1.0 - probability_of_success) / (probability_of_success ** 2)
65
+ end
66
+
67
+ def skewness
68
+ (2.0 - probability_of_success) / Math.sqrt(1.0 - probability_of_success)
69
+ end
70
+
71
+ def kurtosis
72
+ 6.0 + ((probability_of_success ** 2) / (1.0 - probability_of_success))
73
+ end
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,51 @@
1
+ module Statistics
2
+ module Distribution
3
+ class LogSeries
4
+ def self.density_function(k, p)
5
+ return if k <= 0
6
+ k = k.to_i
7
+
8
+ left = (-1.0 / Math.log(1.0 - p))
9
+ right = (p ** k).to_r
10
+
11
+ left * right / k
12
+ end
13
+
14
+ def self.cumulative_function(k, p)
15
+ return if k <= 0
16
+
17
+ # Sadly, the incomplete beta function is converging
18
+ # too fast to zero and breaking the calculation on logs.
19
+ # So, we default to the basic definition of the CDF which is
20
+ # the integral (-Inf, K) of the PDF, with P(X <= x) which can
21
+ # be solved as a summation of all PDFs from 1 to K. Note that the summation approach
22
+ # only applies to discrete distributions.
23
+ #
24
+ # right = Math.incomplete_beta_function(p, (k + 1).floor, 0) / Math.log(1.0 - p)
25
+ # 1.0 + right
26
+
27
+ result = 0.0
28
+ 1.upto(k) do |number|
29
+ result += self.density_function(number, p)
30
+ end
31
+
32
+ result
33
+ end
34
+
35
+ def self.mode
36
+ 1.0
37
+ end
38
+
39
+ def self.mean(p)
40
+ (-1.0 / Math.log(1.0 - p)) * (p / (1.0 - p))
41
+ end
42
+
43
+ def self.variance(p)
44
+ up = p + Math.log(1.0 - p)
45
+ down = ((1.0 - p) ** 2) * (Math.log(1.0 - p) ** 2)
46
+
47
+ (-1.0 * p) * (up / down.to_r)
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,51 @@
1
+ module Statistics
2
+ module Distribution
3
+ class NegativeBinomial
4
+ attr_accessor :number_of_failures, :probability_per_trial
5
+
6
+ def initialize(r, p)
7
+ self.number_of_failures = r.to_i
8
+ self.probability_per_trial = p
9
+ end
10
+
11
+ def probability_mass_function(k)
12
+ return if number_of_failures < 0 || k < 0 || k > number_of_failures
13
+
14
+ left = Math.combination(k + number_of_failures - 1, k)
15
+ right = ((1 - probability_per_trial) ** number_of_failures) * (probability_per_trial ** k)
16
+
17
+ left * right
18
+ end
19
+
20
+ def cumulative_function(k)
21
+ return if k < 0 || k > number_of_failures
22
+ k = k.to_i
23
+
24
+ 1.0 - Math.incomplete_beta_function(probability_per_trial, k + 1, number_of_failures)
25
+ end
26
+
27
+ def mean
28
+ (probability_per_trial * number_of_failures)/(1 - probability_per_trial).to_r
29
+ end
30
+
31
+ def variance
32
+ (probability_per_trial * number_of_failures)/((1 - probability_per_trial) ** 2).to_r
33
+ end
34
+
35
+ def skewness
36
+ (1 + probability_per_trial).to_r / Math.sqrt(probability_per_trial * number_of_failures)
37
+ end
38
+
39
+ def mode
40
+ if number_of_failures > 1
41
+ up = probability_per_trial * (number_of_failures - 1)
42
+ down = (1 - probability_per_trial).to_r
43
+
44
+ (up/down).floor
45
+ elsif number_of_failures <= 1
46
+ 0.0
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -5,9 +5,9 @@ module Statistics
5
5
  alias_method :mode, :mean
6
6
 
7
7
  def initialize(avg, std)
8
- self.mean = avg.to_f
9
- self.standard_deviation = std.to_f
10
- self.variance = std.to_f**2
8
+ self.mean = avg.to_r
9
+ self.standard_deviation = std.to_r
10
+ self.variance = std.to_r**2
11
11
  end
12
12
 
13
13
  def cumulative_function(value)
@@ -79,5 +79,61 @@ module Statistics
79
79
  euler/Math.sqrt(2 * Math::PI)
80
80
  end
81
81
  end
82
+
83
+ # Inverse Standard Normal distribution:
84
+ # References:
85
+ # https://en.wikipedia.org/wiki/Inverse_distribution
86
+ # http://www.source-code.biz/snippets/vbasic/9.htm
87
+ class InverseStandardNormal < StandardNormal
88
+ A1 = -39.6968302866538
89
+ A2 = 220.946098424521
90
+ A3 = -275.928510446969
91
+ A4 = 138.357751867269
92
+ A5 = -30.6647980661472
93
+ A6 = 2.50662827745924
94
+ B1 = -54.4760987982241
95
+ B2 = 161.585836858041
96
+ B3 = -155.698979859887
97
+ B4 = 66.8013118877197
98
+ B5 = -13.2806815528857
99
+ C1 = -7.78489400243029E-03
100
+ C2 = -0.322396458041136
101
+ C3 = -2.40075827716184
102
+ C4 = -2.54973253934373
103
+ C5 = 4.37466414146497
104
+ C6 = 2.93816398269878
105
+ D1 = 7.78469570904146E-03
106
+ D2 = 0.32246712907004
107
+ D3 = 2.445134137143
108
+ D4 = 3.75440866190742
109
+ P_LOW = 0.02425
110
+ P_HIGH = 1 - P_LOW
111
+
112
+ def density_function(_)
113
+ raise NotImplementedError
114
+ end
115
+
116
+ def random(elements: 1, seed: Random.new_seed)
117
+ raise NotImplementedError
118
+ end
119
+
120
+ def cumulative_function(value)
121
+ return if value < 0.0 || value > 1.0
122
+ return -1.0 * Float::INFINITY if value.zero?
123
+ return Float::INFINITY if value == 1.0
124
+
125
+ if value < P_LOW
126
+ q = Math.sqrt((Math.log(value) * -2.0))
127
+ (((((C1 * q + C2) * q + C3) * q + C4) * q + C5) * q + C6) / ((((D1 * q + D2) * q + D3) * q + D4) * q + 1.0)
128
+ elsif value <= P_HIGH
129
+ q = value - 0.5
130
+ r = q ** 2
131
+ (((((A1 * r + A2) * r + A3) * r + A4) * r + A5) * r + A6) * q / (((((B1 * r + B2) * r + B3) * r + B4) * r + B5) * r + 1.0)
132
+ else
133
+ q = Math.sqrt((Math.log(1 - value) * -2.0))
134
+ - (((((C1 * q + C2) * q + C3) * q + C4) * q + C5) * q + C6) / ((((D1 * q + D2) * q + D3) * q + D4) * q + 1)
135
+ end
136
+ end
137
+ end
82
138
  end
83
139
  end
@@ -18,7 +18,7 @@ module Statistics
18
18
  upper = (expected_number_of_occurrences ** k) * Math.exp(-expected_number_of_occurrences)
19
19
  lower = Math.factorial(k)
20
20
 
21
- upper/lower.to_f
21
+ upper/lower.to_r
22
22
  end
23
23
 
24
24
  def cumulative_function(k)
@@ -31,7 +31,7 @@ module Statistics
31
31
 
32
32
  # We need the right tail, i.e.: The upper incomplete gamma function. This can be
33
33
  # achieved by doing a substraction between 1 and the lower incomplete gamma function.
34
- 1 - (upper/lower.to_f)
34
+ 1 - (upper/lower.to_r)
35
35
  end
36
36
  end
37
37
  end
@@ -29,7 +29,7 @@ module Statistics
29
29
  upper = Math.gamma((degrees_of_freedom + 1)/2.0)
30
30
  lower = Math.sqrt(degrees_of_freedom * Math::PI) * Math.gamma(degrees_of_freedom/2.0)
31
31
  left = upper/lower
32
- right = (1 + ((value ** 2)/degrees_of_freedom.to_f)) ** -((degrees_of_freedom + 1)/2.0)
32
+ right = (1 + ((value ** 2)/degrees_of_freedom.to_r)) ** -((degrees_of_freedom + 1)/2.0)
33
33
 
34
34
  left * right
35
35
  end
@@ -64,8 +64,8 @@ module Statistics
64
64
  results << Math.simpson_rule(threshold, y, 10_000) do |t|
65
65
  up = Math.gamma((v+1)/2.0)
66
66
  down = Math.sqrt(Math::PI * v) * Math.gamma(v/2.0)
67
- right = (1 + ((y ** 2)/v.to_f)) ** ((v+1)/2.0)
68
- left = up/down.to_f
67
+ right = (1 + ((y ** 2)/v.to_r)) ** ((v+1)/2.0)
68
+ left = up/down.to_r
69
69
 
70
70
  left * right
71
71
  end
@@ -4,8 +4,8 @@ module Statistics
4
4
  attr_accessor :left, :right
5
5
 
6
6
  def initialize(a, b)
7
- self.left = a.to_f
8
- self.right = b.to_f
7
+ self.left = a.to_r
8
+ self.right = b.to_r
9
9
  end
10
10
 
11
11
  def density_function(value)
@@ -4,8 +4,8 @@ module Statistics
4
4
  attr_accessor :shape, :scale # k and lambda
5
5
 
6
6
  def initialize(k, lamb)
7
- self.shape = k.to_f
8
- self.scale = lamb.to_f
7
+ self.shape = k.to_r
8
+ self.scale = lamb.to_r
9
9
  end
10
10
 
11
11
  def cumulative_function(random_value)
@@ -45,7 +45,7 @@ module Statistics
45
45
  # Using the inverse CDF function, also called quantile, we can calculate
46
46
  # a random sample that follows a weibull distribution.
47
47
  #
48
- # Formula extracted from http://www.stat.yale.edu/Courses/1997-98/101/chigf.htm
48
+ # Formula extracted from https://www.taygeta.com/random/weibull.html
49
49
  def random(elements: 1, seed: Random.new_seed)
50
50
  results = []
51
51
 
@@ -0,0 +1,71 @@
1
+ module Statistics
2
+ class SpearmanRankCoefficient
3
+ def self.rank(data:, return_ranks_only: true)
4
+ descending_order_data = data.sort { |a, b| b <=> a }
5
+ rankings = {}
6
+
7
+ data.each do |value|
8
+ # If we have ties, the find_index method will only retrieve the index of the
9
+ # first element in the list (i.e, the most close to the left of the array),
10
+ # so when a tie is detected, we increase the temporal ranking by the number of
11
+ # counted elements at that particular time and then we increase the counter.
12
+ temporal_ranking = descending_order_data.find_index(value) + 1 # 0-index
13
+
14
+ if rankings.fetch(value, false)
15
+ rankings[value][:rank] += (temporal_ranking + rankings[value][:counter])
16
+ rankings[value][:counter] += 1
17
+ rankings[value][:tie_rank] = rankings[value][:rank] / rankings[value][:counter].to_r
18
+ else
19
+ rankings[value] = { counter: 1, rank: temporal_ranking, tie_rank: temporal_ranking }
20
+ end
21
+ end
22
+
23
+ if return_ranks_only
24
+ data.map do |value|
25
+ rankings[value][:tie_rank]
26
+ end
27
+ else
28
+ rankings
29
+ end
30
+ end
31
+
32
+ # Formulas extracted from: https://statistics.laerd.com/statistical-guides/spearmans-rank-order-correlation-statistical-guide.php
33
+ def self.coefficient(set_one, set_two)
34
+ raise 'Both group sets must have the same number of cases.' if set_one.size != set_two.size
35
+ return if set_one.size == 0 && set_two.size == 0
36
+
37
+ set_one_mean, set_two_mean = set_one.mean, set_two.mean
38
+ have_tie_ranks = (set_one + set_two).any? { |rank| rank.is_a?(Float) || rank.is_a?(Rational) }
39
+
40
+ if have_tie_ranks
41
+ numerator = 0
42
+ squared_differences_set_one = 0
43
+ squared_differences_set_two = 0
44
+
45
+ set_one.size.times do |idx|
46
+ local_diff_one = (set_one[idx] - set_one_mean)
47
+ local_diff_two = (set_two[idx] - set_two_mean)
48
+
49
+ squared_differences_set_one += local_diff_one ** 2
50
+ squared_differences_set_two += local_diff_two ** 2
51
+
52
+ numerator += local_diff_one * local_diff_two
53
+ end
54
+
55
+ denominator = Math.sqrt(squared_differences_set_one * squared_differences_set_two)
56
+
57
+ numerator / denominator.to_r # This is rho or spearman's coefficient.
58
+ else
59
+ sum_squared_differences = set_one.each_with_index.reduce(0) do |memo, (rank_one, index)|
60
+ memo += ((rank_one - set_two[index]) ** 2)
61
+ memo
62
+ end
63
+
64
+ numerator = 6 * sum_squared_differences
65
+ denominator = ((set_one.size ** 3) - set_one.size)
66
+
67
+ 1.0 - (numerator / denominator.to_r) # This is rho or spearman's coefficient.
68
+ end
69
+ end
70
+ end
71
+ end
@@ -8,12 +8,12 @@ module Statistics
8
8
  statistic = if expected.is_a? Numeric
9
9
  observed.reduce(0) do |memo, observed_value|
10
10
  up = (observed_value - expected) ** 2
11
- memo += (up/expected.to_f)
11
+ memo += (up/expected.to_r)
12
12
  end
13
13
  else
14
14
  expected.each_with_index.reduce(0) do |memo, (expected_value, index)|
15
15
  up = (observed[index] - expected_value) ** 2
16
- memo += (up/expected_value.to_f)
16
+ memo += (up/expected_value.to_r)
17
17
  end
18
18
  end
19
19
 
@@ -19,7 +19,7 @@ module Statistics
19
19
  if args.size == 2
20
20
  variances = [args[0].variance, args[1].variance]
21
21
 
22
- f_score = variances.max/variances.min.to_f
22
+ f_score = variances.max/variances.min.to_r
23
23
  df1 = 1 # k-1 (k = 2)
24
24
  df2 = args.flatten.size - 2 # N-k (k = 2)
25
25
  elsif args.size > 2
@@ -37,18 +37,18 @@ module Statistics
37
37
  variance_between_groups = iterator.reduce(0) do |summation, (size, index)|
38
38
  inner_calculation = size * ((sample_means[index] - overall_mean) ** 2)
39
39
 
40
- summation += (inner_calculation / (total_groups - 1).to_f)
40
+ summation += (inner_calculation / (total_groups - 1).to_r)
41
41
  end
42
42
 
43
43
  # Variance within groups
44
44
  variance_within_groups = (0...total_groups).reduce(0) do |outer_summation, group_index|
45
45
  outer_summation += args[group_index].reduce(0) do |inner_sumation, observation|
46
46
  inner_calculation = ((observation - sample_means[group_index]) ** 2)
47
- inner_sumation += (inner_calculation / (total_elements - total_groups).to_f)
47
+ inner_sumation += (inner_calculation / (total_elements - total_groups).to_r)
48
48
  end
49
49
  end
50
50
 
51
- f_score = variance_between_groups/variance_within_groups.to_f
51
+ f_score = variance_between_groups/variance_within_groups.to_r
52
52
  df1 = total_groups - 1
53
53
  df2 = total_elements - total_groups
54
54
  end
@@ -0,0 +1,70 @@
1
+ module Statistics
2
+ module StatisticalTest
3
+ class KolmogorovSmirnovTest
4
+ # Common alpha, and critical D are calculated following formulas from: https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test#Two-sample_Kolmogorov%E2%80%93Smirnov_test
5
+ def self.two_samples(group_one:, group_two:, alpha: 0.05)
6
+ samples = group_one + group_two # We can use unbalaced group samples
7
+
8
+ ecdf_one = Distribution::Empirical.new(samples: group_one)
9
+ ecdf_two = Distribution::Empirical.new(samples: group_two)
10
+
11
+ d_max = samples.sort.map do |sample|
12
+ d1 = ecdf_one.cumulative_function(x: sample)
13
+ d2 = ecdf_two.cumulative_function(x: sample)
14
+
15
+ (d1 - d2).abs
16
+ end.max
17
+
18
+ # TODO: Validate calculation of Common alpha.
19
+ common_alpha = Math.sqrt((-0.5 * Math.log(alpha)))
20
+ radicand = (group_one.size + group_two.size) / (group_one.size * group_two.size).to_r
21
+
22
+ critical_d = common_alpha * Math.sqrt(radicand)
23
+ # critical_d = self.critical_d(alpha: alpha, n: samples.size)
24
+
25
+ # We are unable to calculate the p_value, because we don't have the Kolmogorov distribution
26
+ # defined. We reject the null hypotesis if Dmax is > than Dcritical.
27
+ { d_max: d_max,
28
+ d_critical: critical_d,
29
+ total_samples: samples.size,
30
+ alpha: alpha,
31
+ null: d_max <= critical_d,
32
+ alternative: d_max > critical_d,
33
+ confidence_level: 1.0 - alpha }
34
+ end
35
+
36
+ # This is an implementation of the formula presented by Paul Molin and Hervé Abdi in a paper,
37
+ # called "New Table and numerical approximations for Kolmogorov-Smirnov / Lilliefors / Van Soest
38
+ # normality test".
39
+ # In this paper, the authors defines a couple of 6th-degree polynomial functions that allow us
40
+ # to find an aproximation of the real critical value. This is based in the conclusions made by
41
+ # Dagnelie (1968), where indicates that critical values given by Lilliefors can be approximated
42
+ # numerically.
43
+ #
44
+ # In general, the formula found is:
45
+ # C(N, alpha) ^ -2 = A(alpha) * N + B(alpha).
46
+ #
47
+ # Where A(alpha), B(alpha) are two 6th degree polynomial functions computed using the principle
48
+ # of Monte Carlo simulations.
49
+ #
50
+ # paper can be found here: https://utdallas.edu/~herve/MolinAbdi1998-LillieforsTechReport.pdf
51
+ # def self.critical_d(alpha:, n:)
52
+ # confidence = 1.0 - alpha
53
+
54
+ # a_alpha = 6.32207539843126 -17.1398870006148 * confidence +
55
+ # 38.42812675101057 * (confidence ** 2) - 45.93241384693391 * (confidence ** 3) +
56
+ # 7.88697700041829 * (confidence ** 4) + 29.79317711037858 * (confidence ** 5) -
57
+ # 18.48090137098585 * (confidence ** 6)
58
+
59
+ # b_alpha = 12.940399038404 - 53.458334259532 * confidence +
60
+ # 186.923866119699 * (confidence ** 2) - 410.582178349305 * (confidence ** 3) +
61
+ # 517.377862566267 * (confidence ** 4) - 343.581476222384 * (confidence ** 5) +
62
+ # 92.123451358715 * (confidence ** 6)
63
+
64
+ # Math.sqrt(1.0 / (a_alpha * n + b_alpha))
65
+ # end
66
+ end
67
+
68
+ KSTest = KolmogorovSmirnovTest # Alias
69
+ end
70
+ end
@@ -21,9 +21,9 @@ module Statistics
21
21
  raise ZeroStdError, ZeroStdError::STD_ERROR_MSG if data_std == 0
22
22
 
23
23
  comparison_mean = args[0]
24
- degrees_of_freedom = args[1].size
24
+ degrees_of_freedom = args[1].size - 1
25
25
 
26
- (data_mean - comparison_mean)/(data_std / Math.sqrt(args[1].size).to_f).to_f
26
+ (data_mean - comparison_mean)/(data_std / Math.sqrt(args[1].size).to_r).to_r
27
27
  else
28
28
  sample_left_mean = args[0].mean
29
29
  sample_left_variance = args[0].variance
@@ -31,12 +31,12 @@ module Statistics
31
31
  sample_right_mean = args[1].mean
32
32
  degrees_of_freedom = args.flatten.size - 2
33
33
 
34
- left_root = sample_left_variance/args[0].size.to_f
35
- right_root = sample_right_variance/args[1].size.to_f
34
+ left_root = sample_left_variance/args[0].size.to_r
35
+ right_root = sample_right_variance/args[1].size.to_r
36
36
 
37
37
  standard_error = Math.sqrt(left_root + right_root)
38
38
 
39
- (sample_left_mean - sample_right_mean).abs/standard_error.to_f
39
+ (sample_left_mean - sample_right_mean).abs/standard_error.to_r
40
40
  end
41
41
 
42
42
  t_distribution = Distribution::TStudent.new(degrees_of_freedom)
@@ -72,7 +72,7 @@ module Statistics
72
72
 
73
73
  down = difference_std/Math.sqrt(differences.size)
74
74
 
75
- t_score = (differences.mean - 0)/down.to_f
75
+ t_score = (differences.mean - 0)/down.to_r
76
76
 
77
77
  probability = Distribution::TStudent.new(degrees_of_freedom).cumulative_function(t_score)
78
78
 
@@ -73,7 +73,7 @@ module Statistics
73
73
  memo += ((t[:counter] ** 3) - t[:counter])/12.0
74
74
  end
75
75
 
76
- left = (total_group_one * total_group_two)/(n * (n - 1)).to_f
76
+ left = (total_group_one * total_group_two)/(n * (n - 1)).to_r
77
77
  right = (((n ** 3) - n)/12.0) - rank_sum
78
78
 
79
79
  Math.sqrt(left * right)
@@ -82,7 +82,7 @@ module Statistics
82
82
  private def ranked_sum_for(total, group)
83
83
  # sum rankings per group
84
84
  group.reduce(0) do |memo, element|
85
- rank_of_element = total[element][:rank] / total[element][:counter].to_f
85
+ rank_of_element = total[element][:rank] / total[element][:counter].to_r
86
86
  memo += rank_of_element
87
87
  end
88
88
  end
@@ -1,3 +1,3 @@
1
1
  module Statistics
2
- VERSION = "2.0.4"
2
+ VERSION = "2.1.3"
3
3
  end
@@ -27,9 +27,9 @@ Gem::Specification.new do |spec|
27
27
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
28
28
  spec.require_paths = ["lib"]
29
29
 
30
- spec.add_development_dependency "bundler",'~> 1.15', '>= 1.15.4'
31
- spec.add_development_dependency "rake", '~> 12.0', '>= 12.0.0'
32
- spec.add_development_dependency "rspec", '~> 3.6', '>= 3.6.0'
30
+ spec.add_development_dependency "rake", '>= 12.0.0', '~> 13.0'
31
+ spec.add_development_dependency "rspec", '>= 3.6.0'
33
32
  spec.add_development_dependency "grb", '~> 0.4.1', '>= 0.4.1'
34
- spec.add_development_dependency 'byebug', '~> 9.1.0', '>= 9.1.0'
33
+ spec.add_development_dependency 'byebug', '>= 9.1.0'
34
+ spec.add_development_dependency 'pry'
35
35
  end
metadata CHANGED
@@ -1,62 +1,39 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-statistics
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.4
4
+ version: 2.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - esteban zapata
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-05-18 00:00:00.000000000 Z
11
+ date: 2021-02-04 00:00:00.000000000 Z
12
12
  dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: bundler
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: '1.15'
20
- - - ">="
21
- - !ruby/object:Gem::Version
22
- version: 1.15.4
23
- type: :development
24
- prerelease: false
25
- version_requirements: !ruby/object:Gem::Requirement
26
- requirements:
27
- - - "~>"
28
- - !ruby/object:Gem::Version
29
- version: '1.15'
30
- - - ">="
31
- - !ruby/object:Gem::Version
32
- version: 1.15.4
33
13
  - !ruby/object:Gem::Dependency
34
14
  name: rake
35
15
  requirement: !ruby/object:Gem::Requirement
36
16
  requirements:
37
- - - "~>"
38
- - !ruby/object:Gem::Version
39
- version: '12.0'
40
17
  - - ">="
41
18
  - !ruby/object:Gem::Version
42
19
  version: 12.0.0
20
+ - - "~>"
21
+ - !ruby/object:Gem::Version
22
+ version: '13.0'
43
23
  type: :development
44
24
  prerelease: false
45
25
  version_requirements: !ruby/object:Gem::Requirement
46
26
  requirements:
47
- - - "~>"
48
- - !ruby/object:Gem::Version
49
- version: '12.0'
50
27
  - - ">="
51
28
  - !ruby/object:Gem::Version
52
29
  version: 12.0.0
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: '13.0'
53
33
  - !ruby/object:Gem::Dependency
54
34
  name: rspec
55
35
  requirement: !ruby/object:Gem::Requirement
56
36
  requirements:
57
- - - "~>"
58
- - !ruby/object:Gem::Version
59
- version: '3.6'
60
37
  - - ">="
61
38
  - !ruby/object:Gem::Version
62
39
  version: 3.6.0
@@ -64,9 +41,6 @@ dependencies:
64
41
  prerelease: false
65
42
  version_requirements: !ruby/object:Gem::Requirement
66
43
  requirements:
67
- - - "~>"
68
- - !ruby/object:Gem::Version
69
- version: '3.6'
70
44
  - - ">="
71
45
  - !ruby/object:Gem::Version
72
46
  version: 3.6.0
@@ -94,9 +68,6 @@ dependencies:
94
68
  name: byebug
95
69
  requirement: !ruby/object:Gem::Requirement
96
70
  requirements:
97
- - - "~>"
98
- - !ruby/object:Gem::Version
99
- version: 9.1.0
100
71
  - - ">="
101
72
  - !ruby/object:Gem::Version
102
73
  version: 9.1.0
@@ -104,12 +75,23 @@ dependencies:
104
75
  prerelease: false
105
76
  version_requirements: !ruby/object:Gem::Requirement
106
77
  requirements:
107
- - - "~>"
78
+ - - ">="
108
79
  - !ruby/object:Gem::Version
109
80
  version: 9.1.0
81
+ - !ruby/object:Gem::Dependency
82
+ name: pry
83
+ requirement: !ruby/object:Gem::Requirement
84
+ requirements:
110
85
  - - ">="
111
86
  - !ruby/object:Gem::Version
112
- version: 9.1.0
87
+ version: '0'
88
+ type: :development
89
+ prerelease: false
90
+ version_requirements: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - ">="
93
+ - !ruby/object:Gem::Version
94
+ version: '0'
113
95
  description: |-
114
96
  This gem is intended to accomplish the same purpose as jStat js library:
115
97
  to provide ruby with statistical capabilities without the need
@@ -122,10 +104,13 @@ executables: []
122
104
  extensions: []
123
105
  extra_rdoc_files: []
124
106
  files:
107
+ - ".github/dependabot.yml"
108
+ - ".github/workflows/ruby.yml"
125
109
  - ".gitignore"
126
110
  - ".rspec"
127
111
  - ".travis.yml"
128
112
  - CODE_OF_CONDUCT.md
113
+ - CONTRIBUTING.md
129
114
  - Gemfile
130
115
  - LICENSE
131
116
  - LICENSE.txt
@@ -137,18 +122,25 @@ files:
137
122
  - lib/math.rb
138
123
  - lib/statistics.rb
139
124
  - lib/statistics/distribution.rb
125
+ - lib/statistics/distribution/bernoulli.rb
140
126
  - lib/statistics/distribution/beta.rb
141
127
  - lib/statistics/distribution/binomial.rb
142
128
  - lib/statistics/distribution/chi_squared.rb
129
+ - lib/statistics/distribution/empirical.rb
143
130
  - lib/statistics/distribution/f.rb
131
+ - lib/statistics/distribution/geometric.rb
132
+ - lib/statistics/distribution/logseries.rb
133
+ - lib/statistics/distribution/negative_binomial.rb
144
134
  - lib/statistics/distribution/normal.rb
145
135
  - lib/statistics/distribution/poisson.rb
146
136
  - lib/statistics/distribution/t_student.rb
147
137
  - lib/statistics/distribution/uniform.rb
148
138
  - lib/statistics/distribution/weibull.rb
139
+ - lib/statistics/spearman_rank_coefficient.rb
149
140
  - lib/statistics/statistical_test.rb
150
141
  - lib/statistics/statistical_test/chi_squared_test.rb
151
142
  - lib/statistics/statistical_test/f_test.rb
143
+ - lib/statistics/statistical_test/kolmogorov_smirnov_test.rb
152
144
  - lib/statistics/statistical_test/t_test.rb
153
145
  - lib/statistics/statistical_test/wilcoxon_rank_sum_test.rb
154
146
  - lib/statistics/version.rb
@@ -157,7 +149,7 @@ homepage: https://github.com/estebanz01/ruby-statistics
157
149
  licenses:
158
150
  - MIT
159
151
  metadata: {}
160
- post_install_message:
152
+ post_install_message:
161
153
  rdoc_options: []
162
154
  require_paths:
163
155
  - lib
@@ -172,9 +164,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
172
164
  - !ruby/object:Gem::Version
173
165
  version: '0'
174
166
  requirements: []
175
- rubyforge_project:
176
- rubygems_version: 2.5.2.1
177
- signing_key:
167
+ rubygems_version: 3.1.4
168
+ signing_key:
178
169
  specification_version: 4
179
170
  summary: A ruby gem for som specific statistics. Inspired by the jStat js library.
180
171
  test_files: []