ruby-statistics 2.0.4 → 2.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 298bc7d8dff1aeabc7db9c11fe9d7987f16bde40
4
- data.tar.gz: 1d796e62c18052f87fc2616b4c1a5f777080c1ab
2
+ SHA256:
3
+ metadata.gz: 6612502f03d8077d0158d997a42dfbc4d1002f2ab01ce2b7bdb5fbd510187e3e
4
+ data.tar.gz: 14fb04073b5b788dfa9e93aa586daef050dd105c2d2f8bdd17db30ad1fbcf144
5
5
  SHA512:
6
- metadata.gz: 98e8c58f34668e839be9689c74debd75bd7a6869372536d7e9927a63f77fca59ab05e06b413705f0d286094292cb566c01e6fe71145cdd7d2152fc930829910e
7
- data.tar.gz: 37b78191adb8d659f21134346a8a415c5bd7bd8a7dd99b2c1f8d7793a2ea741c43e60d8235a7d5fcc2bc0284b24e8e58e8404e0b4b4401ee3bc60f7e1afc8b8b
6
+ metadata.gz: '09590f836a59563819a1a847830e5dc2ee3554415cadc81c35b2a0f43ab1af87204f028659e8aa2f30a14b58c69c3e4f65db5e722d0a00ced5d92faa1e7dce82'
7
+ data.tar.gz: 2e66a26c23bf1f05cb9de40e992b302c4f0fef13aa70b4e509de479cb15b9700d4032f5d548aa45110f161ef9dac417f9b1872479a02dca0e729a051be2a4fc8
@@ -0,0 +1,15 @@
1
+ # To get started with Dependabot version updates, you'll need to specify which
2
+ # package ecosystems to update and where the package manifests are located.
3
+ # Please see the documentation for all configuration options:
4
+ # https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
5
+
6
+ version: 2
7
+ updates:
8
+ - package-ecosystem: "bundler" # See documentation for possible values
9
+ directory: "/" # Location of package manifests
10
+ schedule:
11
+ interval: "weekly"
12
+ - package-ecosystem: "github-actions" # See documentation for possible values
13
+ directory: "/" # Location of package manifests
14
+ schedule:
15
+ interval: "weekly"
@@ -0,0 +1,35 @@
1
+ name: Ruby
2
+
3
+ on: [push]
4
+
5
+ jobs:
6
+ build:
7
+
8
+ runs-on: ubuntu-latest
9
+
10
+ steps:
11
+ - uses: actions/checkout@v2.3.4
12
+ - name: Set up Ruby 2.6
13
+ uses: actions/setup-ruby@v1.1.2
14
+ with:
15
+ ruby-version: 2.6.x
16
+ - name: Build and test with Rake
17
+ run: |
18
+ gem install bundler
19
+ bundle install --jobs 2 --retry 1
20
+ bundle exec rake
21
+ build_2_7:
22
+
23
+ runs-on: ubuntu-latest
24
+
25
+ steps:
26
+ - uses: actions/checkout@v2.3.4
27
+ - name: Set up Ruby 2.7
28
+ uses: actions/setup-ruby@v1.1.2
29
+ with:
30
+ ruby-version: 2.7.x
31
+ - name: Build and test with Rake
32
+ run: |
33
+ gem install bundler
34
+ bundle install --jobs 2 --retry 1
35
+ bundle exec rake
data/.travis.yml CHANGED
@@ -1,8 +1,9 @@
1
1
  sudo: false
2
2
  language: ruby
3
3
  rvm:
4
- - 2.2
5
- - 2.3.1
6
- - 2.4.0
7
- - 2.5.0
8
- before_install: gem install bundler
4
+ - 2.5.1
5
+ - 2.6.0
6
+ - 2.6.3
7
+ - 2.6.5
8
+ - 2.7
9
+ before_install: gem update --system && gem install bundler
data/CONTRIBUTING.md ADDED
@@ -0,0 +1 @@
1
+ Bug reports and pull requests are welcome on GitHub at https://github.com/estebanz01/ruby-statistics. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant code of conduct](https://www.contributor-covenant.org/).
data/README.md CHANGED
@@ -5,10 +5,11 @@
5
5
  A basic ruby gem that implements some statistical methods, functions and concepts to be used in any ruby environment without depending on any mathematical software like `R`, `Matlab`, `Octave` or similar.
6
6
 
7
7
  Unit test runs under the following ruby versions:
8
- * Ruby 2.2.
9
- * Ruby 2.3.1.
10
- * Ruby 2.4.0.
11
- * Ruby 2.5.0.
8
+ * Ruby 2.5.1.
9
+ * Ruby 2.6.0.
10
+ * Ruby 2.6.3.
11
+ * Ruby 2.6.5.
12
+ * Ruby 2.7.
12
13
 
13
14
  We got the inspiration from the folks at [JStat](https://github.com/jstat/jstat) and some interesting lectures about [Keystroke dynamics](http://www.biometric-solutions.com/keystroke-dynamics.html).
14
15
 
@@ -52,7 +53,7 @@ normal = Statistics::Distribution::StandardNormal.new # Using all namespaces.
52
53
  ```
53
54
 
54
55
  ## Documentation
55
- You can find a bit more detailed documentation of all available distributions, tests and functions in the [Documentation Index](https://github.com/estebanz01/ruby-statistics/wiki/Documentation-Index)
56
+ You can find a bit more detailed documentation of all available distributions, tests and functions in the [Documentation Index](https://github.com/estebanz01/ruby-statistics/wiki)
56
57
 
57
58
  ## Development
58
59
 
data/lib/math.rb CHANGED
@@ -9,11 +9,11 @@ module Math
9
9
  end
10
10
 
11
11
  def self.combination(n, r)
12
- self.factorial(n)/(self.factorial(r) * self.factorial(n - r)).to_f # n!/(r! * [n - r]!)
12
+ self.factorial(n)/(self.factorial(r) * self.factorial(n - r)).to_r # n!/(r! * [n - r]!)
13
13
  end
14
14
 
15
15
  def self.permutation(n, k)
16
- self.factorial(n)/self.factorial(n - k).to_f
16
+ self.factorial(n)/self.factorial(n - k).to_r
17
17
  end
18
18
 
19
19
  # Function adapted from the python implementation that exists in https://en.wikipedia.org/wiki/Simpson%27s_rule#Sample_implementation
@@ -24,7 +24,8 @@ module Math
24
24
  return
25
25
  end
26
26
 
27
- h = (b - a)/n.to_f
27
+ h = (b - a)/n.to_r
28
+
28
29
  resA = yield(a)
29
30
  resB = yield(b)
30
31
 
@@ -45,7 +46,7 @@ module Math
45
46
 
46
47
  def self.lower_incomplete_gamma_function(s, x)
47
48
  # The greater the iterations, the better. That's why we are iterating 10_000 * x times
48
- self.simpson_rule(0, x, (10_000 * x.round).round) do |t|
49
+ self.simpson_rule(0, x.to_r, (10_000 * x.round).round) do |t|
49
50
  (t ** (s - 1)) * Math.exp(-t)
50
51
  end
51
52
  end
@@ -72,7 +73,7 @@ module Math
72
73
  # To avoid overflow problems, the implementation applies the logarithm properties
73
74
  # to calculate in a faster and safer way the values.
74
75
  lbet_ab = (Math.lgamma(alp)[0] + Math.lgamma(bet)[0] - Math.lgamma(alp + bet)[0]).freeze
75
- front = (Math.exp(Math.log(x) * alp + Math.log(1.0 - x) * bet - lbet_ab) / alp.to_f).freeze
76
+ front = (Math.exp(Math.log(x) * alp + Math.log(1.0 - x) * bet - lbet_ab) / alp.to_r).freeze
76
77
 
77
78
  # This is the non-log version of the left part of the formula (before the continuous fraction)
78
79
  # down_left = alp * self.beta_function(alp, bet)
@@ -0,0 +1,35 @@
1
+ module Statistics
2
+ module Distribution
3
+ class Bernoulli
4
+ def self.density_function(n, p)
5
+ return if n != 0 && n != 1 # The support of the distribution is n = {0, 1}.
6
+
7
+ case n
8
+ when 0 then 1.0 - p
9
+ when 1 then p
10
+ end
11
+ end
12
+
13
+ def self.cumulative_function(n, p)
14
+ return if n != 0 && n != 1 # The support of the distribution is n = {0, 1}.
15
+
16
+ case n
17
+ when 0 then 1.0 - p
18
+ when 1 then 1.0
19
+ end
20
+ end
21
+
22
+ def self.variance(p)
23
+ p * (1.0 - p)
24
+ end
25
+
26
+ def self.skewness(p)
27
+ (1.0 - 2.0*p).to_r / Math.sqrt(p * (1.0 - p))
28
+ end
29
+
30
+ def self.kurtosis(p)
31
+ (6.0 * (p ** 2) - (6 * p) + 1) / (p * (1.0 - p))
32
+ end
33
+ end
34
+ end
35
+ end
@@ -4,8 +4,8 @@ module Statistics
4
4
  attr_accessor :alpha, :beta
5
5
 
6
6
  def initialize(alp, bet)
7
- self.alpha = alp.to_f
8
- self.beta = bet.to_f
7
+ self.alpha = alp.to_r
8
+ self.beta = bet.to_r
9
9
  end
10
10
 
11
11
  def cumulative_function(value)
@@ -0,0 +1,26 @@
1
+ module Statistics
2
+ module Distribution
3
+ class Empirical
4
+ attr_accessor :samples
5
+
6
+ def initialize(samples:)
7
+ self.samples = samples
8
+ end
9
+
10
+ # Formula grabbed from here: https://statlect.com/asymptotic-theory/empirical-distribution
11
+ def cumulative_function(x:)
12
+ cumulative_sum = samples.reduce(0) do |summation, sample|
13
+ summation += if sample <= x
14
+ 1
15
+ else
16
+ 0
17
+ end
18
+
19
+ summation
20
+ end
21
+
22
+ cumulative_sum / samples.size.to_r
23
+ end
24
+ end
25
+ end
26
+ end
@@ -10,7 +10,7 @@ module Statistics
10
10
 
11
11
  # Formula extracted from http://www.itl.nist.gov/div898/handbook/eda/section3/eda3665.htm#CDF
12
12
  def cumulative_function(value)
13
- k = d2/(d2 + d1 * value.to_f)
13
+ k = d2/(d2 + d1 * value.to_r)
14
14
 
15
15
  1 - Math.incomplete_beta_function(k, d2/2.0, d1/2.0)
16
16
  end
@@ -18,28 +18,28 @@ module Statistics
18
18
  def density_function(value)
19
19
  return if d1 < 0 || d2 < 0 # F-pdf is well defined for the [0, +infinity) interval.
20
20
 
21
- val = value.to_f
21
+ val = value.to_r
22
22
  upper = ((d1 * val) ** d1) * (d2**d2)
23
23
  lower = (d1 * val + d2) ** (d1 + d2)
24
- up = Math.sqrt(upper/lower.to_f)
24
+ up = Math.sqrt(upper/lower.to_r)
25
25
  down = val * Math.beta_function(d1/2.0, d2/2.0)
26
26
 
27
- up/down.to_f
27
+ up/down.to_r
28
28
  end
29
29
 
30
30
  def mean
31
31
  return if d2 <= 2
32
32
 
33
- d2/(d2 - 2).to_f
33
+ d2/(d2 - 2).to_r
34
34
  end
35
35
 
36
36
  def mode
37
37
  return if d1 <= 2
38
38
 
39
- left = (d1 - 2)/d1.to_f
40
- right = d2/(d2 + 2).to_f
39
+ left = (d1 - 2)/d1.to_r
40
+ right = d2/(d2 + 2).to_r
41
41
 
42
- left * right
42
+ (left * right).to_f
43
43
  end
44
44
  end
45
45
  end
@@ -0,0 +1,76 @@
1
+ module Statistics
2
+ module Distribution
3
+ class Geometric
4
+ attr_accessor :probability_of_success, :always_success_allowed
5
+
6
+ def initialize(p, always_success: false)
7
+ self.probability_of_success = p.to_r
8
+ self.always_success_allowed = always_success
9
+ end
10
+
11
+ def density_function(k)
12
+ k = k.to_i
13
+
14
+ if always_success_allowed
15
+ return if k < 0
16
+
17
+ ((1.0 - probability_of_success) ** k) * probability_of_success
18
+ else
19
+ return if k <= 0
20
+
21
+ ((1.0 - probability_of_success) ** (k - 1.0)) * probability_of_success
22
+ end
23
+ end
24
+
25
+ def cumulative_function(k)
26
+ k = k.to_i
27
+
28
+ if always_success_allowed
29
+ return if k < 0
30
+
31
+ 1.0 - ((1.0 - probability_of_success) ** (k + 1.0))
32
+ else
33
+ return if k <= 0
34
+
35
+ 1.0 - ((1.0 - probability_of_success) ** k)
36
+ end
37
+ end
38
+
39
+ def mean
40
+ if always_success_allowed
41
+ (1.0 - probability_of_success) / probability_of_success
42
+ else
43
+ 1.0 / probability_of_success
44
+ end
45
+ end
46
+
47
+ def median
48
+ if always_success_allowed
49
+ (-1.0 / Math.log2(1.0 - probability_of_success)).ceil - 1.0
50
+ else
51
+ (-1.0 / Math.log2(1.0 - probability_of_success)).ceil
52
+ end
53
+ end
54
+
55
+ def mode
56
+ if always_success_allowed
57
+ 0.0
58
+ else
59
+ 1.0
60
+ end
61
+ end
62
+
63
+ def variance
64
+ (1.0 - probability_of_success) / (probability_of_success ** 2)
65
+ end
66
+
67
+ def skewness
68
+ (2.0 - probability_of_success) / Math.sqrt(1.0 - probability_of_success)
69
+ end
70
+
71
+ def kurtosis
72
+ 6.0 + ((probability_of_success ** 2) / (1.0 - probability_of_success))
73
+ end
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,51 @@
1
+ module Statistics
2
+ module Distribution
3
+ class LogSeries
4
+ def self.density_function(k, p)
5
+ return if k <= 0
6
+ k = k.to_i
7
+
8
+ left = (-1.0 / Math.log(1.0 - p))
9
+ right = (p ** k).to_r
10
+
11
+ left * right / k
12
+ end
13
+
14
+ def self.cumulative_function(k, p)
15
+ return if k <= 0
16
+
17
+ # Sadly, the incomplete beta function is converging
18
+ # too fast to zero and breaking the calculation on logs.
19
+ # So, we default to the basic definition of the CDF which is
20
+ # the integral (-Inf, K) of the PDF, with P(X <= x) which can
21
+ # be solved as a summation of all PDFs from 1 to K. Note that the summation approach
22
+ # only applies to discrete distributions.
23
+ #
24
+ # right = Math.incomplete_beta_function(p, (k + 1).floor, 0) / Math.log(1.0 - p)
25
+ # 1.0 + right
26
+
27
+ result = 0.0
28
+ 1.upto(k) do |number|
29
+ result += self.density_function(number, p)
30
+ end
31
+
32
+ result
33
+ end
34
+
35
+ def self.mode
36
+ 1.0
37
+ end
38
+
39
+ def self.mean(p)
40
+ (-1.0 / Math.log(1.0 - p)) * (p / (1.0 - p))
41
+ end
42
+
43
+ def self.variance(p)
44
+ up = p + Math.log(1.0 - p)
45
+ down = ((1.0 - p) ** 2) * (Math.log(1.0 - p) ** 2)
46
+
47
+ (-1.0 * p) * (up / down.to_r)
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,51 @@
1
+ module Statistics
2
+ module Distribution
3
+ class NegativeBinomial
4
+ attr_accessor :number_of_failures, :probability_per_trial
5
+
6
+ def initialize(r, p)
7
+ self.number_of_failures = r.to_i
8
+ self.probability_per_trial = p
9
+ end
10
+
11
+ def probability_mass_function(k)
12
+ return if number_of_failures < 0 || k < 0 || k > number_of_failures
13
+
14
+ left = Math.combination(k + number_of_failures - 1, k)
15
+ right = ((1 - probability_per_trial) ** number_of_failures) * (probability_per_trial ** k)
16
+
17
+ left * right
18
+ end
19
+
20
+ def cumulative_function(k)
21
+ return if k < 0 || k > number_of_failures
22
+ k = k.to_i
23
+
24
+ 1.0 - Math.incomplete_beta_function(probability_per_trial, k + 1, number_of_failures)
25
+ end
26
+
27
+ def mean
28
+ (probability_per_trial * number_of_failures)/(1 - probability_per_trial).to_r
29
+ end
30
+
31
+ def variance
32
+ (probability_per_trial * number_of_failures)/((1 - probability_per_trial) ** 2).to_r
33
+ end
34
+
35
+ def skewness
36
+ (1 + probability_per_trial).to_r / Math.sqrt(probability_per_trial * number_of_failures)
37
+ end
38
+
39
+ def mode
40
+ if number_of_failures > 1
41
+ up = probability_per_trial * (number_of_failures - 1)
42
+ down = (1 - probability_per_trial).to_r
43
+
44
+ (up/down).floor
45
+ elsif number_of_failures <= 1
46
+ 0.0
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -5,9 +5,9 @@ module Statistics
5
5
  alias_method :mode, :mean
6
6
 
7
7
  def initialize(avg, std)
8
- self.mean = avg.to_f
9
- self.standard_deviation = std.to_f
10
- self.variance = std.to_f**2
8
+ self.mean = avg.to_r
9
+ self.standard_deviation = std.to_r
10
+ self.variance = std.to_r**2
11
11
  end
12
12
 
13
13
  def cumulative_function(value)
@@ -79,5 +79,61 @@ module Statistics
79
79
  euler/Math.sqrt(2 * Math::PI)
80
80
  end
81
81
  end
82
+
83
+ # Inverse Standard Normal distribution:
84
+ # References:
85
+ # https://en.wikipedia.org/wiki/Inverse_distribution
86
+ # http://www.source-code.biz/snippets/vbasic/9.htm
87
+ class InverseStandardNormal < StandardNormal
88
+ A1 = -39.6968302866538
89
+ A2 = 220.946098424521
90
+ A3 = -275.928510446969
91
+ A4 = 138.357751867269
92
+ A5 = -30.6647980661472
93
+ A6 = 2.50662827745924
94
+ B1 = -54.4760987982241
95
+ B2 = 161.585836858041
96
+ B3 = -155.698979859887
97
+ B4 = 66.8013118877197
98
+ B5 = -13.2806815528857
99
+ C1 = -7.78489400243029E-03
100
+ C2 = -0.322396458041136
101
+ C3 = -2.40075827716184
102
+ C4 = -2.54973253934373
103
+ C5 = 4.37466414146497
104
+ C6 = 2.93816398269878
105
+ D1 = 7.78469570904146E-03
106
+ D2 = 0.32246712907004
107
+ D3 = 2.445134137143
108
+ D4 = 3.75440866190742
109
+ P_LOW = 0.02425
110
+ P_HIGH = 1 - P_LOW
111
+
112
+ def density_function(_)
113
+ raise NotImplementedError
114
+ end
115
+
116
+ def random(elements: 1, seed: Random.new_seed)
117
+ raise NotImplementedError
118
+ end
119
+
120
+ def cumulative_function(value)
121
+ return if value < 0.0 || value > 1.0
122
+ return -1.0 * Float::INFINITY if value.zero?
123
+ return Float::INFINITY if value == 1.0
124
+
125
+ if value < P_LOW
126
+ q = Math.sqrt((Math.log(value) * -2.0))
127
+ (((((C1 * q + C2) * q + C3) * q + C4) * q + C5) * q + C6) / ((((D1 * q + D2) * q + D3) * q + D4) * q + 1.0)
128
+ elsif value <= P_HIGH
129
+ q = value - 0.5
130
+ r = q ** 2
131
+ (((((A1 * r + A2) * r + A3) * r + A4) * r + A5) * r + A6) * q / (((((B1 * r + B2) * r + B3) * r + B4) * r + B5) * r + 1.0)
132
+ else
133
+ q = Math.sqrt((Math.log(1 - value) * -2.0))
134
+ - (((((C1 * q + C2) * q + C3) * q + C4) * q + C5) * q + C6) / ((((D1 * q + D2) * q + D3) * q + D4) * q + 1)
135
+ end
136
+ end
137
+ end
82
138
  end
83
139
  end
@@ -18,7 +18,7 @@ module Statistics
18
18
  upper = (expected_number_of_occurrences ** k) * Math.exp(-expected_number_of_occurrences)
19
19
  lower = Math.factorial(k)
20
20
 
21
- upper/lower.to_f
21
+ upper/lower.to_r
22
22
  end
23
23
 
24
24
  def cumulative_function(k)
@@ -31,7 +31,7 @@ module Statistics
31
31
 
32
32
  # We need the right tail, i.e.: The upper incomplete gamma function. This can be
33
33
  # achieved by doing a substraction between 1 and the lower incomplete gamma function.
34
- 1 - (upper/lower.to_f)
34
+ 1 - (upper/lower.to_r)
35
35
  end
36
36
  end
37
37
  end
@@ -29,7 +29,7 @@ module Statistics
29
29
  upper = Math.gamma((degrees_of_freedom + 1)/2.0)
30
30
  lower = Math.sqrt(degrees_of_freedom * Math::PI) * Math.gamma(degrees_of_freedom/2.0)
31
31
  left = upper/lower
32
- right = (1 + ((value ** 2)/degrees_of_freedom.to_f)) ** -((degrees_of_freedom + 1)/2.0)
32
+ right = (1 + ((value ** 2)/degrees_of_freedom.to_r)) ** -((degrees_of_freedom + 1)/2.0)
33
33
 
34
34
  left * right
35
35
  end
@@ -64,8 +64,8 @@ module Statistics
64
64
  results << Math.simpson_rule(threshold, y, 10_000) do |t|
65
65
  up = Math.gamma((v+1)/2.0)
66
66
  down = Math.sqrt(Math::PI * v) * Math.gamma(v/2.0)
67
- right = (1 + ((y ** 2)/v.to_f)) ** ((v+1)/2.0)
68
- left = up/down.to_f
67
+ right = (1 + ((y ** 2)/v.to_r)) ** ((v+1)/2.0)
68
+ left = up/down.to_r
69
69
 
70
70
  left * right
71
71
  end
@@ -4,8 +4,8 @@ module Statistics
4
4
  attr_accessor :left, :right
5
5
 
6
6
  def initialize(a, b)
7
- self.left = a.to_f
8
- self.right = b.to_f
7
+ self.left = a.to_r
8
+ self.right = b.to_r
9
9
  end
10
10
 
11
11
  def density_function(value)
@@ -4,8 +4,8 @@ module Statistics
4
4
  attr_accessor :shape, :scale # k and lambda
5
5
 
6
6
  def initialize(k, lamb)
7
- self.shape = k.to_f
8
- self.scale = lamb.to_f
7
+ self.shape = k.to_r
8
+ self.scale = lamb.to_r
9
9
  end
10
10
 
11
11
  def cumulative_function(random_value)
@@ -45,7 +45,7 @@ module Statistics
45
45
  # Using the inverse CDF function, also called quantile, we can calculate
46
46
  # a random sample that follows a weibull distribution.
47
47
  #
48
- # Formula extracted from http://www.stat.yale.edu/Courses/1997-98/101/chigf.htm
48
+ # Formula extracted from https://www.taygeta.com/random/weibull.html
49
49
  def random(elements: 1, seed: Random.new_seed)
50
50
  results = []
51
51
 
@@ -0,0 +1,71 @@
1
+ module Statistics
2
+ class SpearmanRankCoefficient
3
+ def self.rank(data:, return_ranks_only: true)
4
+ descending_order_data = data.sort { |a, b| b <=> a }
5
+ rankings = {}
6
+
7
+ data.each do |value|
8
+ # If we have ties, the find_index method will only retrieve the index of the
9
+ # first element in the list (i.e, the most close to the left of the array),
10
+ # so when a tie is detected, we increase the temporal ranking by the number of
11
+ # counted elements at that particular time and then we increase the counter.
12
+ temporal_ranking = descending_order_data.find_index(value) + 1 # 0-index
13
+
14
+ if rankings.fetch(value, false)
15
+ rankings[value][:rank] += (temporal_ranking + rankings[value][:counter])
16
+ rankings[value][:counter] += 1
17
+ rankings[value][:tie_rank] = rankings[value][:rank] / rankings[value][:counter].to_r
18
+ else
19
+ rankings[value] = { counter: 1, rank: temporal_ranking, tie_rank: temporal_ranking }
20
+ end
21
+ end
22
+
23
+ if return_ranks_only
24
+ data.map do |value|
25
+ rankings[value][:tie_rank]
26
+ end
27
+ else
28
+ rankings
29
+ end
30
+ end
31
+
32
+ # Formulas extracted from: https://statistics.laerd.com/statistical-guides/spearmans-rank-order-correlation-statistical-guide.php
33
+ def self.coefficient(set_one, set_two)
34
+ raise 'Both group sets must have the same number of cases.' if set_one.size != set_two.size
35
+ return if set_one.size == 0 && set_two.size == 0
36
+
37
+ set_one_mean, set_two_mean = set_one.mean, set_two.mean
38
+ have_tie_ranks = (set_one + set_two).any? { |rank| rank.is_a?(Float) || rank.is_a?(Rational) }
39
+
40
+ if have_tie_ranks
41
+ numerator = 0
42
+ squared_differences_set_one = 0
43
+ squared_differences_set_two = 0
44
+
45
+ set_one.size.times do |idx|
46
+ local_diff_one = (set_one[idx] - set_one_mean)
47
+ local_diff_two = (set_two[idx] - set_two_mean)
48
+
49
+ squared_differences_set_one += local_diff_one ** 2
50
+ squared_differences_set_two += local_diff_two ** 2
51
+
52
+ numerator += local_diff_one * local_diff_two
53
+ end
54
+
55
+ denominator = Math.sqrt(squared_differences_set_one * squared_differences_set_two)
56
+
57
+ numerator / denominator.to_r # This is rho or spearman's coefficient.
58
+ else
59
+ sum_squared_differences = set_one.each_with_index.reduce(0) do |memo, (rank_one, index)|
60
+ memo += ((rank_one - set_two[index]) ** 2)
61
+ memo
62
+ end
63
+
64
+ numerator = 6 * sum_squared_differences
65
+ denominator = ((set_one.size ** 3) - set_one.size)
66
+
67
+ 1.0 - (numerator / denominator.to_r) # This is rho or spearman's coefficient.
68
+ end
69
+ end
70
+ end
71
+ end
@@ -8,12 +8,12 @@ module Statistics
8
8
  statistic = if expected.is_a? Numeric
9
9
  observed.reduce(0) do |memo, observed_value|
10
10
  up = (observed_value - expected) ** 2
11
- memo += (up/expected.to_f)
11
+ memo += (up/expected.to_r)
12
12
  end
13
13
  else
14
14
  expected.each_with_index.reduce(0) do |memo, (expected_value, index)|
15
15
  up = (observed[index] - expected_value) ** 2
16
- memo += (up/expected_value.to_f)
16
+ memo += (up/expected_value.to_r)
17
17
  end
18
18
  end
19
19
 
@@ -19,7 +19,7 @@ module Statistics
19
19
  if args.size == 2
20
20
  variances = [args[0].variance, args[1].variance]
21
21
 
22
- f_score = variances.max/variances.min.to_f
22
+ f_score = variances.max/variances.min.to_r
23
23
  df1 = 1 # k-1 (k = 2)
24
24
  df2 = args.flatten.size - 2 # N-k (k = 2)
25
25
  elsif args.size > 2
@@ -37,18 +37,18 @@ module Statistics
37
37
  variance_between_groups = iterator.reduce(0) do |summation, (size, index)|
38
38
  inner_calculation = size * ((sample_means[index] - overall_mean) ** 2)
39
39
 
40
- summation += (inner_calculation / (total_groups - 1).to_f)
40
+ summation += (inner_calculation / (total_groups - 1).to_r)
41
41
  end
42
42
 
43
43
  # Variance within groups
44
44
  variance_within_groups = (0...total_groups).reduce(0) do |outer_summation, group_index|
45
45
  outer_summation += args[group_index].reduce(0) do |inner_sumation, observation|
46
46
  inner_calculation = ((observation - sample_means[group_index]) ** 2)
47
- inner_sumation += (inner_calculation / (total_elements - total_groups).to_f)
47
+ inner_sumation += (inner_calculation / (total_elements - total_groups).to_r)
48
48
  end
49
49
  end
50
50
 
51
- f_score = variance_between_groups/variance_within_groups.to_f
51
+ f_score = variance_between_groups/variance_within_groups.to_r
52
52
  df1 = total_groups - 1
53
53
  df2 = total_elements - total_groups
54
54
  end
@@ -0,0 +1,70 @@
1
+ module Statistics
2
+ module StatisticalTest
3
+ class KolmogorovSmirnovTest
4
+ # Common alpha, and critical D are calculated following formulas from: https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test#Two-sample_Kolmogorov%E2%80%93Smirnov_test
5
+ def self.two_samples(group_one:, group_two:, alpha: 0.05)
6
+ samples = group_one + group_two # We can use unbalaced group samples
7
+
8
+ ecdf_one = Distribution::Empirical.new(samples: group_one)
9
+ ecdf_two = Distribution::Empirical.new(samples: group_two)
10
+
11
+ d_max = samples.sort.map do |sample|
12
+ d1 = ecdf_one.cumulative_function(x: sample)
13
+ d2 = ecdf_two.cumulative_function(x: sample)
14
+
15
+ (d1 - d2).abs
16
+ end.max
17
+
18
+ # TODO: Validate calculation of Common alpha.
19
+ common_alpha = Math.sqrt((-0.5 * Math.log(alpha)))
20
+ radicand = (group_one.size + group_two.size) / (group_one.size * group_two.size).to_r
21
+
22
+ critical_d = common_alpha * Math.sqrt(radicand)
23
+ # critical_d = self.critical_d(alpha: alpha, n: samples.size)
24
+
25
+ # We are unable to calculate the p_value, because we don't have the Kolmogorov distribution
26
+ # defined. We reject the null hypotesis if Dmax is > than Dcritical.
27
+ { d_max: d_max,
28
+ d_critical: critical_d,
29
+ total_samples: samples.size,
30
+ alpha: alpha,
31
+ null: d_max <= critical_d,
32
+ alternative: d_max > critical_d,
33
+ confidence_level: 1.0 - alpha }
34
+ end
35
+
36
+ # This is an implementation of the formula presented by Paul Molin and Hervé Abdi in a paper,
37
+ # called "New Table and numerical approximations for Kolmogorov-Smirnov / Lilliefors / Van Soest
38
+ # normality test".
39
+ # In this paper, the authors defines a couple of 6th-degree polynomial functions that allow us
40
+ # to find an aproximation of the real critical value. This is based in the conclusions made by
41
+ # Dagnelie (1968), where indicates that critical values given by Lilliefors can be approximated
42
+ # numerically.
43
+ #
44
+ # In general, the formula found is:
45
+ # C(N, alpha) ^ -2 = A(alpha) * N + B(alpha).
46
+ #
47
+ # Where A(alpha), B(alpha) are two 6th degree polynomial functions computed using the principle
48
+ # of Monte Carlo simulations.
49
+ #
50
+ # paper can be found here: https://utdallas.edu/~herve/MolinAbdi1998-LillieforsTechReport.pdf
51
+ # def self.critical_d(alpha:, n:)
52
+ # confidence = 1.0 - alpha
53
+
54
+ # a_alpha = 6.32207539843126 -17.1398870006148 * confidence +
55
+ # 38.42812675101057 * (confidence ** 2) - 45.93241384693391 * (confidence ** 3) +
56
+ # 7.88697700041829 * (confidence ** 4) + 29.79317711037858 * (confidence ** 5) -
57
+ # 18.48090137098585 * (confidence ** 6)
58
+
59
+ # b_alpha = 12.940399038404 - 53.458334259532 * confidence +
60
+ # 186.923866119699 * (confidence ** 2) - 410.582178349305 * (confidence ** 3) +
61
+ # 517.377862566267 * (confidence ** 4) - 343.581476222384 * (confidence ** 5) +
62
+ # 92.123451358715 * (confidence ** 6)
63
+
64
+ # Math.sqrt(1.0 / (a_alpha * n + b_alpha))
65
+ # end
66
+ end
67
+
68
+ KSTest = KolmogorovSmirnovTest # Alias
69
+ end
70
+ end
@@ -21,9 +21,9 @@ module Statistics
21
21
  raise ZeroStdError, ZeroStdError::STD_ERROR_MSG if data_std == 0
22
22
 
23
23
  comparison_mean = args[0]
24
- degrees_of_freedom = args[1].size
24
+ degrees_of_freedom = args[1].size - 1
25
25
 
26
- (data_mean - comparison_mean)/(data_std / Math.sqrt(args[1].size).to_f).to_f
26
+ (data_mean - comparison_mean)/(data_std / Math.sqrt(args[1].size).to_r).to_r
27
27
  else
28
28
  sample_left_mean = args[0].mean
29
29
  sample_left_variance = args[0].variance
@@ -31,12 +31,12 @@ module Statistics
31
31
  sample_right_mean = args[1].mean
32
32
  degrees_of_freedom = args.flatten.size - 2
33
33
 
34
- left_root = sample_left_variance/args[0].size.to_f
35
- right_root = sample_right_variance/args[1].size.to_f
34
+ left_root = sample_left_variance/args[0].size.to_r
35
+ right_root = sample_right_variance/args[1].size.to_r
36
36
 
37
37
  standard_error = Math.sqrt(left_root + right_root)
38
38
 
39
- (sample_left_mean - sample_right_mean).abs/standard_error.to_f
39
+ (sample_left_mean - sample_right_mean).abs/standard_error.to_r
40
40
  end
41
41
 
42
42
  t_distribution = Distribution::TStudent.new(degrees_of_freedom)
@@ -72,7 +72,7 @@ module Statistics
72
72
 
73
73
  down = difference_std/Math.sqrt(differences.size)
74
74
 
75
- t_score = (differences.mean - 0)/down.to_f
75
+ t_score = (differences.mean - 0)/down.to_r
76
76
 
77
77
  probability = Distribution::TStudent.new(degrees_of_freedom).cumulative_function(t_score)
78
78
 
@@ -73,7 +73,7 @@ module Statistics
73
73
  memo += ((t[:counter] ** 3) - t[:counter])/12.0
74
74
  end
75
75
 
76
- left = (total_group_one * total_group_two)/(n * (n - 1)).to_f
76
+ left = (total_group_one * total_group_two)/(n * (n - 1)).to_r
77
77
  right = (((n ** 3) - n)/12.0) - rank_sum
78
78
 
79
79
  Math.sqrt(left * right)
@@ -82,7 +82,7 @@ module Statistics
82
82
  private def ranked_sum_for(total, group)
83
83
  # sum rankings per group
84
84
  group.reduce(0) do |memo, element|
85
- rank_of_element = total[element][:rank] / total[element][:counter].to_f
85
+ rank_of_element = total[element][:rank] / total[element][:counter].to_r
86
86
  memo += rank_of_element
87
87
  end
88
88
  end
@@ -1,3 +1,3 @@
1
1
  module Statistics
2
- VERSION = "2.0.4"
2
+ VERSION = "2.1.3"
3
3
  end
@@ -27,9 +27,9 @@ Gem::Specification.new do |spec|
27
27
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
28
28
  spec.require_paths = ["lib"]
29
29
 
30
- spec.add_development_dependency "bundler",'~> 1.15', '>= 1.15.4'
31
- spec.add_development_dependency "rake", '~> 12.0', '>= 12.0.0'
32
- spec.add_development_dependency "rspec", '~> 3.6', '>= 3.6.0'
30
+ spec.add_development_dependency "rake", '>= 12.0.0', '~> 13.0'
31
+ spec.add_development_dependency "rspec", '>= 3.6.0'
33
32
  spec.add_development_dependency "grb", '~> 0.4.1', '>= 0.4.1'
34
- spec.add_development_dependency 'byebug', '~> 9.1.0', '>= 9.1.0'
33
+ spec.add_development_dependency 'byebug', '>= 9.1.0'
34
+ spec.add_development_dependency 'pry'
35
35
  end
metadata CHANGED
@@ -1,62 +1,39 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-statistics
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.4
4
+ version: 2.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - esteban zapata
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-05-18 00:00:00.000000000 Z
11
+ date: 2021-02-04 00:00:00.000000000 Z
12
12
  dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: bundler
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: '1.15'
20
- - - ">="
21
- - !ruby/object:Gem::Version
22
- version: 1.15.4
23
- type: :development
24
- prerelease: false
25
- version_requirements: !ruby/object:Gem::Requirement
26
- requirements:
27
- - - "~>"
28
- - !ruby/object:Gem::Version
29
- version: '1.15'
30
- - - ">="
31
- - !ruby/object:Gem::Version
32
- version: 1.15.4
33
13
  - !ruby/object:Gem::Dependency
34
14
  name: rake
35
15
  requirement: !ruby/object:Gem::Requirement
36
16
  requirements:
37
- - - "~>"
38
- - !ruby/object:Gem::Version
39
- version: '12.0'
40
17
  - - ">="
41
18
  - !ruby/object:Gem::Version
42
19
  version: 12.0.0
20
+ - - "~>"
21
+ - !ruby/object:Gem::Version
22
+ version: '13.0'
43
23
  type: :development
44
24
  prerelease: false
45
25
  version_requirements: !ruby/object:Gem::Requirement
46
26
  requirements:
47
- - - "~>"
48
- - !ruby/object:Gem::Version
49
- version: '12.0'
50
27
  - - ">="
51
28
  - !ruby/object:Gem::Version
52
29
  version: 12.0.0
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: '13.0'
53
33
  - !ruby/object:Gem::Dependency
54
34
  name: rspec
55
35
  requirement: !ruby/object:Gem::Requirement
56
36
  requirements:
57
- - - "~>"
58
- - !ruby/object:Gem::Version
59
- version: '3.6'
60
37
  - - ">="
61
38
  - !ruby/object:Gem::Version
62
39
  version: 3.6.0
@@ -64,9 +41,6 @@ dependencies:
64
41
  prerelease: false
65
42
  version_requirements: !ruby/object:Gem::Requirement
66
43
  requirements:
67
- - - "~>"
68
- - !ruby/object:Gem::Version
69
- version: '3.6'
70
44
  - - ">="
71
45
  - !ruby/object:Gem::Version
72
46
  version: 3.6.0
@@ -94,9 +68,6 @@ dependencies:
94
68
  name: byebug
95
69
  requirement: !ruby/object:Gem::Requirement
96
70
  requirements:
97
- - - "~>"
98
- - !ruby/object:Gem::Version
99
- version: 9.1.0
100
71
  - - ">="
101
72
  - !ruby/object:Gem::Version
102
73
  version: 9.1.0
@@ -104,12 +75,23 @@ dependencies:
104
75
  prerelease: false
105
76
  version_requirements: !ruby/object:Gem::Requirement
106
77
  requirements:
107
- - - "~>"
78
+ - - ">="
108
79
  - !ruby/object:Gem::Version
109
80
  version: 9.1.0
81
+ - !ruby/object:Gem::Dependency
82
+ name: pry
83
+ requirement: !ruby/object:Gem::Requirement
84
+ requirements:
110
85
  - - ">="
111
86
  - !ruby/object:Gem::Version
112
- version: 9.1.0
87
+ version: '0'
88
+ type: :development
89
+ prerelease: false
90
+ version_requirements: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - ">="
93
+ - !ruby/object:Gem::Version
94
+ version: '0'
113
95
  description: |-
114
96
  This gem is intended to accomplish the same purpose as jStat js library:
115
97
  to provide ruby with statistical capabilities without the need
@@ -122,10 +104,13 @@ executables: []
122
104
  extensions: []
123
105
  extra_rdoc_files: []
124
106
  files:
107
+ - ".github/dependabot.yml"
108
+ - ".github/workflows/ruby.yml"
125
109
  - ".gitignore"
126
110
  - ".rspec"
127
111
  - ".travis.yml"
128
112
  - CODE_OF_CONDUCT.md
113
+ - CONTRIBUTING.md
129
114
  - Gemfile
130
115
  - LICENSE
131
116
  - LICENSE.txt
@@ -137,18 +122,25 @@ files:
137
122
  - lib/math.rb
138
123
  - lib/statistics.rb
139
124
  - lib/statistics/distribution.rb
125
+ - lib/statistics/distribution/bernoulli.rb
140
126
  - lib/statistics/distribution/beta.rb
141
127
  - lib/statistics/distribution/binomial.rb
142
128
  - lib/statistics/distribution/chi_squared.rb
129
+ - lib/statistics/distribution/empirical.rb
143
130
  - lib/statistics/distribution/f.rb
131
+ - lib/statistics/distribution/geometric.rb
132
+ - lib/statistics/distribution/logseries.rb
133
+ - lib/statistics/distribution/negative_binomial.rb
144
134
  - lib/statistics/distribution/normal.rb
145
135
  - lib/statistics/distribution/poisson.rb
146
136
  - lib/statistics/distribution/t_student.rb
147
137
  - lib/statistics/distribution/uniform.rb
148
138
  - lib/statistics/distribution/weibull.rb
139
+ - lib/statistics/spearman_rank_coefficient.rb
149
140
  - lib/statistics/statistical_test.rb
150
141
  - lib/statistics/statistical_test/chi_squared_test.rb
151
142
  - lib/statistics/statistical_test/f_test.rb
143
+ - lib/statistics/statistical_test/kolmogorov_smirnov_test.rb
152
144
  - lib/statistics/statistical_test/t_test.rb
153
145
  - lib/statistics/statistical_test/wilcoxon_rank_sum_test.rb
154
146
  - lib/statistics/version.rb
@@ -157,7 +149,7 @@ homepage: https://github.com/estebanz01/ruby-statistics
157
149
  licenses:
158
150
  - MIT
159
151
  metadata: {}
160
- post_install_message:
152
+ post_install_message:
161
153
  rdoc_options: []
162
154
  require_paths:
163
155
  - lib
@@ -172,9 +164,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
172
164
  - !ruby/object:Gem::Version
173
165
  version: '0'
174
166
  requirements: []
175
- rubyforge_project:
176
- rubygems_version: 2.5.2.1
177
- signing_key:
167
+ rubygems_version: 3.1.4
168
+ signing_key:
178
169
  specification_version: 4
179
170
  summary: A ruby gem for som specific statistics. Inspired by the jStat js library.
180
171
  test_files: []