ruby-statistics 2.0.5 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1e73ce22e1ad6da4f9d2925ed5884d6e72d915ebad14b56b9e81c64c422c14cf
4
- data.tar.gz: 18fa7cafd8bbf457dd8963b2e666525964ad588257ffc8774cee2d1250f4b469
3
+ metadata.gz: d33cf13ba623ecbb23488499a13a52c75c906b3eb258e7b146b628d10a84de89
4
+ data.tar.gz: a075adb8960b0906cd276a138e70fd3bc76c738d9ed70afac08d57ccd50d86b3
5
5
  SHA512:
6
- metadata.gz: d362a9a2a5ea950ccc37ca5754dca0644040f49563afb14bbd10792ac2ef1f13853724cb693107fa6eb571972432026c5b3cf363ea4932513360f33513147401
7
- data.tar.gz: cd5dd0a2b386fcaa0190d369a1f9d40248d4ed315e165df43403884cb33438902dd080a3f1579ba498eb88a5f2936ba91f6cd4a9e31a948ac31e2d124ba232fe
6
+ metadata.gz: '0799701996d9c3496e35b9f2f73024c359bbc263c3e34995b047bbbda1c0acff8b1ae5bf323bd3f2fd1901bb9eb3331271ccbd2fc16d6911d179644a8ad1878f'
7
+ data.tar.gz: fe31571ab416c16b9832a4dff937e583c41a67b58a4676315f5cbde7720773cbb705b35973e16ef3e002cc542ea7b947e4a93157ae1ea09fecaa0950ecea7ab1
data/CONTRIBUTING.md ADDED
@@ -0,0 +1 @@
1
+ Bug reports and pull requests are welcome on GitHub at https://github.com/estebanz01/ruby-statistics. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant code of conduct](https://www.contributor-covenant.org/).
data/README.md CHANGED
@@ -52,7 +52,7 @@ normal = Statistics::Distribution::StandardNormal.new # Using all namespaces.
52
52
  ```
53
53
 
54
54
  ## Documentation
55
- You can find a bit more detailed documentation of all available distributions, tests and functions in the [Documentation Index](https://github.com/estebanz01/ruby-statistics/wiki/Documentation-Index)
55
+ You can find a bit more detailed documentation of all available distributions, tests and functions in the [Documentation Index](https://github.com/estebanz01/ruby-statistics/wiki)
56
56
 
57
57
  ## Development
58
58
 
@@ -0,0 +1,26 @@
1
+ module Statistics
2
+ module Distribution
3
+ class Empirical
4
+ attr_accessor :samples
5
+
6
+ def initialize(samples:)
7
+ self.samples = samples
8
+ end
9
+
10
+ # Formula grabbed from here: https://statlect.com/asymptotic-theory/empirical-distribution
11
+ def cumulative_function(x:)
12
+ cumulative_sum = samples.reduce(0) do |summation, sample|
13
+ summation += if sample <= x
14
+ 1
15
+ else
16
+ 0
17
+ end
18
+
19
+ summation
20
+ end
21
+
22
+ cumulative_sum / samples.size.to_f
23
+ end
24
+ end
25
+ end
26
+ end
@@ -45,7 +45,7 @@ module Statistics
45
45
  # Using the inverse CDF function, also called quantile, we can calculate
46
46
  # a random sample that follows a weibull distribution.
47
47
  #
48
- # Formula extracted from http://www.stat.yale.edu/Courses/1997-98/101/chigf.htm
48
+ # Formula extracted from https://www.taygeta.com/random/weibull.html
49
49
  def random(elements: 1, seed: Random.new_seed)
50
50
  results = []
51
51
 
@@ -0,0 +1,71 @@
1
+ module Statistics
2
+ class SpearmanRankCoefficient
3
+ def self.rank(data:, return_ranks_only: true)
4
+ descending_order_data = data.sort { |a, b| b <=> a }
5
+ rankings = {}
6
+
7
+ data.each do |value|
8
+ # If we have ties, the find_index method will only retrieve the index of the
9
+ # first element in the list (i.e, the most close to the left of the array),
10
+ # so when a tie is detected, we increase the temporal ranking by the number of
11
+ # counted elements at that particular time and then we increase the counter.
12
+ temporal_ranking = descending_order_data.find_index(value) + 1 # 0-index
13
+
14
+ if rankings.fetch(value, false)
15
+ rankings[value][:rank] += (temporal_ranking + rankings[value][:counter])
16
+ rankings[value][:counter] += 1
17
+ rankings[value][:tie_rank] = rankings[value][:rank] / rankings[value][:counter].to_f
18
+ else
19
+ rankings[value] = { counter: 1, rank: temporal_ranking, tie_rank: temporal_ranking }
20
+ end
21
+ end
22
+
23
+ if return_ranks_only
24
+ data.map do |value|
25
+ rankings[value][:tie_rank]
26
+ end
27
+ else
28
+ rankings
29
+ end
30
+ end
31
+
32
+ # Formulas extracted from: https://statistics.laerd.com/statistical-guides/spearmans-rank-order-correlation-statistical-guide.php
33
+ def self.coefficient(set_one, set_two)
34
+ raise 'Both group sets must have the same number of cases.' if set_one.size != set_two.size
35
+ return if set_one.size == 0 && set_two.size == 0
36
+
37
+ set_one_mean, set_two_mean = set_one.mean, set_two.mean
38
+ have_tie_ranks = (set_one + set_two).any? { |rank| rank.is_a?(Float) }
39
+
40
+ if have_tie_ranks
41
+ numerator = 0
42
+ squared_differences_set_one = 0
43
+ squared_differences_set_two = 0
44
+
45
+ set_one.size.times do |idx|
46
+ local_diff_one = (set_one[idx] - set_one_mean)
47
+ local_diff_two = (set_two[idx] - set_two_mean)
48
+
49
+ squared_differences_set_one += local_diff_one ** 2
50
+ squared_differences_set_two += local_diff_two ** 2
51
+
52
+ numerator += local_diff_one * local_diff_two
53
+ end
54
+
55
+ denominator = Math.sqrt(squared_differences_set_one * squared_differences_set_two)
56
+
57
+ numerator / denominator.to_f # This is rho or spearman's coefficient.
58
+ else
59
+ sum_squared_differences = set_one.each_with_index.reduce(0) do |memo, (rank_one, index)|
60
+ memo += ((rank_one - set_two[index]) ** 2)
61
+ memo
62
+ end
63
+
64
+ numerator = 6 * sum_squared_differences
65
+ denominator = ((set_one.size ** 3) - set_one.size)
66
+
67
+ 1.0 - (numerator / denominator.to_f) # This is rho or spearman's coefficient.
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,70 @@
1
+ module Statistics
2
+ module StatisticalTest
3
+ class KolmogorovSmirnovTest
4
+ # Common alpha, and critical D are calculated following formulas from: https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test#Two-sample_Kolmogorov%E2%80%93Smirnov_test
5
+ def self.two_samples(group_one:, group_two:, alpha: 0.05)
6
+ samples = group_one + group_two # We can use unbalaced group samples
7
+
8
+ ecdf_one = Distribution::Empirical.new(samples: group_one)
9
+ ecdf_two = Distribution::Empirical.new(samples: group_two)
10
+
11
+ d_max = samples.sort.map do |sample|
12
+ d1 = ecdf_one.cumulative_function(x: sample)
13
+ d2 = ecdf_two.cumulative_function(x: sample)
14
+
15
+ (d1 - d2).abs
16
+ end.max
17
+
18
+ # TODO: Validate calculation of Common alpha.
19
+ common_alpha = Math.sqrt((-0.5 * Math.log(alpha)))
20
+ radicand = (group_one.size + group_two.size) / (group_one.size * group_two.size).to_f
21
+
22
+ critical_d = common_alpha * Math.sqrt(radicand)
23
+ # critical_d = self.critical_d(alpha: alpha, n: samples.size)
24
+
25
+ # We are unable to calculate the p_value, because we don't have the Kolmogorov distribution
26
+ # defined. We reject the null hypotesis if Dmax is > than Dcritical.
27
+ { d_max: d_max,
28
+ d_critical: critical_d,
29
+ total_samples: samples.size,
30
+ alpha: alpha,
31
+ null: d_max <= critical_d,
32
+ alternative: d_max > critical_d,
33
+ confidence_level: 1.0 - alpha }
34
+ end
35
+
36
+ # This is an implementation of the formula presented by Paul Molin and Hervé Abdi in a paper,
37
+ # called "New Table and numerical approximations for Kolmogorov-Smirnov / Lilliefors / Van Soest
38
+ # normality test".
39
+ # In this paper, the authors defines a couple of 6th-degree polynomial functions that allow us
40
+ # to find an aproximation of the real critical value. This is based in the conclusions made by
41
+ # Dagnelie (1968), where indicates that critical values given by Lilliefors can be approximated
42
+ # numerically.
43
+ #
44
+ # In general, the formula found is:
45
+ # C(N, alpha) ^ -2 = A(alpha) * N + B(alpha).
46
+ #
47
+ # Where A(alpha), B(alpha) are two 6th degree polynomial functions computed using the principle
48
+ # of Monte Carlo simulations.
49
+ #
50
+ # paper can be found here: https://utdallas.edu/~herve/MolinAbdi1998-LillieforsTechReport.pdf
51
+ # def self.critical_d(alpha:, n:)
52
+ # confidence = 1.0 - alpha
53
+
54
+ # a_alpha = 6.32207539843126 -17.1398870006148 * confidence +
55
+ # 38.42812675101057 * (confidence ** 2) - 45.93241384693391 * (confidence ** 3) +
56
+ # 7.88697700041829 * (confidence ** 4) + 29.79317711037858 * (confidence ** 5) -
57
+ # 18.48090137098585 * (confidence ** 6)
58
+
59
+ # b_alpha = 12.940399038404 - 53.458334259532 * confidence +
60
+ # 186.923866119699 * (confidence ** 2) - 410.582178349305 * (confidence ** 3) +
61
+ # 517.377862566267 * (confidence ** 4) - 343.581476222384 * (confidence ** 5) +
62
+ # 92.123451358715 * (confidence ** 6)
63
+
64
+ # Math.sqrt(1.0 / (a_alpha * n + b_alpha))
65
+ # end
66
+ end
67
+
68
+ KSTest = KolmogorovSmirnovTest # Alias
69
+ end
70
+ end
@@ -1,3 +1,3 @@
1
1
  module Statistics
2
- VERSION = "2.0.5"
2
+ VERSION = "2.1.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-statistics
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.5
4
+ version: 2.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - esteban zapata
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-07-04 00:00:00.000000000 Z
11
+ date: 2018-12-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -126,6 +126,7 @@ files:
126
126
  - ".rspec"
127
127
  - ".travis.yml"
128
128
  - CODE_OF_CONDUCT.md
129
+ - CONTRIBUTING.md
129
130
  - Gemfile
130
131
  - LICENSE
131
132
  - LICENSE.txt
@@ -141,6 +142,7 @@ files:
141
142
  - lib/statistics/distribution/beta.rb
142
143
  - lib/statistics/distribution/binomial.rb
143
144
  - lib/statistics/distribution/chi_squared.rb
145
+ - lib/statistics/distribution/empirical.rb
144
146
  - lib/statistics/distribution/f.rb
145
147
  - lib/statistics/distribution/geometric.rb
146
148
  - lib/statistics/distribution/logseries.rb
@@ -150,9 +152,11 @@ files:
150
152
  - lib/statistics/distribution/t_student.rb
151
153
  - lib/statistics/distribution/uniform.rb
152
154
  - lib/statistics/distribution/weibull.rb
155
+ - lib/statistics/spearman_rank_coefficient.rb
153
156
  - lib/statistics/statistical_test.rb
154
157
  - lib/statistics/statistical_test/chi_squared_test.rb
155
158
  - lib/statistics/statistical_test/f_test.rb
159
+ - lib/statistics/statistical_test/kolmogorov_smirnov_test.rb
156
160
  - lib/statistics/statistical_test/t_test.rb
157
161
  - lib/statistics/statistical_test/wilcoxon_rank_sum_test.rb
158
162
  - lib/statistics/version.rb
@@ -177,7 +181,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
177
181
  version: '0'
178
182
  requirements: []
179
183
  rubyforge_project:
180
- rubygems_version: 2.7.3
184
+ rubygems_version: 2.7.7
181
185
  signing_key:
182
186
  specification_version: 4
183
187
  summary: A ruby gem for som specific statistics. Inspired by the jStat js library.