ruby-statistics 2.0.5 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1e73ce22e1ad6da4f9d2925ed5884d6e72d915ebad14b56b9e81c64c422c14cf
4
- data.tar.gz: 18fa7cafd8bbf457dd8963b2e666525964ad588257ffc8774cee2d1250f4b469
3
+ metadata.gz: d33cf13ba623ecbb23488499a13a52c75c906b3eb258e7b146b628d10a84de89
4
+ data.tar.gz: a075adb8960b0906cd276a138e70fd3bc76c738d9ed70afac08d57ccd50d86b3
5
5
  SHA512:
6
- metadata.gz: d362a9a2a5ea950ccc37ca5754dca0644040f49563afb14bbd10792ac2ef1f13853724cb693107fa6eb571972432026c5b3cf363ea4932513360f33513147401
7
- data.tar.gz: cd5dd0a2b386fcaa0190d369a1f9d40248d4ed315e165df43403884cb33438902dd080a3f1579ba498eb88a5f2936ba91f6cd4a9e31a948ac31e2d124ba232fe
6
+ metadata.gz: '0799701996d9c3496e35b9f2f73024c359bbc263c3e34995b047bbbda1c0acff8b1ae5bf323bd3f2fd1901bb9eb3331271ccbd2fc16d6911d179644a8ad1878f'
7
+ data.tar.gz: fe31571ab416c16b9832a4dff937e583c41a67b58a4676315f5cbde7720773cbb705b35973e16ef3e002cc542ea7b947e4a93157ae1ea09fecaa0950ecea7ab1
data/CONTRIBUTING.md ADDED
@@ -0,0 +1 @@
1
+ Bug reports and pull requests are welcome on GitHub at https://github.com/estebanz01/ruby-statistics. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant code of conduct](https://www.contributor-covenant.org/).
data/README.md CHANGED
@@ -52,7 +52,7 @@ normal = Statistics::Distribution::StandardNormal.new # Using all namespaces.
52
52
  ```
53
53
 
54
54
  ## Documentation
55
- You can find a bit more detailed documentation of all available distributions, tests and functions in the [Documentation Index](https://github.com/estebanz01/ruby-statistics/wiki/Documentation-Index)
55
+ You can find a bit more detailed documentation of all available distributions, tests and functions in the [Documentation Index](https://github.com/estebanz01/ruby-statistics/wiki)
56
56
 
57
57
  ## Development
58
58
 
@@ -0,0 +1,26 @@
1
+ module Statistics
2
+ module Distribution
3
+ class Empirical
4
+ attr_accessor :samples
5
+
6
+ def initialize(samples:)
7
+ self.samples = samples
8
+ end
9
+
10
+ # Formula grabbed from here: https://statlect.com/asymptotic-theory/empirical-distribution
11
+ def cumulative_function(x:)
12
+ cumulative_sum = samples.reduce(0) do |summation, sample|
13
+ summation += if sample <= x
14
+ 1
15
+ else
16
+ 0
17
+ end
18
+
19
+ summation
20
+ end
21
+
22
+ cumulative_sum / samples.size.to_f
23
+ end
24
+ end
25
+ end
26
+ end
@@ -45,7 +45,7 @@ module Statistics
45
45
  # Using the inverse CDF function, also called quantile, we can calculate
46
46
  # a random sample that follows a weibull distribution.
47
47
  #
48
- # Formula extracted from http://www.stat.yale.edu/Courses/1997-98/101/chigf.htm
48
+ # Formula extracted from https://www.taygeta.com/random/weibull.html
49
49
  def random(elements: 1, seed: Random.new_seed)
50
50
  results = []
51
51
 
@@ -0,0 +1,71 @@
1
+ module Statistics
2
+ class SpearmanRankCoefficient
3
+ def self.rank(data:, return_ranks_only: true)
4
+ descending_order_data = data.sort { |a, b| b <=> a }
5
+ rankings = {}
6
+
7
+ data.each do |value|
8
+ # If we have ties, the find_index method will only retrieve the index of the
9
+ # first element in the list (i.e, the most close to the left of the array),
10
+ # so when a tie is detected, we increase the temporal ranking by the number of
11
+ # counted elements at that particular time and then we increase the counter.
12
+ temporal_ranking = descending_order_data.find_index(value) + 1 # 0-index
13
+
14
+ if rankings.fetch(value, false)
15
+ rankings[value][:rank] += (temporal_ranking + rankings[value][:counter])
16
+ rankings[value][:counter] += 1
17
+ rankings[value][:tie_rank] = rankings[value][:rank] / rankings[value][:counter].to_f
18
+ else
19
+ rankings[value] = { counter: 1, rank: temporal_ranking, tie_rank: temporal_ranking }
20
+ end
21
+ end
22
+
23
+ if return_ranks_only
24
+ data.map do |value|
25
+ rankings[value][:tie_rank]
26
+ end
27
+ else
28
+ rankings
29
+ end
30
+ end
31
+
32
+ # Formulas extracted from: https://statistics.laerd.com/statistical-guides/spearmans-rank-order-correlation-statistical-guide.php
33
+ def self.coefficient(set_one, set_two)
34
+ raise 'Both group sets must have the same number of cases.' if set_one.size != set_two.size
35
+ return if set_one.size == 0 && set_two.size == 0
36
+
37
+ set_one_mean, set_two_mean = set_one.mean, set_two.mean
38
+ have_tie_ranks = (set_one + set_two).any? { |rank| rank.is_a?(Float) }
39
+
40
+ if have_tie_ranks
41
+ numerator = 0
42
+ squared_differences_set_one = 0
43
+ squared_differences_set_two = 0
44
+
45
+ set_one.size.times do |idx|
46
+ local_diff_one = (set_one[idx] - set_one_mean)
47
+ local_diff_two = (set_two[idx] - set_two_mean)
48
+
49
+ squared_differences_set_one += local_diff_one ** 2
50
+ squared_differences_set_two += local_diff_two ** 2
51
+
52
+ numerator += local_diff_one * local_diff_two
53
+ end
54
+
55
+ denominator = Math.sqrt(squared_differences_set_one * squared_differences_set_two)
56
+
57
+ numerator / denominator.to_f # This is rho or spearman's coefficient.
58
+ else
59
+ sum_squared_differences = set_one.each_with_index.reduce(0) do |memo, (rank_one, index)|
60
+ memo += ((rank_one - set_two[index]) ** 2)
61
+ memo
62
+ end
63
+
64
+ numerator = 6 * sum_squared_differences
65
+ denominator = ((set_one.size ** 3) - set_one.size)
66
+
67
+ 1.0 - (numerator / denominator.to_f) # This is rho or spearman's coefficient.
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,70 @@
1
+ module Statistics
2
+ module StatisticalTest
3
+ class KolmogorovSmirnovTest
4
+ # Common alpha, and critical D are calculated following formulas from: https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test#Two-sample_Kolmogorov%E2%80%93Smirnov_test
5
+ def self.two_samples(group_one:, group_two:, alpha: 0.05)
6
+ samples = group_one + group_two # We can use unbalaced group samples
7
+
8
+ ecdf_one = Distribution::Empirical.new(samples: group_one)
9
+ ecdf_two = Distribution::Empirical.new(samples: group_two)
10
+
11
+ d_max = samples.sort.map do |sample|
12
+ d1 = ecdf_one.cumulative_function(x: sample)
13
+ d2 = ecdf_two.cumulative_function(x: sample)
14
+
15
+ (d1 - d2).abs
16
+ end.max
17
+
18
+ # TODO: Validate calculation of Common alpha.
19
+ common_alpha = Math.sqrt((-0.5 * Math.log(alpha)))
20
+ radicand = (group_one.size + group_two.size) / (group_one.size * group_two.size).to_f
21
+
22
+ critical_d = common_alpha * Math.sqrt(radicand)
23
+ # critical_d = self.critical_d(alpha: alpha, n: samples.size)
24
+
25
+ # We are unable to calculate the p_value, because we don't have the Kolmogorov distribution
26
+ # defined. We reject the null hypotesis if Dmax is > than Dcritical.
27
+ { d_max: d_max,
28
+ d_critical: critical_d,
29
+ total_samples: samples.size,
30
+ alpha: alpha,
31
+ null: d_max <= critical_d,
32
+ alternative: d_max > critical_d,
33
+ confidence_level: 1.0 - alpha }
34
+ end
35
+
36
+ # This is an implementation of the formula presented by Paul Molin and Hervé Abdi in a paper,
37
+ # called "New Table and numerical approximations for Kolmogorov-Smirnov / Lilliefors / Van Soest
38
+ # normality test".
39
+ # In this paper, the authors defines a couple of 6th-degree polynomial functions that allow us
40
+ # to find an aproximation of the real critical value. This is based in the conclusions made by
41
+ # Dagnelie (1968), where indicates that critical values given by Lilliefors can be approximated
42
+ # numerically.
43
+ #
44
+ # In general, the formula found is:
45
+ # C(N, alpha) ^ -2 = A(alpha) * N + B(alpha).
46
+ #
47
+ # Where A(alpha), B(alpha) are two 6th degree polynomial functions computed using the principle
48
+ # of Monte Carlo simulations.
49
+ #
50
+ # paper can be found here: https://utdallas.edu/~herve/MolinAbdi1998-LillieforsTechReport.pdf
51
+ # def self.critical_d(alpha:, n:)
52
+ # confidence = 1.0 - alpha
53
+
54
+ # a_alpha = 6.32207539843126 -17.1398870006148 * confidence +
55
+ # 38.42812675101057 * (confidence ** 2) - 45.93241384693391 * (confidence ** 3) +
56
+ # 7.88697700041829 * (confidence ** 4) + 29.79317711037858 * (confidence ** 5) -
57
+ # 18.48090137098585 * (confidence ** 6)
58
+
59
+ # b_alpha = 12.940399038404 - 53.458334259532 * confidence +
60
+ # 186.923866119699 * (confidence ** 2) - 410.582178349305 * (confidence ** 3) +
61
+ # 517.377862566267 * (confidence ** 4) - 343.581476222384 * (confidence ** 5) +
62
+ # 92.123451358715 * (confidence ** 6)
63
+
64
+ # Math.sqrt(1.0 / (a_alpha * n + b_alpha))
65
+ # end
66
+ end
67
+
68
+ KSTest = KolmogorovSmirnovTest # Alias
69
+ end
70
+ end
@@ -1,3 +1,3 @@
1
1
  module Statistics
2
- VERSION = "2.0.5"
2
+ VERSION = "2.1.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-statistics
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.5
4
+ version: 2.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - esteban zapata
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-07-04 00:00:00.000000000 Z
11
+ date: 2018-12-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -126,6 +126,7 @@ files:
126
126
  - ".rspec"
127
127
  - ".travis.yml"
128
128
  - CODE_OF_CONDUCT.md
129
+ - CONTRIBUTING.md
129
130
  - Gemfile
130
131
  - LICENSE
131
132
  - LICENSE.txt
@@ -141,6 +142,7 @@ files:
141
142
  - lib/statistics/distribution/beta.rb
142
143
  - lib/statistics/distribution/binomial.rb
143
144
  - lib/statistics/distribution/chi_squared.rb
145
+ - lib/statistics/distribution/empirical.rb
144
146
  - lib/statistics/distribution/f.rb
145
147
  - lib/statistics/distribution/geometric.rb
146
148
  - lib/statistics/distribution/logseries.rb
@@ -150,9 +152,11 @@ files:
150
152
  - lib/statistics/distribution/t_student.rb
151
153
  - lib/statistics/distribution/uniform.rb
152
154
  - lib/statistics/distribution/weibull.rb
155
+ - lib/statistics/spearman_rank_coefficient.rb
153
156
  - lib/statistics/statistical_test.rb
154
157
  - lib/statistics/statistical_test/chi_squared_test.rb
155
158
  - lib/statistics/statistical_test/f_test.rb
159
+ - lib/statistics/statistical_test/kolmogorov_smirnov_test.rb
156
160
  - lib/statistics/statistical_test/t_test.rb
157
161
  - lib/statistics/statistical_test/wilcoxon_rank_sum_test.rb
158
162
  - lib/statistics/version.rb
@@ -177,7 +181,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
177
181
  version: '0'
178
182
  requirements: []
179
183
  rubyforge_project:
180
- rubygems_version: 2.7.3
184
+ rubygems_version: 2.7.7
181
185
  signing_key:
182
186
  specification_version: 4
183
187
  summary: A ruby gem for som specific statistics. Inspired by the jStat js library.