RubyGems - ruby-statistics - Versions diffs - 2.0.5 → 2.1.0 - Mend

ruby-statistics 2.0.5 → 2.1.0

Files changed (9) hide show

checksums.yaml +4 -4
data/CONTRIBUTING.md +1 -0
data/README.md +1 -1
data/lib/statistics/distribution/empirical.rb +26 -0
data/lib/statistics/distribution/weibull.rb +1 -1
data/lib/statistics/spearman_rank_coefficient.rb +71 -0
data/lib/statistics/statistical_test/kolmogorov_smirnov_test.rb +70 -0
data/lib/statistics/version.rb +1 -1
metadata +7 -3

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 1e73ce22e1ad6da4f9d2925ed5884d6e72d915ebad14b56b9e81c64c422c14cf
-  data.tar.gz: 18fa7cafd8bbf457dd8963b2e666525964ad588257ffc8774cee2d1250f4b469
+  metadata.gz: d33cf13ba623ecbb23488499a13a52c75c906b3eb258e7b146b628d10a84de89
+  data.tar.gz: a075adb8960b0906cd276a138e70fd3bc76c738d9ed70afac08d57ccd50d86b3
 SHA512:
-  metadata.gz: d362a9a2a5ea950ccc37ca5754dca0644040f49563afb14bbd10792ac2ef1f13853724cb693107fa6eb571972432026c5b3cf363ea4932513360f33513147401
-  data.tar.gz: cd5dd0a2b386fcaa0190d369a1f9d40248d4ed315e165df43403884cb33438902dd080a3f1579ba498eb88a5f2936ba91f6cd4a9e31a948ac31e2d124ba232fe
+  metadata.gz: '0799701996d9c3496e35b9f2f73024c359bbc263c3e34995b047bbbda1c0acff8b1ae5bf323bd3f2fd1901bb9eb3331271ccbd2fc16d6911d179644a8ad1878f'
+  data.tar.gz: fe31571ab416c16b9832a4dff937e583c41a67b58a4676315f5cbde7720773cbb705b35973e16ef3e002cc542ea7b947e4a93157ae1ea09fecaa0950ecea7ab1

data/CONTRIBUTING.md ADDED Viewed

	@@ -0,0 +1 @@
1	+ Bug reports and pull requests are welcome on GitHub at https://github.com/estebanz01/ruby-statistics. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant code of conduct](https://www.contributor-covenant.org/).

data/README.md CHANGED Viewed

@@ -52,7 +52,7 @@ normal = Statistics::Distribution::StandardNormal.new # Using all namespaces.
 ```
 ## Documentation
-You can find a bit more detailed documentation of all available distributions, tests and functions in the [Documentation Index](https://github.com/estebanz01/ruby-statistics/wiki/Documentation-Index)
+You can find a bit more detailed documentation of all available distributions, tests and functions in the [Documentation Index](https://github.com/estebanz01/ruby-statistics/wiki)
 ## Development

data/lib/statistics/distribution/empirical.rb ADDED Viewed

@@ -0,0 +1,26 @@
+module Statistics
+  module Distribution
+    class Empirical
+      attr_accessor :samples
+      def initialize(samples:)
+        self.samples = samples
+      end
+      # Formula grabbed from here: https://statlect.com/asymptotic-theory/empirical-distribution
+      def cumulative_function(x:)
+        cumulative_sum = samples.reduce(0) do |summation, sample|
+          summation += if sample <= x
+                         1
+                       else
+                         0
+                       end
+          summation
+        end
+        cumulative_sum / samples.size.to_f
+      end
+    end
+  end
+end

data/lib/statistics/distribution/weibull.rb CHANGED Viewed

@@ -45,7 +45,7 @@ module Statistics
       # Using the inverse CDF function, also called quantile, we can calculate
       # a random sample that follows a weibull distribution.
       #
-      # Formula extracted from http://www.stat.yale.edu/Courses/1997-98/101/chigf.htm
+      # Formula extracted from https://www.taygeta.com/random/weibull.html
       def random(elements: 1, seed: Random.new_seed)
         results = []

data/lib/statistics/spearman_rank_coefficient.rb ADDED Viewed

@@ -0,0 +1,71 @@
+module Statistics
+  class SpearmanRankCoefficient
+    def self.rank(data:, return_ranks_only: true)
+      descending_order_data = data.sort { |a, b| b <=> a }
+      rankings = {}
+      data.each do |value|
+        # If we have ties, the find_index method will only retrieve the index of the
+        # first element in the list (i.e, the most close to the left of the array),
+        # so when a tie is detected, we increase the temporal ranking by the number of
+        # counted elements at that particular time and then we increase the counter.
+        temporal_ranking = descending_order_data.find_index(value) + 1 # 0-index
+        if rankings.fetch(value, false)
+          rankings[value][:rank] += (temporal_ranking + rankings[value][:counter])
+          rankings[value][:counter] += 1
+          rankings[value][:tie_rank] = rankings[value][:rank] / rankings[value][:counter].to_f
+        else
+          rankings[value] = { counter: 1, rank: temporal_ranking, tie_rank: temporal_ranking }
+        end
+      end
+      if return_ranks_only
+        data.map do |value|
+          rankings[value][:tie_rank]
+        end
+      else
+        rankings
+      end
+    end
+    # Formulas extracted from: https://statistics.laerd.com/statistical-guides/spearmans-rank-order-correlation-statistical-guide.php
+    def self.coefficient(set_one, set_two)
+      raise 'Both group sets must have the same number of cases.' if set_one.size != set_two.size
+      return if set_one.size == 0 && set_two.size == 0
+      set_one_mean, set_two_mean = set_one.mean, set_two.mean
+      have_tie_ranks = (set_one + set_two).any? { |rank| rank.is_a?(Float) }
+      if have_tie_ranks
+        numerator = 0
+        squared_differences_set_one = 0
+        squared_differences_set_two = 0
+        set_one.size.times do |idx|
+          local_diff_one = (set_one[idx] - set_one_mean)
+          local_diff_two = (set_two[idx] - set_two_mean)
+          squared_differences_set_one += local_diff_one ** 2
+          squared_differences_set_two += local_diff_two ** 2
+          numerator += local_diff_one * local_diff_two
+        end
+        denominator = Math.sqrt(squared_differences_set_one * squared_differences_set_two)
+        numerator / denominator.to_f # This is rho or spearman's coefficient.
+      else
+        sum_squared_differences = set_one.each_with_index.reduce(0) do |memo, (rank_one, index)|
+          memo += ((rank_one - set_two[index]) ** 2)
+          memo
+        end
+        numerator = 6 * sum_squared_differences
+        denominator = ((set_one.size ** 3) - set_one.size)
+        1.0 - (numerator / denominator.to_f) # This is rho or spearman's coefficient.
+      end
+    end
+  end
+end

data/lib/statistics/statistical_test/kolmogorov_smirnov_test.rb ADDED Viewed

@@ -0,0 +1,70 @@
+module Statistics
+  module StatisticalTest
+    class KolmogorovSmirnovTest
+      # Common alpha, and critical D are calculated following formulas from: https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test#Two-sample_Kolmogorov%E2%80%93Smirnov_test
+      def self.two_samples(group_one:, group_two:, alpha: 0.05)
+        samples = group_one + group_two # We can use unbalaced group samples
+        ecdf_one = Distribution::Empirical.new(samples: group_one)
+        ecdf_two = Distribution::Empirical.new(samples: group_two)
+        d_max = samples.sort.map do |sample|
+          d1 = ecdf_one.cumulative_function(x: sample)
+          d2 = ecdf_two.cumulative_function(x: sample)
+          (d1 - d2).abs
+        end.max
+        # TODO: Validate calculation of Common alpha.
+        common_alpha = Math.sqrt((-0.5 * Math.log(alpha)))
+        radicand = (group_one.size + group_two.size) / (group_one.size * group_two.size).to_f
+        critical_d = common_alpha * Math.sqrt(radicand)
+        # critical_d = self.critical_d(alpha: alpha, n: samples.size)
+        # We are unable to calculate the p_value, because we don't have the Kolmogorov distribution
+        # defined. We reject the null hypotesis if Dmax is > than Dcritical.
+        { d_max: d_max,
+          d_critical: critical_d,
+          total_samples: samples.size,
+          alpha: alpha,
+          null: d_max <= critical_d,
+          alternative: d_max > critical_d,
+          confidence_level: 1.0 - alpha }
+      end
+      # This is an implementation of the formula presented by Paul Molin and Hervé Abdi in a paper,
+      # called "New Table and numerical approximations for Kolmogorov-Smirnov / Lilliefors / Van Soest
+      # normality test".
+      # In this paper, the authors defines a couple of 6th-degree polynomial functions that allow us
+      # to find an aproximation of the real critical value. This is based in the conclusions made by
+      # Dagnelie (1968), where indicates that critical values given by Lilliefors can be approximated
+      # numerically.
+      #
+      # In general, the formula found is:
+      #  C(N, alpha) ^ -2  = A(alpha) * N + B(alpha).
+      #
+      # Where A(alpha), B(alpha) are two 6th degree polynomial functions computed using the principle
+      # of Monte Carlo simulations.
+      #
+      # paper can be found here: https://utdallas.edu/~herve/MolinAbdi1998-LillieforsTechReport.pdf
+      # def self.critical_d(alpha:, n:)
+      #   confidence = 1.0 - alpha
+      #   a_alpha = 6.32207539843126 -17.1398870006148 * confidence +
+      #     38.42812675101057 * (confidence ** 2) - 45.93241384693391 * (confidence ** 3) +
+      #     7.88697700041829 * (confidence ** 4) + 29.79317711037858 * (confidence ** 5) -
+      #     18.48090137098585 * (confidence ** 6)
+      #   b_alpha = 12.940399038404 - 53.458334259532 * confidence +
+      #     186.923866119699 * (confidence ** 2) - 410.582178349305 * (confidence ** 3) +
+      #     517.377862566267 * (confidence ** 4) - 343.581476222384 * (confidence ** 5) +
+      #     92.123451358715 * (confidence ** 6)
+      #   Math.sqrt(1.0 / (a_alpha * n + b_alpha))
+      # end
+    end
+    KSTest = KolmogorovSmirnovTest # Alias
+  end
+end

data/lib/statistics/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Statistics
-  VERSION = "2.0.5"
+  VERSION = "2.1.0"
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: ruby-statistics
 version: !ruby/object:Gem::Version
-  version: 2.0.5
+  version: 2.1.0
 platform: ruby
 authors:
 - esteban zapata
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2018-07-04 00:00:00.000000000 Z
+date: 2018-12-31 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -126,6 +126,7 @@ files:
 - ".rspec"
 - ".travis.yml"
 - CODE_OF_CONDUCT.md
+- CONTRIBUTING.md
 - Gemfile
 - LICENSE
 - LICENSE.txt
@@ -141,6 +142,7 @@ files:
 - lib/statistics/distribution/beta.rb
 - lib/statistics/distribution/binomial.rb
 - lib/statistics/distribution/chi_squared.rb
+- lib/statistics/distribution/empirical.rb
 - lib/statistics/distribution/f.rb
 - lib/statistics/distribution/geometric.rb
 - lib/statistics/distribution/logseries.rb
@@ -150,9 +152,11 @@ files:
 - lib/statistics/distribution/t_student.rb
 - lib/statistics/distribution/uniform.rb
 - lib/statistics/distribution/weibull.rb
+- lib/statistics/spearman_rank_coefficient.rb
 - lib/statistics/statistical_test.rb
 - lib/statistics/statistical_test/chi_squared_test.rb
 - lib/statistics/statistical_test/f_test.rb
+- lib/statistics/statistical_test/kolmogorov_smirnov_test.rb
 - lib/statistics/statistical_test/t_test.rb
 - lib/statistics/statistical_test/wilcoxon_rank_sum_test.rb
 - lib/statistics/version.rb
@@ -177,7 +181,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.7.3
+rubygems_version: 2.7.7
 signing_key:
 specification_version: 4
 summary: A ruby gem for som specific statistics. Inspired by the jStat js library.