RubyGems - ruby-statistics - Versions diffs - 2.0.5 → 2.1.0 - Mend

ruby-statistics 2.0.5 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

checksums.yaml +4 -4
data/CONTRIBUTING.md +1 -0
data/README.md +1 -1
data/lib/statistics/distribution/empirical.rb +26 -0
data/lib/statistics/distribution/weibull.rb +1 -1
data/lib/statistics/spearman_rank_coefficient.rb +71 -0
data/lib/statistics/statistical_test/kolmogorov_smirnov_test.rb +70 -0
data/lib/statistics/version.rb +1 -1
metadata +7 -3

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 1e73ce22e1ad6da4f9d2925ed5884d6e72d915ebad14b56b9e81c64c422c14cf
-  data.tar.gz: 18fa7cafd8bbf457dd8963b2e666525964ad588257ffc8774cee2d1250f4b469
+  metadata.gz: d33cf13ba623ecbb23488499a13a52c75c906b3eb258e7b146b628d10a84de89
+  data.tar.gz: a075adb8960b0906cd276a138e70fd3bc76c738d9ed70afac08d57ccd50d86b3
 SHA512:
-  metadata.gz: d362a9a2a5ea950ccc37ca5754dca0644040f49563afb14bbd10792ac2ef1f13853724cb693107fa6eb571972432026c5b3cf363ea4932513360f33513147401
-  data.tar.gz: cd5dd0a2b386fcaa0190d369a1f9d40248d4ed315e165df43403884cb33438902dd080a3f1579ba498eb88a5f2936ba91f6cd4a9e31a948ac31e2d124ba232fe
+  metadata.gz: '0799701996d9c3496e35b9f2f73024c359bbc263c3e34995b047bbbda1c0acff8b1ae5bf323bd3f2fd1901bb9eb3331271ccbd2fc16d6911d179644a8ad1878f'
+  data.tar.gz: fe31571ab416c16b9832a4dff937e583c41a67b58a4676315f5cbde7720773cbb705b35973e16ef3e002cc542ea7b947e4a93157ae1ea09fecaa0950ecea7ab1

data/CONTRIBUTING.md ADDED Viewed

	@@ -0,0 +1 @@
1	+ Bug reports and pull requests are welcome on GitHub at https://github.com/estebanz01/ruby-statistics. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant code of conduct](https://www.contributor-covenant.org/).

data/README.md CHANGED Viewed

@@ -52,7 +52,7 @@ normal = Statistics::Distribution::StandardNormal.new # Using all namespaces.
 ```
 ## Documentation
-You can find a bit more detailed documentation of all available distributions, tests and functions in the [Documentation Index](https://github.com/estebanz01/ruby-statistics/wiki/Documentation-Index)
+You can find a bit more detailed documentation of all available distributions, tests and functions in the [Documentation Index](https://github.com/estebanz01/ruby-statistics/wiki)
 ## Development

data/lib/statistics/distribution/empirical.rb ADDED Viewed

@@ -0,0 +1,26 @@
+module Statistics
+  module Distribution
+    class Empirical
+      attr_accessor :samples
+      def initialize(samples:)
+        self.samples = samples
+      end
+      # Formula grabbed from here: https://statlect.com/asymptotic-theory/empirical-distribution
+      def cumulative_function(x:)
+        cumulative_sum = samples.reduce(0) do |summation, sample|
+          summation += if sample <= x
+                         1
+                       else
+                         0
+                       end
+          summation
+        end
+        cumulative_sum / samples.size.to_f
+      end
+    end
+  end
+end

data/lib/statistics/distribution/weibull.rb CHANGED Viewed

@@ -45,7 +45,7 @@ module Statistics
       # Using the inverse CDF function, also called quantile, we can calculate
       # a random sample that follows a weibull distribution.
       #
-      # Formula extracted from http://www.stat.yale.edu/Courses/1997-98/101/chigf.htm
+      # Formula extracted from https://www.taygeta.com/random/weibull.html
       def random(elements: 1, seed: Random.new_seed)
         results = []

data/lib/statistics/spearman_rank_coefficient.rb ADDED Viewed

@@ -0,0 +1,71 @@
+module Statistics
+  class SpearmanRankCoefficient
+    def self.rank(data:, return_ranks_only: true)
+      descending_order_data = data.sort { |a, b| b <=> a }
+      rankings = {}
+      data.each do |value|
+        # If we have ties, the find_index method will only retrieve the index of the
+        # first element in the list (i.e, the most close to the left of the array),
+        # so when a tie is detected, we increase the temporal ranking by the number of
+        # counted elements at that particular time and then we increase the counter.
+        temporal_ranking = descending_order_data.find_index(value) + 1 # 0-index
+        if rankings.fetch(value, false)
+          rankings[value][:rank] += (temporal_ranking + rankings[value][:counter])
+          rankings[value][:counter] += 1
+          rankings[value][:tie_rank] = rankings[value][:rank] / rankings[value][:counter].to_f
+        else
+          rankings[value] = { counter: 1, rank: temporal_ranking, tie_rank: temporal_ranking }
+        end
+      end
+      if return_ranks_only
+        data.map do |value|
+          rankings[value][:tie_rank]
+        end
+      else
+        rankings
+      end
+    end
+    # Formulas extracted from: https://statistics.laerd.com/statistical-guides/spearmans-rank-order-correlation-statistical-guide.php
+    def self.coefficient(set_one, set_two)
+      raise 'Both group sets must have the same number of cases.' if set_one.size != set_two.size
+      return if set_one.size == 0 && set_two.size == 0
+      set_one_mean, set_two_mean = set_one.mean, set_two.mean
+      have_tie_ranks = (set_one + set_two).any? { |rank| rank.is_a?(Float) }
+      if have_tie_ranks
+        numerator = 0
+        squared_differences_set_one = 0
+        squared_differences_set_two = 0
+        set_one.size.times do |idx|
+          local_diff_one = (set_one[idx] - set_one_mean)
+          local_diff_two = (set_two[idx] - set_two_mean)
+          squared_differences_set_one += local_diff_one ** 2
+          squared_differences_set_two += local_diff_two ** 2
+          numerator += local_diff_one * local_diff_two
+        end
+        denominator = Math.sqrt(squared_differences_set_one * squared_differences_set_two)
+        numerator / denominator.to_f # This is rho or spearman's coefficient.
+      else
+        sum_squared_differences = set_one.each_with_index.reduce(0) do |memo, (rank_one, index)|
+          memo += ((rank_one - set_two[index]) ** 2)
+          memo
+        end
+        numerator = 6 * sum_squared_differences
+        denominator = ((set_one.size ** 3) - set_one.size)
+        1.0 - (numerator / denominator.to_f) # This is rho or spearman's coefficient.
+      end
+    end
+  end
+end

data/lib/statistics/statistical_test/kolmogorov_smirnov_test.rb ADDED Viewed

@@ -0,0 +1,70 @@
+module Statistics
+  module StatisticalTest
+    class KolmogorovSmirnovTest
+      # Common alpha, and critical D are calculated following formulas from: https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test#Two-sample_Kolmogorov%E2%80%93Smirnov_test
+      def self.two_samples(group_one:, group_two:, alpha: 0.05)
+        samples = group_one + group_two # We can use unbalaced group samples
+        ecdf_one = Distribution::Empirical.new(samples: group_one)
+        ecdf_two = Distribution::Empirical.new(samples: group_two)
+        d_max = samples.sort.map do |sample|
+          d1 = ecdf_one.cumulative_function(x: sample)
+          d2 = ecdf_two.cumulative_function(x: sample)
+          (d1 - d2).abs
+        end.max
+        # TODO: Validate calculation of Common alpha.
+        common_alpha = Math.sqrt((-0.5 * Math.log(alpha)))
+        radicand = (group_one.size + group_two.size) / (group_one.size * group_two.size).to_f
+        critical_d = common_alpha * Math.sqrt(radicand)
+        # critical_d = self.critical_d(alpha: alpha, n: samples.size)
+        # We are unable to calculate the p_value, because we don't have the Kolmogorov distribution
+        # defined. We reject the null hypotesis if Dmax is > than Dcritical.
+        { d_max: d_max,
+          d_critical: critical_d,
+          total_samples: samples.size,
+          alpha: alpha,
+          null: d_max <= critical_d,
+          alternative: d_max > critical_d,
+          confidence_level: 1.0 - alpha }
+      end
+      # This is an implementation of the formula presented by Paul Molin and Hervé Abdi in a paper,
+      # called "New Table and numerical approximations for Kolmogorov-Smirnov / Lilliefors / Van Soest
+      # normality test".
+      # In this paper, the authors defines a couple of 6th-degree polynomial functions that allow us
+      # to find an aproximation of the real critical value. This is based in the conclusions made by
+      # Dagnelie (1968), where indicates that critical values given by Lilliefors can be approximated
+      # numerically.
+      #
+      # In general, the formula found is:
+      #  C(N, alpha) ^ -2  = A(alpha) * N + B(alpha).
+      #
+      # Where A(alpha), B(alpha) are two 6th degree polynomial functions computed using the principle
+      # of Monte Carlo simulations.
+      #
+      # paper can be found here: https://utdallas.edu/~herve/MolinAbdi1998-LillieforsTechReport.pdf
+      # def self.critical_d(alpha:, n:)
+      #   confidence = 1.0 - alpha
+      #   a_alpha = 6.32207539843126 -17.1398870006148 * confidence +
+      #     38.42812675101057 * (confidence ** 2) - 45.93241384693391 * (confidence ** 3) +
+      #     7.88697700041829 * (confidence ** 4) + 29.79317711037858 * (confidence ** 5) -
+      #     18.48090137098585 * (confidence ** 6)
+      #   b_alpha = 12.940399038404 - 53.458334259532 * confidence +
+      #     186.923866119699 * (confidence ** 2) - 410.582178349305 * (confidence ** 3) +
+      #     517.377862566267 * (confidence ** 4) - 343.581476222384 * (confidence ** 5) +
+      #     92.123451358715 * (confidence ** 6)
+      #   Math.sqrt(1.0 / (a_alpha * n + b_alpha))
+      # end
+    end
+    KSTest = KolmogorovSmirnovTest # Alias
+  end
+end

data/lib/statistics/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Statistics
-  VERSION = "2.0.5"
+  VERSION = "2.1.0"
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: ruby-statistics
 version: !ruby/object:Gem::Version
-  version: 2.0.5
+  version: 2.1.0
 platform: ruby
 authors:
 - esteban zapata
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2018-07-04 00:00:00.000000000 Z
+date: 2018-12-31 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -126,6 +126,7 @@ files:
 - ".rspec"
 - ".travis.yml"
 - CODE_OF_CONDUCT.md
+- CONTRIBUTING.md
 - Gemfile
 - LICENSE
 - LICENSE.txt
@@ -141,6 +142,7 @@ files:
 - lib/statistics/distribution/beta.rb
 - lib/statistics/distribution/binomial.rb
 - lib/statistics/distribution/chi_squared.rb
+- lib/statistics/distribution/empirical.rb
 - lib/statistics/distribution/f.rb
 - lib/statistics/distribution/geometric.rb
 - lib/statistics/distribution/logseries.rb
@@ -150,9 +152,11 @@ files:
 - lib/statistics/distribution/t_student.rb
 - lib/statistics/distribution/uniform.rb
 - lib/statistics/distribution/weibull.rb
+- lib/statistics/spearman_rank_coefficient.rb
 - lib/statistics/statistical_test.rb
 - lib/statistics/statistical_test/chi_squared_test.rb
 - lib/statistics/statistical_test/f_test.rb
+- lib/statistics/statistical_test/kolmogorov_smirnov_test.rb
 - lib/statistics/statistical_test/t_test.rb
 - lib/statistics/statistical_test/wilcoxon_rank_sum_test.rb
 - lib/statistics/version.rb
@@ -177,7 +181,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.7.3
+rubygems_version: 2.7.7
 signing_key:
 specification_version: 4
 summary: A ruby gem for som specific statistics. Inspired by the jStat js library.