RubyGems - enumerable-stats - Versions diffs - 1.0.0 → 1.2.0 - Mend

enumerable-stats 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

checksums.yaml +4 -4
data/lib/enumerable-stats.rb +1 -1
data/lib/enumerable_stats/enumerable_ext.rb +200 -10
metadata +6 -3

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: bd06a4ad69a379469e16e45d5eca8debb2a786edc66c3208429adde1c6f80f20
-  data.tar.gz: 2a8823cdf6f5fe2793e9df99dbd71f4427a63146fdd165475204fe4493607355
+  metadata.gz: 8c04e21076f0a3ebbf61538159c1348107ca5ed371f4b1f135212dd8d932e184
+  data.tar.gz: 32ef265cbaac6a2801e01fb67b73a12313a65d900bc9e1ca0bbc0bfa0bc98f0d
 SHA512:
-  metadata.gz: 308025ee228b384520cff759f5664beace555bd66e3894c389e1547359e2eb90e7fad1bfd72c6774f5b1b327c222a5dd9e8a278b9e16155a9a7e48a693bc60bc
-  data.tar.gz: 224af711d1b3b24d8218cf15adb116216dcb57c81f677b5af8c02c238e2ea304e2ac3537aa9c221750055b68a61d15cce578ee5e7a7687283cce4fb0f917f5dd
+  metadata.gz: 2ebf8b1692788056fe5a6492cd4b986d1b88372e6877cffab441550980e1bf1dd775ebb7254c1bd6c2c91247c0890e6dadec67ab906ce75a3ba486a10a42815d
+  data.tar.gz: b2293b0fe996aaf31a4c245a54f1a6eb1b9f03b4e1cc5b2cc7ac2c57cfcb423f9299ac5a4518d5d1fa6846ebaff977abaafa0eb08f175faef5c456d6b7d222f5

data/lib/enumerable-stats.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 # frozen_string_literal: true
-require_relative 'enumerable_stats/enumerable_ext'
+require_relative "enumerable_stats/enumerable_ext"
 module Enumerable
   include EnumerableStats::EnumerableExt

data/lib/enumerable_stats/enumerable_ext.rb CHANGED Viewed

@@ -1,6 +1,36 @@
 # frozen_string_literal: true
 module EnumerableStats
+  # Extension module that adds statistical methods to all Enumerable objects.
+  #
+  # This module provides essential statistical functions including measures of central tendency
+  # (mean, median), measures of dispersion (variance, standard deviation), percentile calculations,
+  # outlier detection using the IQR method, and statistical comparison methods.
+  #
+  # When included, these methods become available on all Ruby collections that include
+  # Enumerable (Arrays, Ranges, Sets, etc.), enabling seamless statistical analysis
+  # without external dependencies.
+  #
+  # @example Basic statistical calculations
+  #   [1, 2, 3, 4, 5].mean          #=> 3.0
+  #   [1, 2, 3, 4, 5].median        #=> 3
+  #   [1, 2, 3, 4, 5].percentile(75) #=> 4.0
+  #
+  # @example Outlier detection
+  #   data = [1, 2, 3, 4, 100]
+  #   data.remove_outliers           #=> [1, 2, 3, 4]
+  #   data.outlier_stats             #=> { outliers_removed: 1, percentage: 20.0, ... }
+  #
+  # @example Statistical testing
+  #   control = [10, 12, 14, 16, 18]
+  #   treatment = [15, 17, 19, 21, 23]
+  #   control.t_value(treatment)     #=> negative t-statistic
+  #   control.degrees_of_freedom(treatment) #=> degrees of freedom for Welch's t-test
+  #   treatment.greater_than?(control) #=> true (treatment mean significantly > control mean)
+  #   control.less_than?(treatment)    #=> true (control mean significantly < treatment mean)
+  #
+  # @see Enumerable
+  # @since 0.1.0
   module EnumerableExt
     # Calculates the percentage difference between this collection's mean and another value or collection's mean
     # Uses the symmetric percentage difference formula: |a - b| / ((a + b) / 2) * 100
@@ -13,7 +43,7 @@ module EnumerableStats
       b = other.respond_to?(:mean) ? other.mean.to_f : other.to_f
       return 0.0 if a == b
-      return Float::INFINITY if a + b == 0
+      return Float::INFINITY if (a + b).zero?
       ((a - b).abs / ((a + b) / 2.0).abs) * 100
     end
@@ -29,7 +59,7 @@ module EnumerableStats
       b = other.respond_to?(:mean) ? other.mean.to_f : other.to_f
       return 0.0 if a == b
-      return Float::INFINITY if a + b == 0
+      return Float::INFINITY if (a + b).zero?
       ((a - b) / ((a + b) / 2.0).abs) * 100
     end
@@ -70,12 +100,52 @@ module EnumerableStats
       n = (n1 + n2)**2
-      d1 = variance**2 / (count**2 * (count - 1))
-      d2 = other.variance**2 / (other.count**2 * (other.count - 1))
+      d1 = (variance**2) / ((count**2) * (count - 1))
+      d2 = (other.variance**2) / ((other.count**2) * (other.count - 1))
       n / (d1 + d2)
     end
+    # Tests if this collection's mean is significantly greater than another collection's mean
+    # using a one-tailed Student's t-test. Returns true if the test indicates statistical
+    # significance at the specified alpha level.
+    #
+    # @param other [Enumerable] Another collection to compare against
+    # @param alpha [Float] Significance level (default: 0.05 for 95% confidence)
+    # @return [Boolean] True if this collection's mean is significantly greater
+    # @example
+    #   control = [10, 12, 11, 13, 12]     # mean ≈ 11.6
+    #   treatment = [15, 17, 16, 18, 14]   # mean = 16.0
+    #   treatment.greater_than?(control)   # => true (treatment significantly > control)
+    #   control.greater_than?(treatment)   # => false
+    def greater_than?(other, alpha: 0.05)
+      t_stat = t_value(other)
+      df = degrees_of_freedom(other)
+      critical_value = critical_t_value(df, alpha)
+      t_stat > critical_value
+    end
+    # Tests if this collection's mean is significantly less than another collection's mean
+    # using a one-tailed Student's t-test. Returns true if the test indicates statistical
+    # significance at the specified alpha level.
+    #
+    # @param other [Enumerable] Another collection to compare against
+    # @param alpha [Float] Significance level (default: 0.05 for 95% confidence)
+    # @return [Boolean] True if this collection's mean is significantly less
+    # @example
+    #   control = [10, 12, 11, 13, 12]     # mean ≈ 11.6
+    #   treatment = [15, 17, 16, 18, 14]   # mean = 16.0
+    #   control.less_than?(treatment)      # => true (control significantly < treatment)
+    #   treatment.less_than?(control)      # => false
+    def less_than?(other, alpha: 0.05)
+      t_stat = t_value(other)
+      df = degrees_of_freedom(other)
+      critical_value = critical_t_value(df, alpha)
+      t_stat < -critical_value
+    end
     # Calculates the arithmetic mean (average) of the collection
     #
     # @return [Float] The arithmetic mean of all numeric values
@@ -96,7 +166,7 @@ module EnumerableStats
     #   [5, 1, 3, 2, 4].median        # => 3 (automatically sorts)
     #   [].median                     # => nil
     def median
-      return nil if size == 0
+      return nil if size.zero?
       sorted = sort
       midpoint = size / 2
@@ -108,6 +178,53 @@ module EnumerableStats
       end
     end
+    # Calculates the specified percentile of the collection
+    # Uses linear interpolation between data points when the exact percentile falls between values
+    # This is equivalent to the "linear" method used by many statistical software packages
+    #
+    # @param percentile [Numeric] The percentile to calculate (0-100)
+    # @return [Numeric, nil] The value at the specified percentile, or nil if the collection is empty
+    # @raise [ArgumentError] If percentile is not between 0 and 100
+    # @example
+    #   [1, 2, 3, 4, 5].percentile(50)    # => 3 (same as median)
+    #   [1, 2, 3, 4, 5].percentile(25)    # => 2.0 (25th percentile)
+    #   [1, 2, 3, 4, 5].percentile(75)    # => 4.0 (75th percentile)
+    #   [1, 2, 3, 4, 5].percentile(0)     # => 1 (minimum value)
+    #   [1, 2, 3, 4, 5].percentile(100)   # => 5 (maximum value)
+    #   [].percentile(50)                 # => nil (empty collection)
+    def percentile(percentile)
+      return nil if size.zero?
+      unless percentile.is_a?(Numeric) && percentile >= 0 && percentile <= 100
+        raise ArgumentError, "Percentile must be a number between 0 and 100, got #{percentile}"
+      end
+      sorted = sort
+      # Handle edge cases
+      return sorted.first if percentile.zero?
+      return sorted.last if percentile == 100
+      # Calculate the position using the "linear" method (R-7/Excel method)
+      # This is the most commonly used method in statistical software
+      position = (size - 1) * (percentile / 100.0)
+      # If position is an integer, return that exact element
+      if position == position.floor
+        sorted[position.to_i]
+      else
+        # Linear interpolation between the two surrounding values
+        lower_index = position.floor
+        upper_index = position.ceil
+        weight = position - position.floor
+        lower_value = sorted[lower_index]
+        upper_value = sorted[upper_index]
+        lower_value + (weight * (upper_value - lower_value))
+      end
+    end
     # Calculates the sample variance of the collection
     # Uses the unbiased formula with n-1 degrees of freedom (Bessel's correction)
     #
@@ -117,7 +234,7 @@ module EnumerableStats
     #   [5, 5, 5, 5].variance         # => 0.0 (no variation)
     def variance
       mean = self.mean
-      sum_of_squares = map { |r| (r - mean)**2 }.sum
+      sum_of_squares = sum { |r| (r - mean)**2 }
       sum_of_squares / (count - 1).to_f
     end
@@ -157,7 +274,7 @@ module EnumerableStats
         lower_index = q1_pos.floor
         upper_index = q1_pos.ceil
         weight = q1_pos - q1_pos.floor
-        q1 = sorted[lower_index] + weight * (sorted[upper_index] - sorted[lower_index])
+        q1 = sorted[lower_index] + (weight * (sorted[upper_index] - sorted[lower_index]))
       end
       # Calculate Q3
@@ -167,7 +284,7 @@ module EnumerableStats
         lower_index = q3_pos.floor
         upper_index = q3_pos.ceil
         weight = q3_pos - q3_pos.floor
-        q3 = sorted[lower_index] + weight * (sorted[upper_index] - sorted[lower_index])
+        q3 = sorted[lower_index] + (weight * (sorted[upper_index] - sorted[lower_index]))
       end
       iqr = q3 - q1
@@ -177,7 +294,7 @@ module EnumerableStats
       upper_bound = q3 + (multiplier * iqr)
       # Filter out outliers
-      select { |value| value >= lower_bound && value <= upper_bound }
+      select { |value| value.between?(lower_bound, upper_bound) }
     end
     # Returns statistics about outlier removal for debugging/logging
@@ -200,5 +317,78 @@ module EnumerableStats
         outlier_percentage: ((original_count - filtered.size).to_f / original_count * 100).round(2)
       }
     end
+    private
+    # Calculates the critical t-value for a one-tailed test given degrees of freedom and alpha level
+    # Uses a lookup table for common df values and approximations for others
+    #
+    # @param df [Float] Degrees of freedom
+    # @param alpha [Float] Significance level (e.g., 0.05 for 95% confidence)
+    # @return [Float] Critical t-value for one-tailed test
+    def critical_t_value(df, alpha)
+      # For large df (≥30), t-distribution approximates normal distribution
+      return normal_critical_value(alpha) if df >= 30
+      # Lookup table for common t-values (one-tailed, α = 0.05)
+      # These are standard critical values from t-tables
+      t_table_05 = {
+        1 => 6.314, 2 => 2.920, 3 => 2.353, 4 => 2.132, 5 => 2.015,
+        6 => 1.943, 7 => 1.895, 8 => 1.860, 9 => 1.833, 10 => 1.812,
+        11 => 1.796, 12 => 1.782, 13 => 1.771, 14 => 1.761, 15 => 1.753,
+        16 => 1.746, 17 => 1.740, 18 => 1.734, 19 => 1.729, 20 => 1.725,
+        21 => 1.721, 22 => 1.717, 23 => 1.714, 24 => 1.711, 25 => 1.708,
+        26 => 1.706, 27 => 1.703, 28 => 1.701, 29 => 1.699
+      }
+      # Lookup table for common t-values (one-tailed, α = 0.01)
+      t_table_01 = {
+        1 => 31.821, 2 => 6.965, 3 => 4.541, 4 => 3.747, 5 => 3.365,
+        6 => 3.143, 7 => 2.998, 8 => 2.896, 9 => 2.821, 10 => 2.764,
+        11 => 2.718, 12 => 2.681, 13 => 2.650, 14 => 2.624, 15 => 2.602,
+        16 => 2.583, 17 => 2.567, 18 => 2.552, 19 => 2.539, 20 => 2.528,
+        21 => 2.518, 22 => 2.508, 23 => 2.500, 24 => 2.492, 25 => 2.485,
+        26 => 2.479, 27 => 2.473, 28 => 2.467, 29 => 2.462
+      }
+      df_int = df.round
+      if alpha <= 0.01
+        t_table_01[df_int] || t_table_01[29] # Use df=29 as fallback for larger values
+      elsif alpha <= 0.05
+        t_table_05[df_int] || t_table_05[29] # Use df=29 as fallback for larger values
+      else
+        # For alpha > 0.05, interpolate or use approximation
+        # This is a rough approximation for other alpha levels
+        base_t = t_table_05[df_int] || t_table_05[29]
+        base_t * ((0.05 / alpha)**0.5)
+      end
+    end
+    # Returns the critical value for standard normal distribution (z-score)
+    # Used when degrees of freedom is large (≥30)
+    #
+    # @param alpha [Float] Significance level
+    # @return [Float] Critical z-value for one-tailed test
+    def normal_critical_value(alpha)
+      # Common z-values for one-tailed tests
+      # Use approximate comparisons to avoid float equality issues
+      if (alpha - 0.10).abs < 1e-10
+        1.282
+      elsif (alpha - 0.05).abs < 1e-10
+        1.645
+      elsif (alpha - 0.025).abs < 1e-10
+        1.960
+      elsif (alpha - 0.01).abs < 1e-10
+        2.326
+      elsif (alpha - 0.005).abs < 1e-10
+        2.576
+      else
+        # Approximation using inverse normal for other alpha values
+        # This is a rough approximation of the inverse normal CDF
+        # For α = 0.05, this gives approximately 1.645
+        Math.sqrt(-2 * Math.log(alpha))
+      end
+    end
   end
-end
+end

metadata CHANGED Viewed

@@ -1,13 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: enumerable-stats
 version: !ruby/object:Gem::Version
-  version: 1.0.0
+  version: 1.2.0
 platform: ruby
 authors:
 - Jon Daniel
+autorequire:
 bindir: bin
 cert_chain: []
-date: 2025-07-31 00:00:00.000000000 Z
+date: 2025-08-02 00:00:00.000000000 Z
 dependencies: []
 description: |
   A Ruby gem that extends all Enumerable objects (Arrays, Ranges, Sets, etc.) with essential statistical methods.
@@ -28,6 +29,7 @@ metadata:
   source_code_uri: https://github.com/binarycleric/enumerable-stats
   github_repo: ssh://github.com/binarycleric/enumerable-stats
   rubygems_mfa_required: 'true'
+post_install_message:
 rdoc_options: []
 require_paths:
 - lib
@@ -42,7 +44,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.6.2
+rubygems_version: 3.5.22
+signing_key:
 specification_version: 4
 summary: Statistical Methods for Enumerable Collections
 test_files: []