RubyGems - more_math - Versions diffs - 1.5.0 → 1.6.0 - Mend

more_math 1.5.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

checksums.yaml +4 -4
data/CHANGES.md +28 -1
data/README.md +25 -54
data/Rakefile +8 -2
data/lib/more_math/cantor_pairing_function.rb +59 -0
data/lib/more_math/constants/functions_constants.rb +37 -0
data/lib/more_math/continued_fraction.rb +170 -60
data/lib/more_math/distributions.rb +98 -9
data/lib/more_math/entropy.rb +74 -2
data/lib/more_math/exceptions.rb +26 -0
data/lib/more_math/functions.rb +140 -4
data/lib/more_math/histogram.rb +86 -3
data/lib/more_math/linear_regression.rb +108 -7
data/lib/more_math/newton_bisection.rb +71 -8
data/lib/more_math/numberify_string_function.rb +96 -20
data/lib/more_math/permutation.rb +132 -27
data/lib/more_math/ranking_common.rb +38 -10
data/lib/more_math/sequence/moving_average.rb +27 -0
data/lib/more_math/sequence/refinement.rb +26 -0
data/lib/more_math/sequence.rb +177 -66
data/lib/more_math/string_numeral.rb +172 -4
data/lib/more_math/subset.rb +49 -5
data/lib/more_math/version.rb +1 -1
data/lib/more_math.rb +1 -0
data/more_math.gemspec +4 -3
metadata +17 -3

data/lib/more_math/sequence.rb CHANGED Viewed

@@ -2,111 +2,179 @@ require 'more_math/sequence/moving_average'
 require 'more_math/sequence/refinement'
 module MoreMath
-  # This class is used to contain elements and compute various statistical
-  # values for them.
+  # A sequence class for statistical analysis and mathematical operations.
+  #
+  # This class provides comprehensive statistical functionality including:
+  # - Basic sequence operations (iteration, size, etc.)
+  # - Statistical measures (mean, variance, standard deviation)
+  # - Advanced statistical methods (percentiles, confidence intervals)
+  # - Time series analysis (moving averages, autocorrelation)
+  # - Hypothesis testing (t-tests, confidence intervals)
+  # - Data visualization tools (histograms)
+  #
+  # @example Basic usage
+  #   sequence = Sequence.new([1, 2, 3, 4, 5])
+  #   puts sequence.mean        # => 3.0
+  #   puts sequence.variance    # => 2.0
+  #   sequence.simple_moving_average(3) # => [2.0, 3.0, 4.0]
+  #
+  # @example Statistical analysis
+  #   data = Sequence.new([10, 15, 20, 25, 30])
+  #   puts data.percentile(90)      # => 28.0
+  #   puts data.confidence_interval(0.05) # => 17.0..23.0
   class Sequence
     include MoreMath::Sequence::MovingAverage
+    # Initializes a new Sequence instance with the given elements.
+    #
+    # @param elements [Array] The array of elements to store in this sequence
     def initialize(elements)
       @elements = elements.dup.freeze
     end
     # Returns the array of elements.
+    #
+    # @return [Array] The frozen array of elements in this sequence
     attr_reader :elements
-    # Calls the +block+ for every element of this Sequence.
+    # Calls the block for every element of this Sequence.
+    #
+    # @yield [element] Yields each element to the block
+    # @yieldparam element [Object] Each element in the sequence
+    # @return [self] Returns self to allow method chaining
     def each(&block)
       @elements.each(&block)
     end
     include Enumerable
     # Returns true if this sequence is empty, otherwise false.
+    #
+    # @return [Boolean] true if sequence has no elements, false otherwise
     def empty?
       @elements.empty?
     end
-    # Returns the number of elements, on which the analysis is based.
+    # Returns the number of elements in this sequence.
+    #
+    # @return [Integer] The count of elements in the sequence
     def size
       @elements.size
     end
     # Reset all memoized values of this sequence.
+    #
+    # @return [self] Returns self after clearing memoization cache
     def reset
       self.class.mize_cache_clear
       self
     end
+    # Converts the sequence to an array.
+    #
+    # @return [Array] A duplicate of the internal elements array
     def to_ary
       @elements.dup
     end
     alias to_a to_ary
-    # Push +element+ on this Sequence and return a new Sequence instance with
-    # +element+ as its last element.
+    # Pushes an element onto this Sequence and returns a new Sequence instance.
+    #
+    # @param element [Object] The element to add to the sequence
+    # @return [Sequence] A new Sequence instance with the element added
     def push(element)
       Sequence.new(@elements.dup.push(element))
     end
     alias << push
     # Returns the variance of the elements.
+    #
+    # Variance measures how far each number in the set is from the mean.
+    #
+    # @return [Float] The population variance of the elements
+    # @note Uses the formula: Σ(xi - μ)² / n
     memoize method:
     def variance
       sum_of_squares / size
     end
-    # Returns the sample_variance of the elements.
+    # Returns the sample variance of the elements.
+    #
+    # Sample variance is used when the data represents a sample rather than a population.
+    #
+    # @return [Float] The sample variance of the elements
+    # @note Uses the formula: Σ(xi - μ)² / (n-1)
     memoize method:
     def sample_variance
       size > 1 ? sum_of_squares / (size - 1.0) : 0.0
     end
-    # Returns the sum of squares (the sum of the squared deviations) of the
-    # elements.
+    # Returns the sum of squares of the elements.
+    #
+    # Sum of squares is used in variance and standard deviation calculations.
+    #
+    # @return [Float] The sum of squared deviations from the mean
     memoize method:
     def sum_of_squares
       @elements.inject(0.0) { |s, t| s + (t - arithmetic_mean) ** 2 }
     end
     # Returns the standard deviation of the elements.
+    #
+    # Standard deviation measures the amount of variation or dispersion in a set of values.
+    #
+    # @return [Float] The population standard deviation
     memoize method:
     def standard_deviation
       Math.sqrt(variance)
     end
     # Returns the Z-score sequence derived from the current sequence.
+    #
+    # Z-scores standardize data by transforming it to have a mean of 0 and standard deviation of 1.
+    #
+    # @return [Sequence] A new Sequence with z-score values
     memoize method:
     def z_score
       self.class.new(elements.map { |t| t.to_f - mean / standard_deviation })
     end
-    # Returns the standard deviation of the elements in percentage of the
-    # arithmetic mean.
+    # Returns the standard deviation as a percentage of the arithmetic mean.
+    #
+    # @return [Float] Standard deviation expressed as a percentage of the mean
     memoize method:
     def standard_deviation_percentage
       100.0 * standard_deviation / arithmetic_mean
     end
     # Returns the sample standard deviation of the elements.
+    #
+    # @return [Float] The sample standard deviation
     memoize method:
     def sample_standard_deviation
       Math.sqrt(sample_variance)
     end
-    # Returns the sample standard deviation of the elements in percentage
-    # of the arithmetic mean.
+    # Returns the sample standard deviation as a percentage of the arithmetic mean.
+    #
+    # @return [Float] Sample standard deviation expressed as a percentage of the mean
     memoize method:
     def sample_standard_deviation_percentage
       100.0 * sample_standard_deviation / arithmetic_mean
     end
     # Returns the sum of all elements.
+    #
+    # @return [Float] The sum of all elements in the sequence
     memoize method:
     def sum
       @elements.inject(0.0) { |s, t| s + t }
     end
     # Returns the arithmetic mean of the elements.
+    #
+    # @return [Float] The arithmetic mean (average) of the elements
     memoize method:
     def arithmetic_mean
       sum / size
@@ -114,8 +182,11 @@ module MoreMath
     alias mean arithmetic_mean
-    # Returns the harmonic mean of the elements. If any of the elements
-    # is less than or equal to 0.0, this method returns NaN.
+    # Returns the harmonic mean of the elements.
+    #
+    # The harmonic mean is useful for rates and ratios. Returns NaN if any element is <= 0.
+    #
+    # @return [Float] The harmonic mean, or NaN if invalid input
     memoize method:
     def harmonic_mean
       sum = @elements.inject(0.0) { |s, t|
@@ -128,8 +199,12 @@ module MoreMath
       sum ? size / sum : 0 / 0.0
     end
-    # Returns the geometric mean of the elements. If any of the
-    # elements is less than 0.0, this method returns NaN.
+    # Returns the geometric mean of the elements.
+    #
+    # The geometric mean is useful for sets of positive numbers that are to be multiplied together.
+    # Returns NaN if any element is negative, 0 if any element is zero.
+    #
+    # @return [Float] The geometric mean, or NaN if invalid input
     memoize method:
     def geometric_mean
       sum = @elements.inject(0.0) { |s, t|
@@ -153,27 +228,36 @@ module MoreMath
     end
     # Returns the minimum of the elements.
+    #
+    # @return [Object] The minimum element in the sequence
     memoize method:
     def min
       @elements.min
     end
     # Returns the maximum of the elements.
+    #
+    # @return [Object] The maximum element in the sequence
     memoize method:
     def max
       @elements.max
     end
-    # Return a sorted array of the elements.
+    # Returns a sorted array of the elements.
+    #
+    # @return [Array] A new array containing elements sorted in ascending order
     memoize method:
     def sorted
       @elements.sort
     end
-    # Returns the +p+-percentile of the elements.
-    # There are many methods to compute the percentile, this method uses the
-    # the weighted average at x_(n + 1)p, which allows p to be in 0...100
-    # (excluding the 100).
+    # Returns the p-percentile of the elements.
+    #
+    # Uses weighted average at x_(n + 1)p for interpolation between percentiles.
+    #
+    # @param p [Integer, Float] The percentile to calculate (0-99)
+    # @return [Float] The p-th percentile value
+    # @raise [ArgumentError] If p is not in the range (0...100)
     def percentile(p = 50)
       (0...100).include?(p) or
         raise ArgumentError, "p = #{p}, but has to be in (0...100)"
@@ -195,16 +279,20 @@ module MoreMath
     alias median percentile
-    # Use an approximation of the Welch-Satterthwaite equation to compute the
-    # degrees of freedom for Welch's t-test.
+    # Computes the degrees of freedom for Welch's t-test.
+    #
+    # @param other [Sequence] The other sequence to compare against
+    # @return [Float] The degrees of freedom for Welch's t-test
     def compute_welch_df(other)
       (sample_variance / size + other.sample_variance / other.size) ** 2 / (
         (sample_variance ** 2 / (size ** 2 * (size - 1))) +
         (other.sample_variance ** 2 / (other.size ** 2 * (other.size - 1))))
     end
-    # Returns the t value of the Welch's t-test between this Sequence
-    # instance and the +other+.
+    # Returns the t value of the Welch's t-test between this sequence and another.
+    #
+    # @param other [Sequence] The other sequence to compare against
+    # @return [Float] The t-statistic value
     def t_welch(other)
       signal = arithmetic_mean - other.arithmetic_mean
       noise = Math.sqrt(sample_variance / size +
@@ -214,26 +302,35 @@ module MoreMath
       0.0
     end
-    # Returns an estimation of the common standard deviation of the
-    # elements of this and +other+.
+    # Returns an estimation of the common standard deviation of this and another sequence.
+    #
+    # @param other [Sequence] The other sequence to compare against
+    # @return [Float] The pooled standard deviation estimate
     def common_standard_deviation(other)
       Math.sqrt(common_variance(other))
     end
-    # Returns an estimation of the common variance of the elements of this
-    # and +other+.
+    # Returns an estimation of the common variance of this and another sequence.
+    #
+    # @param other [Sequence] The other sequence to compare against
+    # @return [Float] The pooled variance estimate
     def common_variance(other)
       (size - 1) * sample_variance + (other.size - 1) *
         other.sample_variance / (size + other.size - 2)
     end
-    # Compute the # degrees of freedom for Student's t-test.
+    # Computes the degrees of freedom for Student's t-test.
+    #
+    # @param other [Sequence] The other sequence to compare against
+    # @return [Integer] The degrees of freedom for Student's t-test
     def compute_student_df(other)
       size + other.size - 2
     end
-    # Returns the t value of the Student's t-test between this Sequence
-    # instance and the +other+.
+    # Returns the t value of the Student's t-test between this sequence and another.
+    #
+    # @param other [Sequence] The other sequence to compare against
+    # @return [Float] The t-statistic value
     def t_student(other)
       signal = arithmetic_mean - other.arithmetic_mean
       noise = common_standard_deviation(other) *
@@ -243,9 +340,12 @@ module MoreMath
       0.0
     end
-    # Compute a sample size, that will more likely yield a mean difference
-    # between this instance's elements and those of +other+. Use +alpha+
-    # and +beta+ as levels for the first- and second-order errors.
+    # Computes the suggested sample size for detecting a mean difference.
+    #
+    # @param other [Sequence] The other sequence to compare against
+    # @param alpha [Float] The significance level (default: 0.05)
+    # @param beta [Float] The Type II error probability (default: 0.05)
+    # @return [Float] The suggested sample size
     def suggested_sample_size(other, alpha = 0.05, beta = 0.05)
       alpha, beta = alpha.abs, beta.abs
       signal = arithmetic_mean - other.arithmetic_mean
@@ -256,17 +356,21 @@ module MoreMath
         Math.sqrt(pooled_variance_estimate)) / signal) ** 2
     end
-    # Return true, if the Sequence instance covers the +other+, that is their
-    # arithmetic mean value is most likely to be equal for the +alpha+ error
-    # level.
+    # Determines if this sequence covers another sequence at the given alpha level.
+    #
+    # @param other [Sequence] The other sequence to compare against
+    # @param alpha [Float] The significance level (default: 0.05)
+    # @return [Boolean] true if sequences are statistically equivalent
     def cover?(other, alpha = 0.05)
       t = t_welch(other)
       td = TDistribution.new(compute_welch_df(other))
       t.abs < td.inverse_probability(1 - alpha.abs / 2.0)
     end
-    # Return the confidence interval for the arithmetic mean with alpha level +alpha+ of
-    # the elements of this Sequence instance as a Range object.
+    # Returns the confidence interval for the arithmetic mean.
+    #
+    # @param alpha [Float] The significance level (default: 0.05)
+    # @return [Range] The confidence interval as a range object
     def confidence_interval(alpha = 0.05)
       td = TDistribution.new(size - 1)
       t = td.inverse_probability(alpha / 2).abs
@@ -274,7 +378,9 @@ module MoreMath
       (arithmetic_mean - delta)..(arithmetic_mean + delta)
     end
-    # Returns the array of autovariances (of length size - 1).
+    # Returns the array of autovariances.
+    #
+    # @return [Array<Float>] Array of autovariance values
     def autovariance
       Array.new(size - 1) do |k|
         s = 0.0
@@ -285,15 +391,17 @@ module MoreMath
       end
     end
-    # Returns the array of autocorrelation values c_k / c_0 (of length size -
-    # 1).
+    # Returns the array of autocorrelation values.
+    #
+    # @return [Array<Float>] Array of autocorrelation values (normalized by first variance)
     def autocorrelation
       c = autovariance
       Array.new(c.size) { |k| c[k] / c[0] }
     end
-    # Returns the d-value for the Durbin-Watson statistic. The value is d << 2
-    # for positive, d >> 2 for negative and d around 2 for no autocorrelation.
+    # Returns the d-value for the Durbin-Watson statistic.
+    #
+    # @return [Float] The Durbin-Watson statistic value (close to 2 indicates no autocorrelation)
     def durbin_watson_statistic
       e = linear_regression.residuals
       e.size <= 1 and return 2.0
@@ -301,10 +409,10 @@ module MoreMath
         e.inject(0.0) { |s, x| s + x ** 2 }
     end
-    # Returns the q value of the Ljung-Box statistic for the number of lags
-    # +lags+. A higher value might indicate autocorrelation in the elements of
-    # this Sequence instance. This method returns nil if there weren't enough
-    # (at least lags) lags available.
+    # Returns the q value of the Ljung-Box statistic.
+    #
+    # @param lags [Integer] The number of lags to consider (default: 20)
+    # @return [Float, nil] The Ljung-Box statistic value or nil if insufficient data
     def ljung_box_statistic(lags = 20)
       r = autocorrelation
       lags >= r.size and return
@@ -312,14 +420,11 @@ module MoreMath
       n * (n + 2) * (1..lags).inject(0.0) { |s, i| s + r[i] ** 2 / (n - i) }
     end
-    # This method tries to detect autocorrelation with the Ljung-Box
-    # statistic. If enough lags can be considered it returns a hash with
-    # results, otherwise nil is returned. The keys are
-    # :lags:: the number of lags,
-    # :alpha_level:: the alpha level for the test,
-    # :q:: the value of the ljung_box_statistic,
-    # :p:: the p-value computed, if p is higher than alpha no correlation was detected,
-    # :detected:: true if a correlation was found.
+    # Detects autocorrelation using the Ljung-Box statistic.
+    #
+    # @param lags [Integer] The number of lags to consider (default: 20)
+    # @param alpha_level [Float] The significance level (default: 0.05)
+    # @return [Hash, nil] Results hash or nil if insufficient data
     def detect_autocorrelation(lags = 20, alpha_level = 0.05)
       if q = ljung_box_statistic(lags)
         p = ChiSquareDistribution.new(lags).probability(q)
@@ -334,16 +439,19 @@ module MoreMath
     end
     # Returns the interquartile range for this sequence.
+    #
+    # @return [Float] The difference between 75th and 25th percentiles
     def interquartile_range
       quartile1 = percentile(25)
       quartile3 = percentile(75)
       quartile3 - quartile1
     end
-    # Return a result hash with the number of :very_low, :low, :high, and
-    # :very_high outliers, determined by the box plotting algorithm run with
-    # :median and :iqr parameters. If no outliers were found or the iqr is
-    # less than epsilon, nil is returned.
+    # Detects outliers using the boxplot algorithm.
+    #
+    # @param factor [Float] The multiplier for IQR to define outlier boundaries (default: 3.0)
+    # @param epsilon [Float] Small value for numerical stability (default: 1E-5)
+    # @return [Hash, nil] Outlier statistics or nil if no outliers or insufficient data
     def detect_outliers(factor = 3.0, epsilon = 1E-5)
       half_factor = factor / 2.0
       quartile1 = percentile(25)
@@ -372,15 +480,18 @@ module MoreMath
       end
     end
-    # Returns the LinearRegression object for the equation a * x + b which
-    # represents the line computed by the linear regression algorithm.
+    # Returns the LinearRegression object for this sequence.
+    #
+    # @return [LinearRegression] The linear regression model for this data
     memoize method:
     def linear_regression
       LinearRegression.new @elements
     end
-    # Returns a Histogram instance with +bins+ as the number of bins for this
-    # analysis' elements.
+    # Creates a Histogram instance from this sequence.
+    #
+    # @param bins [Integer] The number of bins for the histogram
+    # @return [Histogram] A new Histogram instance
     def histogram(bins)
       Histogram.new(self, bins)
     end