RubyGems - more_math - Versions diffs - 1.9.0 → 1.11.0 - Mend

more_math 1.9.0 → 1.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: bd844028040726f15a03d4260e794d2b6dd47edf0ddac071be3fc2d3e1742be5
-  data.tar.gz: 9fc2b5f24a5ffd586cfcd7f700dcb85510f799a8f1af356289233316d6f91966
+  metadata.gz: 9cec08e9525ca81297ec96ef655884adc76c51dd751aa77f3c2d9092796853b9
+  data.tar.gz: cbdd8862d8150d5bcc8f00d9aa6c2f0629601aa83a5ec992876facd6c9d208c3
 SHA512:
-  metadata.gz: ddd7116481e881af48ab5cd854f4ff6def832c30e491767bae3835f17bd2f12ba19d171c9b765d3ef41b1f50b33e6a5fa9ca73a7c6c9817379da13b0aa63e509
-  data.tar.gz: 0346e146c89234d85eb9ab38a69598f2dc879ea3335c23cddc36cc29ddfaf1cb0cd3e44fb7bc17a4634c3cfbb37a4ff2d0916d04f630b42dd1a7de7b2d8da341
+  metadata.gz: a55392037367f21c5af96ce557f85470ee9a75a58047580f944c5822af87240c754be75fe8d4a9fd93c070c1c01a793fbbb161b57889e92b9d7f9bad7e3e07cf
+  data.tar.gz: 6349577e9373872abdb696cf8abe2b2f0bf8ed0c7f318bb5fc103256f0d6042744aae2fb04ab91233a090ff2112dee8cb96c89cb0786ae95e68bf2c506f1aeb7

data/CHANGES.md CHANGED Viewed

@@ -1,5 +1,49 @@
 # Changes
+## 2026-01-22 v1.11.0
+- Added new entropy helper methods: `entropy_probabilities`,
+  `entropy_per_symbol`, `minimum_entropy_per_symbol`,
+  `collision_entropy_per_symbol`, `entropy_total`, `minimum_entropy_total`, and
+  `collision_entropy_total` to `lib/more_math/entropy.rb`
+- Replaced the old `entropy` method with an alias to `entropy_per_symbol`
+- Updated documentation for `entropy_ratio` to clarify the `size:` keyword
+  argument
+- Removed the `entropy_ratio_minimum` method and its corresponding test cases
+  from the codebase
+- Enhanced test coverage in `tests/entropy_test.rb` with new unit tests for the
+  added helper methods
+    - Updated existing test assertions to use `assert_in_delta` for floating-point
+      comparisons
+    - Added descriptive comments to the test `setup` method
+    - Ensured all new methods return correct values for empty, uniform, and
+      varied symbol strings
+- Corrected example values in `entropy_ratio` documentation
+## 2026-01-19 v1.10.0
+- Added new `entropy_maximum` method to calculate theoretical maximum entropy
+  for a text given an alphabet size
+- Made `size` parameter required in `entropy_ratio` and `entropy_ratio_minimum`
+  methods instead of defaulting to `text.size`
+- Updated YARD documentation to clarify that `size` parameter represents
+  alphabet size
+- Modified examples to use explicit alphabet sizes for better clarity
+- All entropy methods now consistently return values in bits as expected for
+  Shannon entropy
+- Updated documentation examples to use simplified method calls without
+  `MoreMath::Entropy` prefix
+- Enhanced `entropy_maximum` method documentation to explain its use in
+  determining security strength for tokens
+- Added comprehensive tests for `entropy_maximum` function covering edge cases
+  and various alphabet sizes
+- Improved `entropy_maximum` method signature to return `0` for invalid
+  alphabet sizes (≤ 1) and use `Math.log2` for calculation
+- Updated existing entropy method documentation to clarify it calculates
+  entropy in bits
+- Simplified example code in documentation to use direct method calls instead
+  of module prefixes
 ## 2026-01-16 v1.9.0
 - Added support for array inputs in entropy calculation methods by checking

data/lib/more_math/entropy.rb CHANGED Viewed

@@ -15,41 +15,138 @@ module MoreMath
   #
   # @example Basic usage
   #   require 'more_math'
-  #   include MoreMath
+  #   include MoreMath::Functions
   #
   #   text = "hello world"
   #   puts entropy(text)        # => 2.3219280948873626
   #   puts entropy_ratio(text)   # => 0.7428571428571429
   #
   # @example Using with different text samples
-  #   MoreMath::Entropy.entropy("aaaa")           # => 0.0 (no entropy)
-  #   MoreMath::Entropy.entropy("abcd")           # => 2.0 (maximum entropy)
+  #   entropy("aaaa")           # => 0.0 (no entropy)
+  #   entropy("abcd")           # => 2.0 (actual entropy)
   module Entropy
-    # Calculates the Shannon entropy of a text string.
+    # Calculates the probability distribution of symbols in the given input.
     #
-    # Shannon entropy measures the average amount of information (in bits) needed
-    # to encode characters in the text based on their frequencies.
+    # This method computes the frequency of each symbol in the input and
+    # converts these frequencies into probabilities by dividing by the total
+    # number of symbols.
     #
-    # @example
-    #   MoreMath::Entropy.entropy("hello") # => 2.3219280948873626
-    #   MoreMath::Entropy.entropy("aaaa")  # => 0.0
-    #
-    # @param text [String] The input text to calculate entropy for
-    # @return [Float] The Shannon entropy in bits
-    def entropy(text)
-      chars = nil
-      if text.respond_to?(:chars)
-        chars = text.chars
-      else
-        chars = text
-      end
-      size  = chars.size
-      chars.each_with_object(Hash.new(0.0)) { |c, h| h[c] += 1 }.
-        each_value.reduce(0.0) do |entropy, count|
-          frequency = count / size
-          entropy + frequency * Math.log2(frequency)
-        end.abs
+    # @param symbols [String, Array<String>] The sequence of symbols to
+    #   calculate probabilities for
+    # @return [Hash<String, Float>] A hash mapping each symbol to its
+    #   probability value
+    def entropy_probabilities(symbols)
+      symbols = symbols.chars if symbols.respond_to?(:chars)
+      freq  = symbols.tally
+      total = symbols.size
+      freq.transform_values { |c| c.to_f / total }
+    end
+    # Calculates the Shannon entropy per symbol in the given symbols.
+    #
+    # This method computes the entropy of a sequence of symbols, measuring the
+    # average information content or unpredictability of the symbols.
+    #
+    # @param symbols [String, Array<String>] The sequence of symbols to calculate entropy for
+    # @return [Float] The entropy value in bits per symbol
+    def entropy_per_symbol(symbols)
+      symbols = symbols.chars if symbols.respond_to?(:chars)
+      symbols.empty? and return 0.0
+      probs = entropy_probabilities(symbols)
+      -probs.values.sum { |p| p * Math.log2(p) }
+    end
+    alias entropy entropy_per_symbol
+    # Calculates the minimum entropy per symbol in the given symbols.
+    #
+    # This method determines the minimum possible entropy for a sequence of
+    # symbols, which represents the entropy when all symbols are equally
+    # likely.
+    #
+    # @param symbols [String, Array<String>] The sequence of symbols to
+    #   calculate minimum entropy for
+    # @return [Float] The minimum entropy value in bits per symbol
+    def minimum_entropy_per_symbol(symbols)
+      symbols = symbols.chars if symbols.respond_to?(:chars)
+      symbols.empty? and return 0.0
+      probs = entropy_probabilities(symbols)
+      -Math.log2(probs.values.max)
+    end
+    # Calculates the collision entropy per symbol in the given symbols.
+    #
+    # This method computes the collision entropy, which measures the
+    # unpredictability of the most likely outcome in a symbol sequence. It's
+    # based on the sum of squared probabilities of each symbol.
+    #
+    # @param symbols [String, Array<String>] The sequence of symbols to
+    #   calculate collision entropy for
+    # @return [Float] The collision entropy value in bits per symbol
+    def collision_entropy_per_symbol(symbols)
+      symbols = symbols.chars if symbols.respond_to?(:chars)
+      symbols.empty? and return 0.0
+      probs = entropy_probabilities(symbols)
+      -Math.log2(probs.values.sum { |p| p * p })
+    end
+    # Calculates the total entropy for a sequence of symbols.
+    #
+    # This method computes the total information content of a symbol sequence
+    # by multiplying the entropy per symbol by the total number of symbols.
+    #
+    # @param symbols [String, Array<String>] The sequence of symbols to
+    #   calculate total entropy for
+    # @return [Float] The total entropy value in bits for the entire symbol
+    #   sequence
+    def entropy_total(symbols)
+      symbols = symbols.chars if symbols.respond_to?(:chars)
+      entropy_per_symbol(symbols) * symbols.size
+    end
+    # Calculates the total minimum entropy for a sequence of symbols.
+    #
+    # This method computes the total information content of a symbol sequence
+    # using the minimum entropy per symbol, multiplied by the total number of
+    # symbols. It represents the theoretical minimum possible entropy for the
+    # given sequence.
+    #
+    # @param symbols [String, Array<String>] The sequence of symbols to
+    #   calculate total minimum entropy for
+    # @return [Float] The total minimum entropy value in bits for the entire
+    #   symbol sequence
+    def minimum_entropy_total(symbols)
+      symbols = symbols.chars if symbols.respond_to?(:chars)
+      minimum_entropy_per_symbol(symbols) * symbols.size
+    end
+    # Calculates the total collision entropy for a sequence of symbols.
+    #
+    # This method computes the total information content of a symbol sequence
+    # using the collision entropy per symbol, multiplied by the total number of
+    # symbols. Collision entropy measures the unpredictability of the most
+    # likely outcome in a symbol sequence.
+    #
+    # @param symbols [String, Array<String>] The sequence of symbols to
+    #   calculate total collision entropy for
+    # @return [Float] The total collision entropy value in bits for the entire
+    #   symbol sequence
+    def collision_entropy_total(symbols)
+      symbols = symbols.chars if symbols.respond_to?(:chars)
+      collision_entropy_per_symbol(symbols) * symbols.size
     end
     # Calculates the ideal (maximum) entropy for a given character set size.
@@ -58,8 +155,8 @@ module MoreMath
     # alphabet have equal probability of occurrence.
     #
     # @example
-    #   MoreMath::Entropy.entropy_ideal(2)  # => 1.0
-    #   MoreMath::Entropy.entropy_ideal(256) # => 8.0
+    #   entropy_ideal(2)  # => 1.0
+    #   entropy_ideal(256) # => 8.0
     #
     # @param size [Integer] The number of unique characters in the alphabet
     # @return [Float] The maximum possible entropy in bits
@@ -80,54 +177,39 @@ module MoreMath
     # theoretical maximum entropy for that character set.
     #
     # @example
-    #   MoreMath::Entropy.entropy_ratio("hello")     # => 0.6834
-    #   MoreMath::Entropy.entropy_ratio("aaaaa")     # => 0.0
-    #   MoreMath::Entropy.entropy_ratio("abcde")     # => 1.0
+    #   entropy_ratio("hello", size: 26) # => 0.4088
+    #   entropy_ratio("aaaaa", size: 26) # => 0.0
+    #   entropy_ratio("abcde", size: 5)  # => 1.0
+    #   entropy_ratio("abcde", size: 26) # => 0.4939
     #
-    # @example With custom alphabet size
-    #   # Normalizing against a 26-letter alphabet (English)
-    #   MoreMath::Entropy.entropy_ratio("hello", size: 26) # => 0.394...
     #
     # @param text [String] The input text to calculate entropy ratio for
-    # @param size [Integer] The size of the character set to normalize against.
-    #   Defaults to the total length of the text (`text.size`), which
-    #   normalizes the entropy relative to the text's own character space.
-    #   This allows comparison of texts with different lengths on the same scale.
+    # @param size [Integer] The size of the character set to normalize against
+    #   (alphabet size).
     # @return [Float] Normalized entropy ratio between 0 and 1
-    def entropy_ratio(text, size: text.size)
+    def entropy_ratio(text, size:)
       size <= 1 and return 0.0
       entropy(text) / entropy_ideal(size)
     end
-    # Calculates the minimum entropy ratio with confidence interval adjustment
+    # Calculates the maximum possible entropy for a given text and alphabet
+    # size.
     #
-    # This method computes a adjusted entropy ratio that accounts for
-    # statistical uncertainty by incorporating the standard error and a
-    # confidence level.
+    # This represents the theoretical maximum entropy that could be achieved if
+    # all characters in the text were chosen uniformly at random from the
+    # alphabet. It's used to determine the upper bound of security strength for
+    # tokens.
     #
-    # @param text [String] The input text to calculate entropy ratio for
-    # @param size [Integer] The size of the character set to normalize against
-    # @param alpha [Float] The significance level for the confidence interval (default: 0.05)
-    # @return [Float] The adjusted entropy ratio within the confidence interval
-    # @raise [ArgumentError] When alphabet size is less than 2
-    # @raise [ArgumentError] When text is empty
-    def entropy_ratio_minimum(text, size: text.size, alpha: 0.05)
-      raise ArgumentError, 'alphabet size must be ≥ 2' if size < 2
-      raise ArgumentError, 'text must not be empty'    if text.empty?
-      n = text.size
-      k = size
-      ratio = MoreMath::Functions.entropy_ratio(text, size: k)
-      logk = Math.log2(k)
-      diff = logk - 1.0 / Math.log(2)
-      var  = (diff ** 2) / (logk ** 2) * (1.0 - 1.0 / k) / n
-      se   = Math.sqrt(var)          # standard error
-      z = STD_NORMAL_DISTRIBUTION.inverse_probability(1.0 - alpha / 2.0)
-      (ratio - z * se).clamp(0, 1)
+    # @example
+    #   entropy_maximum("hello", size: 26)  # => 23
+    #   entropy_maximum("abc123", size: 64) # => 36
+    #
+    # @param text [String] The input text to calculate maximum entropy for
+    # @param size [Integer] The size of the character set (alphabet size)
+    # @return [Integer] The maximum possible entropy in bits, or 0 if size <= 1
+    def entropy_maximum(text, size:)
+      size > 1 or return 0
+      (text.size * Math.log2(size)).floor
     end
   end
 end

data/lib/more_math/version.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 module MoreMath
   # MoreMath version
-  VERSION         = '1.9.0'
+  VERSION         = '1.11.0'
   VERSION_ARRAY   = VERSION.split('.').map(&:to_i) # :nodoc:
   VERSION_MAJOR   = VERSION_ARRAY[0] # :nodoc:
   VERSION_MINOR   = VERSION_ARRAY[1] # :nodoc:

data/more_math.gemspec CHANGED Viewed

@@ -1,9 +1,9 @@
 # -*- encoding: utf-8 -*-
-# stub: more_math 1.9.0 ruby lib
+# stub: more_math 1.11.0 ruby lib
 Gem::Specification.new do |s|
   s.name = "more_math".freeze
-  s.version = "1.9.0".freeze
+  s.version = "1.11.0".freeze
   s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
   s.require_paths = ["lib".freeze]

data/tests/entropy_test.rb CHANGED Viewed

@@ -6,6 +6,12 @@ require 'tins'
 class EntropyTest < Test::Unit::TestCase
   include MoreMath::Functions
+  # The setup method initializes instance variables with various string values.
+  #
+  # This method prepares the object with predefined string constants for
+  # testing and demonstration purposes. It sets up empty strings, strings of
+  # specific lengths, and strings containing various character sets
+  # including ASCII, Unicode, and Japanese characters.
   def setup
     @empty  = ''
     @low    = ?A * 42
@@ -16,55 +22,106 @@ class EntropyTest < Test::Unit::TestCase
   end
   def test_entropy
-    assert_equal 0, entropy(@empty)
-    assert_equal 0, entropy(@low)
-    assert_in_delta 3.951, entropy(@string), 1E-3
-    assert_in_delta 4.431, entropy(@high), 1E-3
-    assert_in_delta 3.700, entropy(@random), 1E-3
-    assert_in_delta 2.807, entropy(@hi), 1E-3
+    assert_in_delta 0.0, entropy(@empty), 1e-12
+    assert_in_delta 0.0, entropy(@low),    1e-12
+    assert_in_delta 3.951, entropy(@string), 1e-3
+    assert_in_delta 4.431, entropy(@high),   1e-3
+    assert_in_delta 3.700, entropy(@random), 1e-3
+    assert_in_delta 2.807, entropy(@hi),     1e-3
   end
   def test_entropy_ideal
-    assert_equal 0, entropy_ideal(-1)
-    assert_equal 0, entropy_ideal(0)
-    assert_equal 0, entropy_ideal(0.5)
-    assert_equal 0, entropy_ideal(1)
-    assert_in_delta 1,     entropy_ideal(2), 1E-3
-    assert_in_delta 1.584, entropy_ideal(3), 1E-3
-    assert_in_delta 3,     entropy_ideal(8), 1E-3
-    assert_in_delta 3.321, entropy_ideal(10), 1E-3
-    assert_in_delta 4,     entropy_ideal(16), 1E-3
+    assert_in_delta 0.0, entropy_ideal(-1), 1e-12
+    assert_in_delta 0.0, entropy_ideal(0),  1e-12
+    assert_in_delta 0.0, entropy_ideal(0.5), 1e-12
+    assert_in_delta 0.0, entropy_ideal(1),  1e-12
+    assert_in_delta 1.0,   entropy_ideal(2), 1e-3
+    assert_in_delta 1.584, entropy_ideal(3), 1e-3
+    assert_in_delta 3.0,   entropy_ideal(8), 1e-3
+    assert_in_delta 3.321, entropy_ideal(10), 1e-3
+    assert_in_delta 4.0,   entropy_ideal(16), 1e-3
+  end
+  def test_entropy_maximum
+    text = 'A' * 64
+    assert_equal 0, entropy_maximum(text, size: -1)
+    assert_equal 0, entropy_maximum(text, size: 0)
+    assert_equal 0, entropy_maximum(text, size: 1)
+    assert_equal 64, entropy_maximum(text, size: 2)
+    assert_equal 256, entropy_maximum(text, size: 16)
+    assert_equal 128, entropy_maximum(text[0, 32], size: 16)
   end
   def test_entropy_ratio
-    assert_equal 0,        entropy_ratio(@empty)
-    assert_equal 0,        entropy_ratio(@low, size: 128)
-    assert_in_delta 0.564, entropy_ratio(@string, size: 128), 1E-3
-    assert_in_delta 0.633, entropy_ratio(@high, size: 128), 1E-3
-    assert_in_delta 1.0,   entropy_ratio(@random), 1E-3
-    assert_in_delta 0.462, entropy_ratio(@random, size: 256), 1E-3
-    assert_in_delta 0.253, entropy_ratio(@hi, size: 2_136), 1E-3
+    assert_in_delta 0.0, entropy_ratio(@empty, size: 128), 1e-12
+    assert_in_delta 0.0, entropy_ratio(@low, size: 128), 1e-12
+    assert_in_delta 0.564, entropy_ratio(@string, size: 128), 1e-3
+    assert_in_delta 0.633, entropy_ratio(@high, size: 128), 1e-3
+    assert_in_delta 1.0,   entropy_ratio(@random, size: @random.size), 1e-3
+    assert_in_delta 0.462, entropy_ratio(@random, size: 256), 1e-3
+    assert_in_delta 0.253, entropy_ratio(@hi, size: 2_136), 1e-3
+  end
+  def test_entropy_probabilities
+    probs = entropy_probabilities('ABAB')
+    assert_equal 0.5, probs['A']
+    assert_equal 0.5, probs['B']
+    probs = entropy_probabilities('AAAA')
+    assert_equal 1.0, probs['A']
+    probs = entropy_probabilities([])
+    assert_equal({}, probs)
+    # Ensure the method accepts an Array of symbols
+    probs = entropy_probabilities(['x', 'y', 'x'])
+    assert_equal 2.0 / 3.0, probs['x']
+    assert_equal 1.0 / 3.0, probs['y']
+  end
+  def test_minimum_entropy_per_symbol
+    # Uniform distribution → entropy equals log2(size)
+    assert_in_delta 2.0, minimum_entropy_per_symbol('ABCD'), 1e-12
+    # Single symbol → 0
+    assert_in_delta 0.0, minimum_entropy_per_symbol('AAAA'), 1e-12
+    # Empty string → 0
+    assert_in_delta 0.0, minimum_entropy_per_symbol(''), 1e-12
+  end
+  def test_collision_entropy_per_symbol
+    # For a uniform distribution, collision entropy = log2(size)
+    assert_in_delta 2.0, collision_entropy_per_symbol('ABCD'), 1e-12
+    # All symbols the same → 0
+    assert_in_delta 0.0, collision_entropy_per_symbol('AAAA'), 1e-12
+    # Empty string → 0
+    assert_in_delta 0.0, collision_entropy_per_symbol(''), 1e-12
   end
-  def test_entropy_ratio_minimum_basic
-    # A fairly long random token over a 16‑symbol alphabet
-    token = Tins::Token.new(length: 128, alphabet: Tins::Token::BASE16_LOWERCASE_ALPHABET)
+  def test_entropy_total
+    text = 'ABCD'
+    per = entropy_per_symbol(text)
+    assert_in_delta per * text.size, entropy_total(text), 1e-12
-    limit = entropy_ratio_minimum(token, size: 16)
+    assert_in_delta 0.0, entropy_total(''), 1e-12
+  end
-    # Bounds must be ≧ 0
-    assert_operator limit, :>=, 0.0
+  def test_minimum_entropy_total
+    text = 'ABCD'
+    per = minimum_entropy_per_symbol(text)
+    assert_in_delta per * text.size, minimum_entropy_total(text), 1e-12
-    # The observed ratio should be ≧ limit
-    ratio = entropy_ratio(token, size: 16)
-    assert_operator ratio, :>=, limit
+    assert_in_delta 0.0, minimum_entropy_total(''), 1e-12
   end
-  def test_entropy_ratio_minimum_small
-    # Very short string – the interval will stay below 1.0
-    str = 'a'          # alphabet size 2 (binary)
-    limit = entropy_ratio_minimum(str, size: 2)
+  def test_collision_entropy_total
+    text = 'ABCD'
+    per = collision_entropy_per_symbol(text)
+    assert_in_delta per * text.size, collision_entropy_total(text), 1e-12
-    assert_equal 0.0, limit
+    assert_in_delta 0.0, collision_entropy_total(''), 1e-12
   end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: more_math
 version: !ruby/object:Gem::Version
-  version: 1.9.0
+  version: 1.11.0
 platform: ruby
 authors:
 - Florian Frank