more_math 1.10.0 → 1.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b375aa122041b022a2c6b6457cbd6ba9a87691e75a816696c004b57824a5c50e
4
- data.tar.gz: ea3ae210dc593238418ea80b9d8c86b156b7fd53be2971349cbda9985e59aee1
3
+ metadata.gz: 9cec08e9525ca81297ec96ef655884adc76c51dd751aa77f3c2d9092796853b9
4
+ data.tar.gz: cbdd8862d8150d5bcc8f00d9aa6c2f0629601aa83a5ec992876facd6c9d208c3
5
5
  SHA512:
6
- metadata.gz: e28aaa3aaf79ab42eac1fbf4c4a678d7676279ebf326fed10e8c094d0040ad1d664b8a5e9b137a68c25077496886036c67fc377cfb0e1f4027d2c3abaf2ccbc8
7
- data.tar.gz: 8f8f81bd9beb6c9b7f040d58ac6a90acb3fe4a9394896cd4822c148f7ee362e9c0ee5004785953a9f292b7fac2cf23177676cd46ed17f344e19337399b473cd8
6
+ metadata.gz: a55392037367f21c5af96ce557f85470ee9a75a58047580f944c5822af87240c754be75fe8d4a9fd93c070c1c01a793fbbb161b57889e92b9d7f9bad7e3e07cf
7
+ data.tar.gz: 6349577e9373872abdb696cf8abe2b2f0bf8ed0c7f318bb5fc103256f0d6042744aae2fb04ab91233a090ff2112dee8cb96c89cb0786ae95e68bf2c506f1aeb7
data/CHANGES.md CHANGED
@@ -1,5 +1,25 @@
1
1
  # Changes
2
2
 
3
+ ## 2026-01-22 v1.11.0
4
+
5
+ - Added new entropy helper methods: `entropy_probabilities`,
6
+ `entropy_per_symbol`, `minimum_entropy_per_symbol`,
7
+ `collision_entropy_per_symbol`, `entropy_total`, `minimum_entropy_total`, and
8
+ `collision_entropy_total` to `lib/more_math/entropy.rb`
9
+ - Replaced the old `entropy` method with an alias to `entropy_per_symbol`
10
+ - Updated documentation for `entropy_ratio` to clarify the `size:` keyword
11
+ argument
12
+ - Removed the `entropy_ratio_minimum` method and its corresponding test cases
13
+ from the codebase
14
+ - Enhanced test coverage in `tests/entropy_test.rb` with new unit tests for the
15
+ added helper methods
16
+ - Updated existing test assertions to use `assert_in_delta` for floating-point
17
+ comparisons
18
+ - Added descriptive comments to the test `setup` method
19
+ - Ensured all new methods return correct values for empty, uniform, and
20
+ varied symbol strings
21
+ - Corrected example values in `entropy_ratio` documentation
22
+
3
23
  ## 2026-01-19 v1.10.0
4
24
 
5
25
  - Added new `entropy_maximum` method to calculate theoretical maximum entropy
@@ -25,31 +25,128 @@ module MoreMath
25
25
  # entropy("aaaa") # => 0.0 (no entropy)
26
26
  # entropy("abcd") # => 2.0 (actual entropy)
27
27
  module Entropy
28
- # Calculates the Shannon entropy in bits of a text string.
28
+ # Calculates the probability distribution of symbols in the given input.
29
+ #
30
+ # This method computes the frequency of each symbol in the input and
31
+ # converts these frequencies into probabilities by dividing by the total
32
+ # number of symbols.
33
+ #
34
+ # @param symbols [String, Array<String>] The sequence of symbols to
35
+ # calculate probabilities for
36
+ # @return [Hash<String, Float>] A hash mapping each symbol to its
37
+ # probability value
38
+ def entropy_probabilities(symbols)
39
+ symbols = symbols.chars if symbols.respond_to?(:chars)
40
+
41
+ freq = symbols.tally
42
+ total = symbols.size
43
+
44
+ freq.transform_values { |c| c.to_f / total }
45
+ end
46
+
47
+ # Calculates the Shannon entropy per symbol in the given symbols.
29
48
  #
30
- # Shannon entropy measures the average amount of information (in bits) needed
31
- # to encode characters in the text based on their frequencies.
49
+ # This method computes the entropy of a sequence of symbols, measuring the
50
+ # average information content or unpredictability of the symbols.
32
51
  #
33
- # @example
34
- # entropy("hello") # => 2.3219280948873626
35
- # entropy("aaaa") # => 0.0
36
- #
37
- # @param text [String] The input text to calculate entropy for
38
- # @return [Float] The Shannon entropy in bits
39
- def entropy(text)
40
- chars = nil
41
- if text.respond_to?(:chars)
42
- chars = text.chars
43
- else
44
- chars = text
45
- end
46
- size = chars.size
47
-
48
- chars.each_with_object(Hash.new(0.0)) { |c, h| h[c] += 1 }.
49
- each_value.reduce(0.0) do |entropy, count|
50
- frequency = count / size
51
- entropy + frequency * Math.log2(frequency)
52
- end.abs
52
+ # @param symbols [String, Array<String>] The sequence of symbols to calculate entropy for
53
+ # @return [Float] The entropy value in bits per symbol
54
+ def entropy_per_symbol(symbols)
55
+ symbols = symbols.chars if symbols.respond_to?(:chars)
56
+
57
+ symbols.empty? and return 0.0
58
+
59
+ probs = entropy_probabilities(symbols)
60
+
61
+ -probs.values.sum { |p| p * Math.log2(p) }
62
+ end
63
+ alias entropy entropy_per_symbol
64
+
65
+ # Calculates the minimum entropy per symbol in the given symbols.
66
+ #
67
+ # This method determines the minimum possible entropy for a sequence of
68
+ # symbols, which represents the entropy when all symbols are equally
69
+ # likely.
70
+ #
71
+ # @param symbols [String, Array<String>] The sequence of symbols to
72
+ # calculate minimum entropy for
73
+ # @return [Float] The minimum entropy value in bits per symbol
74
+ def minimum_entropy_per_symbol(symbols)
75
+ symbols = symbols.chars if symbols.respond_to?(:chars)
76
+
77
+ symbols.empty? and return 0.0
78
+
79
+ probs = entropy_probabilities(symbols)
80
+
81
+ -Math.log2(probs.values.max)
82
+ end
83
+
84
+ # Calculates the collision entropy per symbol in the given symbols.
85
+ #
86
+ # This method computes the collision entropy, which measures the
87
+ # unpredictability of the most likely outcome in a symbol sequence. It's
88
+ # based on the sum of squared probabilities of each symbol.
89
+ #
90
+ # @param symbols [String, Array<String>] The sequence of symbols to
91
+ # calculate collision entropy for
92
+ # @return [Float] The collision entropy value in bits per symbol
93
+ def collision_entropy_per_symbol(symbols)
94
+ symbols = symbols.chars if symbols.respond_to?(:chars)
95
+
96
+ symbols.empty? and return 0.0
97
+
98
+ probs = entropy_probabilities(symbols)
99
+
100
+ -Math.log2(probs.values.sum { |p| p * p })
101
+ end
102
+
103
+ # Calculates the total entropy for a sequence of symbols.
104
+ #
105
+ # This method computes the total information content of a symbol sequence
106
+ # by multiplying the entropy per symbol by the total number of symbols.
107
+ #
108
+ # @param symbols [String, Array<String>] The sequence of symbols to
109
+ # calculate total entropy for
110
+ # @return [Float] The total entropy value in bits for the entire symbol
111
+ # sequence
112
+ def entropy_total(symbols)
113
+ symbols = symbols.chars if symbols.respond_to?(:chars)
114
+
115
+ entropy_per_symbol(symbols) * symbols.size
116
+ end
117
+
118
+ # Calculates the total minimum entropy for a sequence of symbols.
119
+ #
120
+ # This method computes the total information content of a symbol sequence
121
+ # using the minimum entropy per symbol, multiplied by the total number of
122
+ # symbols. It represents the theoretical minimum possible entropy for the
123
+ # given sequence.
124
+ #
125
+ # @param symbols [String, Array<String>] The sequence of symbols to
126
+ # calculate total minimum entropy for
127
+ # @return [Float] The total minimum entropy value in bits for the entire
128
+ # symbol sequence
129
+ def minimum_entropy_total(symbols)
130
+ symbols = symbols.chars if symbols.respond_to?(:chars)
131
+
132
+ minimum_entropy_per_symbol(symbols) * symbols.size
133
+ end
134
+
135
+ # Calculates the total collision entropy for a sequence of symbols.
136
+ #
137
+ # This method computes the total information content of a symbol sequence
138
+ # using the collision entropy per symbol, multiplied by the total number of
139
+ # symbols. Collision entropy measures the unpredictability of the most
140
+ # likely outcome in a symbol sequence.
141
+ #
142
+ # @param symbols [String, Array<String>] The sequence of symbols to
143
+ # calculate total collision entropy for
144
+ # @return [Float] The total collision entropy value in bits for the entire
145
+ # symbol sequence
146
+ def collision_entropy_total(symbols)
147
+ symbols = symbols.chars if symbols.respond_to?(:chars)
148
+
149
+ collision_entropy_per_symbol(symbols) * symbols.size
53
150
  end
54
151
 
55
152
  # Calculates the ideal (maximum) entropy for a given character set size.
@@ -80,54 +177,21 @@ module MoreMath
80
177
  # theoretical maximum entropy for that character set.
81
178
  #
82
179
  # @example
83
- # entropy_ratio("hello") # => 0.6834
84
- # entropy_ratio("aaaaa") # => 0.0
85
- # entropy_ratio("abcde") # => 1.0
180
+ # entropy_ratio("hello", size: 26) # => 0.4088
181
+ # entropy_ratio("aaaaa", size: 26) # => 0.0
182
+ # entropy_ratio("abcde", size: 5) # => 1.0
183
+ # entropy_ratio("abcde", size: 26) # => 0.4939
86
184
  #
87
- # @example With custom alphabet size
88
- # # Normalizing against a 26-letter alphabet (English)
89
- # entropy_ratio("hello", size: 26) # => 0.394...
90
185
  #
91
186
  # @param text [String] The input text to calculate entropy ratio for
92
- # @param size [Integer] The size of the character set to normalize against (alphabet size).
187
+ # @param size [Integer] The size of the character set to normalize against
188
+ # (alphabet size).
93
189
  # @return [Float] Normalized entropy ratio between 0 and 1
94
190
  def entropy_ratio(text, size:)
95
191
  size <= 1 and return 0.0
96
192
  entropy(text) / entropy_ideal(size)
97
193
  end
98
194
 
99
- # Calculates the minimum entropy ratio with confidence interval adjustment
100
- #
101
- # This method computes a adjusted entropy ratio that accounts for
102
- # statistical uncertainty by incorporating the standard error and a
103
- # confidence level.
104
- #
105
- # @param text [String] The input text to calculate entropy ratio for
106
- # @param size [Integer] The size of the character set to normalize against
107
- # (alphabet size).
108
- # @param alpha [Float] The significance level for the confidence interval (default: 0.05)
109
- # @return [Float] The adjusted entropy ratio within the confidence interval
110
- # @raise [ArgumentError] When alphabet size is less than 2
111
- # @raise [ArgumentError] When text is empty
112
- def entropy_ratio_minimum(text, size:, alpha: 0.05)
113
- raise ArgumentError, 'alphabet size must be ≥ 2' if size < 2
114
- raise ArgumentError, 'text must not be empty' if text.empty?
115
-
116
- n = text.size
117
- k = size
118
-
119
- ratio = MoreMath::Functions.entropy_ratio(text, size: k)
120
-
121
- logk = Math.log2(k)
122
- diff = logk - 1.0 / Math.log(2)
123
- var = (diff ** 2) / (logk ** 2) * (1.0 - 1.0 / k) / n
124
- se = Math.sqrt(var) # standard error
125
-
126
- z = STD_NORMAL_DISTRIBUTION.inverse_probability(1.0 - alpha / 2.0)
127
-
128
- (ratio - z * se).clamp(0, 1)
129
- end
130
-
131
195
  # Calculates the maximum possible entropy for a given text and alphabet
132
196
  # size.
133
197
  #
@@ -1,6 +1,6 @@
1
1
  module MoreMath
2
2
  # MoreMath version
3
- VERSION = '1.10.0'
3
+ VERSION = '1.11.0'
4
4
  VERSION_ARRAY = VERSION.split('.').map(&:to_i) # :nodoc:
5
5
  VERSION_MAJOR = VERSION_ARRAY[0] # :nodoc:
6
6
  VERSION_MINOR = VERSION_ARRAY[1] # :nodoc:
data/more_math.gemspec CHANGED
@@ -1,9 +1,9 @@
1
1
  # -*- encoding: utf-8 -*-
2
- # stub: more_math 1.10.0 ruby lib
2
+ # stub: more_math 1.11.0 ruby lib
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = "more_math".freeze
6
- s.version = "1.10.0".freeze
6
+ s.version = "1.11.0".freeze
7
7
 
8
8
  s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
9
9
  s.require_paths = ["lib".freeze]
@@ -6,6 +6,12 @@ require 'tins'
6
6
  class EntropyTest < Test::Unit::TestCase
7
7
  include MoreMath::Functions
8
8
 
9
+ # The setup method initializes instance variables with various string values.
10
+ #
11
+ # This method prepares the object with predefined string constants for
12
+ # testing and demonstration purposes. It sets up empty strings, strings of
13
+ # specific lengths, and strings containing various character sets
14
+ # including ASCII, Unicode, and Japanese characters.
9
15
  def setup
10
16
  @empty = ''
11
17
  @low = ?A * 42
@@ -16,27 +22,27 @@ class EntropyTest < Test::Unit::TestCase
16
22
  end
17
23
 
18
24
  def test_entropy
19
- assert_equal 0, entropy(@empty)
20
- assert_equal 0, entropy(@low)
21
- assert_in_delta 3.951, entropy(@string), 1E-3
22
- assert_in_delta 4.431, entropy(@high), 1E-3
23
- assert_in_delta 3.700, entropy(@random), 1E-3
24
- assert_in_delta 2.807, entropy(@hi), 1E-3
25
+ assert_in_delta 0.0, entropy(@empty), 1e-12
26
+ assert_in_delta 0.0, entropy(@low), 1e-12
27
+ assert_in_delta 3.951, entropy(@string), 1e-3
28
+ assert_in_delta 4.431, entropy(@high), 1e-3
29
+ assert_in_delta 3.700, entropy(@random), 1e-3
30
+ assert_in_delta 2.807, entropy(@hi), 1e-3
25
31
  end
26
32
 
27
33
  def test_entropy_ideal
28
- assert_equal 0, entropy_ideal(-1)
29
- assert_equal 0, entropy_ideal(0)
30
- assert_equal 0, entropy_ideal(0.5)
31
- assert_equal 0, entropy_ideal(1)
32
- assert_in_delta 1, entropy_ideal(2), 1E-3
33
- assert_in_delta 1.584, entropy_ideal(3), 1E-3
34
- assert_in_delta 3, entropy_ideal(8), 1E-3
35
- assert_in_delta 3.321, entropy_ideal(10), 1E-3
36
- assert_in_delta 4, entropy_ideal(16), 1E-3
34
+ assert_in_delta 0.0, entropy_ideal(-1), 1e-12
35
+ assert_in_delta 0.0, entropy_ideal(0), 1e-12
36
+ assert_in_delta 0.0, entropy_ideal(0.5), 1e-12
37
+ assert_in_delta 0.0, entropy_ideal(1), 1e-12
38
+ assert_in_delta 1.0, entropy_ideal(2), 1e-3
39
+ assert_in_delta 1.584, entropy_ideal(3), 1e-3
40
+ assert_in_delta 3.0, entropy_ideal(8), 1e-3
41
+ assert_in_delta 3.321, entropy_ideal(10), 1e-3
42
+ assert_in_delta 4.0, entropy_ideal(16), 1e-3
37
43
  end
38
44
 
39
- def test_entropy_mamxium
45
+ def test_entropy_maximum
40
46
  text = 'A' * 64
41
47
  assert_equal 0, entropy_maximum(text, size: -1)
42
48
  assert_equal 0, entropy_maximum(text, size: 0)
@@ -47,34 +53,75 @@ class EntropyTest < Test::Unit::TestCase
47
53
  end
48
54
 
49
55
  def test_entropy_ratio
50
- assert_equal 0, entropy_ratio(@empty, size: 128)
51
- assert_equal 0, entropy_ratio(@low, size: 128)
52
- assert_in_delta 0.564, entropy_ratio(@string, size: 128), 1E-3
53
- assert_in_delta 0.633, entropy_ratio(@high, size: 128), 1E-3
54
- assert_in_delta 1.0, entropy_ratio(@random, size: @random.size), 1E-3
55
- assert_in_delta 0.462, entropy_ratio(@random, size: 256), 1E-3
56
- assert_in_delta 0.253, entropy_ratio(@hi, size: 2_136), 1E-3
56
+ assert_in_delta 0.0, entropy_ratio(@empty, size: 128), 1e-12
57
+ assert_in_delta 0.0, entropy_ratio(@low, size: 128), 1e-12
58
+ assert_in_delta 0.564, entropy_ratio(@string, size: 128), 1e-3
59
+ assert_in_delta 0.633, entropy_ratio(@high, size: 128), 1e-3
60
+ assert_in_delta 1.0, entropy_ratio(@random, size: @random.size), 1e-3
61
+ assert_in_delta 0.462, entropy_ratio(@random, size: 256), 1e-3
62
+ assert_in_delta 0.253, entropy_ratio(@hi, size: 2_136), 1e-3
57
63
  end
58
64
 
59
- def test_entropy_ratio_minimum_basic
60
- # A fairly long random token over a 16‑symbol alphabet
61
- token = Tins::Token.new(length: 128, alphabet: Tins::Token::BASE16_LOWERCASE_ALPHABET)
65
+ def test_entropy_probabilities
66
+ probs = entropy_probabilities('ABAB')
67
+ assert_equal 0.5, probs['A']
68
+ assert_equal 0.5, probs['B']
62
69
 
63
- limit = entropy_ratio_minimum(token, size: 16)
70
+ probs = entropy_probabilities('AAAA')
71
+ assert_equal 1.0, probs['A']
64
72
 
65
- # Bounds must be ≧ 0
66
- assert_operator limit, :>=, 0.0
73
+ probs = entropy_probabilities([])
74
+ assert_equal({}, probs)
67
75
 
68
- # The observed ratio should be limit
69
- ratio = entropy_ratio(token, size: 16)
70
- assert_operator ratio, :>=, limit
76
+ # Ensure the method accepts an Array of symbols
77
+ probs = entropy_probabilities(['x', 'y', 'x'])
78
+ assert_equal 2.0 / 3.0, probs['x']
79
+ assert_equal 1.0 / 3.0, probs['y']
71
80
  end
72
81
 
73
- def test_entropy_ratio_minimum_small
74
- # Very short string the interval will stay below 1.0
75
- str = 'a' # alphabet size 2 (binary)
76
- limit = entropy_ratio_minimum(str, size: 2)
82
+ def test_minimum_entropy_per_symbol
83
+ # Uniform distribution entropy equals log2(size)
84
+ assert_in_delta 2.0, minimum_entropy_per_symbol('ABCD'), 1e-12
77
85
 
78
- assert_equal 0.0, limit
86
+ # Single symbol → 0
87
+ assert_in_delta 0.0, minimum_entropy_per_symbol('AAAA'), 1e-12
88
+
89
+ # Empty string → 0
90
+ assert_in_delta 0.0, minimum_entropy_per_symbol(''), 1e-12
91
+ end
92
+
93
+ def test_collision_entropy_per_symbol
94
+ # For a uniform distribution, collision entropy = log2(size)
95
+ assert_in_delta 2.0, collision_entropy_per_symbol('ABCD'), 1e-12
96
+
97
+ # All symbols the same → 0
98
+ assert_in_delta 0.0, collision_entropy_per_symbol('AAAA'), 1e-12
99
+
100
+ # Empty string → 0
101
+ assert_in_delta 0.0, collision_entropy_per_symbol(''), 1e-12
102
+ end
103
+
104
+ def test_entropy_total
105
+ text = 'ABCD'
106
+ per = entropy_per_symbol(text)
107
+ assert_in_delta per * text.size, entropy_total(text), 1e-12
108
+
109
+ assert_in_delta 0.0, entropy_total(''), 1e-12
110
+ end
111
+
112
+ def test_minimum_entropy_total
113
+ text = 'ABCD'
114
+ per = minimum_entropy_per_symbol(text)
115
+ assert_in_delta per * text.size, minimum_entropy_total(text), 1e-12
116
+
117
+ assert_in_delta 0.0, minimum_entropy_total(''), 1e-12
118
+ end
119
+
120
+ def test_collision_entropy_total
121
+ text = 'ABCD'
122
+ per = collision_entropy_per_symbol(text)
123
+ assert_in_delta per * text.size, collision_entropy_total(text), 1e-12
124
+
125
+ assert_in_delta 0.0, collision_entropy_total(''), 1e-12
79
126
  end
80
127
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: more_math
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.10.0
4
+ version: 1.11.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Florian Frank