more_math 1.9.0 → 1.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +44 -0
- data/lib/more_math/entropy.rb +148 -66
- data/lib/more_math/version.rb +1 -1
- data/more_math.gemspec +2 -2
- data/tests/entropy_test.rb +93 -36
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9cec08e9525ca81297ec96ef655884adc76c51dd751aa77f3c2d9092796853b9
|
|
4
|
+
data.tar.gz: cbdd8862d8150d5bcc8f00d9aa6c2f0629601aa83a5ec992876facd6c9d208c3
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a55392037367f21c5af96ce557f85470ee9a75a58047580f944c5822af87240c754be75fe8d4a9fd93c070c1c01a793fbbb161b57889e92b9d7f9bad7e3e07cf
|
|
7
|
+
data.tar.gz: 6349577e9373872abdb696cf8abe2b2f0bf8ed0c7f318bb5fc103256f0d6042744aae2fb04ab91233a090ff2112dee8cb96c89cb0786ae95e68bf2c506f1aeb7
|
data/CHANGES.md
CHANGED
|
@@ -1,5 +1,49 @@
|
|
|
1
1
|
# Changes
|
|
2
2
|
|
|
3
|
+
## 2026-01-22 v1.11.0
|
|
4
|
+
|
|
5
|
+
- Added new entropy helper methods: `entropy_probabilities`,
|
|
6
|
+
`entropy_per_symbol`, `minimum_entropy_per_symbol`,
|
|
7
|
+
`collision_entropy_per_symbol`, `entropy_total`, `minimum_entropy_total`, and
|
|
8
|
+
`collision_entropy_total` to `lib/more_math/entropy.rb`
|
|
9
|
+
- Replaced the old `entropy` method with an alias to `entropy_per_symbol`
|
|
10
|
+
- Updated documentation for `entropy_ratio` to clarify the `size:` keyword
|
|
11
|
+
argument
|
|
12
|
+
- Removed the `entropy_ratio_minimum` method and its corresponding test cases
|
|
13
|
+
from the codebase
|
|
14
|
+
- Enhanced test coverage in `tests/entropy_test.rb` with new unit tests for the
|
|
15
|
+
added helper methods
|
|
16
|
+
- Updated existing test assertions to use `assert_in_delta` for floating-point
|
|
17
|
+
comparisons
|
|
18
|
+
- Added descriptive comments to the test `setup` method
|
|
19
|
+
- Ensured all new methods return correct values for empty, uniform, and
|
|
20
|
+
varied symbol strings
|
|
21
|
+
- Corrected example values in `entropy_ratio` documentation
|
|
22
|
+
|
|
23
|
+
## 2026-01-19 v1.10.0
|
|
24
|
+
|
|
25
|
+
- Added new `entropy_maximum` method to calculate theoretical maximum entropy
|
|
26
|
+
for a text given an alphabet size
|
|
27
|
+
- Made `size` parameter required in `entropy_ratio` and `entropy_ratio_minimum`
|
|
28
|
+
methods instead of defaulting to `text.size`
|
|
29
|
+
- Updated YARD documentation to clarify that `size` parameter represents
|
|
30
|
+
alphabet size
|
|
31
|
+
- Modified examples to use explicit alphabet sizes for better clarity
|
|
32
|
+
- All entropy methods now consistently return values in bits as expected for
|
|
33
|
+
Shannon entropy
|
|
34
|
+
- Updated documentation examples to use simplified method calls without
|
|
35
|
+
`MoreMath::Entropy` prefix
|
|
36
|
+
- Enhanced `entropy_maximum` method documentation to explain its use in
|
|
37
|
+
determining security strength for tokens
|
|
38
|
+
- Added comprehensive tests for `entropy_maximum` function covering edge cases
|
|
39
|
+
and various alphabet sizes
|
|
40
|
+
- Improved `entropy_maximum` method signature to return `0` for invalid
|
|
41
|
+
alphabet sizes (≤ 1) and use `Math.log2` for calculation
|
|
42
|
+
- Updated existing entropy method documentation to clarify it calculates
|
|
43
|
+
entropy in bits
|
|
44
|
+
- Simplified example code in documentation to use direct method calls instead
|
|
45
|
+
of module prefixes
|
|
46
|
+
|
|
3
47
|
## 2026-01-16 v1.9.0
|
|
4
48
|
|
|
5
49
|
- Added support for array inputs in entropy calculation methods by checking
|
data/lib/more_math/entropy.rb
CHANGED
|
@@ -15,41 +15,138 @@ module MoreMath
|
|
|
15
15
|
#
|
|
16
16
|
# @example Basic usage
|
|
17
17
|
# require 'more_math'
|
|
18
|
-
# include MoreMath
|
|
18
|
+
# include MoreMath::Functions
|
|
19
19
|
#
|
|
20
20
|
# text = "hello world"
|
|
21
21
|
# puts entropy(text) # => 2.3219280948873626
|
|
22
22
|
# puts entropy_ratio(text) # => 0.7428571428571429
|
|
23
23
|
#
|
|
24
24
|
# @example Using with different text samples
|
|
25
|
-
#
|
|
26
|
-
#
|
|
25
|
+
# entropy("aaaa") # => 0.0 (no entropy)
|
|
26
|
+
# entropy("abcd") # => 2.0 (actual entropy)
|
|
27
27
|
module Entropy
|
|
28
|
-
# Calculates the
|
|
28
|
+
# Calculates the probability distribution of symbols in the given input.
|
|
29
29
|
#
|
|
30
|
-
#
|
|
31
|
-
#
|
|
30
|
+
# This method computes the frequency of each symbol in the input and
|
|
31
|
+
# converts these frequencies into probabilities by dividing by the total
|
|
32
|
+
# number of symbols.
|
|
32
33
|
#
|
|
33
|
-
# @
|
|
34
|
-
#
|
|
35
|
-
#
|
|
36
|
-
#
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
34
|
+
# @param symbols [String, Array<String>] The sequence of symbols to
|
|
35
|
+
# calculate probabilities for
|
|
36
|
+
# @return [Hash<String, Float>] A hash mapping each symbol to its
|
|
37
|
+
# probability value
|
|
38
|
+
def entropy_probabilities(symbols)
|
|
39
|
+
symbols = symbols.chars if symbols.respond_to?(:chars)
|
|
40
|
+
|
|
41
|
+
freq = symbols.tally
|
|
42
|
+
total = symbols.size
|
|
43
|
+
|
|
44
|
+
freq.transform_values { |c| c.to_f / total }
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Calculates the Shannon entropy per symbol in the given symbols.
|
|
48
|
+
#
|
|
49
|
+
# This method computes the entropy of a sequence of symbols, measuring the
|
|
50
|
+
# average information content or unpredictability of the symbols.
|
|
51
|
+
#
|
|
52
|
+
# @param symbols [String, Array<String>] The sequence of symbols to calculate entropy for
|
|
53
|
+
# @return [Float] The entropy value in bits per symbol
|
|
54
|
+
def entropy_per_symbol(symbols)
|
|
55
|
+
symbols = symbols.chars if symbols.respond_to?(:chars)
|
|
56
|
+
|
|
57
|
+
symbols.empty? and return 0.0
|
|
58
|
+
|
|
59
|
+
probs = entropy_probabilities(symbols)
|
|
60
|
+
|
|
61
|
+
-probs.values.sum { |p| p * Math.log2(p) }
|
|
62
|
+
end
|
|
63
|
+
alias entropy entropy_per_symbol
|
|
64
|
+
|
|
65
|
+
# Calculates the minimum entropy per symbol in the given symbols.
|
|
66
|
+
#
|
|
67
|
+
# This method determines the minimum possible entropy for a sequence of
|
|
68
|
+
# symbols, which represents the entropy when all symbols are equally
|
|
69
|
+
# likely.
|
|
70
|
+
#
|
|
71
|
+
# @param symbols [String, Array<String>] The sequence of symbols to
|
|
72
|
+
# calculate minimum entropy for
|
|
73
|
+
# @return [Float] The minimum entropy value in bits per symbol
|
|
74
|
+
def minimum_entropy_per_symbol(symbols)
|
|
75
|
+
symbols = symbols.chars if symbols.respond_to?(:chars)
|
|
76
|
+
|
|
77
|
+
symbols.empty? and return 0.0
|
|
78
|
+
|
|
79
|
+
probs = entropy_probabilities(symbols)
|
|
80
|
+
|
|
81
|
+
-Math.log2(probs.values.max)
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Calculates the collision entropy per symbol in the given symbols.
|
|
85
|
+
#
|
|
86
|
+
# This method computes the collision entropy, which measures the
|
|
87
|
+
# unpredictability of the most likely outcome in a symbol sequence. It's
|
|
88
|
+
# based on the sum of squared probabilities of each symbol.
|
|
89
|
+
#
|
|
90
|
+
# @param symbols [String, Array<String>] The sequence of symbols to
|
|
91
|
+
# calculate collision entropy for
|
|
92
|
+
# @return [Float] The collision entropy value in bits per symbol
|
|
93
|
+
def collision_entropy_per_symbol(symbols)
|
|
94
|
+
symbols = symbols.chars if symbols.respond_to?(:chars)
|
|
95
|
+
|
|
96
|
+
symbols.empty? and return 0.0
|
|
97
|
+
|
|
98
|
+
probs = entropy_probabilities(symbols)
|
|
99
|
+
|
|
100
|
+
-Math.log2(probs.values.sum { |p| p * p })
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Calculates the total entropy for a sequence of symbols.
|
|
104
|
+
#
|
|
105
|
+
# This method computes the total information content of a symbol sequence
|
|
106
|
+
# by multiplying the entropy per symbol by the total number of symbols.
|
|
107
|
+
#
|
|
108
|
+
# @param symbols [String, Array<String>] The sequence of symbols to
|
|
109
|
+
# calculate total entropy for
|
|
110
|
+
# @return [Float] The total entropy value in bits for the entire symbol
|
|
111
|
+
# sequence
|
|
112
|
+
def entropy_total(symbols)
|
|
113
|
+
symbols = symbols.chars if symbols.respond_to?(:chars)
|
|
114
|
+
|
|
115
|
+
entropy_per_symbol(symbols) * symbols.size
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Calculates the total minimum entropy for a sequence of symbols.
|
|
119
|
+
#
|
|
120
|
+
# This method computes the total information content of a symbol sequence
|
|
121
|
+
# using the minimum entropy per symbol, multiplied by the total number of
|
|
122
|
+
# symbols. It represents the theoretical minimum possible entropy for the
|
|
123
|
+
# given sequence.
|
|
124
|
+
#
|
|
125
|
+
# @param symbols [String, Array<String>] The sequence of symbols to
|
|
126
|
+
# calculate total minimum entropy for
|
|
127
|
+
# @return [Float] The total minimum entropy value in bits for the entire
|
|
128
|
+
# symbol sequence
|
|
129
|
+
def minimum_entropy_total(symbols)
|
|
130
|
+
symbols = symbols.chars if symbols.respond_to?(:chars)
|
|
131
|
+
|
|
132
|
+
minimum_entropy_per_symbol(symbols) * symbols.size
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Calculates the total collision entropy for a sequence of symbols.
|
|
136
|
+
#
|
|
137
|
+
# This method computes the total information content of a symbol sequence
|
|
138
|
+
# using the collision entropy per symbol, multiplied by the total number of
|
|
139
|
+
# symbols. Collision entropy measures the unpredictability of the most
|
|
140
|
+
# likely outcome in a symbol sequence.
|
|
141
|
+
#
|
|
142
|
+
# @param symbols [String, Array<String>] The sequence of symbols to
|
|
143
|
+
# calculate total collision entropy for
|
|
144
|
+
# @return [Float] The total collision entropy value in bits for the entire
|
|
145
|
+
# symbol sequence
|
|
146
|
+
def collision_entropy_total(symbols)
|
|
147
|
+
symbols = symbols.chars if symbols.respond_to?(:chars)
|
|
148
|
+
|
|
149
|
+
collision_entropy_per_symbol(symbols) * symbols.size
|
|
53
150
|
end
|
|
54
151
|
|
|
55
152
|
# Calculates the ideal (maximum) entropy for a given character set size.
|
|
@@ -58,8 +155,8 @@ module MoreMath
|
|
|
58
155
|
# alphabet have equal probability of occurrence.
|
|
59
156
|
#
|
|
60
157
|
# @example
|
|
61
|
-
#
|
|
62
|
-
#
|
|
158
|
+
# entropy_ideal(2) # => 1.0
|
|
159
|
+
# entropy_ideal(256) # => 8.0
|
|
63
160
|
#
|
|
64
161
|
# @param size [Integer] The number of unique characters in the alphabet
|
|
65
162
|
# @return [Float] The maximum possible entropy in bits
|
|
@@ -80,54 +177,39 @@ module MoreMath
|
|
|
80
177
|
# theoretical maximum entropy for that character set.
|
|
81
178
|
#
|
|
82
179
|
# @example
|
|
83
|
-
#
|
|
84
|
-
#
|
|
85
|
-
#
|
|
180
|
+
# entropy_ratio("hello", size: 26) # => 0.4088
|
|
181
|
+
# entropy_ratio("aaaaa", size: 26) # => 0.0
|
|
182
|
+
# entropy_ratio("abcde", size: 5) # => 1.0
|
|
183
|
+
# entropy_ratio("abcde", size: 26) # => 0.4939
|
|
86
184
|
#
|
|
87
|
-
# @example With custom alphabet size
|
|
88
|
-
# # Normalizing against a 26-letter alphabet (English)
|
|
89
|
-
# MoreMath::Entropy.entropy_ratio("hello", size: 26) # => 0.394...
|
|
90
185
|
#
|
|
91
186
|
# @param text [String] The input text to calculate entropy ratio for
|
|
92
|
-
# @param size [Integer] The size of the character set to normalize against
|
|
93
|
-
#
|
|
94
|
-
# normalizes the entropy relative to the text's own character space.
|
|
95
|
-
# This allows comparison of texts with different lengths on the same scale.
|
|
187
|
+
# @param size [Integer] The size of the character set to normalize against
|
|
188
|
+
# (alphabet size).
|
|
96
189
|
# @return [Float] Normalized entropy ratio between 0 and 1
|
|
97
|
-
def entropy_ratio(text, size:
|
|
190
|
+
def entropy_ratio(text, size:)
|
|
98
191
|
size <= 1 and return 0.0
|
|
99
192
|
entropy(text) / entropy_ideal(size)
|
|
100
193
|
end
|
|
101
194
|
|
|
102
|
-
# Calculates the
|
|
195
|
+
# Calculates the maximum possible entropy for a given text and alphabet
|
|
196
|
+
# size.
|
|
103
197
|
#
|
|
104
|
-
# This
|
|
105
|
-
#
|
|
106
|
-
#
|
|
198
|
+
# This represents the theoretical maximum entropy that could be achieved if
|
|
199
|
+
# all characters in the text were chosen uniformly at random from the
|
|
200
|
+
# alphabet. It's used to determine the upper bound of security strength for
|
|
201
|
+
# tokens.
|
|
107
202
|
#
|
|
108
|
-
# @
|
|
109
|
-
#
|
|
110
|
-
#
|
|
111
|
-
#
|
|
112
|
-
# @
|
|
113
|
-
# @
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
n = text.size
|
|
119
|
-
k = size
|
|
120
|
-
|
|
121
|
-
ratio = MoreMath::Functions.entropy_ratio(text, size: k)
|
|
122
|
-
|
|
123
|
-
logk = Math.log2(k)
|
|
124
|
-
diff = logk - 1.0 / Math.log(2)
|
|
125
|
-
var = (diff ** 2) / (logk ** 2) * (1.0 - 1.0 / k) / n
|
|
126
|
-
se = Math.sqrt(var) # standard error
|
|
127
|
-
|
|
128
|
-
z = STD_NORMAL_DISTRIBUTION.inverse_probability(1.0 - alpha / 2.0)
|
|
129
|
-
|
|
130
|
-
(ratio - z * se).clamp(0, 1)
|
|
203
|
+
# @example
|
|
204
|
+
# entropy_maximum("hello", size: 26) # => 23
|
|
205
|
+
# entropy_maximum("abc123", size: 64) # => 36
|
|
206
|
+
#
|
|
207
|
+
# @param text [String] The input text to calculate maximum entropy for
|
|
208
|
+
# @param size [Integer] The size of the character set (alphabet size)
|
|
209
|
+
# @return [Integer] The maximum possible entropy in bits, or 0 if size <= 1
|
|
210
|
+
def entropy_maximum(text, size:)
|
|
211
|
+
size > 1 or return 0
|
|
212
|
+
(text.size * Math.log2(size)).floor
|
|
131
213
|
end
|
|
132
214
|
end
|
|
133
215
|
end
|
data/lib/more_math/version.rb
CHANGED
data/more_math.gemspec
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
|
2
|
-
# stub: more_math 1.
|
|
2
|
+
# stub: more_math 1.11.0 ruby lib
|
|
3
3
|
|
|
4
4
|
Gem::Specification.new do |s|
|
|
5
5
|
s.name = "more_math".freeze
|
|
6
|
-
s.version = "1.
|
|
6
|
+
s.version = "1.11.0".freeze
|
|
7
7
|
|
|
8
8
|
s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
|
|
9
9
|
s.require_paths = ["lib".freeze]
|
data/tests/entropy_test.rb
CHANGED
|
@@ -6,6 +6,12 @@ require 'tins'
|
|
|
6
6
|
class EntropyTest < Test::Unit::TestCase
|
|
7
7
|
include MoreMath::Functions
|
|
8
8
|
|
|
9
|
+
# The setup method initializes instance variables with various string values.
|
|
10
|
+
#
|
|
11
|
+
# This method prepares the object with predefined string constants for
|
|
12
|
+
# testing and demonstration purposes. It sets up empty strings, strings of
|
|
13
|
+
# specific lengths, and strings containing various character sets
|
|
14
|
+
# including ASCII, Unicode, and Japanese characters.
|
|
9
15
|
def setup
|
|
10
16
|
@empty = ''
|
|
11
17
|
@low = ?A * 42
|
|
@@ -16,55 +22,106 @@ class EntropyTest < Test::Unit::TestCase
|
|
|
16
22
|
end
|
|
17
23
|
|
|
18
24
|
def test_entropy
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
assert_in_delta 3.951, entropy(@string),
|
|
22
|
-
assert_in_delta 4.431, entropy(@high),
|
|
23
|
-
assert_in_delta 3.700, entropy(@random),
|
|
24
|
-
assert_in_delta 2.807, entropy(@hi),
|
|
25
|
+
assert_in_delta 0.0, entropy(@empty), 1e-12
|
|
26
|
+
assert_in_delta 0.0, entropy(@low), 1e-12
|
|
27
|
+
assert_in_delta 3.951, entropy(@string), 1e-3
|
|
28
|
+
assert_in_delta 4.431, entropy(@high), 1e-3
|
|
29
|
+
assert_in_delta 3.700, entropy(@random), 1e-3
|
|
30
|
+
assert_in_delta 2.807, entropy(@hi), 1e-3
|
|
25
31
|
end
|
|
26
32
|
|
|
27
33
|
def test_entropy_ideal
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
assert_in_delta 1,
|
|
33
|
-
assert_in_delta 1.584, entropy_ideal(3),
|
|
34
|
-
assert_in_delta 3,
|
|
35
|
-
assert_in_delta 3.321, entropy_ideal(10),
|
|
36
|
-
assert_in_delta 4,
|
|
34
|
+
assert_in_delta 0.0, entropy_ideal(-1), 1e-12
|
|
35
|
+
assert_in_delta 0.0, entropy_ideal(0), 1e-12
|
|
36
|
+
assert_in_delta 0.0, entropy_ideal(0.5), 1e-12
|
|
37
|
+
assert_in_delta 0.0, entropy_ideal(1), 1e-12
|
|
38
|
+
assert_in_delta 1.0, entropy_ideal(2), 1e-3
|
|
39
|
+
assert_in_delta 1.584, entropy_ideal(3), 1e-3
|
|
40
|
+
assert_in_delta 3.0, entropy_ideal(8), 1e-3
|
|
41
|
+
assert_in_delta 3.321, entropy_ideal(10), 1e-3
|
|
42
|
+
assert_in_delta 4.0, entropy_ideal(16), 1e-3
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def test_entropy_maximum
|
|
46
|
+
text = 'A' * 64
|
|
47
|
+
assert_equal 0, entropy_maximum(text, size: -1)
|
|
48
|
+
assert_equal 0, entropy_maximum(text, size: 0)
|
|
49
|
+
assert_equal 0, entropy_maximum(text, size: 1)
|
|
50
|
+
assert_equal 64, entropy_maximum(text, size: 2)
|
|
51
|
+
assert_equal 256, entropy_maximum(text, size: 16)
|
|
52
|
+
assert_equal 128, entropy_maximum(text[0, 32], size: 16)
|
|
37
53
|
end
|
|
38
54
|
|
|
39
55
|
def test_entropy_ratio
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
assert_in_delta 0.564, entropy_ratio(@string, size: 128),
|
|
43
|
-
assert_in_delta 0.633, entropy_ratio(@high, size: 128),
|
|
44
|
-
assert_in_delta 1.0, entropy_ratio(@random),
|
|
45
|
-
assert_in_delta 0.462, entropy_ratio(@random, size: 256),
|
|
46
|
-
assert_in_delta 0.253, entropy_ratio(@hi, size: 2_136),
|
|
56
|
+
assert_in_delta 0.0, entropy_ratio(@empty, size: 128), 1e-12
|
|
57
|
+
assert_in_delta 0.0, entropy_ratio(@low, size: 128), 1e-12
|
|
58
|
+
assert_in_delta 0.564, entropy_ratio(@string, size: 128), 1e-3
|
|
59
|
+
assert_in_delta 0.633, entropy_ratio(@high, size: 128), 1e-3
|
|
60
|
+
assert_in_delta 1.0, entropy_ratio(@random, size: @random.size), 1e-3
|
|
61
|
+
assert_in_delta 0.462, entropy_ratio(@random, size: 256), 1e-3
|
|
62
|
+
assert_in_delta 0.253, entropy_ratio(@hi, size: 2_136), 1e-3
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def test_entropy_probabilities
|
|
66
|
+
probs = entropy_probabilities('ABAB')
|
|
67
|
+
assert_equal 0.5, probs['A']
|
|
68
|
+
assert_equal 0.5, probs['B']
|
|
69
|
+
|
|
70
|
+
probs = entropy_probabilities('AAAA')
|
|
71
|
+
assert_equal 1.0, probs['A']
|
|
72
|
+
|
|
73
|
+
probs = entropy_probabilities([])
|
|
74
|
+
assert_equal({}, probs)
|
|
75
|
+
|
|
76
|
+
# Ensure the method accepts an Array of symbols
|
|
77
|
+
probs = entropy_probabilities(['x', 'y', 'x'])
|
|
78
|
+
assert_equal 2.0 / 3.0, probs['x']
|
|
79
|
+
assert_equal 1.0 / 3.0, probs['y']
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def test_minimum_entropy_per_symbol
|
|
83
|
+
# Uniform distribution → entropy equals log2(size)
|
|
84
|
+
assert_in_delta 2.0, minimum_entropy_per_symbol('ABCD'), 1e-12
|
|
85
|
+
|
|
86
|
+
# Single symbol → 0
|
|
87
|
+
assert_in_delta 0.0, minimum_entropy_per_symbol('AAAA'), 1e-12
|
|
88
|
+
|
|
89
|
+
# Empty string → 0
|
|
90
|
+
assert_in_delta 0.0, minimum_entropy_per_symbol(''), 1e-12
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def test_collision_entropy_per_symbol
|
|
94
|
+
# For a uniform distribution, collision entropy = log2(size)
|
|
95
|
+
assert_in_delta 2.0, collision_entropy_per_symbol('ABCD'), 1e-12
|
|
96
|
+
|
|
97
|
+
# All symbols the same → 0
|
|
98
|
+
assert_in_delta 0.0, collision_entropy_per_symbol('AAAA'), 1e-12
|
|
99
|
+
|
|
100
|
+
# Empty string → 0
|
|
101
|
+
assert_in_delta 0.0, collision_entropy_per_symbol(''), 1e-12
|
|
47
102
|
end
|
|
48
103
|
|
|
49
|
-
def
|
|
50
|
-
|
|
51
|
-
|
|
104
|
+
def test_entropy_total
|
|
105
|
+
text = 'ABCD'
|
|
106
|
+
per = entropy_per_symbol(text)
|
|
107
|
+
assert_in_delta per * text.size, entropy_total(text), 1e-12
|
|
52
108
|
|
|
53
|
-
|
|
109
|
+
assert_in_delta 0.0, entropy_total(''), 1e-12
|
|
110
|
+
end
|
|
54
111
|
|
|
55
|
-
|
|
56
|
-
|
|
112
|
+
def test_minimum_entropy_total
|
|
113
|
+
text = 'ABCD'
|
|
114
|
+
per = minimum_entropy_per_symbol(text)
|
|
115
|
+
assert_in_delta per * text.size, minimum_entropy_total(text), 1e-12
|
|
57
116
|
|
|
58
|
-
|
|
59
|
-
ratio = entropy_ratio(token, size: 16)
|
|
60
|
-
assert_operator ratio, :>=, limit
|
|
117
|
+
assert_in_delta 0.0, minimum_entropy_total(''), 1e-12
|
|
61
118
|
end
|
|
62
119
|
|
|
63
|
-
def
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
120
|
+
def test_collision_entropy_total
|
|
121
|
+
text = 'ABCD'
|
|
122
|
+
per = collision_entropy_per_symbol(text)
|
|
123
|
+
assert_in_delta per * text.size, collision_entropy_total(text), 1e-12
|
|
67
124
|
|
|
68
|
-
|
|
125
|
+
assert_in_delta 0.0, collision_entropy_total(''), 1e-12
|
|
69
126
|
end
|
|
70
127
|
end
|