more_math 1.8.0 → 1.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +11 -0
- data/lib/more_math/entropy.rb +39 -4
- data/lib/more_math/version.rb +1 -1
- data/more_math.gemspec +4 -4
- data/tests/entropy_test.rb +37 -10
- metadata +3 -3
- /data/tests/{sequence/refinement_test.rb → sequence_refinement_test.rb} +0 -0
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: bd844028040726f15a03d4260e794d2b6dd47edf0ddac071be3fc2d3e1742be5
|
|
4
|
+
data.tar.gz: 9fc2b5f24a5ffd586cfcd7f700dcb85510f799a8f1af356289233316d6f91966
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: ddd7116481e881af48ab5cd854f4ff6def832c30e491767bae3835f17bd2f12ba19d171c9b765d3ef41b1f50b33e6a5fa9ca73a7c6c9817379da13b0aa63e509
|
|
7
|
+
data.tar.gz: 0346e146c89234d85eb9ab38a69598f2dc879ea3335c23cddc36cc29ddfaf1cb0cd3e44fb7bc17a4634c3cfbb37a4ff2d0916d04f630b42dd1a7de7b2d8da341
|
data/CHANGES.md
CHANGED
|
@@ -1,5 +1,16 @@
|
|
|
1
1
|
# Changes
|
|
2
2
|
|
|
3
|
+
## 2026-01-16 v1.9.0
|
|
4
|
+
|
|
5
|
+
- Added support for array inputs in entropy calculation methods by checking
|
|
6
|
+
`text.respond_to?(:chars)` and using raw arrays when appropriate
|
|
7
|
+
- Added `MoreMath::Entropy.entropy_ratio_minimum` method to provide
|
|
8
|
+
conservative lower bound accounting for sampling error
|
|
9
|
+
- Updated `entropy_ratio` method to use `text.size` instead of
|
|
10
|
+
`text.each_char.size` for consistency
|
|
11
|
+
- Added comprehensive tests for new minimum entropy
|
|
12
|
+
ratio methods
|
|
13
|
+
|
|
3
14
|
## 2026-01-15 v1.8.0
|
|
4
15
|
|
|
5
16
|
- Added tests for `entropy_ratio` and `lambert_w` method inclusion/extension
|
data/lib/more_math/entropy.rb
CHANGED
|
@@ -37,7 +37,12 @@ module MoreMath
|
|
|
37
37
|
# @param text [String] The input text to calculate entropy for
|
|
38
38
|
# @return [Float] The Shannon entropy in bits
|
|
39
39
|
def entropy(text)
|
|
40
|
-
chars =
|
|
40
|
+
chars = nil
|
|
41
|
+
if text.respond_to?(:chars)
|
|
42
|
+
chars = text.chars
|
|
43
|
+
else
|
|
44
|
+
chars = text
|
|
45
|
+
end
|
|
41
46
|
size = chars.size
|
|
42
47
|
|
|
43
48
|
chars.each_with_object(Hash.new(0.0)) { |c, h| h[c] += 1 }.
|
|
@@ -64,7 +69,6 @@ module MoreMath
|
|
|
64
69
|
-1.0 * size * frequency * Math.log2(frequency)
|
|
65
70
|
end
|
|
66
71
|
|
|
67
|
-
|
|
68
72
|
# Calculates the normalized entropy ratio of a text string.
|
|
69
73
|
#
|
|
70
74
|
# The ratio is calculated as actual entropy divided by ideal entropy,
|
|
@@ -86,13 +90,44 @@ module MoreMath
|
|
|
86
90
|
#
|
|
87
91
|
# @param text [String] The input text to calculate entropy ratio for
|
|
88
92
|
# @param size [Integer] The size of the character set to normalize against.
|
|
89
|
-
# Defaults to the total length of the text (`text.
|
|
93
|
+
# Defaults to the total length of the text (`text.size`), which
|
|
90
94
|
# normalizes the entropy relative to the text's own character space.
|
|
91
95
|
# This allows comparison of texts with different lengths on the same scale.
|
|
92
96
|
# @return [Float] Normalized entropy ratio between 0 and 1
|
|
93
|
-
def entropy_ratio(text, size: text.
|
|
97
|
+
def entropy_ratio(text, size: text.size)
|
|
94
98
|
size <= 1 and return 0.0
|
|
95
99
|
entropy(text) / entropy_ideal(size)
|
|
96
100
|
end
|
|
101
|
+
|
|
102
|
+
# Calculates the minimum entropy ratio with confidence interval adjustment
|
|
103
|
+
#
|
|
104
|
+
# This method computes a adjusted entropy ratio that accounts for
|
|
105
|
+
# statistical uncertainty by incorporating the standard error and a
|
|
106
|
+
# confidence level.
|
|
107
|
+
#
|
|
108
|
+
# @param text [String] The input text to calculate entropy ratio for
|
|
109
|
+
# @param size [Integer] The size of the character set to normalize against
|
|
110
|
+
# @param alpha [Float] The significance level for the confidence interval (default: 0.05)
|
|
111
|
+
# @return [Float] The adjusted entropy ratio within the confidence interval
|
|
112
|
+
# @raise [ArgumentError] When alphabet size is less than 2
|
|
113
|
+
# @raise [ArgumentError] When text is empty
|
|
114
|
+
def entropy_ratio_minimum(text, size: text.size, alpha: 0.05)
|
|
115
|
+
raise ArgumentError, 'alphabet size must be ≥ 2' if size < 2
|
|
116
|
+
raise ArgumentError, 'text must not be empty' if text.empty?
|
|
117
|
+
|
|
118
|
+
n = text.size
|
|
119
|
+
k = size
|
|
120
|
+
|
|
121
|
+
ratio = MoreMath::Functions.entropy_ratio(text, size: k)
|
|
122
|
+
|
|
123
|
+
logk = Math.log2(k)
|
|
124
|
+
diff = logk - 1.0 / Math.log(2)
|
|
125
|
+
var = (diff ** 2) / (logk ** 2) * (1.0 - 1.0 / k) / n
|
|
126
|
+
se = Math.sqrt(var) # standard error
|
|
127
|
+
|
|
128
|
+
z = STD_NORMAL_DISTRIBUTION.inverse_probability(1.0 - alpha / 2.0)
|
|
129
|
+
|
|
130
|
+
(ratio - z * se).clamp(0, 1)
|
|
131
|
+
end
|
|
97
132
|
end
|
|
98
133
|
end
|
data/lib/more_math/version.rb
CHANGED
data/more_math.gemspec
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
|
2
|
-
# stub: more_math 1.
|
|
2
|
+
# stub: more_math 1.9.0 ruby lib
|
|
3
3
|
|
|
4
4
|
Gem::Specification.new do |s|
|
|
5
5
|
s.name = "more_math".freeze
|
|
6
|
-
s.version = "1.
|
|
6
|
+
s.version = "1.9.0".freeze
|
|
7
7
|
|
|
8
8
|
s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
|
|
9
9
|
s.require_paths = ["lib".freeze]
|
|
@@ -12,14 +12,14 @@ Gem::Specification.new do |s|
|
|
|
12
12
|
s.description = "Library that provides more mathematical functions/algorithms than standard Ruby.".freeze
|
|
13
13
|
s.email = "flori@ping.de".freeze
|
|
14
14
|
s.extra_rdoc_files = ["README.md".freeze, "lib/more_math.rb".freeze, "lib/more_math/cantor_pairing_function.rb".freeze, "lib/more_math/constants/functions_constants.rb".freeze, "lib/more_math/continued_fraction.rb".freeze, "lib/more_math/distributions.rb".freeze, "lib/more_math/entropy.rb".freeze, "lib/more_math/exceptions.rb".freeze, "lib/more_math/functions.rb".freeze, "lib/more_math/histogram.rb".freeze, "lib/more_math/lambert.rb".freeze, "lib/more_math/linear_regression.rb".freeze, "lib/more_math/newton_bisection.rb".freeze, "lib/more_math/numberify_string_function.rb".freeze, "lib/more_math/permutation.rb".freeze, "lib/more_math/ranking_common.rb".freeze, "lib/more_math/sequence.rb".freeze, "lib/more_math/sequence/moving_average.rb".freeze, "lib/more_math/sequence/refinement.rb".freeze, "lib/more_math/string_numeral.rb".freeze, "lib/more_math/subset.rb".freeze, "lib/more_math/version.rb".freeze]
|
|
15
|
-
s.files = ["CHANGES.md".freeze, "Gemfile".freeze, "LICENSE".freeze, "README.md".freeze, "Rakefile".freeze, "lib/more_math.rb".freeze, "lib/more_math/cantor_pairing_function.rb".freeze, "lib/more_math/constants/functions_constants.rb".freeze, "lib/more_math/continued_fraction.rb".freeze, "lib/more_math/distributions.rb".freeze, "lib/more_math/entropy.rb".freeze, "lib/more_math/exceptions.rb".freeze, "lib/more_math/functions.rb".freeze, "lib/more_math/histogram.rb".freeze, "lib/more_math/lambert.rb".freeze, "lib/more_math/linear_regression.rb".freeze, "lib/more_math/newton_bisection.rb".freeze, "lib/more_math/numberify_string_function.rb".freeze, "lib/more_math/permutation.rb".freeze, "lib/more_math/ranking_common.rb".freeze, "lib/more_math/sequence.rb".freeze, "lib/more_math/sequence/moving_average.rb".freeze, "lib/more_math/sequence/refinement.rb".freeze, "lib/more_math/string_numeral.rb".freeze, "lib/more_math/subset.rb".freeze, "lib/more_math/version.rb".freeze, "more_math.gemspec".freeze, "tests/cantor_pairing_function_test.rb".freeze, "tests/continued_fraction_test.rb".freeze, "tests/distribution_test.rb".freeze, "tests/entropy_test.rb".freeze, "tests/functions_test.rb".freeze, "tests/histogram_test.rb".freeze, "tests/lambert_test.rb".freeze, "tests/newton_bisection_test.rb".freeze, "tests/numberify_string_function_test.rb".freeze, "tests/permutation_test.rb".freeze, "tests/
|
|
15
|
+
s.files = ["CHANGES.md".freeze, "Gemfile".freeze, "LICENSE".freeze, "README.md".freeze, "Rakefile".freeze, "lib/more_math.rb".freeze, "lib/more_math/cantor_pairing_function.rb".freeze, "lib/more_math/constants/functions_constants.rb".freeze, "lib/more_math/continued_fraction.rb".freeze, "lib/more_math/distributions.rb".freeze, "lib/more_math/entropy.rb".freeze, "lib/more_math/exceptions.rb".freeze, "lib/more_math/functions.rb".freeze, "lib/more_math/histogram.rb".freeze, "lib/more_math/lambert.rb".freeze, "lib/more_math/linear_regression.rb".freeze, "lib/more_math/newton_bisection.rb".freeze, "lib/more_math/numberify_string_function.rb".freeze, "lib/more_math/permutation.rb".freeze, "lib/more_math/ranking_common.rb".freeze, "lib/more_math/sequence.rb".freeze, "lib/more_math/sequence/moving_average.rb".freeze, "lib/more_math/sequence/refinement.rb".freeze, "lib/more_math/string_numeral.rb".freeze, "lib/more_math/subset.rb".freeze, "lib/more_math/version.rb".freeze, "more_math.gemspec".freeze, "tests/cantor_pairing_function_test.rb".freeze, "tests/continued_fraction_test.rb".freeze, "tests/distribution_test.rb".freeze, "tests/entropy_test.rb".freeze, "tests/functions_test.rb".freeze, "tests/histogram_test.rb".freeze, "tests/lambert_test.rb".freeze, "tests/newton_bisection_test.rb".freeze, "tests/numberify_string_function_test.rb".freeze, "tests/permutation_test.rb".freeze, "tests/sequence_moving_average_test.rb".freeze, "tests/sequence_refinement_test.rb".freeze, "tests/sequence_test.rb".freeze, "tests/string_numeral_test.rb".freeze, "tests/subset_test.rb".freeze, "tests/test_helper.rb".freeze]
|
|
16
16
|
s.homepage = "https://github.com/flori/more_math".freeze
|
|
17
17
|
s.licenses = ["MIT".freeze]
|
|
18
18
|
s.rdoc_options = ["--title".freeze, "MoreMath -- More Math in Ruby".freeze, "--main".freeze, "README.md".freeze]
|
|
19
19
|
s.required_ruby_version = Gem::Requirement.new(">= 2.0".freeze)
|
|
20
20
|
s.rubygems_version = "4.0.3".freeze
|
|
21
21
|
s.summary = "Library that provides more mathematics.".freeze
|
|
22
|
-
s.test_files = ["tests/cantor_pairing_function_test.rb".freeze, "tests/continued_fraction_test.rb".freeze, "tests/distribution_test.rb".freeze, "tests/entropy_test.rb".freeze, "tests/functions_test.rb".freeze, "tests/histogram_test.rb".freeze, "tests/lambert_test.rb".freeze, "tests/newton_bisection_test.rb".freeze, "tests/numberify_string_function_test.rb".freeze, "tests/permutation_test.rb".freeze, "tests/
|
|
22
|
+
s.test_files = ["tests/cantor_pairing_function_test.rb".freeze, "tests/continued_fraction_test.rb".freeze, "tests/distribution_test.rb".freeze, "tests/entropy_test.rb".freeze, "tests/functions_test.rb".freeze, "tests/histogram_test.rb".freeze, "tests/lambert_test.rb".freeze, "tests/newton_bisection_test.rb".freeze, "tests/numberify_string_function_test.rb".freeze, "tests/permutation_test.rb".freeze, "tests/sequence_moving_average_test.rb".freeze, "tests/sequence_refinement_test.rb".freeze, "tests/sequence_test.rb".freeze, "tests/string_numeral_test.rb".freeze, "tests/subset_test.rb".freeze, "tests/test_helper.rb".freeze]
|
|
23
23
|
|
|
24
24
|
s.specification_version = 4
|
|
25
25
|
|
data/tests/entropy_test.rb
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env ruby
|
|
2
2
|
|
|
3
3
|
require 'test_helper'
|
|
4
|
+
require 'tins'
|
|
4
5
|
|
|
5
6
|
class EntropyTest < Test::Unit::TestCase
|
|
6
7
|
include MoreMath::Functions
|
|
@@ -11,14 +12,16 @@ class EntropyTest < Test::Unit::TestCase
|
|
|
11
12
|
@string = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit'
|
|
12
13
|
@high = 'The quick brown fox jumps over the lazy dog'
|
|
13
14
|
@random = "\xAC-\x8A\xF5\xA8\xF7\\\e\xB5\x8CI\x06\xA7"
|
|
15
|
+
@hi = "こんにちは世界"
|
|
14
16
|
end
|
|
15
17
|
|
|
16
18
|
def test_entropy
|
|
17
19
|
assert_equal 0, entropy(@empty)
|
|
18
20
|
assert_equal 0, entropy(@low)
|
|
19
|
-
assert_in_delta 3.
|
|
20
|
-
assert_in_delta 4.
|
|
21
|
+
assert_in_delta 3.951, entropy(@string), 1E-3
|
|
22
|
+
assert_in_delta 4.431, entropy(@high), 1E-3
|
|
21
23
|
assert_in_delta 3.700, entropy(@random), 1E-3
|
|
24
|
+
assert_in_delta 2.807, entropy(@hi), 1E-3
|
|
22
25
|
end
|
|
23
26
|
|
|
24
27
|
def test_entropy_ideal
|
|
@@ -26,18 +29,42 @@ class EntropyTest < Test::Unit::TestCase
|
|
|
26
29
|
assert_equal 0, entropy_ideal(0)
|
|
27
30
|
assert_equal 0, entropy_ideal(0.5)
|
|
28
31
|
assert_equal 0, entropy_ideal(1)
|
|
29
|
-
assert_in_delta 1,
|
|
32
|
+
assert_in_delta 1, entropy_ideal(2), 1E-3
|
|
30
33
|
assert_in_delta 1.584, entropy_ideal(3), 1E-3
|
|
31
|
-
assert_in_delta 3,
|
|
34
|
+
assert_in_delta 3, entropy_ideal(8), 1E-3
|
|
32
35
|
assert_in_delta 3.321, entropy_ideal(10), 1E-3
|
|
33
|
-
assert_in_delta 4,
|
|
36
|
+
assert_in_delta 4, entropy_ideal(16), 1E-3
|
|
34
37
|
end
|
|
35
38
|
|
|
36
39
|
def test_entropy_ratio
|
|
37
|
-
assert_equal 0,
|
|
38
|
-
assert_equal 0,
|
|
39
|
-
assert_in_delta 0.
|
|
40
|
-
assert_in_delta 0.
|
|
41
|
-
assert_in_delta 1.0,
|
|
40
|
+
assert_equal 0, entropy_ratio(@empty)
|
|
41
|
+
assert_equal 0, entropy_ratio(@low, size: 128)
|
|
42
|
+
assert_in_delta 0.564, entropy_ratio(@string, size: 128), 1E-3
|
|
43
|
+
assert_in_delta 0.633, entropy_ratio(@high, size: 128), 1E-3
|
|
44
|
+
assert_in_delta 1.0, entropy_ratio(@random), 1E-3
|
|
45
|
+
assert_in_delta 0.462, entropy_ratio(@random, size: 256), 1E-3
|
|
46
|
+
assert_in_delta 0.253, entropy_ratio(@hi, size: 2_136), 1E-3
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def test_entropy_ratio_minimum_basic
|
|
50
|
+
# A fairly long random token over a 16‑symbol alphabet
|
|
51
|
+
token = Tins::Token.new(length: 128, alphabet: Tins::Token::BASE16_LOWERCASE_ALPHABET)
|
|
52
|
+
|
|
53
|
+
limit = entropy_ratio_minimum(token, size: 16)
|
|
54
|
+
|
|
55
|
+
# Bounds must be ≧ 0
|
|
56
|
+
assert_operator limit, :>=, 0.0
|
|
57
|
+
|
|
58
|
+
# The observed ratio should be ≧ limit
|
|
59
|
+
ratio = entropy_ratio(token, size: 16)
|
|
60
|
+
assert_operator ratio, :>=, limit
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def test_entropy_ratio_minimum_small
|
|
64
|
+
# Very short string – the interval will stay below 1.0
|
|
65
|
+
str = 'a' # alphabet size 2 (binary)
|
|
66
|
+
limit = entropy_ratio_minimum(str, size: 2)
|
|
67
|
+
|
|
68
|
+
assert_equal 0.0, limit
|
|
42
69
|
end
|
|
43
70
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: more_math
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.9.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Florian Frank
|
|
@@ -201,8 +201,8 @@ files:
|
|
|
201
201
|
- tests/newton_bisection_test.rb
|
|
202
202
|
- tests/numberify_string_function_test.rb
|
|
203
203
|
- tests/permutation_test.rb
|
|
204
|
-
- tests/sequence/refinement_test.rb
|
|
205
204
|
- tests/sequence_moving_average_test.rb
|
|
205
|
+
- tests/sequence_refinement_test.rb
|
|
206
206
|
- tests/sequence_test.rb
|
|
207
207
|
- tests/string_numeral_test.rb
|
|
208
208
|
- tests/subset_test.rb
|
|
@@ -243,8 +243,8 @@ test_files:
|
|
|
243
243
|
- tests/newton_bisection_test.rb
|
|
244
244
|
- tests/numberify_string_function_test.rb
|
|
245
245
|
- tests/permutation_test.rb
|
|
246
|
-
- tests/sequence/refinement_test.rb
|
|
247
246
|
- tests/sequence_moving_average_test.rb
|
|
247
|
+
- tests/sequence_refinement_test.rb
|
|
248
248
|
- tests/sequence_test.rb
|
|
249
249
|
- tests/string_numeral_test.rb
|
|
250
250
|
- tests/subset_test.rb
|
|
File without changes
|