entropy 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +674 -0
- data/README.txt +48 -0
- data/Rakefile +22 -0
- data/doc/rdocs/Entropy.html +234 -0
- data/doc/rdocs/Entropy/Dict.html +311 -0
- data/doc/rdocs/Entropy/ProbabilityMetricSpace.html +544 -0
- data/doc/rdocs/Entropy/ProbabilitySpace.html +473 -0
- data/doc/rdocs/Entropy/Stream.html +281 -0
- data/doc/rdocs/LICENSE_txt.html +834 -0
- data/doc/rdocs/README_txt.html +138 -0
- data/doc/rdocs/created.rid +9 -0
- data/doc/rdocs/images/brick.png +0 -0
- data/doc/rdocs/images/brick_link.png +0 -0
- data/doc/rdocs/images/bug.png +0 -0
- data/doc/rdocs/images/bullet_black.png +0 -0
- data/doc/rdocs/images/bullet_toggle_minus.png +0 -0
- data/doc/rdocs/images/bullet_toggle_plus.png +0 -0
- data/doc/rdocs/images/date.png +0 -0
- data/doc/rdocs/images/find.png +0 -0
- data/doc/rdocs/images/loadingAnimation.gif +0 -0
- data/doc/rdocs/images/macFFBgHack.png +0 -0
- data/doc/rdocs/images/package.png +0 -0
- data/doc/rdocs/images/page_green.png +0 -0
- data/doc/rdocs/images/page_white_text.png +0 -0
- data/doc/rdocs/images/page_white_width.png +0 -0
- data/doc/rdocs/images/plugin.png +0 -0
- data/doc/rdocs/images/ruby.png +0 -0
- data/doc/rdocs/images/tag_green.png +0 -0
- data/doc/rdocs/images/wrench.png +0 -0
- data/doc/rdocs/images/wrench_orange.png +0 -0
- data/doc/rdocs/images/zoom.png +0 -0
- data/doc/rdocs/index.html +155 -0
- data/doc/rdocs/js/darkfish.js +116 -0
- data/doc/rdocs/js/jquery.js +32 -0
- data/doc/rdocs/js/quicksearch.js +114 -0
- data/doc/rdocs/js/thickbox-compressed.js +10 -0
- data/doc/rdocs/lib/entropy/dict_rb.html +56 -0
- data/doc/rdocs/lib/entropy/metric_space_rb.html +54 -0
- data/doc/rdocs/lib/entropy/probability_space_rb.html +52 -0
- data/doc/rdocs/lib/entropy/stream_rb.html +52 -0
- data/doc/rdocs/lib/entropy/version_rb.html +52 -0
- data/doc/rdocs/lib/entropy_rb.html +62 -0
- data/doc/rdocs/rdoc.css +763 -0
- data/entropy.gemspec +24 -0
- data/example/metric_prob_space.rb +42 -0
- data/example/prob_space.rb +31 -0
- data/lib/entropy.rb +13 -0
- data/lib/entropy/dict.rb +30 -0
- data/lib/entropy/metric_space.rb +116 -0
- data/lib/entropy/probability_space.rb +91 -0
- data/lib/entropy/stream.rb +37 -0
- data/lib/entropy/version.rb +3 -0
- data/test/stream0.txt +1 -0
- data/test/stream1.txt +1 -0
- data/test/stream2.txt +1 -0
- data/test/stream_random.bin +2 -0
- data/test/test_entropy_def.rb +10 -0
- data/test/test_entropy_metr.rb +36 -0
- data/test/test_entropy_prob.rb +64 -0
- data/test/test_entropy_stream.rb +29 -0
- metadata +127 -0
data/entropy.gemspec
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "entropy/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "entropy"
|
7
|
+
s.version = Entropy::VERSION
|
8
|
+
s.authors = ["hiraedd"]
|
9
|
+
s.email = ["hiraedd@hiraedd.com"]
|
10
|
+
s.homepage = "https://github.com/hiraedd/entropy_gem"
|
11
|
+
s.summary = %q{Computes the entropy of a finite probability space}
|
12
|
+
s.description = %q{This little gem calculates the entropy and ( the cardinality) of a finite probability space, defined by a probability vector; and of a finite probability metric space, defined by a probability vector and metric function.}
|
13
|
+
|
14
|
+
s.rubyforge_project = "entropy"
|
15
|
+
|
16
|
+
s.files = `git ls-files`.split("\n")
|
17
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
19
|
+
s.require_paths = ["lib"]
|
20
|
+
|
21
|
+
# specify any dependencies here; for example:
|
22
|
+
# s.add_development_dependency "rspec"
|
23
|
+
# s.add_runtime_dependency "rest-client"
|
24
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'entropy'
|
2
|
+
require 'pp'
|
3
|
+
|
4
|
+
obj = Entropy::ProbabilityMetricSpace.new
|
5
|
+
obj.add_stream("000000000")
|
6
|
+
obj.define_distance {|i, j| Math.sqrt(i*i + j*j) }
|
7
|
+
pp obj.prob_space # [1.0]
|
8
|
+
|
9
|
+
obj = Entropy::ProbabilityMetricSpace.new
|
10
|
+
obj.add_stream("0123")
|
11
|
+
obj.define_distance {|i, j| Math.sqrt(i*i + j*j) }
|
12
|
+
m = Matrix.build(4, 4) {|i, j| Math.sqrt(i*i + j*j) }
|
13
|
+
pp obj.distance_matrix # = m
|
14
|
+
pp m
|
15
|
+
|
16
|
+
# redefine prob_space
|
17
|
+
#
|
18
|
+
puts "redefine prob_space and distance matrix"
|
19
|
+
obj.prob_space = [0.25, 0.75]
|
20
|
+
distance = Matrix[[2, 2], [4, 4]]
|
21
|
+
obj.define_distance(distance)
|
22
|
+
pp obj.prob_space # [0.25, 0.25, 0.25, 0.25]
|
23
|
+
pp distance
|
24
|
+
puts obj.cardinality(2)
|
25
|
+
puts obj.diversity(2)
|
26
|
+
puts obj.renyi_entropy(2)
|
27
|
+
puts obj.renyi_entropy(0)
|
28
|
+
puts obj.renyi_entropy(:infinite)
|
29
|
+
|
30
|
+
# setting infinite distance means having a discrete set
|
31
|
+
puts "setting infinite distance means having a discrete set"
|
32
|
+
distance = Matrix[[:infinite, :infinite], [:infinite, :infinite]]
|
33
|
+
obj.define_distance(distance)
|
34
|
+
pp obj.prob_space # [0.25, 0.25, 0.25, 0.25]
|
35
|
+
pp distance
|
36
|
+
puts obj.cardinality(2)
|
37
|
+
puts obj.diversity(2)
|
38
|
+
puts obj.renyi_entropy(2)
|
39
|
+
puts obj.renyi_entropy(0)
|
40
|
+
puts obj.renyi_entropy(:infinite)
|
41
|
+
|
42
|
+
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'entropy'
|
2
|
+
require 'pp'
|
3
|
+
|
4
|
+
obj = Entropy::ProbabilitySpace.new
|
5
|
+
obj.add_stream("000000000")
|
6
|
+
pp obj.prob_space # [1.0]
|
7
|
+
puts obj.cardinality(2) # 1 equal to shannon entropy
|
8
|
+
puts obj.diversity(2).round # 0
|
9
|
+
puts obj.renyi_entropy(2).round # 0
|
10
|
+
puts obj.renyi_entropy(:infinite).round # 0
|
11
|
+
|
12
|
+
obj = Entropy::ProbabilitySpace.new
|
13
|
+
obj.add_stream("1234")
|
14
|
+
pp obj.prob_space # [0.25, 0.25, 0.25, 0.25]
|
15
|
+
puts obj.cardinality(2) # 4 equal to shannon entropy
|
16
|
+
puts obj.diversity(2) # 3/4
|
17
|
+
puts obj.renyi_entropy(2) # 2
|
18
|
+
puts obj.renyi_entropy(:infinite)
|
19
|
+
|
20
|
+
# redefine prob_space
|
21
|
+
puts "redefine prob_space"
|
22
|
+
obj.prob_space = [0.25, 0.75]
|
23
|
+
pp obj.prob_space # [0.25, 0.25, 0.25, 0.25]
|
24
|
+
puts obj.cardinality(2) # 1.6
|
25
|
+
puts obj.diversity(2) # 0.375
|
26
|
+
puts obj.renyi_entropy(2)
|
27
|
+
puts obj.renyi_entropy(0)
|
28
|
+
puts obj.renyi_entropy(:infinite)
|
29
|
+
|
30
|
+
|
31
|
+
|
data/lib/entropy.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
require "entropy/version"
|
2
|
+
require "entropy/stream"
|
3
|
+
require "entropy/dict"
|
4
|
+
require "entropy/probability_space.rb"
|
5
|
+
require "entropy/metric_space.rb"
|
6
|
+
|
7
|
+
module Entropy
|
8
|
+
|
9
|
+
def self.version_string
|
10
|
+
"Entropy version #{Entropy::VERSION}"
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
data/lib/entropy/dict.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'bigdecimal'
|
2
|
+
require 'bigdecimal/util'
|
3
|
+
|
4
|
+
|
5
|
+
module Entropy
|
6
|
+
class Dict
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@bucket = Hash.new(0)
|
10
|
+
@space_size = 0
|
11
|
+
end
|
12
|
+
|
13
|
+
# Add an element to the space
|
14
|
+
def upd(k)
|
15
|
+
@bucket[k] += 1
|
16
|
+
@space_size += 1
|
17
|
+
end
|
18
|
+
|
19
|
+
# Compute frequency of the element
|
20
|
+
def freq(k)
|
21
|
+
(@bucket[k].to_d / @space_size)
|
22
|
+
end
|
23
|
+
|
24
|
+
# Return all the elements of the space
|
25
|
+
def scan_keys
|
26
|
+
@bucket.keys
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,116 @@
|
|
1
|
+
require "matrix"
|
2
|
+
|
3
|
+
module Entropy
|
4
|
+
|
5
|
+
# Entropy::ProbabilityMetricSpace defines a finite probability metric space
|
6
|
+
class ProbabilityMetricSpace < ProbabilitySpace
|
7
|
+
|
8
|
+
attr_reader :distance_matrix, :distance_function
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
@distance_matrix = nil
|
12
|
+
@density = nil
|
13
|
+
super
|
14
|
+
end
|
15
|
+
|
16
|
+
# Adds a set of elements (char, bit) to the space
|
17
|
+
def add_stream(stream)
|
18
|
+
super(stream)
|
19
|
+
end
|
20
|
+
|
21
|
+
# Defines the distance among the elements of the set.
|
22
|
+
# The method accepts a block, so you can use your
|
23
|
+
# function to calculate the distance. The function should
|
24
|
+
# accept two parameters, viz. the ordinals i,j of the elements
|
25
|
+
# and should return a decimal number, the distance between
|
26
|
+
# element i and j.
|
27
|
+
# In alternative you could pass a matrix of numbers defining
|
28
|
+
# the distances among i's and j's.
|
29
|
+
def define_distance(*args, &blk)
|
30
|
+
if !blk.nil?
|
31
|
+
@distance_function = blk
|
32
|
+
update_distance_matrix
|
33
|
+
else
|
34
|
+
mat = args[0]
|
35
|
+
raise "distance matrix is not a matrix object" if !mat.respond_to?("square?")
|
36
|
+
raise "distance matrix is not square" if !mat.square?
|
37
|
+
raise "distance matrix has wrong dimensions" if mat.to_a.size != @prob_space.size
|
38
|
+
@distance_matrix = mat
|
39
|
+
end
|
40
|
+
z = @distance_matrix.map { |d| d == :infinite ? 0 : Math.exp(-d) }
|
41
|
+
@density = z * Matrix.column_vector(@prob_space)
|
42
|
+
@density = @density.column(0).to_a
|
43
|
+
end
|
44
|
+
|
45
|
+
# Computes the Shannon Entropy of the space
|
46
|
+
def shannon_entropy
|
47
|
+
diversity(1)
|
48
|
+
end
|
49
|
+
|
50
|
+
# Computes the Rao Entropy of the space
|
51
|
+
def rao_entropy
|
52
|
+
diversity(2)
|
53
|
+
end
|
54
|
+
|
55
|
+
|
56
|
+
# Computes the Renyi Entropy (α-Entropy) of the space
|
57
|
+
def renyi_entropy(alpha)
|
58
|
+
raise "Warning: distance metric not defined..." if @distance_matrix.nil?
|
59
|
+
h = 0
|
60
|
+
if alpha == 1
|
61
|
+
@prob_space.each_index do |i|
|
62
|
+
h += -@prob_space[i] * Math.log(@density[i])
|
63
|
+
end
|
64
|
+
elsif alpha == :infinite
|
65
|
+
h = -Math.log(@density.max)
|
66
|
+
else
|
67
|
+
@prob_space.each_index do |i|
|
68
|
+
h += @prob_space[i] * (@density[i]**(alpha - 1))
|
69
|
+
end
|
70
|
+
h = 1.0 / (1 - alpha) * Math.log(h)
|
71
|
+
end
|
72
|
+
h
|
73
|
+
end
|
74
|
+
|
75
|
+
# Computes the α-Diversity of the space
|
76
|
+
def diversity(alpha)
|
77
|
+
raise "Warning: distance metric not defined..." if @distance_matrix.nil?
|
78
|
+
div = []
|
79
|
+
@prob_space.each_index do |i|
|
80
|
+
p = @prob_space[i]
|
81
|
+
div[i] = p * surprise(alpha, @density[i])
|
82
|
+
end
|
83
|
+
div.inject(:+)
|
84
|
+
end
|
85
|
+
|
86
|
+
# Computes the α-Cardinality of the space
|
87
|
+
def cardinality(alpha)
|
88
|
+
raise "Warning: distance metric not defined..." if @distance_matrix.nil?
|
89
|
+
card = 0
|
90
|
+
if alpha == 1
|
91
|
+
card = 1
|
92
|
+
@prob_space.each_index do |i|
|
93
|
+
card = card * (@density[i] ** (-@prob_space[i]))
|
94
|
+
end
|
95
|
+
elsif alpha == :infinite
|
96
|
+
card = 1.0/@density.max
|
97
|
+
else
|
98
|
+
@prob_space.each_index do |i|
|
99
|
+
card += @prob_space[i] * (@density[i]**(alpha - 1))
|
100
|
+
end
|
101
|
+
card = card ** (1.0 / (1 - alpha)) if card != 0
|
102
|
+
end
|
103
|
+
card
|
104
|
+
end
|
105
|
+
|
106
|
+
private
|
107
|
+
def update_distance_matrix
|
108
|
+
if !@distance_function.nil?
|
109
|
+
puts "Applying function #{@distance_function.to_s} to the probability space..."
|
110
|
+
@distance_matrix = Matrix.build (@prob_space.size) { |i, j| @distance_function.call(i, j) }
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
end
|
115
|
+
|
116
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
module Entropy
|
2
|
+
|
3
|
+
# Entropy::ProbabilitySpace defines a finite probability space
|
4
|
+
class ProbabilitySpace
|
5
|
+
|
6
|
+
attr_accessor :prob_space
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@space = Entropy::Stream.new
|
10
|
+
@dict = Entropy::Dict.new()
|
11
|
+
@prob_space = []
|
12
|
+
end
|
13
|
+
|
14
|
+
# Adds a set of elements (char, bit) to the space
|
15
|
+
def add_stream(stream)
|
16
|
+
puts "Adding a set in the probability space..."
|
17
|
+
@space.add_stream(stream)
|
18
|
+
update_probability_space(stream)
|
19
|
+
end
|
20
|
+
|
21
|
+
# Computes the Shannon Entropy of the space
|
22
|
+
def shannon_entropy
|
23
|
+
diversity(1)
|
24
|
+
end
|
25
|
+
|
26
|
+
# Computes the Renyi Entropy (α-Entropy) of the space
|
27
|
+
def renyi_entropy(alpha)
|
28
|
+
h = 0
|
29
|
+
if alpha == 1
|
30
|
+
h = @prob_space.map {|p| -p * Math.log(p)}.inject(:+)
|
31
|
+
elsif alpha == :infinite
|
32
|
+
h = -Math.log(@prob_space.max)
|
33
|
+
else
|
34
|
+
h = @prob_space.map {|p| p**alpha}.inject(:+)
|
35
|
+
h = 1.0 / (1 - alpha) * Math.log(h)
|
36
|
+
end
|
37
|
+
h
|
38
|
+
end
|
39
|
+
|
40
|
+
# Computes the α-Diversity of the space
|
41
|
+
def diversity(alpha)
|
42
|
+
@prob_space.map {|p| p * surprise(alpha, p) }.inject(:+)
|
43
|
+
end
|
44
|
+
|
45
|
+
# Surprise function
|
46
|
+
def surprise(alpha, p)
|
47
|
+
s = 0
|
48
|
+
if alpha == 1
|
49
|
+
s = -Math.log(p)
|
50
|
+
elsif alpha == :infinite
|
51
|
+
s = 0
|
52
|
+
else
|
53
|
+
e = alpha - 1
|
54
|
+
s = 1.0 / e * (1 - p**e)
|
55
|
+
end
|
56
|
+
s
|
57
|
+
end
|
58
|
+
|
59
|
+
# Computes the α-Cardinality of the space
|
60
|
+
def cardinality(alpha)
|
61
|
+
card = 0
|
62
|
+
if alpha == 1
|
63
|
+
card = @prob_space.map {|p| p**(-p)}.inject(1, :*)
|
64
|
+
elsif alpha == :infinite
|
65
|
+
card = 1.0/@prob_space.max
|
66
|
+
else
|
67
|
+
card = @prob_space.map {|p| p**alpha}.inject(:+)
|
68
|
+
card = card ** (1.0 / (1 - alpha))
|
69
|
+
end
|
70
|
+
card
|
71
|
+
end
|
72
|
+
|
73
|
+
private
|
74
|
+
def update_probability_space(stream)
|
75
|
+
@prob_space = []
|
76
|
+
it = @space.streams[stream]
|
77
|
+
stream.send(it).each do |b|
|
78
|
+
c = b
|
79
|
+
if it == :each_char
|
80
|
+
c.chomp!
|
81
|
+
next if c.empty?
|
82
|
+
end
|
83
|
+
@dict.upd(c)
|
84
|
+
end
|
85
|
+
@dict.scan_keys.each do |k|
|
86
|
+
@prob_space << @dict.freq(k)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Entropy
|
2
|
+
|
3
|
+
# Class used to define a stream inside the space
|
4
|
+
class Stream
|
5
|
+
|
6
|
+
attr_reader :streams
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@streams = Hash.new
|
10
|
+
end
|
11
|
+
|
12
|
+
# Adds a stream (a File or a String) to the space
|
13
|
+
def add_stream(stream)
|
14
|
+
iter = nil
|
15
|
+
if stream.respond_to?("binmode?")
|
16
|
+
if stream.binmode?
|
17
|
+
puts "Binary File Stream Initialization..."
|
18
|
+
iter = :each_byte
|
19
|
+
else
|
20
|
+
puts "Char File Stream Initialization..."
|
21
|
+
iter = :each_char
|
22
|
+
end
|
23
|
+
elsif stream.respond_to?("each_char")
|
24
|
+
puts "Char Stream Initialization..."
|
25
|
+
iter = :each_char
|
26
|
+
elsif stream.respond_to?("each_byte")
|
27
|
+
puts "Binary Stream Initialization..."
|
28
|
+
iter = :each_byte
|
29
|
+
else
|
30
|
+
raise "Wrong type of stream"
|
31
|
+
end
|
32
|
+
@streams[stream] = iter
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
data/test/stream0.txt
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0000000000000000000000000000000000000000000000000
|
data/test/stream1.txt
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0123456789
|
data/test/stream2.txt
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
00000010
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'entropy'
|
3
|
+
|
4
|
+
class TestEntropy < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def test_string_metr
|
7
|
+
obj = Entropy::ProbabilityMetricSpace.new
|
8
|
+
obj.add_stream("000000000")
|
9
|
+
obj.define_distance {|i, j| Math.sqrt(i*i + j*j) }
|
10
|
+
assert_equal obj.prob_space, [1.0]
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_string_metr_2
|
14
|
+
obj = Entropy::ProbabilityMetricSpace.new
|
15
|
+
obj.add_stream("1000")
|
16
|
+
obj.define_distance {|i, j| Math.sqrt(i*i + j*j) }
|
17
|
+
assert_equal obj.prob_space.sort, [0.25, 0.75]
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_distance_metr
|
21
|
+
obj = Entropy::ProbabilityMetricSpace.new
|
22
|
+
obj.add_stream("0123")
|
23
|
+
obj.define_distance {|i, j| Math.sqrt(i*i + j*j) }
|
24
|
+
m = Matrix.build(4, 4) {|i, j| Math.sqrt(i*i + j*j) }
|
25
|
+
assert_equal obj.distance_matrix, m
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_entropy_metr
|
29
|
+
obj = Entropy::ProbabilityMetricSpace.new
|
30
|
+
obj.add_stream("0123")
|
31
|
+
obj.define_distance {|i, j| 1 }
|
32
|
+
assert_equal obj.diversity(1), 1
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
|