measurable 0.0.5 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,32 +1,51 @@
1
+ require 'measurable/jaccard'
2
+
1
3
  module Measurable
4
+ module Tanimoto
5
+
6
+ # call-seq:
7
+ # tanimoto(u, v) -> Float
8
+ #
9
+ # Tanimoto distance is a coefficient explicitly chosen such as to allow for
10
+ # two dissimilar specimens to be similar to a third one. This breaks the
11
+ # triangle inequality, thus this isn't a metric.
12
+ #
13
+ # More information and references on this are needed. It's left here mostly
14
+ # as a piece of curiosity.
15
+ #
16
+ # See: # http://en.wikipedia.org/wiki/Jaccard_index#Tanimoto.27s_Definitions_of_Similarity_and_Distance
17
+ #
18
+ # Arguments:
19
+ # - +u+ -> An array of Numeric objects.
20
+ # - +v+ -> An array of Numeric objects.
21
+ # Returns:
22
+ # - A measure of the similarity between +u+ and +v+.
23
+ # Raises:
24
+ # - +ArgumentError+ -> The sizes of +u+ and +v+ don't match.
25
+ def tanimoto(u, v)
26
+ # TODO: Change this to a more specific, custom-made exception.
27
+ raise ArgumentError if u.size != v.size
2
28
 
3
- # Tanimoto similarity is the same as Jaccard similarity.
4
- alias :tanimoto_similarity :jaccard
29
+ -Math.log2(jaccard_index(u, v))
30
+ end
5
31
 
6
- # call-seq:
7
- # tanimoto(u, v) -> Float
8
- #
9
- # Tanimoto distance is a coefficient explicitly chosen such as to allow for
10
- # two dissimilar specimens to be similar to a third one. This breaks the
11
- # triangle inequality, thus this isn't a metric.
12
- #
13
- # More information and references on this are needed. It's left here mostly
14
- # as a piece of curiosity.
15
- #
16
- # See: # http://en.wikipedia.org/wiki/Jaccard_index#Tanimoto.27s_Definitions_of_Similarity_and_Distance
17
- #
18
- # * *Arguments* :
19
- # - +u+ -> An array of Numeric objects.
20
- # - +v+ -> An array of Numeric objects.
21
- # * *Returns* :
22
- # - A measure of the similarity between +u+ and +v+.
23
- # * *Raises* :
24
- # - +ArgumentError+ -> The sizes of +u+ and +v+ doesn't match.
25
- #
26
- def tanimoto(u, v)
27
- # TODO: Change this to a more specific, custom-made exception.
28
- raise ArgumentError if u.size != v.size
32
+ def self.extended(base) # :nodoc:
33
+ # Tanimoto similarity is the same as Jaccard similarity.
34
+ base.instance_eval do
35
+ extend Measurable::Jaccard
36
+ alias :tanimoto_similarity :jaccard
37
+ end
38
+ super
39
+ end
29
40
 
30
- -Math.log2(jaccard_index(u, v))
41
+ def self.included(base) # :nodoc:
42
+ base.class_eval do
43
+ include Measurable::Jaccard
44
+ alias :tanimoto_similarity :jaccard
45
+ end
46
+ super
47
+ end
31
48
  end
32
- end
49
+
50
+ extend Measurable::Tanimoto
51
+ end
@@ -1,3 +1,3 @@
1
1
  module Measurable
2
- VERSION = "0.0.5" # :nodoc:
3
- end
2
+ VERSION = "0.0.11" # :nodoc:
3
+ end
@@ -4,12 +4,13 @@ require 'measurable/version'
4
4
  require 'date'
5
5
 
6
6
  Gem::Specification.new do |gem|
7
- gem.name = "measurable"
7
+ gem.name = "measurable"
8
8
  gem.version = Measurable::VERSION
9
9
  gem.date = Date.today.to_s
10
+ gem.license = "MIT"
10
11
  gem.summary = %Q{A Ruby gem with a lot of distance measures for your projects.}
11
12
  gem.description = %Q{A Ruby gem with a lot of distance measures for your projects.}
12
-
13
+
13
14
  gem.authors = ["Carlos Agarie"]
14
15
  gem.email = "carlos.agarie@gmail.com"
15
16
  gem.homepage = "http://github.com/agarie/measurable"
@@ -23,6 +24,7 @@ Gem::Specification.new do |gem|
23
24
  gem.required_ruby_version = '>= 1.9.3'
24
25
 
25
26
  gem.add_development_dependency 'bundler'
26
- gem.add_development_dependency 'rake', '~> 0.9'
27
- gem.add_development_dependency 'rspec', '~> 2.9.0'
27
+ gem.add_development_dependency 'rake', '>= 12.3.3'
28
+ gem.add_development_dependency 'rdoc', '>= 6.0.0'
29
+ gem.add_development_dependency 'rspec', '~> 3.2'
28
30
  end
@@ -0,0 +1,48 @@
1
+ describe "Chebyshev distance" do
2
+
3
+ before :all do
4
+ @u = [1.4, 2.5, 5.8]
5
+ @v = [2.2, 3.6, 2.7]
6
+ @w = [4.1, 5.7, 1.2]
7
+ end
8
+
9
+ it "accepts two arguments" do
10
+ expect { Measurable.chebyshev(@u, @v) }.to_not raise_error
11
+ expect { Measurable.chebyshev(@u, @v, @w) }.to raise_error(ArgumentError)
12
+ end
13
+
14
+ it "should be symmetric" do
15
+ x = Measurable.chebyshev(@u, @v)
16
+ y = Measurable.chebyshev(@v, @u)
17
+
18
+ expect(x).to be_within(TOLERANCE).of(y)
19
+ end
20
+
21
+ it "should return the correct value" do
22
+ x = Measurable.chebyshev(@u, @v)
23
+ expect(x).to be_within(TOLERANCE).of(3.1)
24
+ end
25
+
26
+ it "shouldn't work with vectors of different length" do
27
+ expect { Measurable.chebyshev(@u, [1, 3, 5, 7]) }.to raise_error(ArgumentError)
28
+ end
29
+
30
+ it "can be extended separately" do
31
+ klass = Class.new do
32
+ extend Measurable::Chebyshev
33
+ end
34
+
35
+ x = klass.chebyshev(@u, @v)
36
+ expect(x).to be_within(TOLERANCE).of(3.1)
37
+ end
38
+
39
+ it "can be included separately" do
40
+ klass = Class.new do
41
+ include Measurable::Chebyshev
42
+ end
43
+
44
+ x = klass.new.chebyshev(@u, @v)
45
+ expect(x).to be_within(TOLERANCE).of(3.1)
46
+ end
47
+
48
+ end
@@ -1,29 +1,77 @@
1
- describe "Cosine distance" do
2
-
3
- before :all do
4
- @u = [1, 2]
5
- @v = [2, 3]
6
- @w = [4, 5]
7
- end
8
-
9
- it "accepts two arguments" do
10
- expect { Measurable.cosine(@u, @v) }.to_not raise_error
11
- expect { Measurable.cosine(@u, @v, @w) }.to raise_error(ArgumentError)
12
- end
13
-
14
- it "should be symmetric" do
15
- x = Measurable.cosine(@u, @v)
16
- y = Measurable.cosine(@v, @u)
1
+ describe "Cosine" do
17
2
 
18
- x.should be_within(TOLERANCE).of(y)
19
- end
3
+ context "Similarity" do
4
+ before :all do
5
+ @u = [1, 2]
6
+ @v = [2, 3]
7
+ @w = [4, 5]
8
+ end
9
+
10
+ it "accepts two arguments" do
11
+ expect { Measurable.cosine_similarity(@u, @v) }.to_not raise_error
12
+ expect { Measurable.cosine_similarity(@u, @v, @w) }.to raise_error(ArgumentError)
13
+ end
14
+
15
+ it "should be symmetric" do
16
+ x = Measurable.cosine_similarity(@u, @v)
17
+ y = Measurable.cosine_similarity(@v, @u)
18
+
19
+ x.should be_within(TOLERANCE).of(y)
20
+ end
21
+
22
+ it "should return the correct value" do
23
+ x = Measurable.cosine_similarity(@u, @v)
24
+ x.should be_within(TOLERANCE).of(0.992277877)
25
+ end
20
26
 
21
- it "should return the correct value" do
22
- x = Measurable.cosine(@u, @v)
23
- x.should be_within(TOLERANCE).of(0.992277877)
27
+ it "shouldn't work with vectors of different length" do
28
+ expect { Measurable.cosine_similarity(@u, [1, 3, 5, 7]) }.to raise_error(ArgumentError)
29
+ end
30
+
31
+ it "can be extended separately" do
32
+ klass = Class.new do
33
+ extend Measurable::Cosine
34
+ end
35
+ x = klass.cosine_similarity(@u, @v)
36
+ x.should be_within(TOLERANCE).of(0.992277877)
37
+ end
38
+
39
+ it "can be extended separately" do
40
+ klass = Class.new do
41
+ include Measurable::Cosine
42
+ end
43
+ x = klass.new.cosine_similarity(@u, @v)
44
+ x.should be_within(TOLERANCE).of(0.992277877)
45
+ end
24
46
  end
25
47
 
26
- it "shouldn't work with vectors of different length" do
27
- expect { Measurable.cosine(@u, [1, 3, 5, 7]) }.to raise_error
48
+ context "Distance" do
49
+ before :all do
50
+ @u = [1, 2]
51
+ @v = [2, 3]
52
+ @w = [4, 5]
53
+ end
54
+
55
+ it "accepts two arguments" do
56
+ expect { Measurable.cosine_distance(@u, @v) }.to_not raise_error
57
+ expect { Measurable.cosine_distance(@u, @v, @w) }.to raise_error(ArgumentError)
58
+ end
59
+
60
+ it "should be symmetric" do
61
+ x = Measurable.cosine_distance(@u, @v)
62
+ y = Measurable.cosine_distance(@v, @u)
63
+
64
+ x.should be_within(TOLERANCE).of(y)
65
+ end
66
+
67
+ it "should return the correct value" do
68
+ x = Measurable.cosine_distance(@u, @v)
69
+ # TODO: Use a real example.
70
+ x.should be_within(TOLERANCE).of(1.0 - 0.992277877)
71
+ end
72
+
73
+ it "shouldn't work with vectors of different length" do
74
+ expect { Measurable.cosine_distance(@u, [1, 3, 5, 7]) }.to raise_error(ArgumentError)
75
+ end
28
76
  end
29
- end
77
+ end
@@ -1,22 +1,22 @@
1
1
  describe "Euclidean" do
2
-
2
+
3
3
  before :all do
4
4
  @u = [1, 3, 16]
5
5
  @v = [1, 4, 16]
6
6
  @w = [4, 5, 6]
7
7
  end
8
-
8
+
9
9
  context "Distance" do
10
10
  it "accepts two arguments" do
11
11
  expect { Measurable.euclidean(@u, @v) }.to_not raise_error
12
12
  expect { Measurable.euclidean(@u, @v, @w) }.to raise_error(ArgumentError)
13
13
  end
14
-
14
+
15
15
  it "accepts one argument and returns the vector's norm" do
16
16
  # Remember that 3^2 + 4^2 = 5^2.
17
17
  Measurable.euclidean([3, 4]).should == 5
18
18
  end
19
-
19
+
20
20
  it "should be symmetric" do
21
21
  Measurable.euclidean(@u, @v).should == Measurable.euclidean(@v, @u)
22
22
  end
@@ -25,27 +25,43 @@ describe "Euclidean" do
25
25
  Measurable.euclidean(@u, @u).should == 0
26
26
  Measurable.euclidean(@u, @v).should == 1
27
27
  end
28
-
28
+
29
29
  it "shouldn't work with vectors of different length" do
30
- expect { Measurable.euclidean(@u, [2, 2, 2, 2]) }.to raise_error
30
+ expect { Measurable.euclidean(@u, [2, 2, 2, 2]) }.to raise_error(ArgumentError)
31
+ end
32
+
33
+ it "can be extended separately" do
34
+ klass = Class.new do
35
+ extend Measurable::Euclidean
36
+ end
37
+
38
+ klass.euclidean([3, 4]).should == 5
39
+ end
40
+
41
+ it "can be included separately" do
42
+ klass = Class.new do
43
+ include Measurable::Euclidean
44
+ end
45
+
46
+ klass.new.euclidean([3, 4]).should == 5
31
47
  end
32
48
  end
33
-
49
+
34
50
  context "Squared Distance" do
35
51
  it "accepts two arguments" do
36
52
  expect { Measurable.euclidean_squared(@u, @v) }.to_not raise_error
37
53
  expect { Measurable.euclidean_squared(@u, @v, @w) }.to raise_error(ArgumentError)
38
54
  end
39
-
55
+
40
56
  it "accepts one argument and returns the vector's norm" do
41
57
  # Remember that 3^2 + 4^2 = 5^2.
42
58
  Measurable.euclidean_squared([3, 4]).should == 25
43
59
  end
44
-
60
+
45
61
  it "should be symmetric" do
46
62
  x = Measurable.euclidean_squared(@u, @v)
47
63
  y = Measurable.euclidean_squared(@v, @u)
48
-
64
+
49
65
  x.should == y
50
66
  end
51
67
 
@@ -53,9 +69,9 @@ describe "Euclidean" do
53
69
  Measurable.euclidean_squared(@u, @u).should == 0
54
70
  Measurable.euclidean_squared(@u, @v).should == 1
55
71
  end
56
-
72
+
57
73
  it "shouldn't work with vectors of different length" do
58
- expect { Measurable.euclidean_squared(@u, [2, 2, 2, 2]) }.to raise_error
59
- end
74
+ expect { Measurable.euclidean_squared(@u, [2, 2, 2, 2]) }.to raise_error(ArgumentError)
75
+ end
60
76
  end
61
- end
77
+ end
@@ -0,0 +1,46 @@
1
+ describe "Hamming distance" do
2
+
3
+ before :all do
4
+ @u = "Hi, I'm a test string!"
5
+ @v = "Hello, not a test omg."
6
+ @w = "Hey there, a test wtf!"
7
+ end
8
+
9
+ it "accepts two arguments" do
10
+ expect { Measurable.hamming(@u, @v) }.to_not raise_error
11
+ expect { Measurable.hamming(@u, @v, @w) }.to raise_error(ArgumentError)
12
+ end
13
+
14
+ it "should be symmetric" do
15
+ x = Measurable.hamming(@u, @v)
16
+ y = Measurable.hamming(@v, @u)
17
+
18
+ x.should be(y)
19
+ end
20
+
21
+ it "should return the correct value" do
22
+ x = Measurable.hamming(@u, @v)
23
+ x.should be(17)
24
+ end
25
+
26
+ it "shouldn't work with strings of different length" do
27
+ expect { Measurable.hamming(@u, "smallstring") }.to raise_error(ArgumentError)
28
+ expect { Measurable.hamming(@u, "largestring" * 20) }.to raise_error(ArgumentError)
29
+ end
30
+
31
+ it "can be extended separately" do
32
+ klass = Class.new do
33
+ extend Measurable::Hamming
34
+ end
35
+
36
+ klass.hamming(@u, @v).should == 17
37
+ end
38
+
39
+ it "can be included separately" do
40
+ klass = Class.new do
41
+ include Measurable::Hamming
42
+ end
43
+
44
+ klass.new.hamming(@u, @v).should == 17
45
+ end
46
+ end
@@ -32,6 +32,26 @@ describe "Haversine distance" do
32
32
  end
33
33
 
34
34
  it "should only work with [lat, long] vectors" do
35
- expect { Measurable.haversine([2, 4], [1, 3, 5, 7]) }.to raise_error
35
+ expect { Measurable.haversine([2, 4], [1, 3, 5, 7]) }.to raise_error(ArgumentError)
36
36
  end
37
- end
37
+
38
+ it "can be extended seperately" do
39
+ klass = Class.new do
40
+ extend Measurable::Haversine
41
+ end
42
+
43
+ x = klass.haversine(@u, @v, :km)
44
+
45
+ x.should be_within(@haversine_tolerance).of(18533)
46
+ end
47
+
48
+ it "can be included seperately" do
49
+ klass = Class.new do
50
+ include Measurable::Haversine
51
+ end
52
+
53
+ x = klass.new.haversine(@u, @v, :km)
54
+
55
+ x.should be_within(@haversine_tolerance).of(18533)
56
+ end
57
+ end
@@ -16,18 +16,39 @@ describe "Jaccard" do
16
16
  x = Measurable.jaccard_index(@u, @v)
17
17
  y = Measurable.jaccard_index(@v, @u)
18
18
 
19
- x.should be_within(TOLERANCE).of(y)
19
+ expect(x).to be_within(TOLERANCE).of(y)
20
20
  end
21
21
 
22
22
  it "should return the correct value" do
23
23
  x = Measurable.jaccard_index(@u, @v)
24
24
 
25
- x.should be_within(TOLERANCE).of(2.0 / 3.0)
25
+ expect(x).to be_within(TOLERANCE).of(1.0 / 2.0)
26
26
  end
27
27
 
28
- it "shouldn't work with vectors of different length" do
29
- expect { Measurable.jaccard_index(@u, [1, 2, 3, 4]) }.to raise_error
28
+ it "should work with vectors of different length" do
29
+ expect { Measurable.jaccard_index(@u, [1, 2, 3, 4]) }.to_not raise_error
30
30
  end
31
+
32
+ it "can be extended separately" do
33
+ klass = Class.new do
34
+ extend Measurable::Jaccard
35
+ end
36
+
37
+ x = klass.jaccard_index(@u, @v)
38
+
39
+ expect(x).to be_within(TOLERANCE).of(1.0 / 2.0)
40
+ end
41
+
42
+ it "can be included separately" do
43
+ klass = Class.new do
44
+ include Measurable::Jaccard
45
+ end
46
+
47
+ x = klass.new.jaccard_index(@u, @v)
48
+
49
+ expect(x).to be_within(TOLERANCE).of(1.0 / 2.0)
50
+ end
51
+
31
52
  end
32
53
 
33
54
  context "Distance" do
@@ -38,25 +59,25 @@ describe "Jaccard" do
38
59
  end
39
60
 
40
61
  it "accepts two arguments" do
41
- expect { Measurable.jaccard(@u, @v) }.to_not raise_error
42
- expect { Measurable.jaccard(@u, @v, @w) }.to raise_error(ArgumentError)
62
+ expect { Measurable.jaccard_dissimilarity(@u, @v) }.to_not raise_error
63
+ expect { Measurable.jaccard_dissimilarity(@u, @v, @w) }.to raise_error(ArgumentError)
43
64
  end
44
65
 
45
66
  it "should be symmetric" do
46
- x = Measurable.jaccard(@u, @v)
47
- y = Measurable.jaccard(@v, @u)
67
+ x = Measurable.jaccard_dissimilarity(@u, @v)
68
+ y = Measurable.jaccard_dissimilarity(@v, @u)
48
69
 
49
- x.should be_within(TOLERANCE).of(y)
70
+ expect(x).to be_within(TOLERANCE).of(y)
50
71
  end
51
72
 
52
73
  it "should return the correct value" do
53
- x = Measurable.jaccard(@u, @v)
74
+ x = Measurable.jaccard_dissimilarity(@u, @v)
54
75
 
55
- x.should be_within(TOLERANCE).of(1.0 / 3.0)
76
+ expect(x).to be_within(TOLERANCE).of(1.0 / 2.0)
56
77
  end
57
78
 
58
- it "shouldn't work with vectors of different length" do
59
- expect { Measurable.jaccard(@u, [1, 2, 3, 4]) }.to raise_error
79
+ it "should work with vectors of different length" do
80
+ expect { Measurable.jaccard_dissimilarity(@u, [1, 2, 3, 4]) }.to_not raise_error
60
81
  end
61
82
  end
62
- end
83
+ end