measurable 0.0.5 → 0.0.11

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,32 +1,51 @@
1
+ require 'measurable/jaccard'
2
+
1
3
  module Measurable
4
+ module Tanimoto
5
+
6
+ # call-seq:
7
+ # tanimoto(u, v) -> Float
8
+ #
9
+ # Tanimoto distance is a coefficient explicitly chosen such as to allow for
10
+ # two dissimilar specimens to be similar to a third one. This breaks the
11
+ # triangle inequality, thus this isn't a metric.
12
+ #
13
+ # More information and references on this are needed. It's left here mostly
14
+ # as a piece of curiosity.
15
+ #
16
+ # See: # http://en.wikipedia.org/wiki/Jaccard_index#Tanimoto.27s_Definitions_of_Similarity_and_Distance
17
+ #
18
+ # Arguments:
19
+ # - +u+ -> An array of Numeric objects.
20
+ # - +v+ -> An array of Numeric objects.
21
+ # Returns:
22
+ # - A measure of the similarity between +u+ and +v+.
23
+ # Raises:
24
+ # - +ArgumentError+ -> The sizes of +u+ and +v+ don't match.
25
+ def tanimoto(u, v)
26
+ # TODO: Change this to a more specific, custom-made exception.
27
+ raise ArgumentError if u.size != v.size
2
28
 
3
- # Tanimoto similarity is the same as Jaccard similarity.
4
- alias :tanimoto_similarity :jaccard
29
+ -Math.log2(jaccard_index(u, v))
30
+ end
5
31
 
6
- # call-seq:
7
- # tanimoto(u, v) -> Float
8
- #
9
- # Tanimoto distance is a coefficient explicitly chosen such as to allow for
10
- # two dissimilar specimens to be similar to a third one. This breaks the
11
- # triangle inequality, thus this isn't a metric.
12
- #
13
- # More information and references on this are needed. It's left here mostly
14
- # as a piece of curiosity.
15
- #
16
- # See: # http://en.wikipedia.org/wiki/Jaccard_index#Tanimoto.27s_Definitions_of_Similarity_and_Distance
17
- #
18
- # * *Arguments* :
19
- # - +u+ -> An array of Numeric objects.
20
- # - +v+ -> An array of Numeric objects.
21
- # * *Returns* :
22
- # - A measure of the similarity between +u+ and +v+.
23
- # * *Raises* :
24
- # - +ArgumentError+ -> The sizes of +u+ and +v+ doesn't match.
25
- #
26
- def tanimoto(u, v)
27
- # TODO: Change this to a more specific, custom-made exception.
28
- raise ArgumentError if u.size != v.size
32
+ def self.extended(base) # :nodoc:
33
+ # Tanimoto similarity is the same as Jaccard similarity.
34
+ base.instance_eval do
35
+ extend Measurable::Jaccard
36
+ alias :tanimoto_similarity :jaccard
37
+ end
38
+ super
39
+ end
29
40
 
30
- -Math.log2(jaccard_index(u, v))
41
+ def self.included(base) # :nodoc:
42
+ base.class_eval do
43
+ include Measurable::Jaccard
44
+ alias :tanimoto_similarity :jaccard
45
+ end
46
+ super
47
+ end
31
48
  end
32
- end
49
+
50
+ extend Measurable::Tanimoto
51
+ end
@@ -1,3 +1,3 @@
1
1
  module Measurable
2
- VERSION = "0.0.5" # :nodoc:
3
- end
2
+ VERSION = "0.0.11" # :nodoc:
3
+ end
@@ -4,12 +4,13 @@ require 'measurable/version'
4
4
  require 'date'
5
5
 
6
6
  Gem::Specification.new do |gem|
7
- gem.name = "measurable"
7
+ gem.name = "measurable"
8
8
  gem.version = Measurable::VERSION
9
9
  gem.date = Date.today.to_s
10
+ gem.license = "MIT"
10
11
  gem.summary = %Q{A Ruby gem with a lot of distance measures for your projects.}
11
12
  gem.description = %Q{A Ruby gem with a lot of distance measures for your projects.}
12
-
13
+
13
14
  gem.authors = ["Carlos Agarie"]
14
15
  gem.email = "carlos.agarie@gmail.com"
15
16
  gem.homepage = "http://github.com/agarie/measurable"
@@ -23,6 +24,7 @@ Gem::Specification.new do |gem|
23
24
  gem.required_ruby_version = '>= 1.9.3'
24
25
 
25
26
  gem.add_development_dependency 'bundler'
26
- gem.add_development_dependency 'rake', '~> 0.9'
27
- gem.add_development_dependency 'rspec', '~> 2.9.0'
27
+ gem.add_development_dependency 'rake', '>= 12.3.3'
28
+ gem.add_development_dependency 'rdoc', '>= 6.0.0'
29
+ gem.add_development_dependency 'rspec', '~> 3.2'
28
30
  end
@@ -0,0 +1,48 @@
1
+ describe "Chebyshev distance" do
2
+
3
+ before :all do
4
+ @u = [1.4, 2.5, 5.8]
5
+ @v = [2.2, 3.6, 2.7]
6
+ @w = [4.1, 5.7, 1.2]
7
+ end
8
+
9
+ it "accepts two arguments" do
10
+ expect { Measurable.chebyshev(@u, @v) }.to_not raise_error
11
+ expect { Measurable.chebyshev(@u, @v, @w) }.to raise_error(ArgumentError)
12
+ end
13
+
14
+ it "should be symmetric" do
15
+ x = Measurable.chebyshev(@u, @v)
16
+ y = Measurable.chebyshev(@v, @u)
17
+
18
+ expect(x).to be_within(TOLERANCE).of(y)
19
+ end
20
+
21
+ it "should return the correct value" do
22
+ x = Measurable.chebyshev(@u, @v)
23
+ expect(x).to be_within(TOLERANCE).of(3.1)
24
+ end
25
+
26
+ it "shouldn't work with vectors of different length" do
27
+ expect { Measurable.chebyshev(@u, [1, 3, 5, 7]) }.to raise_error(ArgumentError)
28
+ end
29
+
30
+ it "can be extended separately" do
31
+ klass = Class.new do
32
+ extend Measurable::Chebyshev
33
+ end
34
+
35
+ x = klass.chebyshev(@u, @v)
36
+ expect(x).to be_within(TOLERANCE).of(3.1)
37
+ end
38
+
39
+ it "can be included separately" do
40
+ klass = Class.new do
41
+ include Measurable::Chebyshev
42
+ end
43
+
44
+ x = klass.new.chebyshev(@u, @v)
45
+ expect(x).to be_within(TOLERANCE).of(3.1)
46
+ end
47
+
48
+ end
@@ -1,29 +1,77 @@
1
- describe "Cosine distance" do
2
-
3
- before :all do
4
- @u = [1, 2]
5
- @v = [2, 3]
6
- @w = [4, 5]
7
- end
8
-
9
- it "accepts two arguments" do
10
- expect { Measurable.cosine(@u, @v) }.to_not raise_error
11
- expect { Measurable.cosine(@u, @v, @w) }.to raise_error(ArgumentError)
12
- end
13
-
14
- it "should be symmetric" do
15
- x = Measurable.cosine(@u, @v)
16
- y = Measurable.cosine(@v, @u)
1
+ describe "Cosine" do
17
2
 
18
- x.should be_within(TOLERANCE).of(y)
19
- end
3
+ context "Similarity" do
4
+ before :all do
5
+ @u = [1, 2]
6
+ @v = [2, 3]
7
+ @w = [4, 5]
8
+ end
9
+
10
+ it "accepts two arguments" do
11
+ expect { Measurable.cosine_similarity(@u, @v) }.to_not raise_error
12
+ expect { Measurable.cosine_similarity(@u, @v, @w) }.to raise_error(ArgumentError)
13
+ end
14
+
15
+ it "should be symmetric" do
16
+ x = Measurable.cosine_similarity(@u, @v)
17
+ y = Measurable.cosine_similarity(@v, @u)
18
+
19
+ x.should be_within(TOLERANCE).of(y)
20
+ end
21
+
22
+ it "should return the correct value" do
23
+ x = Measurable.cosine_similarity(@u, @v)
24
+ x.should be_within(TOLERANCE).of(0.992277877)
25
+ end
20
26
 
21
- it "should return the correct value" do
22
- x = Measurable.cosine(@u, @v)
23
- x.should be_within(TOLERANCE).of(0.992277877)
27
+ it "shouldn't work with vectors of different length" do
28
+ expect { Measurable.cosine_similarity(@u, [1, 3, 5, 7]) }.to raise_error(ArgumentError)
29
+ end
30
+
31
+ it "can be extended separately" do
32
+ klass = Class.new do
33
+ extend Measurable::Cosine
34
+ end
35
+ x = klass.cosine_similarity(@u, @v)
36
+ x.should be_within(TOLERANCE).of(0.992277877)
37
+ end
38
+
39
+ it "can be extended separately" do
40
+ klass = Class.new do
41
+ include Measurable::Cosine
42
+ end
43
+ x = klass.new.cosine_similarity(@u, @v)
44
+ x.should be_within(TOLERANCE).of(0.992277877)
45
+ end
24
46
  end
25
47
 
26
- it "shouldn't work with vectors of different length" do
27
- expect { Measurable.cosine(@u, [1, 3, 5, 7]) }.to raise_error
48
+ context "Distance" do
49
+ before :all do
50
+ @u = [1, 2]
51
+ @v = [2, 3]
52
+ @w = [4, 5]
53
+ end
54
+
55
+ it "accepts two arguments" do
56
+ expect { Measurable.cosine_distance(@u, @v) }.to_not raise_error
57
+ expect { Measurable.cosine_distance(@u, @v, @w) }.to raise_error(ArgumentError)
58
+ end
59
+
60
+ it "should be symmetric" do
61
+ x = Measurable.cosine_distance(@u, @v)
62
+ y = Measurable.cosine_distance(@v, @u)
63
+
64
+ x.should be_within(TOLERANCE).of(y)
65
+ end
66
+
67
+ it "should return the correct value" do
68
+ x = Measurable.cosine_distance(@u, @v)
69
+ # TODO: Use a real example.
70
+ x.should be_within(TOLERANCE).of(1.0 - 0.992277877)
71
+ end
72
+
73
+ it "shouldn't work with vectors of different length" do
74
+ expect { Measurable.cosine_distance(@u, [1, 3, 5, 7]) }.to raise_error(ArgumentError)
75
+ end
28
76
  end
29
- end
77
+ end
@@ -1,22 +1,22 @@
1
1
  describe "Euclidean" do
2
-
2
+
3
3
  before :all do
4
4
  @u = [1, 3, 16]
5
5
  @v = [1, 4, 16]
6
6
  @w = [4, 5, 6]
7
7
  end
8
-
8
+
9
9
  context "Distance" do
10
10
  it "accepts two arguments" do
11
11
  expect { Measurable.euclidean(@u, @v) }.to_not raise_error
12
12
  expect { Measurable.euclidean(@u, @v, @w) }.to raise_error(ArgumentError)
13
13
  end
14
-
14
+
15
15
  it "accepts one argument and returns the vector's norm" do
16
16
  # Remember that 3^2 + 4^2 = 5^2.
17
17
  Measurable.euclidean([3, 4]).should == 5
18
18
  end
19
-
19
+
20
20
  it "should be symmetric" do
21
21
  Measurable.euclidean(@u, @v).should == Measurable.euclidean(@v, @u)
22
22
  end
@@ -25,27 +25,43 @@ describe "Euclidean" do
25
25
  Measurable.euclidean(@u, @u).should == 0
26
26
  Measurable.euclidean(@u, @v).should == 1
27
27
  end
28
-
28
+
29
29
  it "shouldn't work with vectors of different length" do
30
- expect { Measurable.euclidean(@u, [2, 2, 2, 2]) }.to raise_error
30
+ expect { Measurable.euclidean(@u, [2, 2, 2, 2]) }.to raise_error(ArgumentError)
31
+ end
32
+
33
+ it "can be extended separately" do
34
+ klass = Class.new do
35
+ extend Measurable::Euclidean
36
+ end
37
+
38
+ klass.euclidean([3, 4]).should == 5
39
+ end
40
+
41
+ it "can be included separately" do
42
+ klass = Class.new do
43
+ include Measurable::Euclidean
44
+ end
45
+
46
+ klass.new.euclidean([3, 4]).should == 5
31
47
  end
32
48
  end
33
-
49
+
34
50
  context "Squared Distance" do
35
51
  it "accepts two arguments" do
36
52
  expect { Measurable.euclidean_squared(@u, @v) }.to_not raise_error
37
53
  expect { Measurable.euclidean_squared(@u, @v, @w) }.to raise_error(ArgumentError)
38
54
  end
39
-
55
+
40
56
  it "accepts one argument and returns the vector's norm" do
41
57
  # Remember that 3^2 + 4^2 = 5^2.
42
58
  Measurable.euclidean_squared([3, 4]).should == 25
43
59
  end
44
-
60
+
45
61
  it "should be symmetric" do
46
62
  x = Measurable.euclidean_squared(@u, @v)
47
63
  y = Measurable.euclidean_squared(@v, @u)
48
-
64
+
49
65
  x.should == y
50
66
  end
51
67
 
@@ -53,9 +69,9 @@ describe "Euclidean" do
53
69
  Measurable.euclidean_squared(@u, @u).should == 0
54
70
  Measurable.euclidean_squared(@u, @v).should == 1
55
71
  end
56
-
72
+
57
73
  it "shouldn't work with vectors of different length" do
58
- expect { Measurable.euclidean_squared(@u, [2, 2, 2, 2]) }.to raise_error
59
- end
74
+ expect { Measurable.euclidean_squared(@u, [2, 2, 2, 2]) }.to raise_error(ArgumentError)
75
+ end
60
76
  end
61
- end
77
+ end
@@ -0,0 +1,46 @@
1
+ describe "Hamming distance" do
2
+
3
+ before :all do
4
+ @u = "Hi, I'm a test string!"
5
+ @v = "Hello, not a test omg."
6
+ @w = "Hey there, a test wtf!"
7
+ end
8
+
9
+ it "accepts two arguments" do
10
+ expect { Measurable.hamming(@u, @v) }.to_not raise_error
11
+ expect { Measurable.hamming(@u, @v, @w) }.to raise_error(ArgumentError)
12
+ end
13
+
14
+ it "should be symmetric" do
15
+ x = Measurable.hamming(@u, @v)
16
+ y = Measurable.hamming(@v, @u)
17
+
18
+ x.should be(y)
19
+ end
20
+
21
+ it "should return the correct value" do
22
+ x = Measurable.hamming(@u, @v)
23
+ x.should be(17)
24
+ end
25
+
26
+ it "shouldn't work with strings of different length" do
27
+ expect { Measurable.hamming(@u, "smallstring") }.to raise_error(ArgumentError)
28
+ expect { Measurable.hamming(@u, "largestring" * 20) }.to raise_error(ArgumentError)
29
+ end
30
+
31
+ it "can be extended separately" do
32
+ klass = Class.new do
33
+ extend Measurable::Hamming
34
+ end
35
+
36
+ klass.hamming(@u, @v).should == 17
37
+ end
38
+
39
+ it "can be included separately" do
40
+ klass = Class.new do
41
+ include Measurable::Hamming
42
+ end
43
+
44
+ klass.new.hamming(@u, @v).should == 17
45
+ end
46
+ end
@@ -32,6 +32,26 @@ describe "Haversine distance" do
32
32
  end
33
33
 
34
34
  it "should only work with [lat, long] vectors" do
35
- expect { Measurable.haversine([2, 4], [1, 3, 5, 7]) }.to raise_error
35
+ expect { Measurable.haversine([2, 4], [1, 3, 5, 7]) }.to raise_error(ArgumentError)
36
36
  end
37
- end
37
+
38
+ it "can be extended seperately" do
39
+ klass = Class.new do
40
+ extend Measurable::Haversine
41
+ end
42
+
43
+ x = klass.haversine(@u, @v, :km)
44
+
45
+ x.should be_within(@haversine_tolerance).of(18533)
46
+ end
47
+
48
+ it "can be included seperately" do
49
+ klass = Class.new do
50
+ include Measurable::Haversine
51
+ end
52
+
53
+ x = klass.new.haversine(@u, @v, :km)
54
+
55
+ x.should be_within(@haversine_tolerance).of(18533)
56
+ end
57
+ end
@@ -16,18 +16,39 @@ describe "Jaccard" do
16
16
  x = Measurable.jaccard_index(@u, @v)
17
17
  y = Measurable.jaccard_index(@v, @u)
18
18
 
19
- x.should be_within(TOLERANCE).of(y)
19
+ expect(x).to be_within(TOLERANCE).of(y)
20
20
  end
21
21
 
22
22
  it "should return the correct value" do
23
23
  x = Measurable.jaccard_index(@u, @v)
24
24
 
25
- x.should be_within(TOLERANCE).of(2.0 / 3.0)
25
+ expect(x).to be_within(TOLERANCE).of(1.0 / 2.0)
26
26
  end
27
27
 
28
- it "shouldn't work with vectors of different length" do
29
- expect { Measurable.jaccard_index(@u, [1, 2, 3, 4]) }.to raise_error
28
+ it "should work with vectors of different length" do
29
+ expect { Measurable.jaccard_index(@u, [1, 2, 3, 4]) }.to_not raise_error
30
30
  end
31
+
32
+ it "can be extended separately" do
33
+ klass = Class.new do
34
+ extend Measurable::Jaccard
35
+ end
36
+
37
+ x = klass.jaccard_index(@u, @v)
38
+
39
+ expect(x).to be_within(TOLERANCE).of(1.0 / 2.0)
40
+ end
41
+
42
+ it "can be included separately" do
43
+ klass = Class.new do
44
+ include Measurable::Jaccard
45
+ end
46
+
47
+ x = klass.new.jaccard_index(@u, @v)
48
+
49
+ expect(x).to be_within(TOLERANCE).of(1.0 / 2.0)
50
+ end
51
+
31
52
  end
32
53
 
33
54
  context "Distance" do
@@ -38,25 +59,25 @@ describe "Jaccard" do
38
59
  end
39
60
 
40
61
  it "accepts two arguments" do
41
- expect { Measurable.jaccard(@u, @v) }.to_not raise_error
42
- expect { Measurable.jaccard(@u, @v, @w) }.to raise_error(ArgumentError)
62
+ expect { Measurable.jaccard_dissimilarity(@u, @v) }.to_not raise_error
63
+ expect { Measurable.jaccard_dissimilarity(@u, @v, @w) }.to raise_error(ArgumentError)
43
64
  end
44
65
 
45
66
  it "should be symmetric" do
46
- x = Measurable.jaccard(@u, @v)
47
- y = Measurable.jaccard(@v, @u)
67
+ x = Measurable.jaccard_dissimilarity(@u, @v)
68
+ y = Measurable.jaccard_dissimilarity(@v, @u)
48
69
 
49
- x.should be_within(TOLERANCE).of(y)
70
+ expect(x).to be_within(TOLERANCE).of(y)
50
71
  end
51
72
 
52
73
  it "should return the correct value" do
53
- x = Measurable.jaccard(@u, @v)
74
+ x = Measurable.jaccard_dissimilarity(@u, @v)
54
75
 
55
- x.should be_within(TOLERANCE).of(1.0 / 3.0)
76
+ expect(x).to be_within(TOLERANCE).of(1.0 / 2.0)
56
77
  end
57
78
 
58
- it "shouldn't work with vectors of different length" do
59
- expect { Measurable.jaccard(@u, [1, 2, 3, 4]) }.to raise_error
79
+ it "should work with vectors of different length" do
80
+ expect { Measurable.jaccard_dissimilarity(@u, [1, 2, 3, 4]) }.to_not raise_error
60
81
  end
61
82
  end
62
- end
83
+ end