measurable 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec +2 -1
- data/Gemfile.lock +1 -1
- data/Rakefile +4 -0
- data/lib/measurable.rb +29 -37
- data/lib/measurable/cosine.rb +6 -4
- data/lib/measurable/euclidean.rb +33 -10
- data/lib/measurable/haversine.rb +14 -12
- data/lib/measurable/jaccard.rb +17 -17
- data/lib/measurable/maxmin.rb +13 -0
- data/lib/measurable/tanimoto.rb +6 -4
- data/lib/measurable/version.rb +1 -1
- data/spec/measurable_spec.rb +112 -22
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 24f0ca4dbb60cda53bab68a614a171df7e337434
|
4
|
+
data.tar.gz: 8675c8a2e723203f287ce4dac3a6e6237fe2b675
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ff4de5c4fbbe64592a16e7980182a76fa7e3960931d401004f9cebd8e439ea17acf0e16b8a465131b80e832fd560fd97f5fd6e1054f43678ded44d730a4e90c3
|
7
|
+
data.tar.gz: 62396f9fb4208745628848a5447872bb86e407d2b8ec34b15acaae6ac193f8a2c1a63aa68ef69046d5d6080a693c4cfc38bc9f419c697445eb251bb861cc9af4
|
data/.rspec
CHANGED
@@ -1 +1,2 @@
|
|
1
|
-
--color
|
1
|
+
--color
|
2
|
+
--require spec_helper
|
data/Gemfile.lock
CHANGED
data/Rakefile
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'rake'
|
2
2
|
require 'bundler/gem_tasks'
|
3
|
+
require "rspec/core/rake_task"
|
3
4
|
|
4
5
|
# Setup the necessary gems, specified in the gemspec.
|
5
6
|
require 'bundler'
|
@@ -11,6 +12,9 @@ rescue Bundler::BundlerError => e
|
|
11
12
|
exit e.status_code
|
12
13
|
end
|
13
14
|
|
15
|
+
# Run all the specs.
|
16
|
+
RSpec::Core::RakeTask.new(:spec)
|
17
|
+
|
14
18
|
# Compile task.
|
15
19
|
# Rake::ExtensionTask.new do |ext|
|
16
20
|
# ext.name = 'measurable'
|
data/lib/measurable.rb
CHANGED
@@ -1,55 +1,47 @@
|
|
1
1
|
require 'measurable/version.rb'
|
2
2
|
|
3
3
|
# Distance measures.
|
4
|
-
|
4
|
+
require 'measurable/euclidean'
|
5
5
|
require 'measurable/cosine'
|
6
|
-
require 'measurable/
|
6
|
+
require 'measurable/tanimoto'
|
7
7
|
require 'measurable/jaccard'
|
8
8
|
require 'measurable/haversine'
|
9
|
+
require 'measurable/maxmin'
|
9
10
|
|
10
11
|
module Measurable
|
11
12
|
# PI = 3.1415926535
|
12
13
|
RAD_PER_DEG = 0.017453293 # PI/180
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
end
|
23
|
-
|
24
|
-
def binary_union(u, v)
|
25
|
-
unions = []
|
26
|
-
u.each_with_index do |n, index|
|
27
|
-
if n == 1 || v[index] == 1
|
28
|
-
unions << 1
|
29
|
-
else
|
30
|
-
unions << 0
|
14
|
+
class << self
|
15
|
+
def binary_union(u, v)
|
16
|
+
unions = []
|
17
|
+
u.each_with_index do |n, index|
|
18
|
+
if n == 1 || v[index] == 1
|
19
|
+
unions << 1
|
20
|
+
else
|
21
|
+
unions << 0
|
22
|
+
end
|
31
23
|
end
|
32
|
-
end
|
33
24
|
|
34
|
-
|
35
|
-
|
25
|
+
unions
|
26
|
+
end
|
36
27
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
28
|
+
def binary_intersection(u, v)
|
29
|
+
intersects = []
|
30
|
+
u.each_with_index do |n, index|
|
31
|
+
if n == 1 && v[index] == 1
|
32
|
+
intersects << 1
|
33
|
+
else
|
34
|
+
intersects << 0
|
35
|
+
end
|
44
36
|
end
|
45
|
-
end
|
46
37
|
|
47
|
-
|
48
|
-
|
38
|
+
intersects
|
39
|
+
end
|
49
40
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
41
|
+
# Checks if we"re dealing with NaN"s and will return 0.0 unless
|
42
|
+
# handle NaN"s is set to false
|
43
|
+
def handle_nan(result)
|
44
|
+
result.nan? ? 0.0 : result
|
45
|
+
end
|
54
46
|
end
|
55
47
|
end
|
data/lib/measurable/cosine.rb
CHANGED
@@ -1,8 +1,10 @@
|
|
1
1
|
module Measurable
|
2
|
-
|
3
|
-
|
4
|
-
|
2
|
+
class << self
|
3
|
+
def cosine(u, v)
|
4
|
+
dot_product = dot(u, v)
|
5
|
+
normalization = self.euclidean_normalize * other.euclidean_normalize
|
5
6
|
|
6
|
-
|
7
|
+
handle_nan(dot_product / normalization)
|
8
|
+
end
|
7
9
|
end
|
8
10
|
end
|
data/lib/measurable/euclidean.rb
CHANGED
@@ -1,17 +1,40 @@
|
|
1
1
|
module Measurable
|
2
|
-
|
3
|
-
|
2
|
+
class << self
|
3
|
+
# Add documentation here!
|
4
|
+
def euclidean(u, v = nil)
|
5
|
+
# If the second argument is nil, the method should return the norm of
|
6
|
+
# vector u. For this, we need the distance between u and the origin.
|
7
|
+
if v.nil?
|
8
|
+
v = Array.new(u.size, 0)
|
9
|
+
end
|
10
|
+
|
11
|
+
# We could make it work with vector of different sizes because of #zip
|
12
|
+
# but it's unreliable. It's better to just throw an exception.
|
13
|
+
# TODO: Change this to a more specific, custom-made exception.
|
14
|
+
raise ArgumentError if u.size != v.size
|
15
|
+
|
16
|
+
sum = u.zip(v).reduce(0.0) do |acc, ary|
|
17
|
+
acc += (ary[0] - ary[-1])**2
|
18
|
+
end
|
4
19
|
|
5
|
-
|
6
|
-
sum += (ary.first - ary.last)**2
|
20
|
+
Math.sqrt(sum)
|
7
21
|
end
|
8
|
-
|
9
|
-
Math.sqrt(sum)
|
10
|
-
end
|
11
22
|
|
12
|
-
|
13
|
-
|
14
|
-
|
23
|
+
def euclidean_squared(u, v = nil)
|
24
|
+
# If the second argument is nil, the method should return the norm of
|
25
|
+
# vector u. For this, we need the distance between u and the origin.
|
26
|
+
if v.nil?
|
27
|
+
v = Array.new(u.size, 0)
|
28
|
+
end
|
29
|
+
|
30
|
+
# We could make it work with vector of different sizes because of #zip
|
31
|
+
# but it's unreliable. It's better to just throw an exception.
|
32
|
+
# TODO: Change this to a more specific, custom-made exception.
|
33
|
+
raise ArgumentError if u.size != v.size
|
34
|
+
|
35
|
+
u.zip(v).reduce(0.0) do |acc, ary|
|
36
|
+
acc += (ary[0] - ary[-1])**2
|
37
|
+
end
|
15
38
|
end
|
16
39
|
end
|
17
40
|
end
|
data/lib/measurable/haversine.rb
CHANGED
@@ -23,22 +23,24 @@ module Measurable
|
|
23
23
|
:meters => R_KM * 1000
|
24
24
|
}
|
25
25
|
|
26
|
-
|
27
|
-
|
28
|
-
|
26
|
+
class << self
|
27
|
+
def haversine(u, v, um = :meters)
|
28
|
+
dlon = u[1] - v[1]
|
29
|
+
dlat = u[0] - v[0]
|
29
30
|
|
30
|
-
|
31
|
-
|
31
|
+
dlon_rad = dlon * RAD_PER_DEG
|
32
|
+
dlat_rad = dlat * RAD_PER_DEG
|
32
33
|
|
33
|
-
|
34
|
-
|
34
|
+
lat1_rad = v[0] * RAD_PER_DEG
|
35
|
+
lon1_rad = v[1] * RAD_PER_DEG
|
35
36
|
|
36
|
-
|
37
|
-
|
37
|
+
lat2_rad = u[0] * RAD_PER_DEG
|
38
|
+
lon2_rad = u[1] * RAD_PER_DEG
|
38
39
|
|
39
|
-
|
40
|
-
|
40
|
+
a = (Math.sin(dlat_rad/2))**2 + Math.cos(lat1_rad) * Math.cos(lat2_rad) * (Math.sin(dlon_rad/2))**2
|
41
|
+
c = 2 * Math.atan2( Math.sqrt(a), Math.sqrt(1-a))
|
41
42
|
|
42
|
-
|
43
|
+
R[um] * c
|
44
|
+
end
|
43
45
|
end
|
44
46
|
end
|
data/lib/measurable/jaccard.rb
CHANGED
@@ -1,26 +1,26 @@
|
|
1
1
|
# http://en.wikipedia.org/wiki/Jaccard_coefficient
|
2
2
|
module Measurable
|
3
|
+
class << self
|
4
|
+
def jaccard(u, v)
|
5
|
+
1 - jaccard_index(u, v)
|
6
|
+
end
|
3
7
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
def jaccard_index(u, v)
|
9
|
-
union = (u + v).uniq.size.to_f
|
10
|
-
i = intersection(u, v).size.to_f
|
8
|
+
def jaccard_index(u, v)
|
9
|
+
union = (u | v).size.to_f
|
10
|
+
intersection = (u & v).size.to_f
|
11
11
|
|
12
|
-
|
13
|
-
|
12
|
+
intersection / union
|
13
|
+
end
|
14
14
|
|
15
|
-
|
16
|
-
|
17
|
-
|
15
|
+
def binary_jaccard(u, v)
|
16
|
+
1 - binary_jaccard_index(u, v)
|
17
|
+
end
|
18
18
|
|
19
|
-
|
20
|
-
|
21
|
-
|
19
|
+
def binary_jaccard_index(u, v)
|
20
|
+
intersection = binary_intersection(u, v).delete_if {|x| x == 0}.size.to_f
|
21
|
+
union = binary_union(u, v).delete_if {|x| x == 0}.size.to_f
|
22
22
|
|
23
|
-
|
23
|
+
intersection / union
|
24
|
+
end
|
24
25
|
end
|
25
|
-
|
26
26
|
end
|
data/lib/measurable/tanimoto.rb
CHANGED
@@ -1,9 +1,11 @@
|
|
1
1
|
# http://en.wikipedia.org/wiki/Jaccard_index#Tanimoto_coefficient_.28extended_Jaccard_coefficient.29
|
2
2
|
module Measurable
|
3
|
-
|
4
|
-
|
5
|
-
|
3
|
+
class << self
|
4
|
+
def tanimoto(u, v)
|
5
|
+
dot = dot(u, v).to_f
|
6
|
+
result = dot / (u.sum_of_squares + v.sum_of_squares - dot).to_f
|
6
7
|
|
7
|
-
|
8
|
+
handle_nan(result)
|
9
|
+
end
|
8
10
|
end
|
9
11
|
end
|
data/lib/measurable/version.rb
CHANGED
data/spec/measurable_spec.rb
CHANGED
@@ -1,35 +1,77 @@
|
|
1
1
|
describe Measurable do
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
describe "
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
3
|
+
describe "Binary union" do
|
4
|
+
|
5
|
+
end
|
6
|
+
|
7
|
+
describe "Binary intersection" do
|
8
|
+
|
9
|
+
end
|
10
|
+
|
11
|
+
describe "Euclidean" do
|
12
|
+
|
13
|
+
before :all do
|
14
|
+
@u = [1, 3, 16]
|
15
|
+
@v = [1, 4, 16]
|
16
|
+
@w = [4, 5, 6]
|
12
17
|
end
|
13
18
|
|
14
|
-
|
19
|
+
context "Distance" do
|
20
|
+
it "accepts two arguments" do
|
21
|
+
expect { Measurable.euclidean(@u, @v) }.to_not raise_error
|
22
|
+
expect { Measurable.euclidean(@u, @v, @w) }.to raise_error(ArgumentError)
|
23
|
+
end
|
24
|
+
|
25
|
+
it "accepts one argument and returns the vector's norm" do
|
26
|
+
# Remember that 3^2 + 4^2 = 5^2.
|
27
|
+
Measurable.euclidean([3, 4]).should == 5
|
28
|
+
end
|
15
29
|
|
16
|
-
|
30
|
+
it "should be symmetric" do
|
31
|
+
Measurable.euclidean(@u, @v).should == Measurable.euclidean(@v, @u)
|
32
|
+
end
|
17
33
|
|
18
|
-
|
19
|
-
|
20
|
-
|
34
|
+
it "should return the correct value" do
|
35
|
+
Measurable.euclidean(@u, @u).should == 0
|
36
|
+
Measurable.euclidean(@u, @v).should == 1
|
37
|
+
end
|
38
|
+
|
39
|
+
it "shouldn't work with vectors of different length" do
|
40
|
+
expect { Measurable.euclidean(@u, [2, 2, 2, 2]) }.to raise_error
|
41
|
+
end
|
21
42
|
end
|
22
43
|
|
23
|
-
|
24
|
-
|
44
|
+
context "Squared Distance" do
|
45
|
+
it "accepts two arguments" do
|
46
|
+
expect { Measurable.euclidean_squared(@u, @v) }.to_not raise_error
|
47
|
+
expect { Measurable.euclidean_squared(@u, @v, @w) }.to raise_error(ArgumentError)
|
48
|
+
end
|
49
|
+
|
50
|
+
it "accepts one argument and returns the vector's norm" do
|
51
|
+
# Remember that 3^2 + 4^2 = 5^2.
|
52
|
+
Measurable.euclidean_squared([3, 4]).should == 25
|
53
|
+
end
|
54
|
+
|
55
|
+
it "should be symmetric" do
|
56
|
+
x = Measurable.euclidean_squared(@u, @v)
|
57
|
+
y = Measurable.euclidean_squared(@v, @u)
|
58
|
+
|
59
|
+
x.should == y
|
60
|
+
end
|
61
|
+
|
62
|
+
it "should return the correct value" do
|
63
|
+
Measurable.euclidean_squared(@u, @u).should == 0
|
64
|
+
Measurable.euclidean_squared(@u, @v).should == 1
|
65
|
+
end
|
66
|
+
|
67
|
+
it "shouldn't work with vectors of different length" do
|
68
|
+
expect { Measurable.euclidean_squared(@u, [2, 2, 2, 2]) }.to raise_error
|
69
|
+
end
|
25
70
|
end
|
71
|
+
|
26
72
|
end
|
27
73
|
|
28
|
-
describe "
|
29
|
-
|
30
|
-
describe "Binary intersection" do
|
31
|
-
|
32
|
-
describe "Cosine similarity measure" do
|
74
|
+
describe "Cosine distance" do
|
33
75
|
it "accepts two arguments"
|
34
76
|
|
35
77
|
it "accepts one argument and returns the vector's norm"
|
@@ -55,7 +97,55 @@ describe Measurable do
|
|
55
97
|
it "shouldn't work with vectors of different length"
|
56
98
|
end
|
57
99
|
|
58
|
-
describe "
|
100
|
+
describe "Tanimoto distance" do
|
101
|
+
it "accepts two arguments"
|
102
|
+
|
103
|
+
it "accepts one argument and returns the vector's norm"
|
104
|
+
|
105
|
+
it "should be symmetric"
|
106
|
+
|
107
|
+
it "should return the correct value"
|
108
|
+
|
109
|
+
it "shouldn't work with vectors of different length"
|
110
|
+
end
|
111
|
+
|
112
|
+
describe "Haversine distance" do
|
113
|
+
it "accepts two arguments"
|
114
|
+
|
115
|
+
it "accepts one argument and returns the vector's norm"
|
116
|
+
|
117
|
+
it "should be symmetric"
|
118
|
+
|
119
|
+
it "should return the correct value"
|
120
|
+
|
121
|
+
it "shouldn't work with vectors of different length"
|
122
|
+
end
|
123
|
+
|
124
|
+
describe "Jaccard distance" do
|
125
|
+
it "accepts two arguments"
|
126
|
+
|
127
|
+
it "accepts one argument and returns the vector's norm"
|
128
|
+
|
129
|
+
it "should be symmetric"
|
130
|
+
|
131
|
+
it "should return the correct value"
|
132
|
+
|
133
|
+
it "shouldn't work with vectors of different length"
|
134
|
+
end
|
135
|
+
|
136
|
+
describe "Binary Jaccard distance" do
|
137
|
+
it "accepts two arguments"
|
138
|
+
|
139
|
+
it "accepts one argument and returns the vector's norm"
|
140
|
+
|
141
|
+
it "should be symmetric"
|
142
|
+
|
143
|
+
it "should return the correct value"
|
144
|
+
|
145
|
+
it "shouldn't work with vectors of different length"
|
146
|
+
end
|
147
|
+
|
148
|
+
describe "Max-min distance" do
|
59
149
|
it "accepts two arguments"
|
60
150
|
|
61
151
|
it "accepts one argument and returns the vector's norm"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: measurable
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Carlos Agarie
|
@@ -70,6 +70,7 @@ files:
|
|
70
70
|
- lib/measurable/euclidean.rb
|
71
71
|
- lib/measurable/haversine.rb
|
72
72
|
- lib/measurable/jaccard.rb
|
73
|
+
- lib/measurable/maxmin.rb
|
73
74
|
- lib/measurable/tanimoto.rb
|
74
75
|
- lib/measurable/version.rb
|
75
76
|
- measurable.gemspec
|