measurable 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c923bf9e9bd70c37d84330fcbb9d883f72344b04
4
- data.tar.gz: bea042df8b59927f38b7ace662f7f8bb41f3f33a
3
+ metadata.gz: 24f0ca4dbb60cda53bab68a614a171df7e337434
4
+ data.tar.gz: 8675c8a2e723203f287ce4dac3a6e6237fe2b675
5
5
  SHA512:
6
- metadata.gz: 5ce3eaec6a905c087b6538baf92172e0099b149eaf206b6af456645d6bc6e9b3b3975566de4c44a8dc84ef0c7dbc6cd11a53ce40d7f8de1b404540b37fa12c52
7
- data.tar.gz: a9d1eb70f7c8b1e13878f23b3001a0eabfbbbe6fb59bf9b56704b6f3e8672ea6cf0954a92d8556219eb6a8ab6f79c7489dede6b82c33230bab821b56ec63ef71
6
+ metadata.gz: ff4de5c4fbbe64592a16e7980182a76fa7e3960931d401004f9cebd8e439ea17acf0e16b8a465131b80e832fd560fd97f5fd6e1054f43678ded44d730a4e90c3
7
+ data.tar.gz: 62396f9fb4208745628848a5447872bb86e407d2b8ec34b15acaae6ac193f8a2c1a63aa68ef69046d5d6080a693c4cfc38bc9f419c697445eb251bb861cc9af4
data/.rspec CHANGED
@@ -1 +1,2 @@
1
- --color
1
+ --color
2
+ --require spec_helper
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- measurable (0.0.3)
4
+ measurable (0.0.4)
5
5
 
6
6
  GEM
7
7
  remote: http://rubygems.org/
data/Rakefile CHANGED
@@ -1,5 +1,6 @@
1
1
  require 'rake'
2
2
  require 'bundler/gem_tasks'
3
+ require "rspec/core/rake_task"
3
4
 
4
5
  # Setup the necessary gems, specified in the gemspec.
5
6
  require 'bundler'
@@ -11,6 +12,9 @@ rescue Bundler::BundlerError => e
11
12
  exit e.status_code
12
13
  end
13
14
 
15
+ # Run all the specs.
16
+ RSpec::Core::RakeTask.new(:spec)
17
+
14
18
  # Compile task.
15
19
  # Rake::ExtensionTask.new do |ext|
16
20
  # ext.name = 'measurable'
data/lib/measurable.rb CHANGED
@@ -1,55 +1,47 @@
1
1
  require 'measurable/version.rb'
2
2
 
3
3
  # Distance measures.
4
- reqiore 'measurable/euclidean'
4
+ require 'measurable/euclidean'
5
5
  require 'measurable/cosine'
6
- require 'measurable/tanimoto_coefficient'
6
+ require 'measurable/tanimoto'
7
7
  require 'measurable/jaccard'
8
8
  require 'measurable/haversine'
9
+ require 'measurable/maxmin'
9
10
 
10
11
  module Measurable
11
12
  # PI = 3.1415926535
12
13
  RAD_PER_DEG = 0.017453293 # PI/180
13
-
14
- # http://en.wikipedia.org/wiki/Intersection_(set_theory)
15
- def intersection(u, v)
16
- (u & v)
17
- end
18
-
19
- # http://en.wikipedia.org/wiki/Union_(set_theory)
20
- def union(u, v)
21
- (u + v).uniq
22
- end
23
-
24
- def binary_union(u, v)
25
- unions = []
26
- u.each_with_index do |n, index|
27
- if n == 1 || v[index] == 1
28
- unions << 1
29
- else
30
- unions << 0
14
+ class << self
15
+ def binary_union(u, v)
16
+ unions = []
17
+ u.each_with_index do |n, index|
18
+ if n == 1 || v[index] == 1
19
+ unions << 1
20
+ else
21
+ unions << 0
22
+ end
31
23
  end
32
- end
33
24
 
34
- unions
35
- end
25
+ unions
26
+ end
36
27
 
37
- def binary_intersection(u, v)
38
- intersects = []
39
- u.each_with_index do |n, index|
40
- if n == 1 && v[index] == 1
41
- intersects << 1
42
- else
43
- intersects << 0
28
+ def binary_intersection(u, v)
29
+ intersects = []
30
+ u.each_with_index do |n, index|
31
+ if n == 1 && v[index] == 1
32
+ intersects << 1
33
+ else
34
+ intersects << 0
35
+ end
44
36
  end
45
- end
46
37
 
47
- intersects
48
- end
38
+ intersects
39
+ end
49
40
 
50
- # Checks if we"re dealing with NaN"s and will return 0.0 unless
51
- # handle NaN"s is set to false
52
- def handle_nan(result)
53
- result.nan? ? 0.0 : result
41
+ # Checks if we"re dealing with NaN"s and will return 0.0 unless
42
+ # handle NaN"s is set to false
43
+ def handle_nan(result)
44
+ result.nan? ? 0.0 : result
45
+ end
54
46
  end
55
47
  end
@@ -1,8 +1,10 @@
1
1
  module Measurable
2
- def cosine(u, v)
3
- dot_product = dot(u, v)
4
- normalization = self.euclidean_normalize * other.euclidean_normalize
2
+ class << self
3
+ def cosine(u, v)
4
+ dot_product = dot(u, v)
5
+ normalization = self.euclidean_normalize * other.euclidean_normalize
5
6
 
6
- handle_nan(dot_product / normalization)
7
+ handle_nan(dot_product / normalization)
8
+ end
7
9
  end
8
10
  end
@@ -1,17 +1,40 @@
1
1
  module Measurable
2
- def euclidean(u, v)
3
- sum = 0.0
2
+ class << self
3
+ # Add documentation here!
4
+ def euclidean(u, v = nil)
5
+ # If the second argument is nil, the method should return the norm of
6
+ # vector u. For this, we need the distance between u and the origin.
7
+ if v.nil?
8
+ v = Array.new(u.size, 0)
9
+ end
10
+
11
+ # We could make it work with vector of different sizes because of #zip
12
+ # but it's unreliable. It's better to just throw an exception.
13
+ # TODO: Change this to a more specific, custom-made exception.
14
+ raise ArgumentError if u.size != v.size
15
+
16
+ sum = u.zip(v).reduce(0.0) do |acc, ary|
17
+ acc += (ary[0] - ary[-1])**2
18
+ end
4
19
 
5
- u.zip(v).each do |ary|
6
- sum += (ary.first - ary.last)**2
20
+ Math.sqrt(sum)
7
21
  end
8
-
9
- Math.sqrt(sum)
10
- end
11
22
 
12
- def euclidean_squared(u, v)
13
- u.zip(v).reduce(0.0) do |acc, ary|
14
- acc += (ary.first - ary.last)**2
23
+ def euclidean_squared(u, v = nil)
24
+ # If the second argument is nil, the method should return the norm of
25
+ # vector u. For this, we need the distance between u and the origin.
26
+ if v.nil?
27
+ v = Array.new(u.size, 0)
28
+ end
29
+
30
+ # We could make it work with vector of different sizes because of #zip
31
+ # but it's unreliable. It's better to just throw an exception.
32
+ # TODO: Change this to a more specific, custom-made exception.
33
+ raise ArgumentError if u.size != v.size
34
+
35
+ u.zip(v).reduce(0.0) do |acc, ary|
36
+ acc += (ary[0] - ary[-1])**2
37
+ end
15
38
  end
16
39
  end
17
40
  end
@@ -23,22 +23,24 @@ module Measurable
23
23
  :meters => R_KM * 1000
24
24
  }
25
25
 
26
- def haversine(u, v, um = :meters)
27
- dlon = u[1] - v[1]
28
- dlat = u[0] - v[0]
26
+ class << self
27
+ def haversine(u, v, um = :meters)
28
+ dlon = u[1] - v[1]
29
+ dlat = u[0] - v[0]
29
30
 
30
- dlon_rad = dlon * RAD_PER_DEG
31
- dlat_rad = dlat * RAD_PER_DEG
31
+ dlon_rad = dlon * RAD_PER_DEG
32
+ dlat_rad = dlat * RAD_PER_DEG
32
33
 
33
- lat1_rad = v[0] * RAD_PER_DEG
34
- lon1_rad = v[1] * RAD_PER_DEG
34
+ lat1_rad = v[0] * RAD_PER_DEG
35
+ lon1_rad = v[1] * RAD_PER_DEG
35
36
 
36
- lat2_rad = u[0] * RAD_PER_DEG
37
- lon2_rad = u[1] * RAD_PER_DEG
37
+ lat2_rad = u[0] * RAD_PER_DEG
38
+ lon2_rad = u[1] * RAD_PER_DEG
38
39
 
39
- a = (Math.sin(dlat_rad/2))**2 + Math.cos(lat1_rad) * Math.cos(lat2_rad) * (Math.sin(dlon_rad/2))**2
40
- c = 2 * Math.atan2( Math.sqrt(a), Math.sqrt(1-a))
40
+ a = (Math.sin(dlat_rad/2))**2 + Math.cos(lat1_rad) * Math.cos(lat2_rad) * (Math.sin(dlon_rad/2))**2
41
+ c = 2 * Math.atan2( Math.sqrt(a), Math.sqrt(1-a))
41
42
 
42
- R[um] * c
43
+ R[um] * c
44
+ end
43
45
  end
44
46
  end
@@ -1,26 +1,26 @@
1
1
  # http://en.wikipedia.org/wiki/Jaccard_coefficient
2
2
  module Measurable
3
+ class << self
4
+ def jaccard(u, v)
5
+ 1 - jaccard_index(u, v)
6
+ end
3
7
 
4
- def jaccard(u, v)
5
- 1 - jaccard_index(u, v)
6
- end
7
-
8
- def jaccard_index(u, v)
9
- union = (u + v).uniq.size.to_f
10
- i = intersection(u, v).size.to_f
8
+ def jaccard_index(u, v)
9
+ union = (u | v).size.to_f
10
+ intersection = (u & v).size.to_f
11
11
 
12
- i / union
13
- end
12
+ intersection / union
13
+ end
14
14
 
15
- def binary_jaccard(u, v)
16
- 1 - binary_jaccard_index(u, v)
17
- end
15
+ def binary_jaccard(u, v)
16
+ 1 - binary_jaccard_index(u, v)
17
+ end
18
18
 
19
- def binary_jaccard_index(u, v)
20
- i = binary_intersection(u, v).delete_if {|x| x == 0}.size.to_f
21
- union = binary_union(u, v).delete_if {|x| x == 0}.size.to_f
19
+ def binary_jaccard_index(u, v)
20
+ intersection = binary_intersection(u, v).delete_if {|x| x == 0}.size.to_f
21
+ union = binary_union(u, v).delete_if {|x| x == 0}.size.to_f
22
22
 
23
- i / union
23
+ intersection / union
24
+ end
24
25
  end
25
-
26
26
  end
@@ -0,0 +1,13 @@
1
+ module Measurable
2
+ class << self
3
+ def maxmin(u, v)
4
+ sum_min, sum_max = u.zip(v).reduce([0.0, 0.0]) do |acc, attributes|
5
+ acc[0] += attributes.min
6
+ acc[-1] += attributes.max
7
+ acc
8
+ end
9
+
10
+ sum_min / sum_max
11
+ end
12
+ end
13
+ end
@@ -1,9 +1,11 @@
1
1
  # http://en.wikipedia.org/wiki/Jaccard_index#Tanimoto_coefficient_.28extended_Jaccard_coefficient.29
2
2
  module Measurable
3
- def tanimoto(u, v)
4
- dot = dot(u, v).to_f
5
- result = dot / (u.sum_of_squares + v.sum_of_squares - dot).to_f
3
+ class << self
4
+ def tanimoto(u, v)
5
+ dot = dot(u, v).to_f
6
+ result = dot / (u.sum_of_squares + v.sum_of_squares - dot).to_f
6
7
 
7
- handle_nan(result)
8
+ handle_nan(result)
9
+ end
8
10
  end
9
11
  end
@@ -1,3 +1,3 @@
1
1
  module Measurable
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.4"
3
3
  end
@@ -1,35 +1,77 @@
1
1
  describe Measurable do
2
2
 
3
- let(:u) { [1, 3, 16] }
4
- let(:v) { [1, 4, 16] }
5
- let(:w) { [4, 5, 6] }
6
-
7
- describe "Euclidean distance" do
8
- it "accepts two arguments" do
9
- expect { Measurable::euclidean(:u) }.to raise_error(ArgumentError)
10
- expect { Measurable::euclidean(:u, :v) }.to_not raise_error(ArgumentError)
11
- expect { Measurable::euclidean(:u, :v, :w) }.to raise_error(ArgumentError)
3
+ describe "Binary union" do
4
+
5
+ end
6
+
7
+ describe "Binary intersection" do
8
+
9
+ end
10
+
11
+ describe "Euclidean" do
12
+
13
+ before :all do
14
+ @u = [1, 3, 16]
15
+ @v = [1, 4, 16]
16
+ @w = [4, 5, 6]
12
17
  end
13
18
 
14
- it "accepts one argument and returns the vector's norm"
19
+ context "Distance" do
20
+ it "accepts two arguments" do
21
+ expect { Measurable.euclidean(@u, @v) }.to_not raise_error
22
+ expect { Measurable.euclidean(@u, @v, @w) }.to raise_error(ArgumentError)
23
+ end
24
+
25
+ it "accepts one argument and returns the vector's norm" do
26
+ # Remember that 3^2 + 4^2 = 5^2.
27
+ Measurable.euclidean([3, 4]).should == 5
28
+ end
15
29
 
16
- it "should be symmetric"
30
+ it "should be symmetric" do
31
+ Measurable.euclidean(@u, @v).should == Measurable.euclidean(@v, @u)
32
+ end
17
33
 
18
- it "should return the correct value" do
19
- Measurable::euclidean(:u, :u).should == 0
20
- euclidean(:u, :v).should == 1
34
+ it "should return the correct value" do
35
+ Measurable.euclidean(@u, @u).should == 0
36
+ Measurable.euclidean(@u, @v).should == 1
37
+ end
38
+
39
+ it "shouldn't work with vectors of different length" do
40
+ expect { Measurable.euclidean(@u, [2, 2, 2, 2]) }.to raise_error
41
+ end
21
42
  end
22
43
 
23
- it "shouldn't work with vectors of different length" do
24
- expect { Measurable::euclidean(:u, [2, 2, 2, 2]) }.to raise_error(DiffLengthError)
44
+ context "Squared Distance" do
45
+ it "accepts two arguments" do
46
+ expect { Measurable.euclidean_squared(@u, @v) }.to_not raise_error
47
+ expect { Measurable.euclidean_squared(@u, @v, @w) }.to raise_error(ArgumentError)
48
+ end
49
+
50
+ it "accepts one argument and returns the vector's norm" do
51
+ # Remember that 3^2 + 4^2 = 5^2.
52
+ Measurable.euclidean_squared([3, 4]).should == 25
53
+ end
54
+
55
+ it "should be symmetric" do
56
+ x = Measurable.euclidean_squared(@u, @v)
57
+ y = Measurable.euclidean_squared(@v, @u)
58
+
59
+ x.should == y
60
+ end
61
+
62
+ it "should return the correct value" do
63
+ Measurable.euclidean_squared(@u, @u).should == 0
64
+ Measurable.euclidean_squared(@u, @v).should == 1
65
+ end
66
+
67
+ it "shouldn't work with vectors of different length" do
68
+ expect { Measurable.euclidean_squared(@u, [2, 2, 2, 2]) }.to raise_error
69
+ end
25
70
  end
71
+
26
72
  end
27
73
 
28
- describe "Binary union" do
29
-
30
- describe "Binary intersection" do
31
-
32
- describe "Cosine similarity measure" do
74
+ describe "Cosine distance" do
33
75
  it "accepts two arguments"
34
76
 
35
77
  it "accepts one argument and returns the vector's norm"
@@ -55,7 +97,55 @@ describe Measurable do
55
97
  it "shouldn't work with vectors of different length"
56
98
  end
57
99
 
58
- describe "Max-min similarity measure" do
100
+ describe "Tanimoto distance" do
101
+ it "accepts two arguments"
102
+
103
+ it "accepts one argument and returns the vector's norm"
104
+
105
+ it "should be symmetric"
106
+
107
+ it "should return the correct value"
108
+
109
+ it "shouldn't work with vectors of different length"
110
+ end
111
+
112
+ describe "Haversine distance" do
113
+ it "accepts two arguments"
114
+
115
+ it "accepts one argument and returns the vector's norm"
116
+
117
+ it "should be symmetric"
118
+
119
+ it "should return the correct value"
120
+
121
+ it "shouldn't work with vectors of different length"
122
+ end
123
+
124
+ describe "Jaccard distance" do
125
+ it "accepts two arguments"
126
+
127
+ it "accepts one argument and returns the vector's norm"
128
+
129
+ it "should be symmetric"
130
+
131
+ it "should return the correct value"
132
+
133
+ it "shouldn't work with vectors of different length"
134
+ end
135
+
136
+ describe "Binary Jaccard distance" do
137
+ it "accepts two arguments"
138
+
139
+ it "accepts one argument and returns the vector's norm"
140
+
141
+ it "should be symmetric"
142
+
143
+ it "should return the correct value"
144
+
145
+ it "shouldn't work with vectors of different length"
146
+ end
147
+
148
+ describe "Max-min distance" do
59
149
  it "accepts two arguments"
60
150
 
61
151
  it "accepts one argument and returns the vector's norm"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: measurable
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Carlos Agarie
@@ -70,6 +70,7 @@ files:
70
70
  - lib/measurable/euclidean.rb
71
71
  - lib/measurable/haversine.rb
72
72
  - lib/measurable/jaccard.rb
73
+ - lib/measurable/maxmin.rb
73
74
  - lib/measurable/tanimoto.rb
74
75
  - lib/measurable/version.rb
75
76
  - measurable.gemspec