measurable 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c923bf9e9bd70c37d84330fcbb9d883f72344b04
4
- data.tar.gz: bea042df8b59927f38b7ace662f7f8bb41f3f33a
3
+ metadata.gz: 24f0ca4dbb60cda53bab68a614a171df7e337434
4
+ data.tar.gz: 8675c8a2e723203f287ce4dac3a6e6237fe2b675
5
5
  SHA512:
6
- metadata.gz: 5ce3eaec6a905c087b6538baf92172e0099b149eaf206b6af456645d6bc6e9b3b3975566de4c44a8dc84ef0c7dbc6cd11a53ce40d7f8de1b404540b37fa12c52
7
- data.tar.gz: a9d1eb70f7c8b1e13878f23b3001a0eabfbbbe6fb59bf9b56704b6f3e8672ea6cf0954a92d8556219eb6a8ab6f79c7489dede6b82c33230bab821b56ec63ef71
6
+ metadata.gz: ff4de5c4fbbe64592a16e7980182a76fa7e3960931d401004f9cebd8e439ea17acf0e16b8a465131b80e832fd560fd97f5fd6e1054f43678ded44d730a4e90c3
7
+ data.tar.gz: 62396f9fb4208745628848a5447872bb86e407d2b8ec34b15acaae6ac193f8a2c1a63aa68ef69046d5d6080a693c4cfc38bc9f419c697445eb251bb861cc9af4
data/.rspec CHANGED
@@ -1 +1,2 @@
1
- --color
1
+ --color
2
+ --require spec_helper
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- measurable (0.0.3)
4
+ measurable (0.0.4)
5
5
 
6
6
  GEM
7
7
  remote: http://rubygems.org/
data/Rakefile CHANGED
@@ -1,5 +1,6 @@
1
1
  require 'rake'
2
2
  require 'bundler/gem_tasks'
3
+ require "rspec/core/rake_task"
3
4
 
4
5
  # Setup the necessary gems, specified in the gemspec.
5
6
  require 'bundler'
@@ -11,6 +12,9 @@ rescue Bundler::BundlerError => e
11
12
  exit e.status_code
12
13
  end
13
14
 
15
+ # Run all the specs.
16
+ RSpec::Core::RakeTask.new(:spec)
17
+
14
18
  # Compile task.
15
19
  # Rake::ExtensionTask.new do |ext|
16
20
  # ext.name = 'measurable'
data/lib/measurable.rb CHANGED
@@ -1,55 +1,47 @@
1
1
  require 'measurable/version.rb'
2
2
 
3
3
  # Distance measures.
4
- reqiore 'measurable/euclidean'
4
+ require 'measurable/euclidean'
5
5
  require 'measurable/cosine'
6
- require 'measurable/tanimoto_coefficient'
6
+ require 'measurable/tanimoto'
7
7
  require 'measurable/jaccard'
8
8
  require 'measurable/haversine'
9
+ require 'measurable/maxmin'
9
10
 
10
11
  module Measurable
11
12
  # PI = 3.1415926535
12
13
  RAD_PER_DEG = 0.017453293 # PI/180
13
-
14
- # http://en.wikipedia.org/wiki/Intersection_(set_theory)
15
- def intersection(u, v)
16
- (u & v)
17
- end
18
-
19
- # http://en.wikipedia.org/wiki/Union_(set_theory)
20
- def union(u, v)
21
- (u + v).uniq
22
- end
23
-
24
- def binary_union(u, v)
25
- unions = []
26
- u.each_with_index do |n, index|
27
- if n == 1 || v[index] == 1
28
- unions << 1
29
- else
30
- unions << 0
14
+ class << self
15
+ def binary_union(u, v)
16
+ unions = []
17
+ u.each_with_index do |n, index|
18
+ if n == 1 || v[index] == 1
19
+ unions << 1
20
+ else
21
+ unions << 0
22
+ end
31
23
  end
32
- end
33
24
 
34
- unions
35
- end
25
+ unions
26
+ end
36
27
 
37
- def binary_intersection(u, v)
38
- intersects = []
39
- u.each_with_index do |n, index|
40
- if n == 1 && v[index] == 1
41
- intersects << 1
42
- else
43
- intersects << 0
28
+ def binary_intersection(u, v)
29
+ intersects = []
30
+ u.each_with_index do |n, index|
31
+ if n == 1 && v[index] == 1
32
+ intersects << 1
33
+ else
34
+ intersects << 0
35
+ end
44
36
  end
45
- end
46
37
 
47
- intersects
48
- end
38
+ intersects
39
+ end
49
40
 
50
- # Checks if we"re dealing with NaN"s and will return 0.0 unless
51
- # handle NaN"s is set to false
52
- def handle_nan(result)
53
- result.nan? ? 0.0 : result
41
+ # Checks if we"re dealing with NaN"s and will return 0.0 unless
42
+ # handle NaN"s is set to false
43
+ def handle_nan(result)
44
+ result.nan? ? 0.0 : result
45
+ end
54
46
  end
55
47
  end
@@ -1,8 +1,10 @@
1
1
  module Measurable
2
- def cosine(u, v)
3
- dot_product = dot(u, v)
4
- normalization = self.euclidean_normalize * other.euclidean_normalize
2
+ class << self
3
+ def cosine(u, v)
4
+ dot_product = dot(u, v)
5
+ normalization = self.euclidean_normalize * other.euclidean_normalize
5
6
 
6
- handle_nan(dot_product / normalization)
7
+ handle_nan(dot_product / normalization)
8
+ end
7
9
  end
8
10
  end
@@ -1,17 +1,40 @@
1
1
  module Measurable
2
- def euclidean(u, v)
3
- sum = 0.0
2
+ class << self
3
+ # Add documentation here!
4
+ def euclidean(u, v = nil)
5
+ # If the second argument is nil, the method should return the norm of
6
+ # vector u. For this, we need the distance between u and the origin.
7
+ if v.nil?
8
+ v = Array.new(u.size, 0)
9
+ end
10
+
11
+ # We could make it work with vector of different sizes because of #zip
12
+ # but it's unreliable. It's better to just throw an exception.
13
+ # TODO: Change this to a more specific, custom-made exception.
14
+ raise ArgumentError if u.size != v.size
15
+
16
+ sum = u.zip(v).reduce(0.0) do |acc, ary|
17
+ acc += (ary[0] - ary[-1])**2
18
+ end
4
19
 
5
- u.zip(v).each do |ary|
6
- sum += (ary.first - ary.last)**2
20
+ Math.sqrt(sum)
7
21
  end
8
-
9
- Math.sqrt(sum)
10
- end
11
22
 
12
- def euclidean_squared(u, v)
13
- u.zip(v).reduce(0.0) do |acc, ary|
14
- acc += (ary.first - ary.last)**2
23
+ def euclidean_squared(u, v = nil)
24
+ # If the second argument is nil, the method should return the norm of
25
+ # vector u. For this, we need the distance between u and the origin.
26
+ if v.nil?
27
+ v = Array.new(u.size, 0)
28
+ end
29
+
30
+ # We could make it work with vector of different sizes because of #zip
31
+ # but it's unreliable. It's better to just throw an exception.
32
+ # TODO: Change this to a more specific, custom-made exception.
33
+ raise ArgumentError if u.size != v.size
34
+
35
+ u.zip(v).reduce(0.0) do |acc, ary|
36
+ acc += (ary[0] - ary[-1])**2
37
+ end
15
38
  end
16
39
  end
17
40
  end
@@ -23,22 +23,24 @@ module Measurable
23
23
  :meters => R_KM * 1000
24
24
  }
25
25
 
26
- def haversine(u, v, um = :meters)
27
- dlon = u[1] - v[1]
28
- dlat = u[0] - v[0]
26
+ class << self
27
+ def haversine(u, v, um = :meters)
28
+ dlon = u[1] - v[1]
29
+ dlat = u[0] - v[0]
29
30
 
30
- dlon_rad = dlon * RAD_PER_DEG
31
- dlat_rad = dlat * RAD_PER_DEG
31
+ dlon_rad = dlon * RAD_PER_DEG
32
+ dlat_rad = dlat * RAD_PER_DEG
32
33
 
33
- lat1_rad = v[0] * RAD_PER_DEG
34
- lon1_rad = v[1] * RAD_PER_DEG
34
+ lat1_rad = v[0] * RAD_PER_DEG
35
+ lon1_rad = v[1] * RAD_PER_DEG
35
36
 
36
- lat2_rad = u[0] * RAD_PER_DEG
37
- lon2_rad = u[1] * RAD_PER_DEG
37
+ lat2_rad = u[0] * RAD_PER_DEG
38
+ lon2_rad = u[1] * RAD_PER_DEG
38
39
 
39
- a = (Math.sin(dlat_rad/2))**2 + Math.cos(lat1_rad) * Math.cos(lat2_rad) * (Math.sin(dlon_rad/2))**2
40
- c = 2 * Math.atan2( Math.sqrt(a), Math.sqrt(1-a))
40
+ a = (Math.sin(dlat_rad/2))**2 + Math.cos(lat1_rad) * Math.cos(lat2_rad) * (Math.sin(dlon_rad/2))**2
41
+ c = 2 * Math.atan2( Math.sqrt(a), Math.sqrt(1-a))
41
42
 
42
- R[um] * c
43
+ R[um] * c
44
+ end
43
45
  end
44
46
  end
@@ -1,26 +1,26 @@
1
1
  # http://en.wikipedia.org/wiki/Jaccard_coefficient
2
2
  module Measurable
3
+ class << self
4
+ def jaccard(u, v)
5
+ 1 - jaccard_index(u, v)
6
+ end
3
7
 
4
- def jaccard(u, v)
5
- 1 - jaccard_index(u, v)
6
- end
7
-
8
- def jaccard_index(u, v)
9
- union = (u + v).uniq.size.to_f
10
- i = intersection(u, v).size.to_f
8
+ def jaccard_index(u, v)
9
+ union = (u | v).size.to_f
10
+ intersection = (u & v).size.to_f
11
11
 
12
- i / union
13
- end
12
+ intersection / union
13
+ end
14
14
 
15
- def binary_jaccard(u, v)
16
- 1 - binary_jaccard_index(u, v)
17
- end
15
+ def binary_jaccard(u, v)
16
+ 1 - binary_jaccard_index(u, v)
17
+ end
18
18
 
19
- def binary_jaccard_index(u, v)
20
- i = binary_intersection(u, v).delete_if {|x| x == 0}.size.to_f
21
- union = binary_union(u, v).delete_if {|x| x == 0}.size.to_f
19
+ def binary_jaccard_index(u, v)
20
+ intersection = binary_intersection(u, v).delete_if {|x| x == 0}.size.to_f
21
+ union = binary_union(u, v).delete_if {|x| x == 0}.size.to_f
22
22
 
23
- i / union
23
+ intersection / union
24
+ end
24
25
  end
25
-
26
26
  end
@@ -0,0 +1,13 @@
1
+ module Measurable
2
+ class << self
3
+ def maxmin(u, v)
4
+ sum_min, sum_max = u.zip(v).reduce([0.0, 0.0]) do |acc, attributes|
5
+ acc[0] += attributes.min
6
+ acc[-1] += attributes.max
7
+ acc
8
+ end
9
+
10
+ sum_min / sum_max
11
+ end
12
+ end
13
+ end
@@ -1,9 +1,11 @@
1
1
  # http://en.wikipedia.org/wiki/Jaccard_index#Tanimoto_coefficient_.28extended_Jaccard_coefficient.29
2
2
  module Measurable
3
- def tanimoto(u, v)
4
- dot = dot(u, v).to_f
5
- result = dot / (u.sum_of_squares + v.sum_of_squares - dot).to_f
3
+ class << self
4
+ def tanimoto(u, v)
5
+ dot = dot(u, v).to_f
6
+ result = dot / (u.sum_of_squares + v.sum_of_squares - dot).to_f
6
7
 
7
- handle_nan(result)
8
+ handle_nan(result)
9
+ end
8
10
  end
9
11
  end
@@ -1,3 +1,3 @@
1
1
  module Measurable
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.4"
3
3
  end
@@ -1,35 +1,77 @@
1
1
  describe Measurable do
2
2
 
3
- let(:u) { [1, 3, 16] }
4
- let(:v) { [1, 4, 16] }
5
- let(:w) { [4, 5, 6] }
6
-
7
- describe "Euclidean distance" do
8
- it "accepts two arguments" do
9
- expect { Measurable::euclidean(:u) }.to raise_error(ArgumentError)
10
- expect { Measurable::euclidean(:u, :v) }.to_not raise_error(ArgumentError)
11
- expect { Measurable::euclidean(:u, :v, :w) }.to raise_error(ArgumentError)
3
+ describe "Binary union" do
4
+
5
+ end
6
+
7
+ describe "Binary intersection" do
8
+
9
+ end
10
+
11
+ describe "Euclidean" do
12
+
13
+ before :all do
14
+ @u = [1, 3, 16]
15
+ @v = [1, 4, 16]
16
+ @w = [4, 5, 6]
12
17
  end
13
18
 
14
- it "accepts one argument and returns the vector's norm"
19
+ context "Distance" do
20
+ it "accepts two arguments" do
21
+ expect { Measurable.euclidean(@u, @v) }.to_not raise_error
22
+ expect { Measurable.euclidean(@u, @v, @w) }.to raise_error(ArgumentError)
23
+ end
24
+
25
+ it "accepts one argument and returns the vector's norm" do
26
+ # Remember that 3^2 + 4^2 = 5^2.
27
+ Measurable.euclidean([3, 4]).should == 5
28
+ end
15
29
 
16
- it "should be symmetric"
30
+ it "should be symmetric" do
31
+ Measurable.euclidean(@u, @v).should == Measurable.euclidean(@v, @u)
32
+ end
17
33
 
18
- it "should return the correct value" do
19
- Measurable::euclidean(:u, :u).should == 0
20
- euclidean(:u, :v).should == 1
34
+ it "should return the correct value" do
35
+ Measurable.euclidean(@u, @u).should == 0
36
+ Measurable.euclidean(@u, @v).should == 1
37
+ end
38
+
39
+ it "shouldn't work with vectors of different length" do
40
+ expect { Measurable.euclidean(@u, [2, 2, 2, 2]) }.to raise_error
41
+ end
21
42
  end
22
43
 
23
- it "shouldn't work with vectors of different length" do
24
- expect { Measurable::euclidean(:u, [2, 2, 2, 2]) }.to raise_error(DiffLengthError)
44
+ context "Squared Distance" do
45
+ it "accepts two arguments" do
46
+ expect { Measurable.euclidean_squared(@u, @v) }.to_not raise_error
47
+ expect { Measurable.euclidean_squared(@u, @v, @w) }.to raise_error(ArgumentError)
48
+ end
49
+
50
+ it "accepts one argument and returns the vector's norm" do
51
+ # Remember that 3^2 + 4^2 = 5^2.
52
+ Measurable.euclidean_squared([3, 4]).should == 25
53
+ end
54
+
55
+ it "should be symmetric" do
56
+ x = Measurable.euclidean_squared(@u, @v)
57
+ y = Measurable.euclidean_squared(@v, @u)
58
+
59
+ x.should == y
60
+ end
61
+
62
+ it "should return the correct value" do
63
+ Measurable.euclidean_squared(@u, @u).should == 0
64
+ Measurable.euclidean_squared(@u, @v).should == 1
65
+ end
66
+
67
+ it "shouldn't work with vectors of different length" do
68
+ expect { Measurable.euclidean_squared(@u, [2, 2, 2, 2]) }.to raise_error
69
+ end
25
70
  end
71
+
26
72
  end
27
73
 
28
- describe "Binary union" do
29
-
30
- describe "Binary intersection" do
31
-
32
- describe "Cosine similarity measure" do
74
+ describe "Cosine distance" do
33
75
  it "accepts two arguments"
34
76
 
35
77
  it "accepts one argument and returns the vector's norm"
@@ -55,7 +97,55 @@ describe Measurable do
55
97
  it "shouldn't work with vectors of different length"
56
98
  end
57
99
 
58
- describe "Max-min similarity measure" do
100
+ describe "Tanimoto distance" do
101
+ it "accepts two arguments"
102
+
103
+ it "accepts one argument and returns the vector's norm"
104
+
105
+ it "should be symmetric"
106
+
107
+ it "should return the correct value"
108
+
109
+ it "shouldn't work with vectors of different length"
110
+ end
111
+
112
+ describe "Haversine distance" do
113
+ it "accepts two arguments"
114
+
115
+ it "accepts one argument and returns the vector's norm"
116
+
117
+ it "should be symmetric"
118
+
119
+ it "should return the correct value"
120
+
121
+ it "shouldn't work with vectors of different length"
122
+ end
123
+
124
+ describe "Jaccard distance" do
125
+ it "accepts two arguments"
126
+
127
+ it "accepts one argument and returns the vector's norm"
128
+
129
+ it "should be symmetric"
130
+
131
+ it "should return the correct value"
132
+
133
+ it "shouldn't work with vectors of different length"
134
+ end
135
+
136
+ describe "Binary Jaccard distance" do
137
+ it "accepts two arguments"
138
+
139
+ it "accepts one argument and returns the vector's norm"
140
+
141
+ it "should be symmetric"
142
+
143
+ it "should return the correct value"
144
+
145
+ it "shouldn't work with vectors of different length"
146
+ end
147
+
148
+ describe "Max-min distance" do
59
149
  it "accepts two arguments"
60
150
 
61
151
  it "accepts one argument and returns the vector's norm"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: measurable
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Carlos Agarie
@@ -70,6 +70,7 @@ files:
70
70
  - lib/measurable/euclidean.rb
71
71
  - lib/measurable/haversine.rb
72
72
  - lib/measurable/jaccard.rb
73
+ - lib/measurable/maxmin.rb
73
74
  - lib/measurable/tanimoto.rb
74
75
  - lib/measurable/version.rb
75
76
  - measurable.gemspec