measurable 0.0.1 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +2 -1
- data/.rspec +1 -0
- data/Gemfile.lock +2 -5
- data/LICENSE +3 -1
- data/README.md +37 -45
- data/Rakefile +7 -7
- data/lib/measurable.rb +40 -17
- data/lib/measurable/{cosine_similarity.rb → cosine.rb} +2 -2
- data/lib/measurable/euclidean.rb +17 -0
- data/lib/measurable/haversine.rb +14 -16
- data/lib/measurable/jaccard.rb +12 -12
- data/lib/measurable/{tanimoto_coefficient.rb → tanimoto.rb} +3 -3
- data/lib/measurable/version.rb +1 -1
- data/measurable.gemspec +7 -9
- data/spec/measurable_spec.rb +69 -0
- data/spec/spec_helper.rb +0 -5
- metadata +20 -45
- data/ext/measurable/extconf.rb +0 -5
- data/ext/measurable/measurable.c +0 -209
- data/spec/measurable.rb +0 -106
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: c923bf9e9bd70c37d84330fcbb9d883f72344b04
|
4
|
+
data.tar.gz: bea042df8b59927f38b7ace662f7f8bb41f3f33a
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 5ce3eaec6a905c087b6538baf92172e0099b149eaf206b6af456645d6bc6e9b3b3975566de4c44a8dc84ef0c7dbc6cd11a53ce40d7f8de1b404540b37fa12c52
|
7
|
+
data.tar.gz: a9d1eb70f7c8b1e13878f23b3001a0eabfbbbe6fb59bf9b56704b6f3e8672ea6cf0954a92d8556219eb6a8ab6f79c7489dede6b82c33230bab821b56ec63ef71
|
data/.gitignore
CHANGED
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/Gemfile.lock
CHANGED
@@ -1,15 +1,13 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
|
4
|
+
measurable (0.0.3)
|
5
5
|
|
6
6
|
GEM
|
7
7
|
remote: http://rubygems.org/
|
8
8
|
specs:
|
9
9
|
diff-lcs (1.1.3)
|
10
10
|
rake (0.9.2.2)
|
11
|
-
rake-compiler (0.8.1)
|
12
|
-
rake
|
13
11
|
rspec (2.9.0)
|
14
12
|
rspec-core (~> 2.9.0)
|
15
13
|
rspec-expectations (~> 2.9.0)
|
@@ -24,7 +22,6 @@ PLATFORMS
|
|
24
22
|
|
25
23
|
DEPENDENCIES
|
26
24
|
bundler
|
27
|
-
|
25
|
+
measurable!
|
28
26
|
rake (~> 0.9)
|
29
|
-
rake-compiler (~> 0.8.1)
|
30
27
|
rspec (~> 2.9.0)
|
data/LICENSE
CHANGED
data/README.md
CHANGED
@@ -1,13 +1,39 @@
|
|
1
1
|
# Measurable
|
2
2
|
|
3
|
-
This
|
3
|
+
This gem encompasses various distance measures. Besides the `Array` class, I also want to support [NMatrix](http://github.com/sciruby/nmatrix)'s `NVector`.
|
4
|
+
|
5
|
+
My objective is to be able to compare different metrics just by changing which method is called. Also, to show how to use NMatrix's C API. I'll create most of the things in pure Ruby first, then the most used operations (or the slowest ones) will be rewritten in C.
|
4
6
|
|
5
7
|
This is a fork of the gem [Distance Measure](https://github.com/reddavis/Distance-Measures), which has a similar objective, but isn't actively maintained and doesn't support NMatrix. Thank you, [reddavis](https://github.com/reddavis). :)
|
6
8
|
|
7
|
-
|
9
|
+
## Install
|
10
|
+
|
11
|
+
`gem install measurable`
|
12
|
+
|
13
|
+
It only works with Ruby MRI 1.9.3 or 2.0.0. I still want to test it on JRuby, but as its still pure Ruby, it should work correctly there.
|
14
|
+
|
15
|
+
## Distance measures that I want to support for the moment
|
16
|
+
|
17
|
+
- Euclidean distance
|
18
|
+
- Squared euclidean distance
|
19
|
+
- Cosine distance
|
20
|
+
- Max-min distance (["K-Means clustering using max-min distance measure"][1])
|
21
|
+
- Jaccard distance
|
22
|
+
- Tanimoto distance
|
23
|
+
|
24
|
+
These still need to be implemented:
|
25
|
+
|
26
|
+
- Cityblock distance
|
27
|
+
- Chebyshev distance
|
28
|
+
- Minkowski distance
|
29
|
+
- Hamming distance
|
30
|
+
- Correlation distance
|
31
|
+
- Chi-square distance
|
32
|
+
- Kullback-Leibler divergence
|
33
|
+
- Jensen-Shannon divergence
|
34
|
+
- Mahalanobis distance
|
35
|
+
- Squared Mahalanobis distance
|
8
36
|
|
9
|
-
I'll update this section when I publish the gem. For now... wait.
|
10
|
-
|
11
37
|
## How to use
|
12
38
|
|
13
39
|
This list will be updated as I have time. I'll refactor the existing measures and add some that I'll need in a project.
|
@@ -20,54 +46,20 @@ require "measurable"
|
|
20
46
|
u = NVector.ones(2)
|
21
47
|
v = NVector.zeros(2)
|
22
48
|
w = [1, 0]
|
49
|
+
x = [2, 2]
|
23
50
|
|
24
51
|
Measurable::euclidean(u, v) # => 1.41421
|
25
52
|
Measurable::euclidean(w, v) # => 1.00000
|
26
53
|
Measurable::euclidean(w, w) # => 0.00000
|
54
|
+
Measurable::
|
27
55
|
```
|
28
56
|
|
29
|
-
Maybe add
|
30
|
-
|
31
|
-
## How to use, the old way:
|
32
|
-
|
33
|
-
a = [1,1]
|
34
|
-
b = [2,2]
|
35
|
-
|
36
|
-
a.euclidean_distance(b)
|
37
|
-
|
38
|
-
a.cosine_similarity(b)
|
39
|
-
|
40
|
-
a.jaccard_index(b)
|
41
|
-
|
42
|
-
a.jaccard_distance(b)
|
43
|
-
|
44
|
-
a.binary_jaccard_index(b)
|
45
|
-
|
46
|
-
a.binary_jaccard_distance(b)
|
47
|
-
|
48
|
-
a.tanimoto_coefficient(b)
|
49
|
-
|
50
|
-
a.haversine_distance(b)
|
51
|
-
|
52
|
-
This may or may not be the complete list, best thing is to check the source code.
|
53
|
-
|
54
|
-
There are also a couple bonus methods:
|
55
|
-
|
56
|
-
a.dot_product(b)
|
57
|
-
|
58
|
-
a.sum_of_squares
|
59
|
-
|
60
|
-
a.intersection_with(b)
|
61
|
-
|
62
|
-
a.union_with(b)
|
63
|
-
|
64
|
-
# When your dealing with 1's and 0's
|
65
|
-
a.binary_intersection_with(b)
|
66
|
-
|
67
|
-
a.binary_union_with(b)
|
57
|
+
Maybe add support for (some of) NMatrix's dtypes, like `:float32`, `:float64`, `:complex64`, `:complex128`, etc. This will have to way until Measurable supports NMatrix C API.
|
68
58
|
|
69
59
|
## License
|
70
60
|
|
71
|
-
|
61
|
+
See LICENSE for details.
|
62
|
+
|
63
|
+
The original `Distance Measure` gem is copyrighted by @reddavis.
|
72
64
|
|
73
|
-
|
65
|
+
[1]: http://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=05156398
|
data/Rakefile
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
require 'rake'
|
2
|
-
require
|
2
|
+
require 'bundler/gem_tasks'
|
3
3
|
|
4
4
|
# Setup the necessary gems, specified in the gemspec.
|
5
5
|
require 'bundler'
|
@@ -12,9 +12,9 @@ rescue Bundler::BundlerError => e
|
|
12
12
|
end
|
13
13
|
|
14
14
|
# Compile task.
|
15
|
-
Rake::ExtensionTask.new do |ext|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
end
|
15
|
+
# Rake::ExtensionTask.new do |ext|
|
16
|
+
# ext.name = 'measurable'
|
17
|
+
# ext.ext_dir = 'ext/measurable'
|
18
|
+
# ext.lib_dir = 'lib/'
|
19
|
+
# ext.source_pattern = "**/*.{c, cpp, h}"
|
20
|
+
# end
|
data/lib/measurable.rb
CHANGED
@@ -1,32 +1,55 @@
|
|
1
|
-
|
1
|
+
require 'measurable/version.rb'
|
2
2
|
|
3
|
-
|
3
|
+
# Distance measures.
|
4
|
+
reqiore 'measurable/euclidean'
|
5
|
+
require 'measurable/cosine'
|
6
|
+
require 'measurable/tanimoto_coefficient'
|
7
|
+
require 'measurable/jaccard'
|
8
|
+
require 'measurable/haversine'
|
4
9
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
require "measurable/haversine"
|
9
|
-
|
10
|
-
require "measurable.so"
|
11
|
-
|
12
|
-
class Array
|
13
|
-
include Measurable
|
10
|
+
module Measurable
|
11
|
+
# PI = 3.1415926535
|
12
|
+
RAD_PER_DEG = 0.017453293 # PI/180
|
14
13
|
|
15
14
|
# http://en.wikipedia.org/wiki/Intersection_(set_theory)
|
16
|
-
def
|
17
|
-
(
|
15
|
+
def intersection(u, v)
|
16
|
+
(u & v)
|
18
17
|
end
|
19
18
|
|
20
19
|
# http://en.wikipedia.org/wiki/Union_(set_theory)
|
21
|
-
def
|
22
|
-
(
|
20
|
+
def union(u, v)
|
21
|
+
(u + v).uniq
|
23
22
|
end
|
24
23
|
|
25
|
-
|
24
|
+
def binary_union(u, v)
|
25
|
+
unions = []
|
26
|
+
u.each_with_index do |n, index|
|
27
|
+
if n == 1 || v[index] == 1
|
28
|
+
unions << 1
|
29
|
+
else
|
30
|
+
unions << 0
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
unions
|
35
|
+
end
|
36
|
+
|
37
|
+
def binary_intersection(u, v)
|
38
|
+
intersects = []
|
39
|
+
u.each_with_index do |n, index|
|
40
|
+
if n == 1 && v[index] == 1
|
41
|
+
intersects << 1
|
42
|
+
else
|
43
|
+
intersects << 0
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
intersects
|
48
|
+
end
|
26
49
|
|
27
50
|
# Checks if we"re dealing with NaN"s and will return 0.0 unless
|
28
51
|
# handle NaN"s is set to false
|
29
52
|
def handle_nan(result)
|
30
53
|
result.nan? ? 0.0 : result
|
31
54
|
end
|
32
|
-
end
|
55
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Measurable
|
2
|
+
def euclidean(u, v)
|
3
|
+
sum = 0.0
|
4
|
+
|
5
|
+
u.zip(v).each do |ary|
|
6
|
+
sum += (ary.first - ary.last)**2
|
7
|
+
end
|
8
|
+
|
9
|
+
Math.sqrt(sum)
|
10
|
+
end
|
11
|
+
|
12
|
+
def euclidean_squared(u, v)
|
13
|
+
u.zip(v).reduce(0.0) do |acc, ary|
|
14
|
+
acc += (ary.first - ary.last)**2
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
data/lib/measurable/haversine.rb
CHANGED
@@ -1,19 +1,17 @@
|
|
1
|
-
#
|
2
1
|
# Notes:
|
3
2
|
#
|
4
3
|
# translated into Ruby based on information contained in:
|
5
|
-
#
|
6
|
-
#
|
7
|
-
# http://en.wikipedia.org/wiki/Haversine_formula
|
4
|
+
# http://mathforum.org/library/drmath/view/51879.html
|
5
|
+
# Dr. Rick and Dr. Peterson - 4/20/99
|
8
6
|
#
|
9
|
-
#
|
10
|
-
#
|
7
|
+
# http://www.movable-type.co.uk/scripts/latlong.html
|
8
|
+
# http://en.wikipedia.org/wiki/Haversine_formula
|
9
|
+
#
|
10
|
+
# This formula can compute accurate distances between two points given latitude
|
11
|
+
# and longitude, even for short distances.
|
11
12
|
|
12
13
|
module Measurable
|
13
14
|
|
14
|
-
# PI = 3.1415926535
|
15
|
-
RAD_PER_DEG = 0.017453293 # PI/180
|
16
|
-
|
17
15
|
R_MILES = 3956 # radius of the great circle in miles
|
18
16
|
R_KM = 6371 # radius in kilometers...some algorithms use 6367
|
19
17
|
|
@@ -25,18 +23,18 @@ module Measurable
|
|
25
23
|
:meters => R_KM * 1000
|
26
24
|
}
|
27
25
|
|
28
|
-
def
|
29
|
-
dlon =
|
30
|
-
dlat =
|
26
|
+
def haversine(u, v, um = :meters)
|
27
|
+
dlon = u[1] - v[1]
|
28
|
+
dlat = u[0] - v[0]
|
31
29
|
|
32
30
|
dlon_rad = dlon * RAD_PER_DEG
|
33
31
|
dlat_rad = dlat * RAD_PER_DEG
|
34
32
|
|
35
|
-
lat1_rad =
|
36
|
-
lon1_rad =
|
33
|
+
lat1_rad = v[0] * RAD_PER_DEG
|
34
|
+
lon1_rad = v[1] * RAD_PER_DEG
|
37
35
|
|
38
|
-
lat2_rad =
|
39
|
-
lon2_rad =
|
36
|
+
lat2_rad = u[0] * RAD_PER_DEG
|
37
|
+
lon2_rad = u[1] * RAD_PER_DEG
|
40
38
|
|
41
39
|
a = (Math.sin(dlat_rad/2))**2 + Math.cos(lat1_rad) * Math.cos(lat2_rad) * (Math.sin(dlon_rad/2))**2
|
42
40
|
c = 2 * Math.atan2( Math.sqrt(a), Math.sqrt(1-a))
|
data/lib/measurable/jaccard.rb
CHANGED
@@ -1,26 +1,26 @@
|
|
1
1
|
# http://en.wikipedia.org/wiki/Jaccard_coefficient
|
2
2
|
module Measurable
|
3
3
|
|
4
|
-
def
|
5
|
-
1 -
|
4
|
+
def jaccard(u, v)
|
5
|
+
1 - jaccard_index(u, v)
|
6
6
|
end
|
7
7
|
|
8
|
-
def jaccard_index(
|
9
|
-
union = (
|
10
|
-
|
8
|
+
def jaccard_index(u, v)
|
9
|
+
union = (u + v).uniq.size.to_f
|
10
|
+
i = intersection(u, v).size.to_f
|
11
11
|
|
12
|
-
|
12
|
+
i / union
|
13
13
|
end
|
14
14
|
|
15
|
-
def
|
16
|
-
1 -
|
15
|
+
def binary_jaccard(u, v)
|
16
|
+
1 - binary_jaccard_index(u, v)
|
17
17
|
end
|
18
18
|
|
19
|
-
def binary_jaccard_index(
|
20
|
-
|
21
|
-
union =
|
19
|
+
def binary_jaccard_index(u, v)
|
20
|
+
i = binary_intersection(u, v).delete_if {|x| x == 0}.size.to_f
|
21
|
+
union = binary_union(u, v).delete_if {|x| x == 0}.size.to_f
|
22
22
|
|
23
|
-
|
23
|
+
i / union
|
24
24
|
end
|
25
25
|
|
26
26
|
end
|
@@ -1,8 +1,8 @@
|
|
1
1
|
# http://en.wikipedia.org/wiki/Jaccard_index#Tanimoto_coefficient_.28extended_Jaccard_coefficient.29
|
2
2
|
module Measurable
|
3
|
-
def
|
4
|
-
dot =
|
5
|
-
result = dot / (
|
3
|
+
def tanimoto(u, v)
|
4
|
+
dot = dot(u, v).to_f
|
5
|
+
result = dot / (u.sum_of_squares + v.sum_of_squares - dot).to_f
|
6
6
|
|
7
7
|
handle_nan(result)
|
8
8
|
end
|
data/lib/measurable/version.rb
CHANGED
data/measurable.gemspec
CHANGED
@@ -1,30 +1,28 @@
|
|
1
|
-
|
2
|
-
$:.unshift lib unless $:.include?(lib)
|
1
|
+
$:.unshift File.expand_path('../lib/', __FILE__)
|
3
2
|
|
4
3
|
require 'measurable/version'
|
4
|
+
require 'date'
|
5
5
|
|
6
6
|
Gem::Specification.new do |gem|
|
7
7
|
gem.name = "measurable"
|
8
8
|
gem.version = Measurable::VERSION
|
9
9
|
gem.date = Date.today.to_s
|
10
|
-
gem.summary = %Q{A Ruby
|
11
|
-
gem.description = %Q{A Ruby
|
10
|
+
gem.summary = %Q{A Ruby gem with a lot of distance measures for your projects.}
|
11
|
+
gem.description = %Q{A Ruby gem with a lot of distance measures for your projects.}
|
12
12
|
|
13
13
|
gem.authors = ["Carlos Agarie"]
|
14
|
-
gem.email = "carlos@
|
14
|
+
gem.email = "carlos.agarie@gmail.com"
|
15
15
|
gem.homepage = "http://github.com/agarie/measurable"
|
16
16
|
|
17
17
|
gem.files = `git ls-files`.split("\n")
|
18
18
|
gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
19
19
|
gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
20
20
|
|
21
|
-
gem.require_paths = ["lib"]
|
22
|
-
gem.extensions = ['ext/measurable/extconf.rb']
|
21
|
+
gem.require_paths = ["lib"]
|
23
22
|
|
24
|
-
gem.required_ruby_version = '>= 1.9.
|
23
|
+
gem.required_ruby_version = '>= 1.9.3'
|
25
24
|
|
26
25
|
gem.add_development_dependency 'bundler'
|
27
26
|
gem.add_development_dependency 'rake', '~> 0.9'
|
28
|
-
gem.add_development_dependency 'rake-compiler', '~> 0.8.1'
|
29
27
|
gem.add_development_dependency 'rspec', '~> 2.9.0'
|
30
28
|
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
describe Measurable do
|
2
|
+
|
3
|
+
let(:u) { [1, 3, 16] }
|
4
|
+
let(:v) { [1, 4, 16] }
|
5
|
+
let(:w) { [4, 5, 6] }
|
6
|
+
|
7
|
+
describe "Euclidean distance" do
|
8
|
+
it "accepts two arguments" do
|
9
|
+
expect { Measurable::euclidean(:u) }.to raise_error(ArgumentError)
|
10
|
+
expect { Measurable::euclidean(:u, :v) }.to_not raise_error(ArgumentError)
|
11
|
+
expect { Measurable::euclidean(:u, :v, :w) }.to raise_error(ArgumentError)
|
12
|
+
end
|
13
|
+
|
14
|
+
it "accepts one argument and returns the vector's norm"
|
15
|
+
|
16
|
+
it "should be symmetric"
|
17
|
+
|
18
|
+
it "should return the correct value" do
|
19
|
+
Measurable::euclidean(:u, :u).should == 0
|
20
|
+
euclidean(:u, :v).should == 1
|
21
|
+
end
|
22
|
+
|
23
|
+
it "shouldn't work with vectors of different length" do
|
24
|
+
expect { Measurable::euclidean(:u, [2, 2, 2, 2]) }.to raise_error(DiffLengthError)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
describe "Binary union" do
|
29
|
+
|
30
|
+
describe "Binary intersection" do
|
31
|
+
|
32
|
+
describe "Cosine similarity measure" do
|
33
|
+
it "accepts two arguments"
|
34
|
+
|
35
|
+
it "accepts one argument and returns the vector's norm"
|
36
|
+
|
37
|
+
it "should handle NaN's"
|
38
|
+
|
39
|
+
it "should be symmetric"
|
40
|
+
|
41
|
+
it "should return the correct value"
|
42
|
+
|
43
|
+
it "shouldn't work with vectors of different length"
|
44
|
+
end
|
45
|
+
|
46
|
+
describe "Chebyshev distance" do
|
47
|
+
it "accepts two arguments"
|
48
|
+
|
49
|
+
it "accepts one argument and returns the vector's norm"
|
50
|
+
|
51
|
+
it "should be symmetric"
|
52
|
+
|
53
|
+
it "should return the correct value"
|
54
|
+
|
55
|
+
it "shouldn't work with vectors of different length"
|
56
|
+
end
|
57
|
+
|
58
|
+
describe "Max-min similarity measure" do
|
59
|
+
it "accepts two arguments"
|
60
|
+
|
61
|
+
it "accepts one argument and returns the vector's norm"
|
62
|
+
|
63
|
+
it "should be symmetric"
|
64
|
+
|
65
|
+
it "should return the correct value"
|
66
|
+
|
67
|
+
it "shouldn't work with vectors of different length"
|
68
|
+
end
|
69
|
+
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,36 +1,32 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: measurable
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
5
|
-
prerelease:
|
4
|
+
version: 0.0.3
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Carlos Agarie
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2013-03-24 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: bundler
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
|
-
- -
|
17
|
+
- - '>='
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: '0'
|
22
20
|
type: :development
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
|
-
- -
|
24
|
+
- - '>='
|
28
25
|
- !ruby/object:Gem::Version
|
29
26
|
version: '0'
|
30
27
|
- !ruby/object:Gem::Dependency
|
31
28
|
name: rake
|
32
29
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
30
|
requirements:
|
35
31
|
- - ~>
|
36
32
|
- !ruby/object:Gem::Version
|
@@ -38,31 +34,13 @@ dependencies:
|
|
38
34
|
type: :development
|
39
35
|
prerelease: false
|
40
36
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
37
|
requirements:
|
43
38
|
- - ~>
|
44
39
|
- !ruby/object:Gem::Version
|
45
40
|
version: '0.9'
|
46
|
-
- !ruby/object:Gem::Dependency
|
47
|
-
name: rake-compiler
|
48
|
-
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
|
-
requirements:
|
51
|
-
- - ~>
|
52
|
-
- !ruby/object:Gem::Version
|
53
|
-
version: 0.8.1
|
54
|
-
type: :development
|
55
|
-
prerelease: false
|
56
|
-
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
none: false
|
58
|
-
requirements:
|
59
|
-
- - ~>
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: 0.8.1
|
62
41
|
- !ruby/object:Gem::Dependency
|
63
42
|
name: rspec
|
64
43
|
requirement: !ruby/object:Gem::Requirement
|
65
|
-
none: false
|
66
44
|
requirements:
|
67
45
|
- - ~>
|
68
46
|
- !ruby/object:Gem::Version
|
@@ -70,59 +48,56 @@ dependencies:
|
|
70
48
|
type: :development
|
71
49
|
prerelease: false
|
72
50
|
version_requirements: !ruby/object:Gem::Requirement
|
73
|
-
none: false
|
74
51
|
requirements:
|
75
52
|
- - ~>
|
76
53
|
- !ruby/object:Gem::Version
|
77
54
|
version: 2.9.0
|
78
|
-
description: A Ruby
|
79
|
-
email: carlos@
|
55
|
+
description: A Ruby gem with a lot of distance measures for your projects.
|
56
|
+
email: carlos.agarie@gmail.com
|
80
57
|
executables: []
|
81
|
-
extensions:
|
82
|
-
- ext/measurable/extconf.rb
|
58
|
+
extensions: []
|
83
59
|
extra_rdoc_files: []
|
84
60
|
files:
|
85
61
|
- .gitignore
|
62
|
+
- .rspec
|
86
63
|
- Gemfile
|
87
64
|
- Gemfile.lock
|
88
65
|
- LICENSE
|
89
66
|
- README.md
|
90
67
|
- Rakefile
|
91
|
-
- ext/measurable/extconf.rb
|
92
|
-
- ext/measurable/measurable.c
|
93
68
|
- lib/measurable.rb
|
94
|
-
- lib/measurable/
|
69
|
+
- lib/measurable/cosine.rb
|
70
|
+
- lib/measurable/euclidean.rb
|
95
71
|
- lib/measurable/haversine.rb
|
96
72
|
- lib/measurable/jaccard.rb
|
97
|
-
- lib/measurable/
|
73
|
+
- lib/measurable/tanimoto.rb
|
98
74
|
- lib/measurable/version.rb
|
99
75
|
- measurable.gemspec
|
100
|
-
- spec/
|
76
|
+
- spec/measurable_spec.rb
|
101
77
|
- spec/spec_helper.rb
|
102
78
|
homepage: http://github.com/agarie/measurable
|
103
79
|
licenses: []
|
80
|
+
metadata: {}
|
104
81
|
post_install_message:
|
105
82
|
rdoc_options: []
|
106
83
|
require_paths:
|
107
84
|
- lib
|
108
85
|
required_ruby_version: !ruby/object:Gem::Requirement
|
109
|
-
none: false
|
110
86
|
requirements:
|
111
|
-
- -
|
87
|
+
- - '>='
|
112
88
|
- !ruby/object:Gem::Version
|
113
|
-
version: 1.9.
|
89
|
+
version: 1.9.3
|
114
90
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
115
|
-
none: false
|
116
91
|
requirements:
|
117
|
-
- -
|
92
|
+
- - '>='
|
118
93
|
- !ruby/object:Gem::Version
|
119
94
|
version: '0'
|
120
95
|
requirements: []
|
121
96
|
rubyforge_project:
|
122
|
-
rubygems_version:
|
97
|
+
rubygems_version: 2.0.0
|
123
98
|
signing_key:
|
124
|
-
specification_version:
|
125
|
-
summary: A Ruby
|
99
|
+
specification_version: 4
|
100
|
+
summary: A Ruby gem with a lot of distance measures for your projects.
|
126
101
|
test_files:
|
127
|
-
- spec/
|
102
|
+
- spec/measurable_spec.rb
|
128
103
|
- spec/spec_helper.rb
|
data/ext/measurable/extconf.rb
DELETED
data/ext/measurable/measurable.c
DELETED
@@ -1,209 +0,0 @@
|
|
1
|
-
#include <ruby.h>
|
2
|
-
#include <math.h>
|
3
|
-
|
4
|
-
#ifndef RUBY_19
|
5
|
-
#ifndef RARRAY_LEN
|
6
|
-
#define RARRAY_LEN(v) (RARRAY(v)->len)
|
7
|
-
#endif
|
8
|
-
#ifndef RARRAY_PTR
|
9
|
-
#define RARRAY_PTR(v) (RARRAY(v)->ptr)
|
10
|
-
#endif
|
11
|
-
#endif
|
12
|
-
|
13
|
-
/*
|
14
|
-
** def euclidean_distance(other)
|
15
|
-
** sum = 0.0
|
16
|
-
** self.each_index do |i|
|
17
|
-
** sum += (self[i] - other[i])**2
|
18
|
-
** end
|
19
|
-
** Math.sqrt(sum)
|
20
|
-
** end
|
21
|
-
*/
|
22
|
-
|
23
|
-
static VALUE rb_euclidean(VALUE self, VALUE other_array) {
|
24
|
-
double value = 0.0;
|
25
|
-
|
26
|
-
/* TODO: check they're the same size. */
|
27
|
-
long vector_length = (RARRAY_LEN(self) - 1);
|
28
|
-
int index;
|
29
|
-
|
30
|
-
for (index = 0; index <= vector_length; index++) {
|
31
|
-
double x, y;
|
32
|
-
|
33
|
-
x = NUM2DBL(RARRAY_PTR(self)[index]);
|
34
|
-
y = NUM2DBL(RARRAY_PTR(other_array)[index]);
|
35
|
-
|
36
|
-
value += pow(x - y, 2);
|
37
|
-
}
|
38
|
-
|
39
|
-
return rb_float_new(sqrt(value));
|
40
|
-
}
|
41
|
-
|
42
|
-
/* Prototypes */
|
43
|
-
long c_array_size(VALUE array);
|
44
|
-
|
45
|
-
/*
|
46
|
-
** def dot_product(other)
|
47
|
-
** sum = 0.0
|
48
|
-
** self.each_with_index do |n, index|
|
49
|
-
** sum += n * other[index]
|
50
|
-
** end
|
51
|
-
**
|
52
|
-
** sum
|
53
|
-
** end
|
54
|
-
*/
|
55
|
-
|
56
|
-
static VALUE rb_dot_product(VALUE self, VALUE other_array) {
|
57
|
-
double sum = 0;
|
58
|
-
|
59
|
-
/* TODO check they're the same size. */
|
60
|
-
long array_size = c_array_size(self);
|
61
|
-
int index;
|
62
|
-
|
63
|
-
for(index = 0; index <= array_size; index++) {
|
64
|
-
double x, y;
|
65
|
-
|
66
|
-
x = NUM2DBL(RARRAY_PTR(self)[index]);
|
67
|
-
y = NUM2DBL(RARRAY_PTR(other_array)[index]);
|
68
|
-
|
69
|
-
sum += x * y;
|
70
|
-
}
|
71
|
-
|
72
|
-
return rb_float_new(sum);
|
73
|
-
}
|
74
|
-
|
75
|
-
/*
|
76
|
-
** def sum_of_squares
|
77
|
-
** inject(0) {|sum, n| sum + n ** 2}
|
78
|
-
** end
|
79
|
-
*/
|
80
|
-
|
81
|
-
static VALUE rb_sum_of_squares(VALUE self) {
|
82
|
-
double sum = 0;
|
83
|
-
long array_size = c_array_size(self);
|
84
|
-
int index;
|
85
|
-
|
86
|
-
for(index = 0; index <= array_size; index++) {
|
87
|
-
double x;
|
88
|
-
|
89
|
-
x = NUM2DBL(RARRAY_PTR(self)[index]);
|
90
|
-
|
91
|
-
sum += pow(x, 2);
|
92
|
-
}
|
93
|
-
|
94
|
-
return rb_float_new(sum);
|
95
|
-
}
|
96
|
-
|
97
|
-
/*
|
98
|
-
** def euclidean_normalize
|
99
|
-
** sum = 0.0
|
100
|
-
** self.each do |n|
|
101
|
-
** sum += n ** 2
|
102
|
-
** end
|
103
|
-
**
|
104
|
-
** Math.sqrt(sum)
|
105
|
-
** end
|
106
|
-
*/
|
107
|
-
|
108
|
-
static VALUE rb_euclidean_normalize(VALUE self) {
|
109
|
-
double sum = 0;
|
110
|
-
long array_size = c_array_size(self);
|
111
|
-
int index;
|
112
|
-
|
113
|
-
for(index = 0; index <= array_size; index++) {
|
114
|
-
double x;
|
115
|
-
|
116
|
-
x = NUM2DBL(RARRAY_PTR(self)[index]);
|
117
|
-
|
118
|
-
sum += pow(x, 2);
|
119
|
-
}
|
120
|
-
|
121
|
-
return rb_float_new(sqrt(sum));
|
122
|
-
}
|
123
|
-
|
124
|
-
/*
|
125
|
-
** def binary_union_with(other)
|
126
|
-
** unions = []
|
127
|
-
** self.each_with_index do |n, index|
|
128
|
-
** if n == 1 || other[index] == 1
|
129
|
-
** unions << 1
|
130
|
-
** else
|
131
|
-
** unions << 0
|
132
|
-
** end
|
133
|
-
** end
|
134
|
-
**
|
135
|
-
** unions
|
136
|
-
** end
|
137
|
-
*/
|
138
|
-
|
139
|
-
static VALUE rb_binary_union_with(VALUE self, VALUE other_array) {
|
140
|
-
//TODO: check arrays are same size
|
141
|
-
long array_size = c_array_size(self);
|
142
|
-
int index;
|
143
|
-
VALUE results = rb_ary_new();
|
144
|
-
|
145
|
-
for(index = 0; index <= array_size; index++) {
|
146
|
-
int self_attribute = NUM2INT(RARRAY_PTR(self)[index]);
|
147
|
-
int other_array_attribute = NUM2INT(RARRAY_PTR(other_array)[index]);
|
148
|
-
|
149
|
-
if(self_attribute == 1 || other_array_attribute == 1) {
|
150
|
-
rb_ary_push(results, rb_int_new(1));
|
151
|
-
} else {
|
152
|
-
rb_ary_push(results, rb_int_new(0));
|
153
|
-
}
|
154
|
-
}
|
155
|
-
|
156
|
-
return results;
|
157
|
-
}
|
158
|
-
|
159
|
-
/*
|
160
|
-
** def binary_intersection_with(other)
|
161
|
-
** intersects = []
|
162
|
-
** self.each_with_index do |n, index|
|
163
|
-
** if n == 1 && other[index] == 1
|
164
|
-
** intersects << 1
|
165
|
-
** else
|
166
|
-
** intersects << 0
|
167
|
-
** end
|
168
|
-
** end
|
169
|
-
**
|
170
|
-
** intersects
|
171
|
-
** end
|
172
|
-
*/
|
173
|
-
|
174
|
-
static VALUE rb_binary_intersection_with(VALUE self, VALUE other_array) {
|
175
|
-
/* TODO check arrays are same size */
|
176
|
-
long array_size = c_array_size(self);
|
177
|
-
int index;
|
178
|
-
VALUE results = rb_ary_new();
|
179
|
-
|
180
|
-
for(index = 0; index <= array_size; index++) {
|
181
|
-
int self_attribute = NUM2INT(RARRAY_PTR(self)[index]);
|
182
|
-
int other_array_attribute = NUM2INT(RARRAY_PTR(other_array)[index]);
|
183
|
-
|
184
|
-
if(self_attribute == 1 && other_array_attribute == 1) {
|
185
|
-
rb_ary_push(results, rb_int_new(1));
|
186
|
-
} else {
|
187
|
-
rb_ary_push(results, rb_int_new(0));
|
188
|
-
}
|
189
|
-
}
|
190
|
-
|
191
|
-
return results;
|
192
|
-
}
|
193
|
-
|
194
|
-
/* return the size of a Ruby array - 1 */
|
195
|
-
long c_array_size(VALUE array) {
|
196
|
-
return (RARRAY_LEN(array) - 1);
|
197
|
-
}
|
198
|
-
|
199
|
-
void
|
200
|
-
Init_measurable()
|
201
|
-
{
|
202
|
-
VALUE rb_measurable = rb_define_module("Measurable");
|
203
|
-
rb_define_method(rb_measurable, "euclidean", rb_euclidean, 1);
|
204
|
-
rb_define_method(rb_measurable, "dot_product", rb_dot_product, 1);
|
205
|
-
rb_define_method(rb_measurable, "sum_of_squares", rb_sum_of_squares, 0);
|
206
|
-
rb_define_method(rb_measurable, "euclidean_normalize", rb_euclidean_normalize, 0);
|
207
|
-
rb_define_method(rb_measurable, "binary_union_with", rb_binary_union_with, 1);
|
208
|
-
rb_define_method(rb_measurable, "binary_intersection_with", rb_binary_intersection_with, 1);
|
209
|
-
}
|
data/spec/measurable.rb
DELETED
@@ -1,106 +0,0 @@
|
|
1
|
-
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
-
|
3
|
-
describe Measurable do
|
4
|
-
|
5
|
-
let(:array) { [5, 5] }
|
6
|
-
let(:array_2) { [7, 3, 2, 4, 1] }
|
7
|
-
let(:array_3) { [4, 1, 9, 7, 5] }
|
8
|
-
|
9
|
-
describe "Euclidean Distance" do
|
10
|
-
it "should return 0.0" do
|
11
|
-
array.euclidean_distance(array).should == 0.0
|
12
|
-
end
|
13
|
-
|
14
|
-
it "should return 4.0" do
|
15
|
-
[5].euclidean_distance([1]).should == 4.0
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
describe "Cosine Similarity" do
|
20
|
-
it "should return 1.0" do
|
21
|
-
array.cosine_similarity(array).should.to_s == "1.0" # WTF
|
22
|
-
end
|
23
|
-
|
24
|
-
it "should handle NaN's" do
|
25
|
-
[0.0, 0.0].cosine_similarity([0.0, 0.0]).nan?.should be_false
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
describe "Tanimoto Coefficient" do
|
30
|
-
it "should return 1.0" do
|
31
|
-
array.tanimoto_coefficient(array).should == 1.0
|
32
|
-
end
|
33
|
-
|
34
|
-
it "should handle NaN's" do
|
35
|
-
[0.0, 0.0].tanimoto_coefficient([0.0, 0.0]).nan?.should be_false
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
describe "Sum of Squares" do
|
40
|
-
it "should return 50" do
|
41
|
-
array.sum_of_squares.should == 50
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
describe "Jaccard" do
|
46
|
-
describe "Jaccard Distance" do
|
47
|
-
it "should return" do
|
48
|
-
array_2.jaccard_distance(array_3).should == (1 - 3.0/7.0)
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
|
-
describe "Jaccard Index" do
|
53
|
-
it "should return" do
|
54
|
-
array_2.jaccard_index(array_3).should == 3.0/7.0
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
describe "Binary Jaccard Index" do
|
59
|
-
it "should return 1/4" do
|
60
|
-
[1,1,1,1].binary_jaccard_index([0,1,0,0]).should == 1/4.0
|
61
|
-
end
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
|
-
describe "Binary Jaccard Distance" do
|
66
|
-
it "should return 0.75" do
|
67
|
-
[1,1,1,1].binary_jaccard_distance([0,1,0,0]).should == 1 - (1/4.0)
|
68
|
-
end
|
69
|
-
end
|
70
|
-
|
71
|
-
describe "Intersection" do
|
72
|
-
it "should return [7,4,1]" do
|
73
|
-
array_2.intersection_with(array_3).should == [7,4,1]
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
77
|
-
describe "Union" do
|
78
|
-
it "should return " do
|
79
|
-
array_2.union_with(array_3).should == [7,3,2,4,1,9,5]
|
80
|
-
end
|
81
|
-
end
|
82
|
-
|
83
|
-
describe "Binary Intersection" do
|
84
|
-
it "should return [0,1,0,0]" do
|
85
|
-
[1,1,1,1].binary_intersection_with([0,1,0,0]).should == [0,1,0,0]
|
86
|
-
end
|
87
|
-
end
|
88
|
-
|
89
|
-
describe "Binary Union" do
|
90
|
-
it "should return [1,1,1,0]" do
|
91
|
-
[1,1,1,0].binary_union_with([0,0,0,0]).should == [1,1,1,0]
|
92
|
-
end
|
93
|
-
end
|
94
|
-
|
95
|
-
describe "Dot Product" do
|
96
|
-
it "should return 50" do
|
97
|
-
[5, 5].dot_product([5, 5]).should == 50.0
|
98
|
-
end
|
99
|
-
end
|
100
|
-
|
101
|
-
describe "Euclidean normalize" do
|
102
|
-
it "should" do
|
103
|
-
[10].euclidean_normalize.should == 10
|
104
|
-
end
|
105
|
-
end
|
106
|
-
end
|