distance_measures 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,21 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 reddavis
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,48 @@
1
+ = Distance Measures
2
+
3
+ A bunch of distance measures that extend Array.
4
+
5
+ == Install
6
+
7
+ gem sources -a http://gemcutter.org
8
+ sudo gem install distance_measures
9
+
10
+ == How to use
11
+
12
+ a = [1,1]
13
+ b = [2,2]
14
+
15
+ a.euclidean_distance(b)
16
+
17
+ a.cosine_similarity(b)
18
+
19
+ a.jaccard_index(b)
20
+
21
+ a.jaccard_distance(b)
22
+
23
+ a.binary_jaccard_index(b)
24
+
25
+ a.binary_jaccard_distance(b)
26
+
27
+ a.tanimoto_coefficient(b)
28
+
29
+ This may or may not be the complete list, best thing is to check the source code.
30
+
31
+ There are also a couple bonus methods:
32
+
33
+ a.dot_product(b)
34
+
35
+ a.sum_of_squares
36
+
37
+ a.intersection_with(b)
38
+
39
+ a.union_with(b)
40
+
41
+ # When your dealing with 1's and 0's
42
+ a.binary_intersection_with(b)
43
+
44
+ a.binary_union_with(b)
45
+
46
+ == Copyright
47
+
48
+ Copyright (c) 2010 reddavis. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,45 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "distance_measures"
8
+ gem.summary = %Q{A bundle of distance measures}
9
+ gem.description = %Q{A bundle of distance measures}
10
+ gem.email = "reddavis@gmail.com"
11
+ gem.homepage = "http://github.com/reddavis/distance_measure"
12
+ gem.authors = ["reddavis"]
13
+ gem.add_development_dependency "rspec", ">= 1.2.9"
14
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
15
+ end
16
+ Jeweler::GemcutterTasks.new
17
+ rescue LoadError
18
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
19
+ end
20
+
21
+ require 'spec/rake/spectask'
22
+ Spec::Rake::SpecTask.new(:spec) do |spec|
23
+ spec.libs << 'lib' << 'spec'
24
+ spec.spec_files = FileList['spec/**/*_spec.rb']
25
+ end
26
+
27
+ Spec::Rake::SpecTask.new(:rcov) do |spec|
28
+ spec.libs << 'lib' << 'spec'
29
+ spec.pattern = 'spec/**/*_spec.rb'
30
+ spec.rcov = true
31
+ end
32
+
33
+ task :spec => :check_dependencies
34
+
35
+ task :default => :spec
36
+
37
+ require 'rake/rdoctask'
38
+ Rake::RDocTask.new do |rdoc|
39
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
40
+
41
+ rdoc.rdoc_dir = 'rdoc'
42
+ rdoc.title = "distance_measure #{version}"
43
+ rdoc.rdoc_files.include('README*')
44
+ rdoc.rdoc_files.include('lib/**/*.rb')
45
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.0
@@ -0,0 +1,60 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{distance_measures}
8
+ s.version = "0.0.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["reddavis"]
12
+ s.date = %q{2010-01-25}
13
+ s.description = %q{A bundle of distance measures}
14
+ s.email = %q{reddavis@gmail.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".gitignore",
22
+ "LICENSE",
23
+ "README.rdoc",
24
+ "Rakefile",
25
+ "VERSION",
26
+ "distance_measures.gemspec",
27
+ "lib/distance_measures.rb",
28
+ "lib/distance_measures/core.rb",
29
+ "lib/distance_measures/cosine_similarity.rb",
30
+ "lib/distance_measures/euclidean_distance.rb",
31
+ "lib/distance_measures/jaccard.rb",
32
+ "lib/distance_measures/tanimoto_coefficient.rb",
33
+ "spec/distance_measures_spec.rb",
34
+ "spec/spec.opts",
35
+ "spec/spec_helper.rb"
36
+ ]
37
+ s.homepage = %q{http://github.com/reddavis/distance_measure}
38
+ s.rdoc_options = ["--charset=UTF-8"]
39
+ s.require_paths = ["lib"]
40
+ s.rubygems_version = %q{1.3.5}
41
+ s.summary = %q{A bundle of distance measures}
42
+ s.test_files = [
43
+ "spec/distance_measures_spec.rb",
44
+ "spec/spec_helper.rb"
45
+ ]
46
+
47
+ if s.respond_to? :specification_version then
48
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
49
+ s.specification_version = 3
50
+
51
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
52
+ s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
53
+ else
54
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
55
+ end
56
+ else
57
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
58
+ end
59
+ end
60
+
@@ -0,0 +1,60 @@
1
+ module DistanceMeasures
2
+ def dot_product(other)
3
+ sum = 0.0
4
+ self.each_with_index do |n, index|
5
+ sum += n * other[index]
6
+ end
7
+
8
+ sum
9
+ end
10
+
11
+ def euclidean_normalize
12
+ sum = 0.0
13
+ self.each do |n|
14
+ sum += n ** 2
15
+ end
16
+
17
+ Math.sqrt(sum)
18
+ end
19
+
20
+ def sum_of_squares
21
+ inject(0) {|sum, n| sum + n ** 2}
22
+ end
23
+
24
+ # http://en.wikipedia.org/wiki/Intersection_(set_theory)
25
+ def intersection_with(other)
26
+ (self & other)
27
+ end
28
+
29
+ # http://en.wikipedia.org/wiki/Union_(set_theory)
30
+ def union_with(other)
31
+ (self + other).uniq
32
+ end
33
+
34
+ # 1's & 0's
35
+ def binary_intersection_with(other)
36
+ intersects = []
37
+ self.each_with_index do |n, index|
38
+ if n == 1 && other[index] == 1
39
+ intersects << 1
40
+ else
41
+ intersects << 0
42
+ end
43
+ end
44
+
45
+ intersects
46
+ end
47
+
48
+ def binary_union_with(other)
49
+ unions = []
50
+ self.each_with_index do |n, index|
51
+ if n == 1 || other[index] == 1
52
+ unions << 1
53
+ else
54
+ unions << 0
55
+ end
56
+ end
57
+
58
+ unions
59
+ end
60
+ end
@@ -0,0 +1,9 @@
1
+ # http://en.wikipedia.org/wiki/Cosine_similarity
2
+ module DistanceMeasures
3
+ def cosine_similarity(other)
4
+ dot_product = self.dot_product(other)
5
+ normalization = self.euclidean_normalize * other.euclidean_normalize
6
+
7
+ dot_product / normalization
8
+ end
9
+ end
@@ -0,0 +1,10 @@
1
+ # http://en.wikipedia.org/wiki/Euclidean_distance
2
+ module DistanceMeasures
3
+ def euclidean_distance(other)
4
+ sum = 0.0
5
+ self.each_index do |i|
6
+ sum += (self[i] - other[i])**2
7
+ end
8
+ Math.sqrt(sum)
9
+ end
10
+ end
@@ -0,0 +1,26 @@
1
+ # http://en.wikipedia.org/wiki/Jaccard_coefficient
2
+ module DistanceMeasures
3
+
4
+ def jaccard_distance(other)
5
+ 1 - self.jaccard_index(other)
6
+ end
7
+
8
+ def jaccard_index(other)
9
+ union = (self + other).uniq.size.to_f
10
+ intersection = self.intersection_with(other).size.to_f
11
+
12
+ intersection / union
13
+ end
14
+
15
+ def binary_jaccard_distance(other)
16
+ 1 - self.binary_jaccard_index(other)
17
+ end
18
+
19
+ def binary_jaccard_index(other)
20
+ intersection = self.binary_intersection_with(other).delete_if {|x| x == 0}.size.to_f
21
+ union = self.binary_union_with(other).delete_if {|x| x == 0}.size.to_f
22
+
23
+ intersection / union
24
+ end
25
+
26
+ end
@@ -0,0 +1,7 @@
1
+ # http://en.wikipedia.org/wiki/Jaccard_index#Tanimoto_coefficient_.28extended_Jaccard_coefficient.29
2
+ module DistanceMeasures
3
+ def tanimoto_coefficient(other)
4
+ dot = self.dot_product(other).to_f
5
+ dot / (self.sum_of_squares + other.sum_of_squares - dot).to_f
6
+ end
7
+ end
@@ -0,0 +1,11 @@
1
+ $:.unshift(File.dirname(__FILE__) + '/../lib')
2
+
3
+ require 'distance_measures/cosine_similarity'
4
+ require 'distance_measures/euclidean_distance'
5
+ require 'distance_measures/tanimoto_coefficient'
6
+ require 'distance_measures/core'
7
+ require 'distance_measures/jaccard'
8
+
9
+ class Array
10
+ include DistanceMeasures
11
+ end
@@ -0,0 +1,93 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "DistanceMeasures" do
4
+
5
+ describe "Euclidean Distance" do
6
+ it "should return 1" do
7
+ array.euclidean_distance(array).should == 0.0
8
+ end
9
+ end
10
+
11
+ describe "Cosine Similarity" do
12
+ it "should return 1.0" do
13
+ array.cosine_similarity(array).should > 0.99
14
+ end
15
+ end
16
+
17
+ describe "Tanimoto Coefficient" do
18
+ it "should return 1.0" do
19
+ array.tanimoto_coefficient(array).should == 1.0
20
+ end
21
+ end
22
+
23
+ describe "Sum of Squares" do
24
+ it "should return 50" do
25
+ array.sum_of_squares.should == 50
26
+ end
27
+ end
28
+
29
+ describe "Jaccard" do
30
+ describe "Jaccard Distance" do
31
+ it "should return" do
32
+ array_2.jaccard_distance(array_3).should == (1 - 3.0/7.0)
33
+ end
34
+ end
35
+
36
+ describe "Jaccard Index" do
37
+ it "should return" do
38
+ array_2.jaccard_index(array_3).should == 3.0/7.0
39
+ end
40
+ end
41
+
42
+ describe "Binary Jaccard Index" do
43
+ it "should return 1/4" do
44
+ [1,1,1,1].binary_jaccard_index([0,1,0,0]).should == 1/4.0
45
+ end
46
+ end
47
+ end
48
+
49
+ describe "Binary Jaccard Distance" do
50
+ it "should return 0.75" do
51
+ [1,1,1,1].binary_jaccard_distance([0,1,0,0]).should == 1 - (1/4.0)
52
+ end
53
+ end
54
+
55
+ describe "Intersection" do
56
+ it "should return [7,4,1]" do
57
+ array_2.intersection_with(array_3).should == [7,4,1]
58
+ end
59
+ end
60
+
61
+ describe "Union" do
62
+ it "should return " do
63
+ array_2.union_with(array_3).should == [7,3,2,4,1,9,5]
64
+ end
65
+ end
66
+
67
+ describe "Binary Intersection" do
68
+ it "should return [0,1,0,0]" do
69
+ [1,1,1,1].binary_intersection_with([0,1,0,0]).should == [0,1,0,0]
70
+ end
71
+ end
72
+
73
+ describe "Binary Union" do
74
+ it "should return [1,1,1,0]" do
75
+ [1,1,1,0].binary_union_with([0,0,0,0]).should == [1,1,1,0]
76
+ end
77
+ end
78
+
79
+ private
80
+
81
+ def array
82
+ [5, 5]
83
+ end
84
+
85
+ def array_2
86
+ [7, 3, 2, 4, 1]
87
+ end
88
+
89
+ def array_3
90
+ [4,1,9,7,5]
91
+ end
92
+
93
+ end
data/spec/spec.opts ADDED
@@ -0,0 +1 @@
1
+ --color
@@ -0,0 +1,9 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'distance_measures'
4
+ require 'spec'
5
+ require 'spec/autorun'
6
+
7
+ Spec::Runner.configure do |config|
8
+
9
+ end
metadata ADDED
@@ -0,0 +1,81 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: distance_measures
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - reddavis
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2010-01-25 00:00:00 +00:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: rspec
17
+ type: :development
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 1.2.9
24
+ version:
25
+ description: A bundle of distance measures
26
+ email: reddavis@gmail.com
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files:
32
+ - LICENSE
33
+ - README.rdoc
34
+ files:
35
+ - .document
36
+ - .gitignore
37
+ - LICENSE
38
+ - README.rdoc
39
+ - Rakefile
40
+ - VERSION
41
+ - distance_measures.gemspec
42
+ - lib/distance_measures.rb
43
+ - lib/distance_measures/core.rb
44
+ - lib/distance_measures/cosine_similarity.rb
45
+ - lib/distance_measures/euclidean_distance.rb
46
+ - lib/distance_measures/jaccard.rb
47
+ - lib/distance_measures/tanimoto_coefficient.rb
48
+ - spec/distance_measures_spec.rb
49
+ - spec/spec.opts
50
+ - spec/spec_helper.rb
51
+ has_rdoc: true
52
+ homepage: http://github.com/reddavis/distance_measure
53
+ licenses: []
54
+
55
+ post_install_message:
56
+ rdoc_options:
57
+ - --charset=UTF-8
58
+ require_paths:
59
+ - lib
60
+ required_ruby_version: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: "0"
65
+ version:
66
+ required_rubygems_version: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: "0"
71
+ version:
72
+ requirements: []
73
+
74
+ rubyforge_project:
75
+ rubygems_version: 1.3.5
76
+ signing_key:
77
+ specification_version: 3
78
+ summary: A bundle of distance measures
79
+ test_files:
80
+ - spec/distance_measures_spec.rb
81
+ - spec/spec_helper.rb