distance_measures 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,21 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 reddavis
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,48 @@
1
+ = Distance Measures
2
+
3
+ A bunch of distance measures that extend Array.
4
+
5
+ == Install
6
+
7
+ gem sources -a http://gemcutter.org
8
+ sudo gem install distance_measures
9
+
10
+ == How to use
11
+
12
+ a = [1,1]
13
+ b = [2,2]
14
+
15
+ a.euclidean_distance(b)
16
+
17
+ a.cosine_similarity(b)
18
+
19
+ a.jaccard_index(b)
20
+
21
+ a.jaccard_distance(b)
22
+
23
+ a.binary_jaccard_index(b)
24
+
25
+ a.binary_jaccard_distance(b)
26
+
27
+ a.tanimoto_coefficient(b)
28
+
29
+ This may or may not be the complete list, best thing is to check the source code.
30
+
31
+ There are also a couple bonus methods:
32
+
33
+ a.dot_product(b)
34
+
35
+ a.sum_of_squares
36
+
37
+ a.intersection_with(b)
38
+
39
+ a.union_with(b)
40
+
41
+ # When your dealing with 1's and 0's
42
+ a.binary_intersection_with(b)
43
+
44
+ a.binary_union_with(b)
45
+
46
+ == Copyright
47
+
48
+ Copyright (c) 2010 reddavis. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,45 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "distance_measures"
8
+ gem.summary = %Q{A bundle of distance measures}
9
+ gem.description = %Q{A bundle of distance measures}
10
+ gem.email = "reddavis@gmail.com"
11
+ gem.homepage = "http://github.com/reddavis/distance_measure"
12
+ gem.authors = ["reddavis"]
13
+ gem.add_development_dependency "rspec", ">= 1.2.9"
14
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
15
+ end
16
+ Jeweler::GemcutterTasks.new
17
+ rescue LoadError
18
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
19
+ end
20
+
21
+ require 'spec/rake/spectask'
22
+ Spec::Rake::SpecTask.new(:spec) do |spec|
23
+ spec.libs << 'lib' << 'spec'
24
+ spec.spec_files = FileList['spec/**/*_spec.rb']
25
+ end
26
+
27
+ Spec::Rake::SpecTask.new(:rcov) do |spec|
28
+ spec.libs << 'lib' << 'spec'
29
+ spec.pattern = 'spec/**/*_spec.rb'
30
+ spec.rcov = true
31
+ end
32
+
33
+ task :spec => :check_dependencies
34
+
35
+ task :default => :spec
36
+
37
+ require 'rake/rdoctask'
38
+ Rake::RDocTask.new do |rdoc|
39
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
40
+
41
+ rdoc.rdoc_dir = 'rdoc'
42
+ rdoc.title = "distance_measure #{version}"
43
+ rdoc.rdoc_files.include('README*')
44
+ rdoc.rdoc_files.include('lib/**/*.rb')
45
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.0
@@ -0,0 +1,60 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{distance_measures}
8
+ s.version = "0.0.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["reddavis"]
12
+ s.date = %q{2010-01-25}
13
+ s.description = %q{A bundle of distance measures}
14
+ s.email = %q{reddavis@gmail.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".gitignore",
22
+ "LICENSE",
23
+ "README.rdoc",
24
+ "Rakefile",
25
+ "VERSION",
26
+ "distance_measures.gemspec",
27
+ "lib/distance_measures.rb",
28
+ "lib/distance_measures/core.rb",
29
+ "lib/distance_measures/cosine_similarity.rb",
30
+ "lib/distance_measures/euclidean_distance.rb",
31
+ "lib/distance_measures/jaccard.rb",
32
+ "lib/distance_measures/tanimoto_coefficient.rb",
33
+ "spec/distance_measures_spec.rb",
34
+ "spec/spec.opts",
35
+ "spec/spec_helper.rb"
36
+ ]
37
+ s.homepage = %q{http://github.com/reddavis/distance_measure}
38
+ s.rdoc_options = ["--charset=UTF-8"]
39
+ s.require_paths = ["lib"]
40
+ s.rubygems_version = %q{1.3.5}
41
+ s.summary = %q{A bundle of distance measures}
42
+ s.test_files = [
43
+ "spec/distance_measures_spec.rb",
44
+ "spec/spec_helper.rb"
45
+ ]
46
+
47
+ if s.respond_to? :specification_version then
48
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
49
+ s.specification_version = 3
50
+
51
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
52
+ s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
53
+ else
54
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
55
+ end
56
+ else
57
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
58
+ end
59
+ end
60
+
@@ -0,0 +1,60 @@
1
+ module DistanceMeasures
2
+ def dot_product(other)
3
+ sum = 0.0
4
+ self.each_with_index do |n, index|
5
+ sum += n * other[index]
6
+ end
7
+
8
+ sum
9
+ end
10
+
11
+ def euclidean_normalize
12
+ sum = 0.0
13
+ self.each do |n|
14
+ sum += n ** 2
15
+ end
16
+
17
+ Math.sqrt(sum)
18
+ end
19
+
20
+ def sum_of_squares
21
+ inject(0) {|sum, n| sum + n ** 2}
22
+ end
23
+
24
+ # http://en.wikipedia.org/wiki/Intersection_(set_theory)
25
+ def intersection_with(other)
26
+ (self & other)
27
+ end
28
+
29
+ # http://en.wikipedia.org/wiki/Union_(set_theory)
30
+ def union_with(other)
31
+ (self + other).uniq
32
+ end
33
+
34
+ # 1's & 0's
35
+ def binary_intersection_with(other)
36
+ intersects = []
37
+ self.each_with_index do |n, index|
38
+ if n == 1 && other[index] == 1
39
+ intersects << 1
40
+ else
41
+ intersects << 0
42
+ end
43
+ end
44
+
45
+ intersects
46
+ end
47
+
48
+ def binary_union_with(other)
49
+ unions = []
50
+ self.each_with_index do |n, index|
51
+ if n == 1 || other[index] == 1
52
+ unions << 1
53
+ else
54
+ unions << 0
55
+ end
56
+ end
57
+
58
+ unions
59
+ end
60
+ end
@@ -0,0 +1,9 @@
1
+ # http://en.wikipedia.org/wiki/Cosine_similarity
2
+ module DistanceMeasures
3
+ def cosine_similarity(other)
4
+ dot_product = self.dot_product(other)
5
+ normalization = self.euclidean_normalize * other.euclidean_normalize
6
+
7
+ dot_product / normalization
8
+ end
9
+ end
@@ -0,0 +1,10 @@
1
+ # http://en.wikipedia.org/wiki/Euclidean_distance
2
+ module DistanceMeasures
3
+ def euclidean_distance(other)
4
+ sum = 0.0
5
+ self.each_index do |i|
6
+ sum += (self[i] - other[i])**2
7
+ end
8
+ Math.sqrt(sum)
9
+ end
10
+ end
@@ -0,0 +1,26 @@
1
+ # http://en.wikipedia.org/wiki/Jaccard_coefficient
2
+ module DistanceMeasures
3
+
4
+ def jaccard_distance(other)
5
+ 1 - self.jaccard_index(other)
6
+ end
7
+
8
+ def jaccard_index(other)
9
+ union = (self + other).uniq.size.to_f
10
+ intersection = self.intersection_with(other).size.to_f
11
+
12
+ intersection / union
13
+ end
14
+
15
+ def binary_jaccard_distance(other)
16
+ 1 - self.binary_jaccard_index(other)
17
+ end
18
+
19
+ def binary_jaccard_index(other)
20
+ intersection = self.binary_intersection_with(other).delete_if {|x| x == 0}.size.to_f
21
+ union = self.binary_union_with(other).delete_if {|x| x == 0}.size.to_f
22
+
23
+ intersection / union
24
+ end
25
+
26
+ end
@@ -0,0 +1,7 @@
1
+ # http://en.wikipedia.org/wiki/Jaccard_index#Tanimoto_coefficient_.28extended_Jaccard_coefficient.29
2
+ module DistanceMeasures
3
+ def tanimoto_coefficient(other)
4
+ dot = self.dot_product(other).to_f
5
+ dot / (self.sum_of_squares + other.sum_of_squares - dot).to_f
6
+ end
7
+ end
@@ -0,0 +1,11 @@
1
+ $:.unshift(File.dirname(__FILE__) + '/../lib')
2
+
3
+ require 'distance_measures/cosine_similarity'
4
+ require 'distance_measures/euclidean_distance'
5
+ require 'distance_measures/tanimoto_coefficient'
6
+ require 'distance_measures/core'
7
+ require 'distance_measures/jaccard'
8
+
9
+ class Array
10
+ include DistanceMeasures
11
+ end
@@ -0,0 +1,93 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "DistanceMeasures" do
4
+
5
+ describe "Euclidean Distance" do
6
+ it "should return 1" do
7
+ array.euclidean_distance(array).should == 0.0
8
+ end
9
+ end
10
+
11
+ describe "Cosine Similarity" do
12
+ it "should return 1.0" do
13
+ array.cosine_similarity(array).should > 0.99
14
+ end
15
+ end
16
+
17
+ describe "Tanimoto Coefficient" do
18
+ it "should return 1.0" do
19
+ array.tanimoto_coefficient(array).should == 1.0
20
+ end
21
+ end
22
+
23
+ describe "Sum of Squares" do
24
+ it "should return 50" do
25
+ array.sum_of_squares.should == 50
26
+ end
27
+ end
28
+
29
+ describe "Jaccard" do
30
+ describe "Jaccard Distance" do
31
+ it "should return" do
32
+ array_2.jaccard_distance(array_3).should == (1 - 3.0/7.0)
33
+ end
34
+ end
35
+
36
+ describe "Jaccard Index" do
37
+ it "should return" do
38
+ array_2.jaccard_index(array_3).should == 3.0/7.0
39
+ end
40
+ end
41
+
42
+ describe "Binary Jaccard Index" do
43
+ it "should return 1/4" do
44
+ [1,1,1,1].binary_jaccard_index([0,1,0,0]).should == 1/4.0
45
+ end
46
+ end
47
+ end
48
+
49
+ describe "Binary Jaccard Distance" do
50
+ it "should return 0.75" do
51
+ [1,1,1,1].binary_jaccard_distance([0,1,0,0]).should == 1 - (1/4.0)
52
+ end
53
+ end
54
+
55
+ describe "Intersection" do
56
+ it "should return [7,4,1]" do
57
+ array_2.intersection_with(array_3).should == [7,4,1]
58
+ end
59
+ end
60
+
61
+ describe "Union" do
62
+ it "should return " do
63
+ array_2.union_with(array_3).should == [7,3,2,4,1,9,5]
64
+ end
65
+ end
66
+
67
+ describe "Binary Intersection" do
68
+ it "should return [0,1,0,0]" do
69
+ [1,1,1,1].binary_intersection_with([0,1,0,0]).should == [0,1,0,0]
70
+ end
71
+ end
72
+
73
+ describe "Binary Union" do
74
+ it "should return [1,1,1,0]" do
75
+ [1,1,1,0].binary_union_with([0,0,0,0]).should == [1,1,1,0]
76
+ end
77
+ end
78
+
79
+ private
80
+
81
+ def array
82
+ [5, 5]
83
+ end
84
+
85
+ def array_2
86
+ [7, 3, 2, 4, 1]
87
+ end
88
+
89
+ def array_3
90
+ [4,1,9,7,5]
91
+ end
92
+
93
+ end
data/spec/spec.opts ADDED
@@ -0,0 +1 @@
1
+ --color
@@ -0,0 +1,9 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'distance_measures'
4
+ require 'spec'
5
+ require 'spec/autorun'
6
+
7
+ Spec::Runner.configure do |config|
8
+
9
+ end
metadata ADDED
@@ -0,0 +1,81 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: distance_measures
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - reddavis
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2010-01-25 00:00:00 +00:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: rspec
17
+ type: :development
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 1.2.9
24
+ version:
25
+ description: A bundle of distance measures
26
+ email: reddavis@gmail.com
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files:
32
+ - LICENSE
33
+ - README.rdoc
34
+ files:
35
+ - .document
36
+ - .gitignore
37
+ - LICENSE
38
+ - README.rdoc
39
+ - Rakefile
40
+ - VERSION
41
+ - distance_measures.gemspec
42
+ - lib/distance_measures.rb
43
+ - lib/distance_measures/core.rb
44
+ - lib/distance_measures/cosine_similarity.rb
45
+ - lib/distance_measures/euclidean_distance.rb
46
+ - lib/distance_measures/jaccard.rb
47
+ - lib/distance_measures/tanimoto_coefficient.rb
48
+ - spec/distance_measures_spec.rb
49
+ - spec/spec.opts
50
+ - spec/spec_helper.rb
51
+ has_rdoc: true
52
+ homepage: http://github.com/reddavis/distance_measure
53
+ licenses: []
54
+
55
+ post_install_message:
56
+ rdoc_options:
57
+ - --charset=UTF-8
58
+ require_paths:
59
+ - lib
60
+ required_ruby_version: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: "0"
65
+ version:
66
+ required_rubygems_version: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: "0"
71
+ version:
72
+ requirements: []
73
+
74
+ rubyforge_project:
75
+ rubygems_version: 1.3.5
76
+ signing_key:
77
+ specification_version: 3
78
+ summary: A bundle of distance measures
79
+ test_files:
80
+ - spec/distance_measures_spec.rb
81
+ - spec/spec_helper.rb