distance_measures 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore CHANGED
@@ -17,5 +17,7 @@ tmtags
17
17
  coverage
18
18
  rdoc
19
19
  pkg
20
+ tmp/*
21
+ benchmarks/*
20
22
 
21
23
  ## PROJECT::SPECIFIC
data/Rakefile CHANGED
@@ -1,16 +1,18 @@
1
1
  require 'rubygems'
2
2
  require 'rake'
3
+ require 'rake/extensiontask'
3
4
 
4
5
  begin
5
6
  require 'jeweler'
6
7
  Jeweler::Tasks.new do |gem|
7
8
  gem.name = "distance_measures"
8
- gem.summary = %Q{A bundle of distance measures}
9
- gem.description = %Q{A bundle of distance measures}
9
+ gem.summary = %Q{A bundle of distance measures with C extensions for the slow bits}
10
+ gem.description = %Q{A bundle of distance measures with C extensions for the slow bits}
10
11
  gem.email = "reddavis@gmail.com"
11
12
  gem.homepage = "http://github.com/reddavis/distance_measure"
12
13
  gem.authors = ["reddavis"]
13
14
  gem.add_development_dependency "rspec", ">= 1.2.9"
15
+ gem.extensions = FileList['ext/**/extconf.rb']
14
16
  # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
15
17
  end
16
18
  Jeweler::GemcutterTasks.new
@@ -30,8 +32,17 @@ Spec::Rake::SpecTask.new(:rcov) do |spec|
30
32
  spec.rcov = true
31
33
  end
32
34
 
33
- task :spec => :check_dependencies
35
+ # Euclidean Distance
36
+ Rake::ExtensionTask.new('euclidean_distance') do |ext|
37
+ ext.lib_dir = File.join('lib', 'distance_measures')
38
+ end
39
+
40
+ # Core
41
+ Rake::ExtensionTask.new('core') do |ext|
42
+ ext.lib_dir = File.join('lib', 'distance_measures')
43
+ end
34
44
 
45
+ task :spec => :check_dependencies
35
46
  task :default => :spec
36
47
 
37
48
  require 'rake/rdoctask'
@@ -43,3 +54,5 @@ Rake::RDocTask.new do |rdoc|
43
54
  rdoc.rdoc_files.include('README*')
44
55
  rdoc.rdoc_files.include('lib/**/*.rb')
45
56
  end
57
+
58
+ Rake::Task[:spec].prerequisites << :compile
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.1
1
+ 0.0.2
@@ -5,13 +5,14 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{distance_measures}
8
- s.version = "0.0.1"
8
+ s.version = "0.0.2"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["reddavis"]
12
- s.date = %q{2010-01-26}
13
- s.description = %q{A bundle of distance measures}
12
+ s.date = %q{2010-07-31}
13
+ s.description = %q{A bundle of distance measures with C extensions for the slow bits}
14
14
  s.email = %q{reddavis@gmail.com}
15
+ s.extensions = ["ext/core/extconf.rb", "ext/euclidean_distance/extconf.rb"]
15
16
  s.extra_rdoc_files = [
16
17
  "LICENSE",
17
18
  "README.rdoc"
@@ -24,10 +25,14 @@ Gem::Specification.new do |s|
24
25
  "Rakefile",
25
26
  "VERSION",
26
27
  "distance_measures.gemspec",
28
+ "ext/core/core.c",
29
+ "ext/core/extconf.rb",
30
+ "ext/euclidean_distance/euclidean_distance.c",
31
+ "ext/euclidean_distance/extconf.rb",
27
32
  "lib/distance_measures.rb",
28
- "lib/distance_measures/core.rb",
33
+ "lib/distance_measures/core.bundle",
29
34
  "lib/distance_measures/cosine_similarity.rb",
30
- "lib/distance_measures/euclidean_distance.rb",
35
+ "lib/distance_measures/euclidean_distance.bundle",
31
36
  "lib/distance_measures/jaccard.rb",
32
37
  "lib/distance_measures/tanimoto_coefficient.rb",
33
38
  "spec/distance_measures_spec.rb",
@@ -37,8 +42,8 @@ Gem::Specification.new do |s|
37
42
  s.homepage = %q{http://github.com/reddavis/distance_measure}
38
43
  s.rdoc_options = ["--charset=UTF-8"]
39
44
  s.require_paths = ["lib"]
40
- s.rubygems_version = %q{1.3.5}
41
- s.summary = %q{A bundle of distance measures}
45
+ s.rubygems_version = %q{1.3.6}
46
+ s.summary = %q{A bundle of distance measures with C extensions for the slow bits}
42
47
  s.test_files = [
43
48
  "spec/distance_measures_spec.rb",
44
49
  "spec/spec_helper.rb"
data/ext/core/core.c ADDED
@@ -0,0 +1,174 @@
1
+ #include <ruby.h>
2
+ #include <math.h>
3
+
4
+ // Prototypes
5
+ long c_array_size(VALUE array);
6
+ // END
7
+
8
+ /*
9
+
10
+ def dot_product(other)
11
+ sum = 0.0
12
+ self.each_with_index do |n, index|
13
+ sum += n * other[index]
14
+ end
15
+
16
+ sum
17
+ end
18
+
19
+ */
20
+ static VALUE rb_dot_product(VALUE self, VALUE other_array) {
21
+ double sum = 0;
22
+
23
+ //TODO: check they're the same size
24
+ long array_size = c_array_size(self);
25
+ int index;
26
+
27
+ for(index = 0; index <= array_size; index++) {
28
+ double x, y;
29
+
30
+ x = NUM2DBL(RARRAY(self)->ptr[index]);
31
+ y = NUM2DBL(RARRAY(other_array)->ptr[index]);
32
+
33
+ sum += x * y;
34
+ }
35
+
36
+ return rb_float_new(sum);
37
+ }
38
+
39
+ /*
40
+
41
+ def sum_of_squares
42
+ inject(0) {|sum, n| sum + n ** 2}
43
+ end
44
+
45
+ */
46
+ static VALUE rb_sum_of_squares(VALUE self) {
47
+ double sum = 0;
48
+ long array_size = c_array_size(self);
49
+ int index;
50
+
51
+ for(index = 0; index <= array_size; index++) {
52
+ double x;
53
+
54
+ x = NUM2DBL(RARRAY(self)->ptr[index]);
55
+
56
+ sum += pow(x, 2);
57
+ }
58
+
59
+ return rb_float_new(sum);
60
+ }
61
+
62
+ /*
63
+
64
+ def euclidean_normalize
65
+ sum = 0.0
66
+ self.each do |n|
67
+ sum += n ** 2
68
+ end
69
+
70
+ Math.sqrt(sum)
71
+ end
72
+
73
+ */
74
+ static VALUE rb_euclidean_normalize(VALUE self) {
75
+ double sum = 0;
76
+ long array_size = c_array_size(self);
77
+ int index;
78
+
79
+ for(index = 0; index <= array_size; index++) {
80
+ double x;
81
+
82
+ x = NUM2DBL(RARRAY(self)->ptr[index]);
83
+
84
+ sum += pow(x, 2);
85
+ }
86
+
87
+ return rb_float_new(sqrt(sum));
88
+ }
89
+
90
+ /*
91
+
92
+ def binary_union_with(other)
93
+ unions = []
94
+ self.each_with_index do |n, index|
95
+ if n == 1 || other[index] == 1
96
+ unions << 1
97
+ else
98
+ unions << 0
99
+ end
100
+ end
101
+
102
+ unions
103
+ end
104
+
105
+ */
106
+ static VALUE rb_binary_union_with(VALUE self, VALUE other_array) {
107
+ //TODO: check arrays are same size
108
+ long array_size = c_array_size(self);
109
+ int index;
110
+ VALUE results = rb_ary_new();
111
+
112
+ for(index = 0; index <= array_size; index++) {
113
+ int self_attribute = NUM2INT(RARRAY(self)->ptr[index]);
114
+ int other_array_attribute = NUM2INT(RARRAY(other_array)->ptr[index]);
115
+
116
+ if(self_attribute == 1 || other_array_attribute == 1) {
117
+ rb_ary_push(results, rb_int_new(1));
118
+ } else {
119
+ rb_ary_push(results, rb_int_new(0));
120
+ }
121
+ }
122
+
123
+ return results;
124
+ }
125
+
126
+ /*
127
+
128
+ def binary_intersection_with(other)
129
+ intersects = []
130
+ self.each_with_index do |n, index|
131
+ if n == 1 && other[index] == 1
132
+ intersects << 1
133
+ else
134
+ intersects << 0
135
+ end
136
+ end
137
+
138
+ intersects
139
+ end
140
+
141
+ */
142
+ static VALUE rb_binary_intersection_with(VALUE self, VALUE other_array) {
143
+ //TODO: check arrays are same size
144
+ long array_size = c_array_size(self);
145
+ int index;
146
+ VALUE results = rb_ary_new();
147
+
148
+ for(index = 0; index <= array_size; index++) {
149
+ int self_attribute = NUM2INT(RARRAY(self)->ptr[index]);
150
+ int other_array_attribute = NUM2INT(RARRAY(other_array)->ptr[index]);
151
+
152
+ if(self_attribute == 1 && other_array_attribute == 1) {
153
+ rb_ary_push(results, rb_int_new(1));
154
+ } else {
155
+ rb_ary_push(results, rb_int_new(0));
156
+ }
157
+ }
158
+
159
+ return results;
160
+ }
161
+
162
+ // return the size of a Ruby array - 1
163
+ long c_array_size(VALUE array) {
164
+ return (RARRAY(array)->len - 1);
165
+ }
166
+
167
+ void Init_core() {
168
+ VALUE distance_measures = rb_define_module("DistanceMeasures");
169
+ rb_define_method(distance_measures, "dot_product", rb_dot_product, 1);
170
+ rb_define_method(distance_measures, "sum_of_squares", rb_sum_of_squares, 0);
171
+ rb_define_method(distance_measures, "euclidean_normalize", rb_euclidean_normalize, 0);
172
+ rb_define_method(distance_measures, "binary_union_with", rb_binary_union_with, 1);
173
+ rb_define_method(distance_measures, "binary_intersection_with", rb_binary_intersection_with, 1);
174
+ }
@@ -0,0 +1,2 @@
1
+ require 'mkmf'
2
+ create_makefile('core/core')
@@ -0,0 +1,37 @@
1
+ #include <ruby.h>
2
+ #include <math.h>
3
+
4
+ /*
5
+
6
+ def euclidean_distance(other)
7
+ sum = 0.0
8
+ self.each_index do |i|
9
+ sum += (self[i] - other[i])**2
10
+ end
11
+ Math.sqrt(sum)
12
+ end
13
+
14
+ */
15
+ static VALUE rb_euclidean_distance(VALUE self, VALUE other_array) {
16
+ double value = 0.0;
17
+
18
+ //TODO: check they're the same size
19
+ long vector_length = (RARRAY(self)->len - 1);
20
+ int index;
21
+
22
+ for(index = 0; index <= vector_length; index++) {
23
+ double x, y;
24
+
25
+ x = NUM2DBL(RARRAY(self)->ptr[index]);
26
+ y = NUM2DBL(RARRAY(other_array)->ptr[index]);
27
+
28
+ value += pow(x - y, 2);
29
+ }
30
+
31
+ return rb_float_new(sqrt(value));
32
+ }
33
+
34
+ void Init_euclidean_distance() {
35
+ VALUE distance_measures = rb_define_module("DistanceMeasures");
36
+ rb_define_method(distance_measures, "euclidean_distance", rb_euclidean_distance, 1);
37
+ }
@@ -0,0 +1,2 @@
1
+ require 'mkmf'
2
+ create_makefile('euclidean_distance/euclidean_distance')
Binary file
@@ -1,9 +1,8 @@
1
- # http://en.wikipedia.org/wiki/Cosine_similarity
2
1
  module DistanceMeasures
3
2
  def cosine_similarity(other)
4
3
  dot_product = self.dot_product(other)
5
4
  normalization = self.euclidean_normalize * other.euclidean_normalize
6
-
5
+
7
6
  handle_nan(dot_product / normalization)
8
7
  end
9
8
  end
@@ -8,4 +8,22 @@ require 'distance_measures/jaccard'
8
8
 
9
9
  class Array
10
10
  include DistanceMeasures
11
+
12
+ # http://en.wikipedia.org/wiki/Intersection_(set_theory)
13
+ def intersection_with(other)
14
+ (self & other)
15
+ end
16
+
17
+ # http://en.wikipedia.org/wiki/Union_(set_theory)
18
+ def union_with(other)
19
+ (self + other).uniq
20
+ end
21
+
22
+ private
23
+
24
+ # Checks if we're dealing with NaN's and will return 0.0 unless
25
+ # handle NaN's is set to false
26
+ def handle_nan(result)
27
+ result.nan? ? 0.0 : result
28
+ end
11
29
  end
@@ -1,101 +1,117 @@
1
1
  require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
2
 
3
3
  describe "DistanceMeasures" do
4
-
4
+
5
5
  describe "Euclidean Distance" do
6
- it "should return 1" do
6
+ it "should return 0.0" do
7
7
  array.euclidean_distance(array).should == 0.0
8
8
  end
9
+
10
+ it "should return 4.0" do
11
+ [5].euclidean_distance([1]).should == 4.0
12
+ end
9
13
  end
10
-
14
+
11
15
  describe "Cosine Similarity" do
12
16
  it "should return 1.0" do
13
- array.cosine_similarity(array).should > 0.99
17
+ array.cosine_similarity(array).should.to_s == "1.0" # WTF
14
18
  end
15
-
19
+
16
20
  it "should handle NaN's" do
17
21
  [0.0, 0.0].cosine_similarity([0.0, 0.0]).nan?.should be_false
18
22
  end
19
23
  end
20
-
24
+
21
25
  describe "Tanimoto Coefficient" do
22
26
  it "should return 1.0" do
23
27
  array.tanimoto_coefficient(array).should == 1.0
24
28
  end
25
-
29
+
26
30
  it "should handle NaN's" do
27
31
  [0.0, 0.0].tanimoto_coefficient([0.0, 0.0]).nan?.should be_false
28
32
  end
29
33
  end
30
-
34
+
31
35
  describe "Sum of Squares" do
32
36
  it "should return 50" do
33
37
  array.sum_of_squares.should == 50
34
38
  end
35
39
  end
36
-
40
+
37
41
  describe "Jaccard" do
38
42
  describe "Jaccard Distance" do
39
43
  it "should return" do
40
44
  array_2.jaccard_distance(array_3).should == (1 - 3.0/7.0)
41
45
  end
42
46
  end
43
-
47
+
44
48
  describe "Jaccard Index" do
45
49
  it "should return" do
46
50
  array_2.jaccard_index(array_3).should == 3.0/7.0
47
51
  end
48
52
  end
49
-
53
+
50
54
  describe "Binary Jaccard Index" do
51
55
  it "should return 1/4" do
52
56
  [1,1,1,1].binary_jaccard_index([0,1,0,0]).should == 1/4.0
53
57
  end
54
58
  end
55
59
  end
56
-
60
+
57
61
  describe "Binary Jaccard Distance" do
58
62
  it "should return 0.75" do
59
63
  [1,1,1,1].binary_jaccard_distance([0,1,0,0]).should == 1 - (1/4.0)
60
64
  end
61
65
  end
62
-
66
+
63
67
  describe "Intersection" do
64
68
  it "should return [7,4,1]" do
65
69
  array_2.intersection_with(array_3).should == [7,4,1]
66
70
  end
67
71
  end
68
-
72
+
69
73
  describe "Union" do
70
74
  it "should return " do
71
- array_2.union_with(array_3).should == [7,3,2,4,1,9,5]
75
+ array_2.union_with(array_3).should == [7,3,2,4,1,9,5]
72
76
  end
73
77
  end
74
-
78
+
75
79
  describe "Binary Intersection" do
76
80
  it "should return [0,1,0,0]" do
77
81
  [1,1,1,1].binary_intersection_with([0,1,0,0]).should == [0,1,0,0]
78
82
  end
79
83
  end
80
-
84
+
81
85
  describe "Binary Union" do
82
86
  it "should return [1,1,1,0]" do
83
87
  [1,1,1,0].binary_union_with([0,0,0,0]).should == [1,1,1,0]
84
88
  end
85
89
  end
86
-
90
+
91
+ describe "Dot Product" do
92
+ it "should return 50" do
93
+ [5, 5].dot_product([5, 5]).should == 50.0
94
+ end
95
+ end
96
+
97
+ describe "Euclidean normalize" do
98
+ it "should" do
99
+ [10].euclidean_normalize.should == 10
100
+ end
101
+ end
102
+
87
103
  private
88
-
104
+
89
105
  def array
90
106
  [5, 5]
91
107
  end
92
-
108
+
93
109
  def array_2
94
110
  [7, 3, 2, 4, 1]
95
111
  end
96
-
112
+
97
113
  def array_3
98
114
  [4,1,9,7,5]
99
115
  end
100
-
116
+
101
117
  end
metadata CHANGED
@@ -1,7 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: distance_measures
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 2
9
+ version: 0.0.2
5
10
  platform: ruby
6
11
  authors:
7
12
  - reddavis
@@ -9,25 +14,30 @@ autorequire:
9
14
  bindir: bin
10
15
  cert_chain: []
11
16
 
12
- date: 2010-01-26 00:00:00 +00:00
17
+ date: 2010-07-31 00:00:00 +01:00
13
18
  default_executable:
14
19
  dependencies:
15
20
  - !ruby/object:Gem::Dependency
16
21
  name: rspec
17
- type: :development
18
- version_requirement:
19
- version_requirements: !ruby/object:Gem::Requirement
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
20
24
  requirements:
21
25
  - - ">="
22
26
  - !ruby/object:Gem::Version
27
+ segments:
28
+ - 1
29
+ - 2
30
+ - 9
23
31
  version: 1.2.9
24
- version:
25
- description: A bundle of distance measures
32
+ type: :development
33
+ version_requirements: *id001
34
+ description: A bundle of distance measures with C extensions for the slow bits
26
35
  email: reddavis@gmail.com
27
36
  executables: []
28
37
 
29
- extensions: []
30
-
38
+ extensions:
39
+ - ext/core/extconf.rb
40
+ - ext/euclidean_distance/extconf.rb
31
41
  extra_rdoc_files:
32
42
  - LICENSE
33
43
  - README.rdoc
@@ -39,10 +49,14 @@ files:
39
49
  - Rakefile
40
50
  - VERSION
41
51
  - distance_measures.gemspec
52
+ - ext/core/core.c
53
+ - ext/core/extconf.rb
54
+ - ext/euclidean_distance/euclidean_distance.c
55
+ - ext/euclidean_distance/extconf.rb
42
56
  - lib/distance_measures.rb
43
- - lib/distance_measures/core.rb
57
+ - lib/distance_measures/core.bundle
44
58
  - lib/distance_measures/cosine_similarity.rb
45
- - lib/distance_measures/euclidean_distance.rb
59
+ - lib/distance_measures/euclidean_distance.bundle
46
60
  - lib/distance_measures/jaccard.rb
47
61
  - lib/distance_measures/tanimoto_coefficient.rb
48
62
  - spec/distance_measures_spec.rb
@@ -61,21 +75,23 @@ required_ruby_version: !ruby/object:Gem::Requirement
61
75
  requirements:
62
76
  - - ">="
63
77
  - !ruby/object:Gem::Version
78
+ segments:
79
+ - 0
64
80
  version: "0"
65
- version:
66
81
  required_rubygems_version: !ruby/object:Gem::Requirement
67
82
  requirements:
68
83
  - - ">="
69
84
  - !ruby/object:Gem::Version
85
+ segments:
86
+ - 0
70
87
  version: "0"
71
- version:
72
88
  requirements: []
73
89
 
74
90
  rubyforge_project:
75
- rubygems_version: 1.3.5
91
+ rubygems_version: 1.3.6
76
92
  signing_key:
77
93
  specification_version: 3
78
- summary: A bundle of distance measures
94
+ summary: A bundle of distance measures with C extensions for the slow bits
79
95
  test_files:
80
96
  - spec/distance_measures_spec.rb
81
97
  - spec/spec_helper.rb
@@ -1,68 +0,0 @@
1
- module DistanceMeasures
2
- def dot_product(other)
3
- sum = 0.0
4
- self.each_with_index do |n, index|
5
- sum += n * other[index]
6
- end
7
-
8
- sum
9
- end
10
-
11
- def euclidean_normalize
12
- sum = 0.0
13
- self.each do |n|
14
- sum += n ** 2
15
- end
16
-
17
- Math.sqrt(sum)
18
- end
19
-
20
- def sum_of_squares
21
- inject(0) {|sum, n| sum + n ** 2}
22
- end
23
-
24
- # http://en.wikipedia.org/wiki/Intersection_(set_theory)
25
- def intersection_with(other)
26
- (self & other)
27
- end
28
-
29
- # http://en.wikipedia.org/wiki/Union_(set_theory)
30
- def union_with(other)
31
- (self + other).uniq
32
- end
33
-
34
- # 1's & 0's
35
- def binary_intersection_with(other)
36
- intersects = []
37
- self.each_with_index do |n, index|
38
- if n == 1 && other[index] == 1
39
- intersects << 1
40
- else
41
- intersects << 0
42
- end
43
- end
44
-
45
- intersects
46
- end
47
-
48
- def binary_union_with(other)
49
- unions = []
50
- self.each_with_index do |n, index|
51
- if n == 1 || other[index] == 1
52
- unions << 1
53
- else
54
- unions << 0
55
- end
56
- end
57
-
58
- unions
59
- end
60
-
61
- private
62
-
63
- # Checks if we're dealing with NaN's and will return 0.0 unless
64
- # handle NaN's is set to false
65
- def handle_nan(result)
66
- result.nan? ? 0.0 : result
67
- end
68
- end
@@ -1,10 +0,0 @@
1
- # http://en.wikipedia.org/wiki/Euclidean_distance
2
- module DistanceMeasures
3
- def euclidean_distance(other)
4
- sum = 0.0
5
- self.each_index do |i|
6
- sum += (self[i] - other[i])**2
7
- end
8
- Math.sqrt(sum)
9
- end
10
- end