distance_measures 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
@@ -17,5 +17,7 @@ tmtags
17
17
  coverage
18
18
  rdoc
19
19
  pkg
20
+ tmp/*
21
+ benchmarks/*
20
22
 
21
23
  ## PROJECT::SPECIFIC
data/Rakefile CHANGED
@@ -1,16 +1,18 @@
1
1
  require 'rubygems'
2
2
  require 'rake'
3
+ require 'rake/extensiontask'
3
4
 
4
5
  begin
5
6
  require 'jeweler'
6
7
  Jeweler::Tasks.new do |gem|
7
8
  gem.name = "distance_measures"
8
- gem.summary = %Q{A bundle of distance measures}
9
- gem.description = %Q{A bundle of distance measures}
9
+ gem.summary = %Q{A bundle of distance measures with C extensions for the slow bits}
10
+ gem.description = %Q{A bundle of distance measures with C extensions for the slow bits}
10
11
  gem.email = "reddavis@gmail.com"
11
12
  gem.homepage = "http://github.com/reddavis/distance_measure"
12
13
  gem.authors = ["reddavis"]
13
14
  gem.add_development_dependency "rspec", ">= 1.2.9"
15
+ gem.extensions = FileList['ext/**/extconf.rb']
14
16
  # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
15
17
  end
16
18
  Jeweler::GemcutterTasks.new
@@ -30,8 +32,17 @@ Spec::Rake::SpecTask.new(:rcov) do |spec|
30
32
  spec.rcov = true
31
33
  end
32
34
 
33
- task :spec => :check_dependencies
35
+ # Euclidean Distance
36
+ Rake::ExtensionTask.new('euclidean_distance') do |ext|
37
+ ext.lib_dir = File.join('lib', 'distance_measures')
38
+ end
39
+
40
+ # Core
41
+ Rake::ExtensionTask.new('core') do |ext|
42
+ ext.lib_dir = File.join('lib', 'distance_measures')
43
+ end
34
44
 
45
+ task :spec => :check_dependencies
35
46
  task :default => :spec
36
47
 
37
48
  require 'rake/rdoctask'
@@ -43,3 +54,5 @@ Rake::RDocTask.new do |rdoc|
43
54
  rdoc.rdoc_files.include('README*')
44
55
  rdoc.rdoc_files.include('lib/**/*.rb')
45
56
  end
57
+
58
+ Rake::Task[:spec].prerequisites << :compile
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.1
1
+ 0.0.2
@@ -5,13 +5,14 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{distance_measures}
8
- s.version = "0.0.1"
8
+ s.version = "0.0.2"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["reddavis"]
12
- s.date = %q{2010-01-26}
13
- s.description = %q{A bundle of distance measures}
12
+ s.date = %q{2010-07-31}
13
+ s.description = %q{A bundle of distance measures with C extensions for the slow bits}
14
14
  s.email = %q{reddavis@gmail.com}
15
+ s.extensions = ["ext/core/extconf.rb", "ext/euclidean_distance/extconf.rb"]
15
16
  s.extra_rdoc_files = [
16
17
  "LICENSE",
17
18
  "README.rdoc"
@@ -24,10 +25,14 @@ Gem::Specification.new do |s|
24
25
  "Rakefile",
25
26
  "VERSION",
26
27
  "distance_measures.gemspec",
28
+ "ext/core/core.c",
29
+ "ext/core/extconf.rb",
30
+ "ext/euclidean_distance/euclidean_distance.c",
31
+ "ext/euclidean_distance/extconf.rb",
27
32
  "lib/distance_measures.rb",
28
- "lib/distance_measures/core.rb",
33
+ "lib/distance_measures/core.bundle",
29
34
  "lib/distance_measures/cosine_similarity.rb",
30
- "lib/distance_measures/euclidean_distance.rb",
35
+ "lib/distance_measures/euclidean_distance.bundle",
31
36
  "lib/distance_measures/jaccard.rb",
32
37
  "lib/distance_measures/tanimoto_coefficient.rb",
33
38
  "spec/distance_measures_spec.rb",
@@ -37,8 +42,8 @@ Gem::Specification.new do |s|
37
42
  s.homepage = %q{http://github.com/reddavis/distance_measure}
38
43
  s.rdoc_options = ["--charset=UTF-8"]
39
44
  s.require_paths = ["lib"]
40
- s.rubygems_version = %q{1.3.5}
41
- s.summary = %q{A bundle of distance measures}
45
+ s.rubygems_version = %q{1.3.6}
46
+ s.summary = %q{A bundle of distance measures with C extensions for the slow bits}
42
47
  s.test_files = [
43
48
  "spec/distance_measures_spec.rb",
44
49
  "spec/spec_helper.rb"
data/ext/core/core.c ADDED
@@ -0,0 +1,174 @@
1
+ #include <ruby.h>
2
+ #include <math.h>
3
+
4
+ // Prototypes
5
+ long c_array_size(VALUE array);
6
+ // END
7
+
8
+ /*
9
+
10
+ def dot_product(other)
11
+ sum = 0.0
12
+ self.each_with_index do |n, index|
13
+ sum += n * other[index]
14
+ end
15
+
16
+ sum
17
+ end
18
+
19
+ */
20
+ static VALUE rb_dot_product(VALUE self, VALUE other_array) {
21
+ double sum = 0;
22
+
23
+ //TODO: check they're the same size
24
+ long array_size = c_array_size(self);
25
+ int index;
26
+
27
+ for(index = 0; index <= array_size; index++) {
28
+ double x, y;
29
+
30
+ x = NUM2DBL(RARRAY(self)->ptr[index]);
31
+ y = NUM2DBL(RARRAY(other_array)->ptr[index]);
32
+
33
+ sum += x * y;
34
+ }
35
+
36
+ return rb_float_new(sum);
37
+ }
38
+
39
+ /*
40
+
41
+ def sum_of_squares
42
+ inject(0) {|sum, n| sum + n ** 2}
43
+ end
44
+
45
+ */
46
+ static VALUE rb_sum_of_squares(VALUE self) {
47
+ double sum = 0;
48
+ long array_size = c_array_size(self);
49
+ int index;
50
+
51
+ for(index = 0; index <= array_size; index++) {
52
+ double x;
53
+
54
+ x = NUM2DBL(RARRAY(self)->ptr[index]);
55
+
56
+ sum += pow(x, 2);
57
+ }
58
+
59
+ return rb_float_new(sum);
60
+ }
61
+
62
+ /*
63
+
64
+ def euclidean_normalize
65
+ sum = 0.0
66
+ self.each do |n|
67
+ sum += n ** 2
68
+ end
69
+
70
+ Math.sqrt(sum)
71
+ end
72
+
73
+ */
74
+ static VALUE rb_euclidean_normalize(VALUE self) {
75
+ double sum = 0;
76
+ long array_size = c_array_size(self);
77
+ int index;
78
+
79
+ for(index = 0; index <= array_size; index++) {
80
+ double x;
81
+
82
+ x = NUM2DBL(RARRAY(self)->ptr[index]);
83
+
84
+ sum += pow(x, 2);
85
+ }
86
+
87
+ return rb_float_new(sqrt(sum));
88
+ }
89
+
90
+ /*
91
+
92
+ def binary_union_with(other)
93
+ unions = []
94
+ self.each_with_index do |n, index|
95
+ if n == 1 || other[index] == 1
96
+ unions << 1
97
+ else
98
+ unions << 0
99
+ end
100
+ end
101
+
102
+ unions
103
+ end
104
+
105
+ */
106
+ static VALUE rb_binary_union_with(VALUE self, VALUE other_array) {
107
+ //TODO: check arrays are same size
108
+ long array_size = c_array_size(self);
109
+ int index;
110
+ VALUE results = rb_ary_new();
111
+
112
+ for(index = 0; index <= array_size; index++) {
113
+ int self_attribute = NUM2INT(RARRAY(self)->ptr[index]);
114
+ int other_array_attribute = NUM2INT(RARRAY(other_array)->ptr[index]);
115
+
116
+ if(self_attribute == 1 || other_array_attribute == 1) {
117
+ rb_ary_push(results, rb_int_new(1));
118
+ } else {
119
+ rb_ary_push(results, rb_int_new(0));
120
+ }
121
+ }
122
+
123
+ return results;
124
+ }
125
+
126
+ /*
127
+
128
+ def binary_intersection_with(other)
129
+ intersects = []
130
+ self.each_with_index do |n, index|
131
+ if n == 1 && other[index] == 1
132
+ intersects << 1
133
+ else
134
+ intersects << 0
135
+ end
136
+ end
137
+
138
+ intersects
139
+ end
140
+
141
+ */
142
+ static VALUE rb_binary_intersection_with(VALUE self, VALUE other_array) {
143
+ //TODO: check arrays are same size
144
+ long array_size = c_array_size(self);
145
+ int index;
146
+ VALUE results = rb_ary_new();
147
+
148
+ for(index = 0; index <= array_size; index++) {
149
+ int self_attribute = NUM2INT(RARRAY(self)->ptr[index]);
150
+ int other_array_attribute = NUM2INT(RARRAY(other_array)->ptr[index]);
151
+
152
+ if(self_attribute == 1 && other_array_attribute == 1) {
153
+ rb_ary_push(results, rb_int_new(1));
154
+ } else {
155
+ rb_ary_push(results, rb_int_new(0));
156
+ }
157
+ }
158
+
159
+ return results;
160
+ }
161
+
162
+ // return the size of a Ruby array - 1
163
+ long c_array_size(VALUE array) {
164
+ return (RARRAY(array)->len - 1);
165
+ }
166
+
167
+ void Init_core() {
168
+ VALUE distance_measures = rb_define_module("DistanceMeasures");
169
+ rb_define_method(distance_measures, "dot_product", rb_dot_product, 1);
170
+ rb_define_method(distance_measures, "sum_of_squares", rb_sum_of_squares, 0);
171
+ rb_define_method(distance_measures, "euclidean_normalize", rb_euclidean_normalize, 0);
172
+ rb_define_method(distance_measures, "binary_union_with", rb_binary_union_with, 1);
173
+ rb_define_method(distance_measures, "binary_intersection_with", rb_binary_intersection_with, 1);
174
+ }
@@ -0,0 +1,2 @@
1
+ require 'mkmf'
2
+ create_makefile('core/core')
@@ -0,0 +1,37 @@
1
+ #include <ruby.h>
2
+ #include <math.h>
3
+
4
+ /*
5
+
6
+ def euclidean_distance(other)
7
+ sum = 0.0
8
+ self.each_index do |i|
9
+ sum += (self[i] - other[i])**2
10
+ end
11
+ Math.sqrt(sum)
12
+ end
13
+
14
+ */
15
+ static VALUE rb_euclidean_distance(VALUE self, VALUE other_array) {
16
+ double value = 0.0;
17
+
18
+ //TODO: check they're the same size
19
+ long vector_length = (RARRAY(self)->len - 1);
20
+ int index;
21
+
22
+ for(index = 0; index <= vector_length; index++) {
23
+ double x, y;
24
+
25
+ x = NUM2DBL(RARRAY(self)->ptr[index]);
26
+ y = NUM2DBL(RARRAY(other_array)->ptr[index]);
27
+
28
+ value += pow(x - y, 2);
29
+ }
30
+
31
+ return rb_float_new(sqrt(value));
32
+ }
33
+
34
+ void Init_euclidean_distance() {
35
+ VALUE distance_measures = rb_define_module("DistanceMeasures");
36
+ rb_define_method(distance_measures, "euclidean_distance", rb_euclidean_distance, 1);
37
+ }
@@ -0,0 +1,2 @@
1
+ require 'mkmf'
2
+ create_makefile('euclidean_distance/euclidean_distance')
Binary file
@@ -1,9 +1,8 @@
1
- # http://en.wikipedia.org/wiki/Cosine_similarity
2
1
  module DistanceMeasures
3
2
  def cosine_similarity(other)
4
3
  dot_product = self.dot_product(other)
5
4
  normalization = self.euclidean_normalize * other.euclidean_normalize
6
-
5
+
7
6
  handle_nan(dot_product / normalization)
8
7
  end
9
8
  end
@@ -8,4 +8,22 @@ require 'distance_measures/jaccard'
8
8
 
9
9
  class Array
10
10
  include DistanceMeasures
11
+
12
+ # http://en.wikipedia.org/wiki/Intersection_(set_theory)
13
+ def intersection_with(other)
14
+ (self & other)
15
+ end
16
+
17
+ # http://en.wikipedia.org/wiki/Union_(set_theory)
18
+ def union_with(other)
19
+ (self + other).uniq
20
+ end
21
+
22
+ private
23
+
24
+ # Checks if we're dealing with NaN's and will return 0.0 unless
25
+ # handle NaN's is set to false
26
+ def handle_nan(result)
27
+ result.nan? ? 0.0 : result
28
+ end
11
29
  end
@@ -1,101 +1,117 @@
1
1
  require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
2
 
3
3
  describe "DistanceMeasures" do
4
-
4
+
5
5
  describe "Euclidean Distance" do
6
- it "should return 1" do
6
+ it "should return 0.0" do
7
7
  array.euclidean_distance(array).should == 0.0
8
8
  end
9
+
10
+ it "should return 4.0" do
11
+ [5].euclidean_distance([1]).should == 4.0
12
+ end
9
13
  end
10
-
14
+
11
15
  describe "Cosine Similarity" do
12
16
  it "should return 1.0" do
13
- array.cosine_similarity(array).should > 0.99
17
+ array.cosine_similarity(array).should.to_s == "1.0" # WTF
14
18
  end
15
-
19
+
16
20
  it "should handle NaN's" do
17
21
  [0.0, 0.0].cosine_similarity([0.0, 0.0]).nan?.should be_false
18
22
  end
19
23
  end
20
-
24
+
21
25
  describe "Tanimoto Coefficient" do
22
26
  it "should return 1.0" do
23
27
  array.tanimoto_coefficient(array).should == 1.0
24
28
  end
25
-
29
+
26
30
  it "should handle NaN's" do
27
31
  [0.0, 0.0].tanimoto_coefficient([0.0, 0.0]).nan?.should be_false
28
32
  end
29
33
  end
30
-
34
+
31
35
  describe "Sum of Squares" do
32
36
  it "should return 50" do
33
37
  array.sum_of_squares.should == 50
34
38
  end
35
39
  end
36
-
40
+
37
41
  describe "Jaccard" do
38
42
  describe "Jaccard Distance" do
39
43
  it "should return" do
40
44
  array_2.jaccard_distance(array_3).should == (1 - 3.0/7.0)
41
45
  end
42
46
  end
43
-
47
+
44
48
  describe "Jaccard Index" do
45
49
  it "should return" do
46
50
  array_2.jaccard_index(array_3).should == 3.0/7.0
47
51
  end
48
52
  end
49
-
53
+
50
54
  describe "Binary Jaccard Index" do
51
55
  it "should return 1/4" do
52
56
  [1,1,1,1].binary_jaccard_index([0,1,0,0]).should == 1/4.0
53
57
  end
54
58
  end
55
59
  end
56
-
60
+
57
61
  describe "Binary Jaccard Distance" do
58
62
  it "should return 0.75" do
59
63
  [1,1,1,1].binary_jaccard_distance([0,1,0,0]).should == 1 - (1/4.0)
60
64
  end
61
65
  end
62
-
66
+
63
67
  describe "Intersection" do
64
68
  it "should return [7,4,1]" do
65
69
  array_2.intersection_with(array_3).should == [7,4,1]
66
70
  end
67
71
  end
68
-
72
+
69
73
  describe "Union" do
70
74
  it "should return " do
71
- array_2.union_with(array_3).should == [7,3,2,4,1,9,5]
75
+ array_2.union_with(array_3).should == [7,3,2,4,1,9,5]
72
76
  end
73
77
  end
74
-
78
+
75
79
  describe "Binary Intersection" do
76
80
  it "should return [0,1,0,0]" do
77
81
  [1,1,1,1].binary_intersection_with([0,1,0,0]).should == [0,1,0,0]
78
82
  end
79
83
  end
80
-
84
+
81
85
  describe "Binary Union" do
82
86
  it "should return [1,1,1,0]" do
83
87
  [1,1,1,0].binary_union_with([0,0,0,0]).should == [1,1,1,0]
84
88
  end
85
89
  end
86
-
90
+
91
+ describe "Dot Product" do
92
+ it "should return 50" do
93
+ [5, 5].dot_product([5, 5]).should == 50.0
94
+ end
95
+ end
96
+
97
+ describe "Euclidean normalize" do
98
+ it "should" do
99
+ [10].euclidean_normalize.should == 10
100
+ end
101
+ end
102
+
87
103
  private
88
-
104
+
89
105
  def array
90
106
  [5, 5]
91
107
  end
92
-
108
+
93
109
  def array_2
94
110
  [7, 3, 2, 4, 1]
95
111
  end
96
-
112
+
97
113
  def array_3
98
114
  [4,1,9,7,5]
99
115
  end
100
-
116
+
101
117
  end
metadata CHANGED
@@ -1,7 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: distance_measures
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 2
9
+ version: 0.0.2
5
10
  platform: ruby
6
11
  authors:
7
12
  - reddavis
@@ -9,25 +14,30 @@ autorequire:
9
14
  bindir: bin
10
15
  cert_chain: []
11
16
 
12
- date: 2010-01-26 00:00:00 +00:00
17
+ date: 2010-07-31 00:00:00 +01:00
13
18
  default_executable:
14
19
  dependencies:
15
20
  - !ruby/object:Gem::Dependency
16
21
  name: rspec
17
- type: :development
18
- version_requirement:
19
- version_requirements: !ruby/object:Gem::Requirement
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
20
24
  requirements:
21
25
  - - ">="
22
26
  - !ruby/object:Gem::Version
27
+ segments:
28
+ - 1
29
+ - 2
30
+ - 9
23
31
  version: 1.2.9
24
- version:
25
- description: A bundle of distance measures
32
+ type: :development
33
+ version_requirements: *id001
34
+ description: A bundle of distance measures with C extensions for the slow bits
26
35
  email: reddavis@gmail.com
27
36
  executables: []
28
37
 
29
- extensions: []
30
-
38
+ extensions:
39
+ - ext/core/extconf.rb
40
+ - ext/euclidean_distance/extconf.rb
31
41
  extra_rdoc_files:
32
42
  - LICENSE
33
43
  - README.rdoc
@@ -39,10 +49,14 @@ files:
39
49
  - Rakefile
40
50
  - VERSION
41
51
  - distance_measures.gemspec
52
+ - ext/core/core.c
53
+ - ext/core/extconf.rb
54
+ - ext/euclidean_distance/euclidean_distance.c
55
+ - ext/euclidean_distance/extconf.rb
42
56
  - lib/distance_measures.rb
43
- - lib/distance_measures/core.rb
57
+ - lib/distance_measures/core.bundle
44
58
  - lib/distance_measures/cosine_similarity.rb
45
- - lib/distance_measures/euclidean_distance.rb
59
+ - lib/distance_measures/euclidean_distance.bundle
46
60
  - lib/distance_measures/jaccard.rb
47
61
  - lib/distance_measures/tanimoto_coefficient.rb
48
62
  - spec/distance_measures_spec.rb
@@ -61,21 +75,23 @@ required_ruby_version: !ruby/object:Gem::Requirement
61
75
  requirements:
62
76
  - - ">="
63
77
  - !ruby/object:Gem::Version
78
+ segments:
79
+ - 0
64
80
  version: "0"
65
- version:
66
81
  required_rubygems_version: !ruby/object:Gem::Requirement
67
82
  requirements:
68
83
  - - ">="
69
84
  - !ruby/object:Gem::Version
85
+ segments:
86
+ - 0
70
87
  version: "0"
71
- version:
72
88
  requirements: []
73
89
 
74
90
  rubyforge_project:
75
- rubygems_version: 1.3.5
91
+ rubygems_version: 1.3.6
76
92
  signing_key:
77
93
  specification_version: 3
78
- summary: A bundle of distance measures
94
+ summary: A bundle of distance measures with C extensions for the slow bits
79
95
  test_files:
80
96
  - spec/distance_measures_spec.rb
81
97
  - spec/spec_helper.rb
@@ -1,68 +0,0 @@
1
- module DistanceMeasures
2
- def dot_product(other)
3
- sum = 0.0
4
- self.each_with_index do |n, index|
5
- sum += n * other[index]
6
- end
7
-
8
- sum
9
- end
10
-
11
- def euclidean_normalize
12
- sum = 0.0
13
- self.each do |n|
14
- sum += n ** 2
15
- end
16
-
17
- Math.sqrt(sum)
18
- end
19
-
20
- def sum_of_squares
21
- inject(0) {|sum, n| sum + n ** 2}
22
- end
23
-
24
- # http://en.wikipedia.org/wiki/Intersection_(set_theory)
25
- def intersection_with(other)
26
- (self & other)
27
- end
28
-
29
- # http://en.wikipedia.org/wiki/Union_(set_theory)
30
- def union_with(other)
31
- (self + other).uniq
32
- end
33
-
34
- # 1's & 0's
35
- def binary_intersection_with(other)
36
- intersects = []
37
- self.each_with_index do |n, index|
38
- if n == 1 && other[index] == 1
39
- intersects << 1
40
- else
41
- intersects << 0
42
- end
43
- end
44
-
45
- intersects
46
- end
47
-
48
- def binary_union_with(other)
49
- unions = []
50
- self.each_with_index do |n, index|
51
- if n == 1 || other[index] == 1
52
- unions << 1
53
- else
54
- unions << 0
55
- end
56
- end
57
-
58
- unions
59
- end
60
-
61
- private
62
-
63
- # Checks if we're dealing with NaN's and will return 0.0 unless
64
- # handle NaN's is set to false
65
- def handle_nan(result)
66
- result.nan? ? 0.0 : result
67
- end
68
- end
@@ -1,10 +0,0 @@
1
- # http://en.wikipedia.org/wiki/Euclidean_distance
2
- module DistanceMeasures
3
- def euclidean_distance(other)
4
- sum = 0.0
5
- self.each_index do |i|
6
- sum += (self[i] - other[i])**2
7
- end
8
- Math.sqrt(sum)
9
- end
10
- end