distance_measures 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +2 -0
- data/Rakefile +16 -3
- data/VERSION +1 -1
- data/distance_measures.gemspec +12 -7
- data/ext/core/core.c +174 -0
- data/ext/core/extconf.rb +2 -0
- data/ext/euclidean_distance/euclidean_distance.c +37 -0
- data/ext/euclidean_distance/extconf.rb +2 -0
- data/lib/distance_measures/core.bundle +0 -0
- data/lib/distance_measures/cosine_similarity.rb +1 -2
- data/lib/distance_measures/euclidean_distance.bundle +0 -0
- data/lib/distance_measures.rb +18 -0
- data/spec/distance_measures_spec.rb +38 -22
- metadata +31 -15
- data/lib/distance_measures/core.rb +0 -68
- data/lib/distance_measures/euclidean_distance.rb +0 -10
data/.gitignore
CHANGED
data/Rakefile
CHANGED
@@ -1,16 +1,18 @@
|
|
1
1
|
require 'rubygems'
|
2
2
|
require 'rake'
|
3
|
+
require 'rake/extensiontask'
|
3
4
|
|
4
5
|
begin
|
5
6
|
require 'jeweler'
|
6
7
|
Jeweler::Tasks.new do |gem|
|
7
8
|
gem.name = "distance_measures"
|
8
|
-
gem.summary = %Q{A bundle of distance measures}
|
9
|
-
gem.description = %Q{A bundle of distance measures}
|
9
|
+
gem.summary = %Q{A bundle of distance measures with C extensions for the slow bits}
|
10
|
+
gem.description = %Q{A bundle of distance measures with C extensions for the slow bits}
|
10
11
|
gem.email = "reddavis@gmail.com"
|
11
12
|
gem.homepage = "http://github.com/reddavis/distance_measure"
|
12
13
|
gem.authors = ["reddavis"]
|
13
14
|
gem.add_development_dependency "rspec", ">= 1.2.9"
|
15
|
+
gem.extensions = FileList['ext/**/extconf.rb']
|
14
16
|
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
15
17
|
end
|
16
18
|
Jeweler::GemcutterTasks.new
|
@@ -30,8 +32,17 @@ Spec::Rake::SpecTask.new(:rcov) do |spec|
|
|
30
32
|
spec.rcov = true
|
31
33
|
end
|
32
34
|
|
33
|
-
|
35
|
+
# Euclidean Distance
|
36
|
+
Rake::ExtensionTask.new('euclidean_distance') do |ext|
|
37
|
+
ext.lib_dir = File.join('lib', 'distance_measures')
|
38
|
+
end
|
39
|
+
|
40
|
+
# Core
|
41
|
+
Rake::ExtensionTask.new('core') do |ext|
|
42
|
+
ext.lib_dir = File.join('lib', 'distance_measures')
|
43
|
+
end
|
34
44
|
|
45
|
+
task :spec => :check_dependencies
|
35
46
|
task :default => :spec
|
36
47
|
|
37
48
|
require 'rake/rdoctask'
|
@@ -43,3 +54,5 @@ Rake::RDocTask.new do |rdoc|
|
|
43
54
|
rdoc.rdoc_files.include('README*')
|
44
55
|
rdoc.rdoc_files.include('lib/**/*.rb')
|
45
56
|
end
|
57
|
+
|
58
|
+
Rake::Task[:spec].prerequisites << :compile
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.2
|
data/distance_measures.gemspec
CHANGED
@@ -5,13 +5,14 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{distance_measures}
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.2"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["reddavis"]
|
12
|
-
s.date = %q{2010-
|
13
|
-
s.description = %q{A bundle of distance measures}
|
12
|
+
s.date = %q{2010-07-31}
|
13
|
+
s.description = %q{A bundle of distance measures with C extensions for the slow bits}
|
14
14
|
s.email = %q{reddavis@gmail.com}
|
15
|
+
s.extensions = ["ext/core/extconf.rb", "ext/euclidean_distance/extconf.rb"]
|
15
16
|
s.extra_rdoc_files = [
|
16
17
|
"LICENSE",
|
17
18
|
"README.rdoc"
|
@@ -24,10 +25,14 @@ Gem::Specification.new do |s|
|
|
24
25
|
"Rakefile",
|
25
26
|
"VERSION",
|
26
27
|
"distance_measures.gemspec",
|
28
|
+
"ext/core/core.c",
|
29
|
+
"ext/core/extconf.rb",
|
30
|
+
"ext/euclidean_distance/euclidean_distance.c",
|
31
|
+
"ext/euclidean_distance/extconf.rb",
|
27
32
|
"lib/distance_measures.rb",
|
28
|
-
"lib/distance_measures/core.
|
33
|
+
"lib/distance_measures/core.bundle",
|
29
34
|
"lib/distance_measures/cosine_similarity.rb",
|
30
|
-
"lib/distance_measures/euclidean_distance.
|
35
|
+
"lib/distance_measures/euclidean_distance.bundle",
|
31
36
|
"lib/distance_measures/jaccard.rb",
|
32
37
|
"lib/distance_measures/tanimoto_coefficient.rb",
|
33
38
|
"spec/distance_measures_spec.rb",
|
@@ -37,8 +42,8 @@ Gem::Specification.new do |s|
|
|
37
42
|
s.homepage = %q{http://github.com/reddavis/distance_measure}
|
38
43
|
s.rdoc_options = ["--charset=UTF-8"]
|
39
44
|
s.require_paths = ["lib"]
|
40
|
-
s.rubygems_version = %q{1.3.
|
41
|
-
s.summary = %q{A bundle of distance measures}
|
45
|
+
s.rubygems_version = %q{1.3.6}
|
46
|
+
s.summary = %q{A bundle of distance measures with C extensions for the slow bits}
|
42
47
|
s.test_files = [
|
43
48
|
"spec/distance_measures_spec.rb",
|
44
49
|
"spec/spec_helper.rb"
|
data/ext/core/core.c
ADDED
@@ -0,0 +1,174 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
#include <math.h>
|
3
|
+
|
4
|
+
// Prototypes
|
5
|
+
long c_array_size(VALUE array);
|
6
|
+
// END
|
7
|
+
|
8
|
+
/*
|
9
|
+
|
10
|
+
def dot_product(other)
|
11
|
+
sum = 0.0
|
12
|
+
self.each_with_index do |n, index|
|
13
|
+
sum += n * other[index]
|
14
|
+
end
|
15
|
+
|
16
|
+
sum
|
17
|
+
end
|
18
|
+
|
19
|
+
*/
|
20
|
+
static VALUE rb_dot_product(VALUE self, VALUE other_array) {
|
21
|
+
double sum = 0;
|
22
|
+
|
23
|
+
//TODO: check they're the same size
|
24
|
+
long array_size = c_array_size(self);
|
25
|
+
int index;
|
26
|
+
|
27
|
+
for(index = 0; index <= array_size; index++) {
|
28
|
+
double x, y;
|
29
|
+
|
30
|
+
x = NUM2DBL(RARRAY(self)->ptr[index]);
|
31
|
+
y = NUM2DBL(RARRAY(other_array)->ptr[index]);
|
32
|
+
|
33
|
+
sum += x * y;
|
34
|
+
}
|
35
|
+
|
36
|
+
return rb_float_new(sum);
|
37
|
+
}
|
38
|
+
|
39
|
+
/*
|
40
|
+
|
41
|
+
def sum_of_squares
|
42
|
+
inject(0) {|sum, n| sum + n ** 2}
|
43
|
+
end
|
44
|
+
|
45
|
+
*/
|
46
|
+
static VALUE rb_sum_of_squares(VALUE self) {
|
47
|
+
double sum = 0;
|
48
|
+
long array_size = c_array_size(self);
|
49
|
+
int index;
|
50
|
+
|
51
|
+
for(index = 0; index <= array_size; index++) {
|
52
|
+
double x;
|
53
|
+
|
54
|
+
x = NUM2DBL(RARRAY(self)->ptr[index]);
|
55
|
+
|
56
|
+
sum += pow(x, 2);
|
57
|
+
}
|
58
|
+
|
59
|
+
return rb_float_new(sum);
|
60
|
+
}
|
61
|
+
|
62
|
+
/*
|
63
|
+
|
64
|
+
def euclidean_normalize
|
65
|
+
sum = 0.0
|
66
|
+
self.each do |n|
|
67
|
+
sum += n ** 2
|
68
|
+
end
|
69
|
+
|
70
|
+
Math.sqrt(sum)
|
71
|
+
end
|
72
|
+
|
73
|
+
*/
|
74
|
+
static VALUE rb_euclidean_normalize(VALUE self) {
|
75
|
+
double sum = 0;
|
76
|
+
long array_size = c_array_size(self);
|
77
|
+
int index;
|
78
|
+
|
79
|
+
for(index = 0; index <= array_size; index++) {
|
80
|
+
double x;
|
81
|
+
|
82
|
+
x = NUM2DBL(RARRAY(self)->ptr[index]);
|
83
|
+
|
84
|
+
sum += pow(x, 2);
|
85
|
+
}
|
86
|
+
|
87
|
+
return rb_float_new(sqrt(sum));
|
88
|
+
}
|
89
|
+
|
90
|
+
/*
|
91
|
+
|
92
|
+
def binary_union_with(other)
|
93
|
+
unions = []
|
94
|
+
self.each_with_index do |n, index|
|
95
|
+
if n == 1 || other[index] == 1
|
96
|
+
unions << 1
|
97
|
+
else
|
98
|
+
unions << 0
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
unions
|
103
|
+
end
|
104
|
+
|
105
|
+
*/
|
106
|
+
static VALUE rb_binary_union_with(VALUE self, VALUE other_array) {
|
107
|
+
//TODO: check arrays are same size
|
108
|
+
long array_size = c_array_size(self);
|
109
|
+
int index;
|
110
|
+
VALUE results = rb_ary_new();
|
111
|
+
|
112
|
+
for(index = 0; index <= array_size; index++) {
|
113
|
+
int self_attribute = NUM2INT(RARRAY(self)->ptr[index]);
|
114
|
+
int other_array_attribute = NUM2INT(RARRAY(other_array)->ptr[index]);
|
115
|
+
|
116
|
+
if(self_attribute == 1 || other_array_attribute == 1) {
|
117
|
+
rb_ary_push(results, rb_int_new(1));
|
118
|
+
} else {
|
119
|
+
rb_ary_push(results, rb_int_new(0));
|
120
|
+
}
|
121
|
+
}
|
122
|
+
|
123
|
+
return results;
|
124
|
+
}
|
125
|
+
|
126
|
+
/*
|
127
|
+
|
128
|
+
def binary_intersection_with(other)
|
129
|
+
intersects = []
|
130
|
+
self.each_with_index do |n, index|
|
131
|
+
if n == 1 && other[index] == 1
|
132
|
+
intersects << 1
|
133
|
+
else
|
134
|
+
intersects << 0
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
intersects
|
139
|
+
end
|
140
|
+
|
141
|
+
*/
|
142
|
+
static VALUE rb_binary_intersection_with(VALUE self, VALUE other_array) {
|
143
|
+
//TODO: check arrays are same size
|
144
|
+
long array_size = c_array_size(self);
|
145
|
+
int index;
|
146
|
+
VALUE results = rb_ary_new();
|
147
|
+
|
148
|
+
for(index = 0; index <= array_size; index++) {
|
149
|
+
int self_attribute = NUM2INT(RARRAY(self)->ptr[index]);
|
150
|
+
int other_array_attribute = NUM2INT(RARRAY(other_array)->ptr[index]);
|
151
|
+
|
152
|
+
if(self_attribute == 1 && other_array_attribute == 1) {
|
153
|
+
rb_ary_push(results, rb_int_new(1));
|
154
|
+
} else {
|
155
|
+
rb_ary_push(results, rb_int_new(0));
|
156
|
+
}
|
157
|
+
}
|
158
|
+
|
159
|
+
return results;
|
160
|
+
}
|
161
|
+
|
162
|
+
// return the size of a Ruby array - 1
|
163
|
+
long c_array_size(VALUE array) {
|
164
|
+
return (RARRAY(array)->len - 1);
|
165
|
+
}
|
166
|
+
|
167
|
+
void Init_core() {
|
168
|
+
VALUE distance_measures = rb_define_module("DistanceMeasures");
|
169
|
+
rb_define_method(distance_measures, "dot_product", rb_dot_product, 1);
|
170
|
+
rb_define_method(distance_measures, "sum_of_squares", rb_sum_of_squares, 0);
|
171
|
+
rb_define_method(distance_measures, "euclidean_normalize", rb_euclidean_normalize, 0);
|
172
|
+
rb_define_method(distance_measures, "binary_union_with", rb_binary_union_with, 1);
|
173
|
+
rb_define_method(distance_measures, "binary_intersection_with", rb_binary_intersection_with, 1);
|
174
|
+
}
|
data/ext/core/extconf.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
#include <math.h>
|
3
|
+
|
4
|
+
/*
|
5
|
+
|
6
|
+
def euclidean_distance(other)
|
7
|
+
sum = 0.0
|
8
|
+
self.each_index do |i|
|
9
|
+
sum += (self[i] - other[i])**2
|
10
|
+
end
|
11
|
+
Math.sqrt(sum)
|
12
|
+
end
|
13
|
+
|
14
|
+
*/
|
15
|
+
static VALUE rb_euclidean_distance(VALUE self, VALUE other_array) {
|
16
|
+
double value = 0.0;
|
17
|
+
|
18
|
+
//TODO: check they're the same size
|
19
|
+
long vector_length = (RARRAY(self)->len - 1);
|
20
|
+
int index;
|
21
|
+
|
22
|
+
for(index = 0; index <= vector_length; index++) {
|
23
|
+
double x, y;
|
24
|
+
|
25
|
+
x = NUM2DBL(RARRAY(self)->ptr[index]);
|
26
|
+
y = NUM2DBL(RARRAY(other_array)->ptr[index]);
|
27
|
+
|
28
|
+
value += pow(x - y, 2);
|
29
|
+
}
|
30
|
+
|
31
|
+
return rb_float_new(sqrt(value));
|
32
|
+
}
|
33
|
+
|
34
|
+
void Init_euclidean_distance() {
|
35
|
+
VALUE distance_measures = rb_define_module("DistanceMeasures");
|
36
|
+
rb_define_method(distance_measures, "euclidean_distance", rb_euclidean_distance, 1);
|
37
|
+
}
|
Binary file
|
@@ -1,9 +1,8 @@
|
|
1
|
-
# http://en.wikipedia.org/wiki/Cosine_similarity
|
2
1
|
module DistanceMeasures
|
3
2
|
def cosine_similarity(other)
|
4
3
|
dot_product = self.dot_product(other)
|
5
4
|
normalization = self.euclidean_normalize * other.euclidean_normalize
|
6
|
-
|
5
|
+
|
7
6
|
handle_nan(dot_product / normalization)
|
8
7
|
end
|
9
8
|
end
|
Binary file
|
data/lib/distance_measures.rb
CHANGED
@@ -8,4 +8,22 @@ require 'distance_measures/jaccard'
|
|
8
8
|
|
9
9
|
class Array
|
10
10
|
include DistanceMeasures
|
11
|
+
|
12
|
+
# http://en.wikipedia.org/wiki/Intersection_(set_theory)
|
13
|
+
def intersection_with(other)
|
14
|
+
(self & other)
|
15
|
+
end
|
16
|
+
|
17
|
+
# http://en.wikipedia.org/wiki/Union_(set_theory)
|
18
|
+
def union_with(other)
|
19
|
+
(self + other).uniq
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
# Checks if we're dealing with NaN's and will return 0.0 unless
|
25
|
+
# handle NaN's is set to false
|
26
|
+
def handle_nan(result)
|
27
|
+
result.nan? ? 0.0 : result
|
28
|
+
end
|
11
29
|
end
|
@@ -1,101 +1,117 @@
|
|
1
1
|
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
2
|
|
3
3
|
describe "DistanceMeasures" do
|
4
|
-
|
4
|
+
|
5
5
|
describe "Euclidean Distance" do
|
6
|
-
it "should return
|
6
|
+
it "should return 0.0" do
|
7
7
|
array.euclidean_distance(array).should == 0.0
|
8
8
|
end
|
9
|
+
|
10
|
+
it "should return 4.0" do
|
11
|
+
[5].euclidean_distance([1]).should == 4.0
|
12
|
+
end
|
9
13
|
end
|
10
|
-
|
14
|
+
|
11
15
|
describe "Cosine Similarity" do
|
12
16
|
it "should return 1.0" do
|
13
|
-
array.cosine_similarity(array).should
|
17
|
+
array.cosine_similarity(array).should.to_s == "1.0" # WTF
|
14
18
|
end
|
15
|
-
|
19
|
+
|
16
20
|
it "should handle NaN's" do
|
17
21
|
[0.0, 0.0].cosine_similarity([0.0, 0.0]).nan?.should be_false
|
18
22
|
end
|
19
23
|
end
|
20
|
-
|
24
|
+
|
21
25
|
describe "Tanimoto Coefficient" do
|
22
26
|
it "should return 1.0" do
|
23
27
|
array.tanimoto_coefficient(array).should == 1.0
|
24
28
|
end
|
25
|
-
|
29
|
+
|
26
30
|
it "should handle NaN's" do
|
27
31
|
[0.0, 0.0].tanimoto_coefficient([0.0, 0.0]).nan?.should be_false
|
28
32
|
end
|
29
33
|
end
|
30
|
-
|
34
|
+
|
31
35
|
describe "Sum of Squares" do
|
32
36
|
it "should return 50" do
|
33
37
|
array.sum_of_squares.should == 50
|
34
38
|
end
|
35
39
|
end
|
36
|
-
|
40
|
+
|
37
41
|
describe "Jaccard" do
|
38
42
|
describe "Jaccard Distance" do
|
39
43
|
it "should return" do
|
40
44
|
array_2.jaccard_distance(array_3).should == (1 - 3.0/7.0)
|
41
45
|
end
|
42
46
|
end
|
43
|
-
|
47
|
+
|
44
48
|
describe "Jaccard Index" do
|
45
49
|
it "should return" do
|
46
50
|
array_2.jaccard_index(array_3).should == 3.0/7.0
|
47
51
|
end
|
48
52
|
end
|
49
|
-
|
53
|
+
|
50
54
|
describe "Binary Jaccard Index" do
|
51
55
|
it "should return 1/4" do
|
52
56
|
[1,1,1,1].binary_jaccard_index([0,1,0,0]).should == 1/4.0
|
53
57
|
end
|
54
58
|
end
|
55
59
|
end
|
56
|
-
|
60
|
+
|
57
61
|
describe "Binary Jaccard Distance" do
|
58
62
|
it "should return 0.75" do
|
59
63
|
[1,1,1,1].binary_jaccard_distance([0,1,0,0]).should == 1 - (1/4.0)
|
60
64
|
end
|
61
65
|
end
|
62
|
-
|
66
|
+
|
63
67
|
describe "Intersection" do
|
64
68
|
it "should return [7,4,1]" do
|
65
69
|
array_2.intersection_with(array_3).should == [7,4,1]
|
66
70
|
end
|
67
71
|
end
|
68
|
-
|
72
|
+
|
69
73
|
describe "Union" do
|
70
74
|
it "should return " do
|
71
|
-
array_2.union_with(array_3).should == [7,3,2,4,1,9,5]
|
75
|
+
array_2.union_with(array_3).should == [7,3,2,4,1,9,5]
|
72
76
|
end
|
73
77
|
end
|
74
|
-
|
78
|
+
|
75
79
|
describe "Binary Intersection" do
|
76
80
|
it "should return [0,1,0,0]" do
|
77
81
|
[1,1,1,1].binary_intersection_with([0,1,0,0]).should == [0,1,0,0]
|
78
82
|
end
|
79
83
|
end
|
80
|
-
|
84
|
+
|
81
85
|
describe "Binary Union" do
|
82
86
|
it "should return [1,1,1,0]" do
|
83
87
|
[1,1,1,0].binary_union_with([0,0,0,0]).should == [1,1,1,0]
|
84
88
|
end
|
85
89
|
end
|
86
|
-
|
90
|
+
|
91
|
+
describe "Dot Product" do
|
92
|
+
it "should return 50" do
|
93
|
+
[5, 5].dot_product([5, 5]).should == 50.0
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
describe "Euclidean normalize" do
|
98
|
+
it "should" do
|
99
|
+
[10].euclidean_normalize.should == 10
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
87
103
|
private
|
88
|
-
|
104
|
+
|
89
105
|
def array
|
90
106
|
[5, 5]
|
91
107
|
end
|
92
|
-
|
108
|
+
|
93
109
|
def array_2
|
94
110
|
[7, 3, 2, 4, 1]
|
95
111
|
end
|
96
|
-
|
112
|
+
|
97
113
|
def array_3
|
98
114
|
[4,1,9,7,5]
|
99
115
|
end
|
100
|
-
|
116
|
+
|
101
117
|
end
|
metadata
CHANGED
@@ -1,7 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: distance_measures
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 2
|
9
|
+
version: 0.0.2
|
5
10
|
platform: ruby
|
6
11
|
authors:
|
7
12
|
- reddavis
|
@@ -9,25 +14,30 @@ autorequire:
|
|
9
14
|
bindir: bin
|
10
15
|
cert_chain: []
|
11
16
|
|
12
|
-
date: 2010-
|
17
|
+
date: 2010-07-31 00:00:00 +01:00
|
13
18
|
default_executable:
|
14
19
|
dependencies:
|
15
20
|
- !ruby/object:Gem::Dependency
|
16
21
|
name: rspec
|
17
|
-
|
18
|
-
|
19
|
-
version_requirements: !ruby/object:Gem::Requirement
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
20
24
|
requirements:
|
21
25
|
- - ">="
|
22
26
|
- !ruby/object:Gem::Version
|
27
|
+
segments:
|
28
|
+
- 1
|
29
|
+
- 2
|
30
|
+
- 9
|
23
31
|
version: 1.2.9
|
24
|
-
|
25
|
-
|
32
|
+
type: :development
|
33
|
+
version_requirements: *id001
|
34
|
+
description: A bundle of distance measures with C extensions for the slow bits
|
26
35
|
email: reddavis@gmail.com
|
27
36
|
executables: []
|
28
37
|
|
29
|
-
extensions:
|
30
|
-
|
38
|
+
extensions:
|
39
|
+
- ext/core/extconf.rb
|
40
|
+
- ext/euclidean_distance/extconf.rb
|
31
41
|
extra_rdoc_files:
|
32
42
|
- LICENSE
|
33
43
|
- README.rdoc
|
@@ -39,10 +49,14 @@ files:
|
|
39
49
|
- Rakefile
|
40
50
|
- VERSION
|
41
51
|
- distance_measures.gemspec
|
52
|
+
- ext/core/core.c
|
53
|
+
- ext/core/extconf.rb
|
54
|
+
- ext/euclidean_distance/euclidean_distance.c
|
55
|
+
- ext/euclidean_distance/extconf.rb
|
42
56
|
- lib/distance_measures.rb
|
43
|
-
- lib/distance_measures/core.
|
57
|
+
- lib/distance_measures/core.bundle
|
44
58
|
- lib/distance_measures/cosine_similarity.rb
|
45
|
-
- lib/distance_measures/euclidean_distance.
|
59
|
+
- lib/distance_measures/euclidean_distance.bundle
|
46
60
|
- lib/distance_measures/jaccard.rb
|
47
61
|
- lib/distance_measures/tanimoto_coefficient.rb
|
48
62
|
- spec/distance_measures_spec.rb
|
@@ -61,21 +75,23 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
61
75
|
requirements:
|
62
76
|
- - ">="
|
63
77
|
- !ruby/object:Gem::Version
|
78
|
+
segments:
|
79
|
+
- 0
|
64
80
|
version: "0"
|
65
|
-
version:
|
66
81
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
67
82
|
requirements:
|
68
83
|
- - ">="
|
69
84
|
- !ruby/object:Gem::Version
|
85
|
+
segments:
|
86
|
+
- 0
|
70
87
|
version: "0"
|
71
|
-
version:
|
72
88
|
requirements: []
|
73
89
|
|
74
90
|
rubyforge_project:
|
75
|
-
rubygems_version: 1.3.
|
91
|
+
rubygems_version: 1.3.6
|
76
92
|
signing_key:
|
77
93
|
specification_version: 3
|
78
|
-
summary: A bundle of distance measures
|
94
|
+
summary: A bundle of distance measures with C extensions for the slow bits
|
79
95
|
test_files:
|
80
96
|
- spec/distance_measures_spec.rb
|
81
97
|
- spec/spec_helper.rb
|
@@ -1,68 +0,0 @@
|
|
1
|
-
module DistanceMeasures
|
2
|
-
def dot_product(other)
|
3
|
-
sum = 0.0
|
4
|
-
self.each_with_index do |n, index|
|
5
|
-
sum += n * other[index]
|
6
|
-
end
|
7
|
-
|
8
|
-
sum
|
9
|
-
end
|
10
|
-
|
11
|
-
def euclidean_normalize
|
12
|
-
sum = 0.0
|
13
|
-
self.each do |n|
|
14
|
-
sum += n ** 2
|
15
|
-
end
|
16
|
-
|
17
|
-
Math.sqrt(sum)
|
18
|
-
end
|
19
|
-
|
20
|
-
def sum_of_squares
|
21
|
-
inject(0) {|sum, n| sum + n ** 2}
|
22
|
-
end
|
23
|
-
|
24
|
-
# http://en.wikipedia.org/wiki/Intersection_(set_theory)
|
25
|
-
def intersection_with(other)
|
26
|
-
(self & other)
|
27
|
-
end
|
28
|
-
|
29
|
-
# http://en.wikipedia.org/wiki/Union_(set_theory)
|
30
|
-
def union_with(other)
|
31
|
-
(self + other).uniq
|
32
|
-
end
|
33
|
-
|
34
|
-
# 1's & 0's
|
35
|
-
def binary_intersection_with(other)
|
36
|
-
intersects = []
|
37
|
-
self.each_with_index do |n, index|
|
38
|
-
if n == 1 && other[index] == 1
|
39
|
-
intersects << 1
|
40
|
-
else
|
41
|
-
intersects << 0
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
intersects
|
46
|
-
end
|
47
|
-
|
48
|
-
def binary_union_with(other)
|
49
|
-
unions = []
|
50
|
-
self.each_with_index do |n, index|
|
51
|
-
if n == 1 || other[index] == 1
|
52
|
-
unions << 1
|
53
|
-
else
|
54
|
-
unions << 0
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
unions
|
59
|
-
end
|
60
|
-
|
61
|
-
private
|
62
|
-
|
63
|
-
# Checks if we're dealing with NaN's and will return 0.0 unless
|
64
|
-
# handle NaN's is set to false
|
65
|
-
def handle_nan(result)
|
66
|
-
result.nan? ? 0.0 : result
|
67
|
-
end
|
68
|
-
end
|