distance_measures 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +2 -0
- data/Rakefile +16 -3
- data/VERSION +1 -1
- data/distance_measures.gemspec +12 -7
- data/ext/core/core.c +174 -0
- data/ext/core/extconf.rb +2 -0
- data/ext/euclidean_distance/euclidean_distance.c +37 -0
- data/ext/euclidean_distance/extconf.rb +2 -0
- data/lib/distance_measures/core.bundle +0 -0
- data/lib/distance_measures/cosine_similarity.rb +1 -2
- data/lib/distance_measures/euclidean_distance.bundle +0 -0
- data/lib/distance_measures.rb +18 -0
- data/spec/distance_measures_spec.rb +38 -22
- metadata +31 -15
- data/lib/distance_measures/core.rb +0 -68
- data/lib/distance_measures/euclidean_distance.rb +0 -10
data/.gitignore
CHANGED
data/Rakefile
CHANGED
@@ -1,16 +1,18 @@
|
|
1
1
|
require 'rubygems'
|
2
2
|
require 'rake'
|
3
|
+
require 'rake/extensiontask'
|
3
4
|
|
4
5
|
begin
|
5
6
|
require 'jeweler'
|
6
7
|
Jeweler::Tasks.new do |gem|
|
7
8
|
gem.name = "distance_measures"
|
8
|
-
gem.summary = %Q{A bundle of distance measures}
|
9
|
-
gem.description = %Q{A bundle of distance measures}
|
9
|
+
gem.summary = %Q{A bundle of distance measures with C extensions for the slow bits}
|
10
|
+
gem.description = %Q{A bundle of distance measures with C extensions for the slow bits}
|
10
11
|
gem.email = "reddavis@gmail.com"
|
11
12
|
gem.homepage = "http://github.com/reddavis/distance_measure"
|
12
13
|
gem.authors = ["reddavis"]
|
13
14
|
gem.add_development_dependency "rspec", ">= 1.2.9"
|
15
|
+
gem.extensions = FileList['ext/**/extconf.rb']
|
14
16
|
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
15
17
|
end
|
16
18
|
Jeweler::GemcutterTasks.new
|
@@ -30,8 +32,17 @@ Spec::Rake::SpecTask.new(:rcov) do |spec|
|
|
30
32
|
spec.rcov = true
|
31
33
|
end
|
32
34
|
|
33
|
-
|
35
|
+
# Euclidean Distance
|
36
|
+
Rake::ExtensionTask.new('euclidean_distance') do |ext|
|
37
|
+
ext.lib_dir = File.join('lib', 'distance_measures')
|
38
|
+
end
|
39
|
+
|
40
|
+
# Core
|
41
|
+
Rake::ExtensionTask.new('core') do |ext|
|
42
|
+
ext.lib_dir = File.join('lib', 'distance_measures')
|
43
|
+
end
|
34
44
|
|
45
|
+
task :spec => :check_dependencies
|
35
46
|
task :default => :spec
|
36
47
|
|
37
48
|
require 'rake/rdoctask'
|
@@ -43,3 +54,5 @@ Rake::RDocTask.new do |rdoc|
|
|
43
54
|
rdoc.rdoc_files.include('README*')
|
44
55
|
rdoc.rdoc_files.include('lib/**/*.rb')
|
45
56
|
end
|
57
|
+
|
58
|
+
Rake::Task[:spec].prerequisites << :compile
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.2
|
data/distance_measures.gemspec
CHANGED
@@ -5,13 +5,14 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{distance_measures}
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.2"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["reddavis"]
|
12
|
-
s.date = %q{2010-
|
13
|
-
s.description = %q{A bundle of distance measures}
|
12
|
+
s.date = %q{2010-07-31}
|
13
|
+
s.description = %q{A bundle of distance measures with C extensions for the slow bits}
|
14
14
|
s.email = %q{reddavis@gmail.com}
|
15
|
+
s.extensions = ["ext/core/extconf.rb", "ext/euclidean_distance/extconf.rb"]
|
15
16
|
s.extra_rdoc_files = [
|
16
17
|
"LICENSE",
|
17
18
|
"README.rdoc"
|
@@ -24,10 +25,14 @@ Gem::Specification.new do |s|
|
|
24
25
|
"Rakefile",
|
25
26
|
"VERSION",
|
26
27
|
"distance_measures.gemspec",
|
28
|
+
"ext/core/core.c",
|
29
|
+
"ext/core/extconf.rb",
|
30
|
+
"ext/euclidean_distance/euclidean_distance.c",
|
31
|
+
"ext/euclidean_distance/extconf.rb",
|
27
32
|
"lib/distance_measures.rb",
|
28
|
-
"lib/distance_measures/core.
|
33
|
+
"lib/distance_measures/core.bundle",
|
29
34
|
"lib/distance_measures/cosine_similarity.rb",
|
30
|
-
"lib/distance_measures/euclidean_distance.
|
35
|
+
"lib/distance_measures/euclidean_distance.bundle",
|
31
36
|
"lib/distance_measures/jaccard.rb",
|
32
37
|
"lib/distance_measures/tanimoto_coefficient.rb",
|
33
38
|
"spec/distance_measures_spec.rb",
|
@@ -37,8 +42,8 @@ Gem::Specification.new do |s|
|
|
37
42
|
s.homepage = %q{http://github.com/reddavis/distance_measure}
|
38
43
|
s.rdoc_options = ["--charset=UTF-8"]
|
39
44
|
s.require_paths = ["lib"]
|
40
|
-
s.rubygems_version = %q{1.3.
|
41
|
-
s.summary = %q{A bundle of distance measures}
|
45
|
+
s.rubygems_version = %q{1.3.6}
|
46
|
+
s.summary = %q{A bundle of distance measures with C extensions for the slow bits}
|
42
47
|
s.test_files = [
|
43
48
|
"spec/distance_measures_spec.rb",
|
44
49
|
"spec/spec_helper.rb"
|
data/ext/core/core.c
ADDED
@@ -0,0 +1,174 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
#include <math.h>
|
3
|
+
|
4
|
+
// Prototypes
|
5
|
+
long c_array_size(VALUE array);
|
6
|
+
// END
|
7
|
+
|
8
|
+
/*
|
9
|
+
|
10
|
+
def dot_product(other)
|
11
|
+
sum = 0.0
|
12
|
+
self.each_with_index do |n, index|
|
13
|
+
sum += n * other[index]
|
14
|
+
end
|
15
|
+
|
16
|
+
sum
|
17
|
+
end
|
18
|
+
|
19
|
+
*/
|
20
|
+
static VALUE rb_dot_product(VALUE self, VALUE other_array) {
|
21
|
+
double sum = 0;
|
22
|
+
|
23
|
+
//TODO: check they're the same size
|
24
|
+
long array_size = c_array_size(self);
|
25
|
+
int index;
|
26
|
+
|
27
|
+
for(index = 0; index <= array_size; index++) {
|
28
|
+
double x, y;
|
29
|
+
|
30
|
+
x = NUM2DBL(RARRAY(self)->ptr[index]);
|
31
|
+
y = NUM2DBL(RARRAY(other_array)->ptr[index]);
|
32
|
+
|
33
|
+
sum += x * y;
|
34
|
+
}
|
35
|
+
|
36
|
+
return rb_float_new(sum);
|
37
|
+
}
|
38
|
+
|
39
|
+
/*
|
40
|
+
|
41
|
+
def sum_of_squares
|
42
|
+
inject(0) {|sum, n| sum + n ** 2}
|
43
|
+
end
|
44
|
+
|
45
|
+
*/
|
46
|
+
static VALUE rb_sum_of_squares(VALUE self) {
|
47
|
+
double sum = 0;
|
48
|
+
long array_size = c_array_size(self);
|
49
|
+
int index;
|
50
|
+
|
51
|
+
for(index = 0; index <= array_size; index++) {
|
52
|
+
double x;
|
53
|
+
|
54
|
+
x = NUM2DBL(RARRAY(self)->ptr[index]);
|
55
|
+
|
56
|
+
sum += pow(x, 2);
|
57
|
+
}
|
58
|
+
|
59
|
+
return rb_float_new(sum);
|
60
|
+
}
|
61
|
+
|
62
|
+
/*
|
63
|
+
|
64
|
+
def euclidean_normalize
|
65
|
+
sum = 0.0
|
66
|
+
self.each do |n|
|
67
|
+
sum += n ** 2
|
68
|
+
end
|
69
|
+
|
70
|
+
Math.sqrt(sum)
|
71
|
+
end
|
72
|
+
|
73
|
+
*/
|
74
|
+
static VALUE rb_euclidean_normalize(VALUE self) {
|
75
|
+
double sum = 0;
|
76
|
+
long array_size = c_array_size(self);
|
77
|
+
int index;
|
78
|
+
|
79
|
+
for(index = 0; index <= array_size; index++) {
|
80
|
+
double x;
|
81
|
+
|
82
|
+
x = NUM2DBL(RARRAY(self)->ptr[index]);
|
83
|
+
|
84
|
+
sum += pow(x, 2);
|
85
|
+
}
|
86
|
+
|
87
|
+
return rb_float_new(sqrt(sum));
|
88
|
+
}
|
89
|
+
|
90
|
+
/*
|
91
|
+
|
92
|
+
def binary_union_with(other)
|
93
|
+
unions = []
|
94
|
+
self.each_with_index do |n, index|
|
95
|
+
if n == 1 || other[index] == 1
|
96
|
+
unions << 1
|
97
|
+
else
|
98
|
+
unions << 0
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
unions
|
103
|
+
end
|
104
|
+
|
105
|
+
*/
|
106
|
+
static VALUE rb_binary_union_with(VALUE self, VALUE other_array) {
|
107
|
+
//TODO: check arrays are same size
|
108
|
+
long array_size = c_array_size(self);
|
109
|
+
int index;
|
110
|
+
VALUE results = rb_ary_new();
|
111
|
+
|
112
|
+
for(index = 0; index <= array_size; index++) {
|
113
|
+
int self_attribute = NUM2INT(RARRAY(self)->ptr[index]);
|
114
|
+
int other_array_attribute = NUM2INT(RARRAY(other_array)->ptr[index]);
|
115
|
+
|
116
|
+
if(self_attribute == 1 || other_array_attribute == 1) {
|
117
|
+
rb_ary_push(results, rb_int_new(1));
|
118
|
+
} else {
|
119
|
+
rb_ary_push(results, rb_int_new(0));
|
120
|
+
}
|
121
|
+
}
|
122
|
+
|
123
|
+
return results;
|
124
|
+
}
|
125
|
+
|
126
|
+
/*
|
127
|
+
|
128
|
+
def binary_intersection_with(other)
|
129
|
+
intersects = []
|
130
|
+
self.each_with_index do |n, index|
|
131
|
+
if n == 1 && other[index] == 1
|
132
|
+
intersects << 1
|
133
|
+
else
|
134
|
+
intersects << 0
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
intersects
|
139
|
+
end
|
140
|
+
|
141
|
+
*/
|
142
|
+
static VALUE rb_binary_intersection_with(VALUE self, VALUE other_array) {
|
143
|
+
//TODO: check arrays are same size
|
144
|
+
long array_size = c_array_size(self);
|
145
|
+
int index;
|
146
|
+
VALUE results = rb_ary_new();
|
147
|
+
|
148
|
+
for(index = 0; index <= array_size; index++) {
|
149
|
+
int self_attribute = NUM2INT(RARRAY(self)->ptr[index]);
|
150
|
+
int other_array_attribute = NUM2INT(RARRAY(other_array)->ptr[index]);
|
151
|
+
|
152
|
+
if(self_attribute == 1 && other_array_attribute == 1) {
|
153
|
+
rb_ary_push(results, rb_int_new(1));
|
154
|
+
} else {
|
155
|
+
rb_ary_push(results, rb_int_new(0));
|
156
|
+
}
|
157
|
+
}
|
158
|
+
|
159
|
+
return results;
|
160
|
+
}
|
161
|
+
|
162
|
+
// return the size of a Ruby array - 1
|
163
|
+
long c_array_size(VALUE array) {
|
164
|
+
return (RARRAY(array)->len - 1);
|
165
|
+
}
|
166
|
+
|
167
|
+
void Init_core() {
|
168
|
+
VALUE distance_measures = rb_define_module("DistanceMeasures");
|
169
|
+
rb_define_method(distance_measures, "dot_product", rb_dot_product, 1);
|
170
|
+
rb_define_method(distance_measures, "sum_of_squares", rb_sum_of_squares, 0);
|
171
|
+
rb_define_method(distance_measures, "euclidean_normalize", rb_euclidean_normalize, 0);
|
172
|
+
rb_define_method(distance_measures, "binary_union_with", rb_binary_union_with, 1);
|
173
|
+
rb_define_method(distance_measures, "binary_intersection_with", rb_binary_intersection_with, 1);
|
174
|
+
}
|
data/ext/core/extconf.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
#include <math.h>
|
3
|
+
|
4
|
+
/*
|
5
|
+
|
6
|
+
def euclidean_distance(other)
|
7
|
+
sum = 0.0
|
8
|
+
self.each_index do |i|
|
9
|
+
sum += (self[i] - other[i])**2
|
10
|
+
end
|
11
|
+
Math.sqrt(sum)
|
12
|
+
end
|
13
|
+
|
14
|
+
*/
|
15
|
+
static VALUE rb_euclidean_distance(VALUE self, VALUE other_array) {
|
16
|
+
double value = 0.0;
|
17
|
+
|
18
|
+
//TODO: check they're the same size
|
19
|
+
long vector_length = (RARRAY(self)->len - 1);
|
20
|
+
int index;
|
21
|
+
|
22
|
+
for(index = 0; index <= vector_length; index++) {
|
23
|
+
double x, y;
|
24
|
+
|
25
|
+
x = NUM2DBL(RARRAY(self)->ptr[index]);
|
26
|
+
y = NUM2DBL(RARRAY(other_array)->ptr[index]);
|
27
|
+
|
28
|
+
value += pow(x - y, 2);
|
29
|
+
}
|
30
|
+
|
31
|
+
return rb_float_new(sqrt(value));
|
32
|
+
}
|
33
|
+
|
34
|
+
void Init_euclidean_distance() {
|
35
|
+
VALUE distance_measures = rb_define_module("DistanceMeasures");
|
36
|
+
rb_define_method(distance_measures, "euclidean_distance", rb_euclidean_distance, 1);
|
37
|
+
}
|
Binary file
|
@@ -1,9 +1,8 @@
|
|
1
|
-
# http://en.wikipedia.org/wiki/Cosine_similarity
|
2
1
|
module DistanceMeasures
|
3
2
|
def cosine_similarity(other)
|
4
3
|
dot_product = self.dot_product(other)
|
5
4
|
normalization = self.euclidean_normalize * other.euclidean_normalize
|
6
|
-
|
5
|
+
|
7
6
|
handle_nan(dot_product / normalization)
|
8
7
|
end
|
9
8
|
end
|
Binary file
|
data/lib/distance_measures.rb
CHANGED
@@ -8,4 +8,22 @@ require 'distance_measures/jaccard'
|
|
8
8
|
|
9
9
|
class Array
|
10
10
|
include DistanceMeasures
|
11
|
+
|
12
|
+
# http://en.wikipedia.org/wiki/Intersection_(set_theory)
|
13
|
+
def intersection_with(other)
|
14
|
+
(self & other)
|
15
|
+
end
|
16
|
+
|
17
|
+
# http://en.wikipedia.org/wiki/Union_(set_theory)
|
18
|
+
def union_with(other)
|
19
|
+
(self + other).uniq
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
# Checks if we're dealing with NaN's and will return 0.0 unless
|
25
|
+
# handle NaN's is set to false
|
26
|
+
def handle_nan(result)
|
27
|
+
result.nan? ? 0.0 : result
|
28
|
+
end
|
11
29
|
end
|
@@ -1,101 +1,117 @@
|
|
1
1
|
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
2
|
|
3
3
|
describe "DistanceMeasures" do
|
4
|
-
|
4
|
+
|
5
5
|
describe "Euclidean Distance" do
|
6
|
-
it "should return
|
6
|
+
it "should return 0.0" do
|
7
7
|
array.euclidean_distance(array).should == 0.0
|
8
8
|
end
|
9
|
+
|
10
|
+
it "should return 4.0" do
|
11
|
+
[5].euclidean_distance([1]).should == 4.0
|
12
|
+
end
|
9
13
|
end
|
10
|
-
|
14
|
+
|
11
15
|
describe "Cosine Similarity" do
|
12
16
|
it "should return 1.0" do
|
13
|
-
array.cosine_similarity(array).should
|
17
|
+
array.cosine_similarity(array).should.to_s == "1.0" # WTF
|
14
18
|
end
|
15
|
-
|
19
|
+
|
16
20
|
it "should handle NaN's" do
|
17
21
|
[0.0, 0.0].cosine_similarity([0.0, 0.0]).nan?.should be_false
|
18
22
|
end
|
19
23
|
end
|
20
|
-
|
24
|
+
|
21
25
|
describe "Tanimoto Coefficient" do
|
22
26
|
it "should return 1.0" do
|
23
27
|
array.tanimoto_coefficient(array).should == 1.0
|
24
28
|
end
|
25
|
-
|
29
|
+
|
26
30
|
it "should handle NaN's" do
|
27
31
|
[0.0, 0.0].tanimoto_coefficient([0.0, 0.0]).nan?.should be_false
|
28
32
|
end
|
29
33
|
end
|
30
|
-
|
34
|
+
|
31
35
|
describe "Sum of Squares" do
|
32
36
|
it "should return 50" do
|
33
37
|
array.sum_of_squares.should == 50
|
34
38
|
end
|
35
39
|
end
|
36
|
-
|
40
|
+
|
37
41
|
describe "Jaccard" do
|
38
42
|
describe "Jaccard Distance" do
|
39
43
|
it "should return" do
|
40
44
|
array_2.jaccard_distance(array_3).should == (1 - 3.0/7.0)
|
41
45
|
end
|
42
46
|
end
|
43
|
-
|
47
|
+
|
44
48
|
describe "Jaccard Index" do
|
45
49
|
it "should return" do
|
46
50
|
array_2.jaccard_index(array_3).should == 3.0/7.0
|
47
51
|
end
|
48
52
|
end
|
49
|
-
|
53
|
+
|
50
54
|
describe "Binary Jaccard Index" do
|
51
55
|
it "should return 1/4" do
|
52
56
|
[1,1,1,1].binary_jaccard_index([0,1,0,0]).should == 1/4.0
|
53
57
|
end
|
54
58
|
end
|
55
59
|
end
|
56
|
-
|
60
|
+
|
57
61
|
describe "Binary Jaccard Distance" do
|
58
62
|
it "should return 0.75" do
|
59
63
|
[1,1,1,1].binary_jaccard_distance([0,1,0,0]).should == 1 - (1/4.0)
|
60
64
|
end
|
61
65
|
end
|
62
|
-
|
66
|
+
|
63
67
|
describe "Intersection" do
|
64
68
|
it "should return [7,4,1]" do
|
65
69
|
array_2.intersection_with(array_3).should == [7,4,1]
|
66
70
|
end
|
67
71
|
end
|
68
|
-
|
72
|
+
|
69
73
|
describe "Union" do
|
70
74
|
it "should return " do
|
71
|
-
array_2.union_with(array_3).should == [7,3,2,4,1,9,5]
|
75
|
+
array_2.union_with(array_3).should == [7,3,2,4,1,9,5]
|
72
76
|
end
|
73
77
|
end
|
74
|
-
|
78
|
+
|
75
79
|
describe "Binary Intersection" do
|
76
80
|
it "should return [0,1,0,0]" do
|
77
81
|
[1,1,1,1].binary_intersection_with([0,1,0,0]).should == [0,1,0,0]
|
78
82
|
end
|
79
83
|
end
|
80
|
-
|
84
|
+
|
81
85
|
describe "Binary Union" do
|
82
86
|
it "should return [1,1,1,0]" do
|
83
87
|
[1,1,1,0].binary_union_with([0,0,0,0]).should == [1,1,1,0]
|
84
88
|
end
|
85
89
|
end
|
86
|
-
|
90
|
+
|
91
|
+
describe "Dot Product" do
|
92
|
+
it "should return 50" do
|
93
|
+
[5, 5].dot_product([5, 5]).should == 50.0
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
describe "Euclidean normalize" do
|
98
|
+
it "should" do
|
99
|
+
[10].euclidean_normalize.should == 10
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
87
103
|
private
|
88
|
-
|
104
|
+
|
89
105
|
def array
|
90
106
|
[5, 5]
|
91
107
|
end
|
92
|
-
|
108
|
+
|
93
109
|
def array_2
|
94
110
|
[7, 3, 2, 4, 1]
|
95
111
|
end
|
96
|
-
|
112
|
+
|
97
113
|
def array_3
|
98
114
|
[4,1,9,7,5]
|
99
115
|
end
|
100
|
-
|
116
|
+
|
101
117
|
end
|
metadata
CHANGED
@@ -1,7 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: distance_measures
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 2
|
9
|
+
version: 0.0.2
|
5
10
|
platform: ruby
|
6
11
|
authors:
|
7
12
|
- reddavis
|
@@ -9,25 +14,30 @@ autorequire:
|
|
9
14
|
bindir: bin
|
10
15
|
cert_chain: []
|
11
16
|
|
12
|
-
date: 2010-
|
17
|
+
date: 2010-07-31 00:00:00 +01:00
|
13
18
|
default_executable:
|
14
19
|
dependencies:
|
15
20
|
- !ruby/object:Gem::Dependency
|
16
21
|
name: rspec
|
17
|
-
|
18
|
-
|
19
|
-
version_requirements: !ruby/object:Gem::Requirement
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
20
24
|
requirements:
|
21
25
|
- - ">="
|
22
26
|
- !ruby/object:Gem::Version
|
27
|
+
segments:
|
28
|
+
- 1
|
29
|
+
- 2
|
30
|
+
- 9
|
23
31
|
version: 1.2.9
|
24
|
-
|
25
|
-
|
32
|
+
type: :development
|
33
|
+
version_requirements: *id001
|
34
|
+
description: A bundle of distance measures with C extensions for the slow bits
|
26
35
|
email: reddavis@gmail.com
|
27
36
|
executables: []
|
28
37
|
|
29
|
-
extensions:
|
30
|
-
|
38
|
+
extensions:
|
39
|
+
- ext/core/extconf.rb
|
40
|
+
- ext/euclidean_distance/extconf.rb
|
31
41
|
extra_rdoc_files:
|
32
42
|
- LICENSE
|
33
43
|
- README.rdoc
|
@@ -39,10 +49,14 @@ files:
|
|
39
49
|
- Rakefile
|
40
50
|
- VERSION
|
41
51
|
- distance_measures.gemspec
|
52
|
+
- ext/core/core.c
|
53
|
+
- ext/core/extconf.rb
|
54
|
+
- ext/euclidean_distance/euclidean_distance.c
|
55
|
+
- ext/euclidean_distance/extconf.rb
|
42
56
|
- lib/distance_measures.rb
|
43
|
-
- lib/distance_measures/core.
|
57
|
+
- lib/distance_measures/core.bundle
|
44
58
|
- lib/distance_measures/cosine_similarity.rb
|
45
|
-
- lib/distance_measures/euclidean_distance.
|
59
|
+
- lib/distance_measures/euclidean_distance.bundle
|
46
60
|
- lib/distance_measures/jaccard.rb
|
47
61
|
- lib/distance_measures/tanimoto_coefficient.rb
|
48
62
|
- spec/distance_measures_spec.rb
|
@@ -61,21 +75,23 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
61
75
|
requirements:
|
62
76
|
- - ">="
|
63
77
|
- !ruby/object:Gem::Version
|
78
|
+
segments:
|
79
|
+
- 0
|
64
80
|
version: "0"
|
65
|
-
version:
|
66
81
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
67
82
|
requirements:
|
68
83
|
- - ">="
|
69
84
|
- !ruby/object:Gem::Version
|
85
|
+
segments:
|
86
|
+
- 0
|
70
87
|
version: "0"
|
71
|
-
version:
|
72
88
|
requirements: []
|
73
89
|
|
74
90
|
rubyforge_project:
|
75
|
-
rubygems_version: 1.3.
|
91
|
+
rubygems_version: 1.3.6
|
76
92
|
signing_key:
|
77
93
|
specification_version: 3
|
78
|
-
summary: A bundle of distance measures
|
94
|
+
summary: A bundle of distance measures with C extensions for the slow bits
|
79
95
|
test_files:
|
80
96
|
- spec/distance_measures_spec.rb
|
81
97
|
- spec/spec_helper.rb
|
@@ -1,68 +0,0 @@
|
|
1
|
-
module DistanceMeasures
|
2
|
-
def dot_product(other)
|
3
|
-
sum = 0.0
|
4
|
-
self.each_with_index do |n, index|
|
5
|
-
sum += n * other[index]
|
6
|
-
end
|
7
|
-
|
8
|
-
sum
|
9
|
-
end
|
10
|
-
|
11
|
-
def euclidean_normalize
|
12
|
-
sum = 0.0
|
13
|
-
self.each do |n|
|
14
|
-
sum += n ** 2
|
15
|
-
end
|
16
|
-
|
17
|
-
Math.sqrt(sum)
|
18
|
-
end
|
19
|
-
|
20
|
-
def sum_of_squares
|
21
|
-
inject(0) {|sum, n| sum + n ** 2}
|
22
|
-
end
|
23
|
-
|
24
|
-
# http://en.wikipedia.org/wiki/Intersection_(set_theory)
|
25
|
-
def intersection_with(other)
|
26
|
-
(self & other)
|
27
|
-
end
|
28
|
-
|
29
|
-
# http://en.wikipedia.org/wiki/Union_(set_theory)
|
30
|
-
def union_with(other)
|
31
|
-
(self + other).uniq
|
32
|
-
end
|
33
|
-
|
34
|
-
# 1's & 0's
|
35
|
-
def binary_intersection_with(other)
|
36
|
-
intersects = []
|
37
|
-
self.each_with_index do |n, index|
|
38
|
-
if n == 1 && other[index] == 1
|
39
|
-
intersects << 1
|
40
|
-
else
|
41
|
-
intersects << 0
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
intersects
|
46
|
-
end
|
47
|
-
|
48
|
-
def binary_union_with(other)
|
49
|
-
unions = []
|
50
|
-
self.each_with_index do |n, index|
|
51
|
-
if n == 1 || other[index] == 1
|
52
|
-
unions << 1
|
53
|
-
else
|
54
|
-
unions << 0
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
unions
|
59
|
-
end
|
60
|
-
|
61
|
-
private
|
62
|
-
|
63
|
-
# Checks if we're dealing with NaN's and will return 0.0 unless
|
64
|
-
# handle NaN's is set to false
|
65
|
-
def handle_nan(result)
|
66
|
-
result.nan? ? 0.0 : result
|
67
|
-
end
|
68
|
-
end
|