measurable 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +3 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +30 -0
- data/LICENSE +20 -0
- data/README.md +73 -0
- data/Rakefile +20 -0
- data/ext/measurable/extconf.rb +5 -0
- data/ext/measurable/measurable.c +209 -0
- data/lib/measurable.rb +32 -0
- data/lib/measurable/cosine_similarity.rb +8 -0
- data/lib/measurable/haversine.rb +46 -0
- data/lib/measurable/jaccard.rb +26 -0
- data/lib/measurable/tanimoto_coefficient.rb +9 -0
- data/lib/measurable/version.rb +3 -0
- data/measurable.gemspec +30 -0
- data/spec/measurable.rb +106 -0
- data/spec/spec_helper.rb +9 -0
- metadata +128 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
measurables (0.0.1)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: http://rubygems.org/
|
8
|
+
specs:
|
9
|
+
diff-lcs (1.1.3)
|
10
|
+
rake (0.9.2.2)
|
11
|
+
rake-compiler (0.8.1)
|
12
|
+
rake
|
13
|
+
rspec (2.9.0)
|
14
|
+
rspec-core (~> 2.9.0)
|
15
|
+
rspec-expectations (~> 2.9.0)
|
16
|
+
rspec-mocks (~> 2.9.0)
|
17
|
+
rspec-core (2.9.0)
|
18
|
+
rspec-expectations (2.9.1)
|
19
|
+
diff-lcs (~> 1.1.3)
|
20
|
+
rspec-mocks (2.9.0)
|
21
|
+
|
22
|
+
PLATFORMS
|
23
|
+
ruby
|
24
|
+
|
25
|
+
DEPENDENCIES
|
26
|
+
bundler
|
27
|
+
measurables!
|
28
|
+
rake (~> 0.9)
|
29
|
+
rake-compiler (~> 0.8.1)
|
30
|
+
rspec (~> 2.9.0)
|
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2012 Carlos Agarie
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
# Measurable
|
2
|
+
|
3
|
+
This (soon to be) gem encompasses various distance measures to be used in different projects. I want to support both the built-in `Array` class and [NMatrix](http://github.com/sciruby/nmatrix)'s `NVector`.
|
4
|
+
|
5
|
+
This is a fork of the gem [Distance Measure](https://github.com/reddavis/Distance-Measures), which has a similar objective, but isn't actively maintained and doesn't support NMatrix. Thank you, [reddavis](https://github.com/reddavis). :)
|
6
|
+
|
7
|
+
# Install
|
8
|
+
|
9
|
+
I'll update this section when I publish the gem. For now... wait.
|
10
|
+
|
11
|
+
## How to use
|
12
|
+
|
13
|
+
This list will be updated as I have time. I'll refactor the existing measures and add some that I'll need in a project.
|
14
|
+
|
15
|
+
The API I intend to support is something like this:
|
16
|
+
|
17
|
+
```ruby
|
18
|
+
require "measurable"
|
19
|
+
|
20
|
+
u = NVector.ones(2)
|
21
|
+
v = NVector.zeros(2)
|
22
|
+
w = [1, 0]
|
23
|
+
|
24
|
+
Measurable::euclidean(u, v) # => 1.41421
|
25
|
+
Measurable::euclidean(w, v) # => 1.00000
|
26
|
+
Measurable::euclidean(w, w) # => 0.00000
|
27
|
+
```
|
28
|
+
|
29
|
+
Maybe add some support for some of NMatrix's dtypes, like `:float32`, `:float64`, `:complex64`, `:complex128`, etc.
|
30
|
+
|
31
|
+
## How to use, the old way:
|
32
|
+
|
33
|
+
a = [1,1]
|
34
|
+
b = [2,2]
|
35
|
+
|
36
|
+
a.euclidean_distance(b)
|
37
|
+
|
38
|
+
a.cosine_similarity(b)
|
39
|
+
|
40
|
+
a.jaccard_index(b)
|
41
|
+
|
42
|
+
a.jaccard_distance(b)
|
43
|
+
|
44
|
+
a.binary_jaccard_index(b)
|
45
|
+
|
46
|
+
a.binary_jaccard_distance(b)
|
47
|
+
|
48
|
+
a.tanimoto_coefficient(b)
|
49
|
+
|
50
|
+
a.haversine_distance(b)
|
51
|
+
|
52
|
+
This may or may not be the complete list, best thing is to check the source code.
|
53
|
+
|
54
|
+
There are also a couple bonus methods:
|
55
|
+
|
56
|
+
a.dot_product(b)
|
57
|
+
|
58
|
+
a.sum_of_squares
|
59
|
+
|
60
|
+
a.intersection_with(b)
|
61
|
+
|
62
|
+
a.union_with(b)
|
63
|
+
|
64
|
+
# When your dealing with 1's and 0's
|
65
|
+
a.binary_intersection_with(b)
|
66
|
+
|
67
|
+
a.binary_union_with(b)
|
68
|
+
|
69
|
+
## License
|
70
|
+
|
71
|
+
Copyright (c) 2012 Carlos Agarie. See LICENSE for details.
|
72
|
+
|
73
|
+
The original `Distance Measure` gem is copyrighted by reddavis 2010.
|
data/Rakefile
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require "rake/extensiontask"
|
3
|
+
|
4
|
+
# Setup the necessary gems, specified in the gemspec.
|
5
|
+
require 'bundler'
|
6
|
+
begin
|
7
|
+
Bundler.setup(:default, :development)
|
8
|
+
rescue Bundler::BundlerError => e
|
9
|
+
$stderr.puts e.message
|
10
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
11
|
+
exit e.status_code
|
12
|
+
end
|
13
|
+
|
14
|
+
# Compile task.
|
15
|
+
Rake::ExtensionTask.new do |ext|
|
16
|
+
ext.name = 'measurable'
|
17
|
+
ext.ext_dir = 'ext/measurable'
|
18
|
+
ext.lib_dir = 'lib/'
|
19
|
+
ext.source_pattern = "**/*.{c, cpp, h}"
|
20
|
+
end
|
@@ -0,0 +1,209 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
#include <math.h>
|
3
|
+
|
4
|
+
#ifndef RUBY_19
|
5
|
+
#ifndef RARRAY_LEN
|
6
|
+
#define RARRAY_LEN(v) (RARRAY(v)->len)
|
7
|
+
#endif
|
8
|
+
#ifndef RARRAY_PTR
|
9
|
+
#define RARRAY_PTR(v) (RARRAY(v)->ptr)
|
10
|
+
#endif
|
11
|
+
#endif
|
12
|
+
|
13
|
+
/*
|
14
|
+
** def euclidean_distance(other)
|
15
|
+
** sum = 0.0
|
16
|
+
** self.each_index do |i|
|
17
|
+
** sum += (self[i] - other[i])**2
|
18
|
+
** end
|
19
|
+
** Math.sqrt(sum)
|
20
|
+
** end
|
21
|
+
*/
|
22
|
+
|
23
|
+
static VALUE rb_euclidean(VALUE self, VALUE other_array) {
|
24
|
+
double value = 0.0;
|
25
|
+
|
26
|
+
/* TODO: check they're the same size. */
|
27
|
+
long vector_length = (RARRAY_LEN(self) - 1);
|
28
|
+
int index;
|
29
|
+
|
30
|
+
for (index = 0; index <= vector_length; index++) {
|
31
|
+
double x, y;
|
32
|
+
|
33
|
+
x = NUM2DBL(RARRAY_PTR(self)[index]);
|
34
|
+
y = NUM2DBL(RARRAY_PTR(other_array)[index]);
|
35
|
+
|
36
|
+
value += pow(x - y, 2);
|
37
|
+
}
|
38
|
+
|
39
|
+
return rb_float_new(sqrt(value));
|
40
|
+
}
|
41
|
+
|
42
|
+
/* Prototypes */
|
43
|
+
long c_array_size(VALUE array);
|
44
|
+
|
45
|
+
/*
|
46
|
+
** def dot_product(other)
|
47
|
+
** sum = 0.0
|
48
|
+
** self.each_with_index do |n, index|
|
49
|
+
** sum += n * other[index]
|
50
|
+
** end
|
51
|
+
**
|
52
|
+
** sum
|
53
|
+
** end
|
54
|
+
*/
|
55
|
+
|
56
|
+
static VALUE rb_dot_product(VALUE self, VALUE other_array) {
|
57
|
+
double sum = 0;
|
58
|
+
|
59
|
+
/* TODO check they're the same size. */
|
60
|
+
long array_size = c_array_size(self);
|
61
|
+
int index;
|
62
|
+
|
63
|
+
for(index = 0; index <= array_size; index++) {
|
64
|
+
double x, y;
|
65
|
+
|
66
|
+
x = NUM2DBL(RARRAY_PTR(self)[index]);
|
67
|
+
y = NUM2DBL(RARRAY_PTR(other_array)[index]);
|
68
|
+
|
69
|
+
sum += x * y;
|
70
|
+
}
|
71
|
+
|
72
|
+
return rb_float_new(sum);
|
73
|
+
}
|
74
|
+
|
75
|
+
/*
|
76
|
+
** def sum_of_squares
|
77
|
+
** inject(0) {|sum, n| sum + n ** 2}
|
78
|
+
** end
|
79
|
+
*/
|
80
|
+
|
81
|
+
static VALUE rb_sum_of_squares(VALUE self) {
|
82
|
+
double sum = 0;
|
83
|
+
long array_size = c_array_size(self);
|
84
|
+
int index;
|
85
|
+
|
86
|
+
for(index = 0; index <= array_size; index++) {
|
87
|
+
double x;
|
88
|
+
|
89
|
+
x = NUM2DBL(RARRAY_PTR(self)[index]);
|
90
|
+
|
91
|
+
sum += pow(x, 2);
|
92
|
+
}
|
93
|
+
|
94
|
+
return rb_float_new(sum);
|
95
|
+
}
|
96
|
+
|
97
|
+
/*
|
98
|
+
** def euclidean_normalize
|
99
|
+
** sum = 0.0
|
100
|
+
** self.each do |n|
|
101
|
+
** sum += n ** 2
|
102
|
+
** end
|
103
|
+
**
|
104
|
+
** Math.sqrt(sum)
|
105
|
+
** end
|
106
|
+
*/
|
107
|
+
|
108
|
+
static VALUE rb_euclidean_normalize(VALUE self) {
|
109
|
+
double sum = 0;
|
110
|
+
long array_size = c_array_size(self);
|
111
|
+
int index;
|
112
|
+
|
113
|
+
for(index = 0; index <= array_size; index++) {
|
114
|
+
double x;
|
115
|
+
|
116
|
+
x = NUM2DBL(RARRAY_PTR(self)[index]);
|
117
|
+
|
118
|
+
sum += pow(x, 2);
|
119
|
+
}
|
120
|
+
|
121
|
+
return rb_float_new(sqrt(sum));
|
122
|
+
}
|
123
|
+
|
124
|
+
/*
|
125
|
+
** def binary_union_with(other)
|
126
|
+
** unions = []
|
127
|
+
** self.each_with_index do |n, index|
|
128
|
+
** if n == 1 || other[index] == 1
|
129
|
+
** unions << 1
|
130
|
+
** else
|
131
|
+
** unions << 0
|
132
|
+
** end
|
133
|
+
** end
|
134
|
+
**
|
135
|
+
** unions
|
136
|
+
** end
|
137
|
+
*/
|
138
|
+
|
139
|
+
static VALUE rb_binary_union_with(VALUE self, VALUE other_array) {
|
140
|
+
//TODO: check arrays are same size
|
141
|
+
long array_size = c_array_size(self);
|
142
|
+
int index;
|
143
|
+
VALUE results = rb_ary_new();
|
144
|
+
|
145
|
+
for(index = 0; index <= array_size; index++) {
|
146
|
+
int self_attribute = NUM2INT(RARRAY_PTR(self)[index]);
|
147
|
+
int other_array_attribute = NUM2INT(RARRAY_PTR(other_array)[index]);
|
148
|
+
|
149
|
+
if(self_attribute == 1 || other_array_attribute == 1) {
|
150
|
+
rb_ary_push(results, rb_int_new(1));
|
151
|
+
} else {
|
152
|
+
rb_ary_push(results, rb_int_new(0));
|
153
|
+
}
|
154
|
+
}
|
155
|
+
|
156
|
+
return results;
|
157
|
+
}
|
158
|
+
|
159
|
+
/*
|
160
|
+
** def binary_intersection_with(other)
|
161
|
+
** intersects = []
|
162
|
+
** self.each_with_index do |n, index|
|
163
|
+
** if n == 1 && other[index] == 1
|
164
|
+
** intersects << 1
|
165
|
+
** else
|
166
|
+
** intersects << 0
|
167
|
+
** end
|
168
|
+
** end
|
169
|
+
**
|
170
|
+
** intersects
|
171
|
+
** end
|
172
|
+
*/
|
173
|
+
|
174
|
+
static VALUE rb_binary_intersection_with(VALUE self, VALUE other_array) {
|
175
|
+
/* TODO check arrays are same size */
|
176
|
+
long array_size = c_array_size(self);
|
177
|
+
int index;
|
178
|
+
VALUE results = rb_ary_new();
|
179
|
+
|
180
|
+
for(index = 0; index <= array_size; index++) {
|
181
|
+
int self_attribute = NUM2INT(RARRAY_PTR(self)[index]);
|
182
|
+
int other_array_attribute = NUM2INT(RARRAY_PTR(other_array)[index]);
|
183
|
+
|
184
|
+
if(self_attribute == 1 && other_array_attribute == 1) {
|
185
|
+
rb_ary_push(results, rb_int_new(1));
|
186
|
+
} else {
|
187
|
+
rb_ary_push(results, rb_int_new(0));
|
188
|
+
}
|
189
|
+
}
|
190
|
+
|
191
|
+
return results;
|
192
|
+
}
|
193
|
+
|
194
|
+
/* return the size of a Ruby array - 1 */
|
195
|
+
long c_array_size(VALUE array) {
|
196
|
+
return (RARRAY_LEN(array) - 1);
|
197
|
+
}
|
198
|
+
|
199
|
+
void
|
200
|
+
Init_measurable()
|
201
|
+
{
|
202
|
+
VALUE rb_measurable = rb_define_module("Measurable");
|
203
|
+
rb_define_method(rb_measurable, "euclidean", rb_euclidean, 1);
|
204
|
+
rb_define_method(rb_measurable, "dot_product", rb_dot_product, 1);
|
205
|
+
rb_define_method(rb_measurable, "sum_of_squares", rb_sum_of_squares, 0);
|
206
|
+
rb_define_method(rb_measurable, "euclidean_normalize", rb_euclidean_normalize, 0);
|
207
|
+
rb_define_method(rb_measurable, "binary_union_with", rb_binary_union_with, 1);
|
208
|
+
rb_define_method(rb_measurable, "binary_intersection_with", rb_binary_intersection_with, 1);
|
209
|
+
}
|
data/lib/measurable.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__) + "/../lib")
|
2
|
+
|
3
|
+
require "measurable/version.rb"
|
4
|
+
|
5
|
+
require "measurable/cosine_similarity"
|
6
|
+
require "measurable/tanimoto_coefficient"
|
7
|
+
require "measurable/jaccard"
|
8
|
+
require "measurable/haversine"
|
9
|
+
|
10
|
+
require "measurable.so"
|
11
|
+
|
12
|
+
class Array
|
13
|
+
include Measurable
|
14
|
+
|
15
|
+
# http://en.wikipedia.org/wiki/Intersection_(set_theory)
|
16
|
+
def intersection_with(other)
|
17
|
+
(self & other)
|
18
|
+
end
|
19
|
+
|
20
|
+
# http://en.wikipedia.org/wiki/Union_(set_theory)
|
21
|
+
def union_with(other)
|
22
|
+
(self + other).uniq
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
# Checks if we"re dealing with NaN"s and will return 0.0 unless
|
28
|
+
# handle NaN"s is set to false
|
29
|
+
def handle_nan(result)
|
30
|
+
result.nan? ? 0.0 : result
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
#
|
2
|
+
# Notes:
|
3
|
+
#
|
4
|
+
# translated into Ruby based on information contained in:
|
5
|
+
# http://mathforum.org/library/drmath/view/51879.html Doctors Rick and Peterson - 4/20/99
|
6
|
+
# http://www.movable-type.co.uk/scripts/latlong.html
|
7
|
+
# http://en.wikipedia.org/wiki/Haversine_formula
|
8
|
+
#
|
9
|
+
# This formula can compute accurate distances between two points given latitude and longitude, even for
|
10
|
+
# short distances.
|
11
|
+
|
12
|
+
module Measurable
|
13
|
+
|
14
|
+
# PI = 3.1415926535
|
15
|
+
RAD_PER_DEG = 0.017453293 # PI/180
|
16
|
+
|
17
|
+
R_MILES = 3956 # radius of the great circle in miles
|
18
|
+
R_KM = 6371 # radius in kilometers...some algorithms use 6367
|
19
|
+
|
20
|
+
# the great circle distance d will be in whatever units R is in
|
21
|
+
R = {
|
22
|
+
:miles => R_MILES,
|
23
|
+
:km => R_KM,
|
24
|
+
:feet => R_MILES * 5282,
|
25
|
+
:meters => R_KM * 1000
|
26
|
+
}
|
27
|
+
|
28
|
+
def haversine_distance(other, um = :meters)
|
29
|
+
dlon = other[1] - self[1]
|
30
|
+
dlat = other[0] - self[0]
|
31
|
+
|
32
|
+
dlon_rad = dlon * RAD_PER_DEG
|
33
|
+
dlat_rad = dlat * RAD_PER_DEG
|
34
|
+
|
35
|
+
lat1_rad = self[0] * RAD_PER_DEG
|
36
|
+
lon1_rad = self[1] * RAD_PER_DEG
|
37
|
+
|
38
|
+
lat2_rad = other[0] * RAD_PER_DEG
|
39
|
+
lon2_rad = other[1] * RAD_PER_DEG
|
40
|
+
|
41
|
+
a = (Math.sin(dlat_rad/2))**2 + Math.cos(lat1_rad) * Math.cos(lat2_rad) * (Math.sin(dlon_rad/2))**2
|
42
|
+
c = 2 * Math.atan2( Math.sqrt(a), Math.sqrt(1-a))
|
43
|
+
|
44
|
+
R[um] * c
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# http://en.wikipedia.org/wiki/Jaccard_coefficient
|
2
|
+
module Measurable
|
3
|
+
|
4
|
+
def jaccard_distance(other)
|
5
|
+
1 - self.jaccard_index(other)
|
6
|
+
end
|
7
|
+
|
8
|
+
def jaccard_index(other)
|
9
|
+
union = (self + other).uniq.size.to_f
|
10
|
+
intersection = self.intersection_with(other).size.to_f
|
11
|
+
|
12
|
+
intersection / union
|
13
|
+
end
|
14
|
+
|
15
|
+
def binary_jaccard_distance(other)
|
16
|
+
1 - self.binary_jaccard_index(other)
|
17
|
+
end
|
18
|
+
|
19
|
+
def binary_jaccard_index(other)
|
20
|
+
intersection = self.binary_intersection_with(other).delete_if {|x| x == 0}.size.to_f
|
21
|
+
union = self.binary_union_with(other).delete_if {|x| x == 0}.size.to_f
|
22
|
+
|
23
|
+
intersection / union
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
@@ -0,0 +1,9 @@
|
|
1
|
+
# http://en.wikipedia.org/wiki/Jaccard_index#Tanimoto_coefficient_.28extended_Jaccard_coefficient.29
|
2
|
+
module Measurable
|
3
|
+
def tanimoto_coefficient(other)
|
4
|
+
dot = self.dot_product(other).to_f
|
5
|
+
result = dot / (self.sum_of_squares + other.sum_of_squares - dot).to_f
|
6
|
+
|
7
|
+
handle_nan(result)
|
8
|
+
end
|
9
|
+
end
|
data/measurable.gemspec
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
lib = File.expand_path('../lib/', __FILE__)
|
2
|
+
$:.unshift lib unless $:.include?(lib)
|
3
|
+
|
4
|
+
require 'measurable/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |gem|
|
7
|
+
gem.name = "measurable"
|
8
|
+
gem.version = Measurable::VERSION
|
9
|
+
gem.date = Date.today.to_s
|
10
|
+
gem.summary = %Q{A Ruby module with a lot of distance measures for your projects.}
|
11
|
+
gem.description = %Q{A Ruby module with a lot of distance measures for your projects.}
|
12
|
+
|
13
|
+
gem.authors = ["Carlos Agarie"]
|
14
|
+
gem.email = "carlos@onox.com.br"
|
15
|
+
gem.homepage = "http://github.com/agarie/measurable"
|
16
|
+
|
17
|
+
gem.files = `git ls-files`.split("\n")
|
18
|
+
gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
19
|
+
gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
20
|
+
|
21
|
+
gem.require_paths = ["lib"]
|
22
|
+
gem.extensions = ['ext/measurable/extconf.rb']
|
23
|
+
|
24
|
+
gem.required_ruby_version = '>= 1.9.2'
|
25
|
+
|
26
|
+
gem.add_development_dependency 'bundler'
|
27
|
+
gem.add_development_dependency 'rake', '~> 0.9'
|
28
|
+
gem.add_development_dependency 'rake-compiler', '~> 0.8.1'
|
29
|
+
gem.add_development_dependency 'rspec', '~> 2.9.0'
|
30
|
+
end
|
data/spec/measurable.rb
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
|
3
|
+
describe Measurable do
|
4
|
+
|
5
|
+
let(:array) { [5, 5] }
|
6
|
+
let(:array_2) { [7, 3, 2, 4, 1] }
|
7
|
+
let(:array_3) { [4, 1, 9, 7, 5] }
|
8
|
+
|
9
|
+
describe "Euclidean Distance" do
|
10
|
+
it "should return 0.0" do
|
11
|
+
array.euclidean_distance(array).should == 0.0
|
12
|
+
end
|
13
|
+
|
14
|
+
it "should return 4.0" do
|
15
|
+
[5].euclidean_distance([1]).should == 4.0
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
describe "Cosine Similarity" do
|
20
|
+
it "should return 1.0" do
|
21
|
+
array.cosine_similarity(array).should.to_s == "1.0" # WTF
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should handle NaN's" do
|
25
|
+
[0.0, 0.0].cosine_similarity([0.0, 0.0]).nan?.should be_false
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
describe "Tanimoto Coefficient" do
|
30
|
+
it "should return 1.0" do
|
31
|
+
array.tanimoto_coefficient(array).should == 1.0
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should handle NaN's" do
|
35
|
+
[0.0, 0.0].tanimoto_coefficient([0.0, 0.0]).nan?.should be_false
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
describe "Sum of Squares" do
|
40
|
+
it "should return 50" do
|
41
|
+
array.sum_of_squares.should == 50
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
describe "Jaccard" do
|
46
|
+
describe "Jaccard Distance" do
|
47
|
+
it "should return" do
|
48
|
+
array_2.jaccard_distance(array_3).should == (1 - 3.0/7.0)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
describe "Jaccard Index" do
|
53
|
+
it "should return" do
|
54
|
+
array_2.jaccard_index(array_3).should == 3.0/7.0
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
describe "Binary Jaccard Index" do
|
59
|
+
it "should return 1/4" do
|
60
|
+
[1,1,1,1].binary_jaccard_index([0,1,0,0]).should == 1/4.0
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
describe "Binary Jaccard Distance" do
|
66
|
+
it "should return 0.75" do
|
67
|
+
[1,1,1,1].binary_jaccard_distance([0,1,0,0]).should == 1 - (1/4.0)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
describe "Intersection" do
|
72
|
+
it "should return [7,4,1]" do
|
73
|
+
array_2.intersection_with(array_3).should == [7,4,1]
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
describe "Union" do
|
78
|
+
it "should return " do
|
79
|
+
array_2.union_with(array_3).should == [7,3,2,4,1,9,5]
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
describe "Binary Intersection" do
|
84
|
+
it "should return [0,1,0,0]" do
|
85
|
+
[1,1,1,1].binary_intersection_with([0,1,0,0]).should == [0,1,0,0]
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
describe "Binary Union" do
|
90
|
+
it "should return [1,1,1,0]" do
|
91
|
+
[1,1,1,0].binary_union_with([0,0,0,0]).should == [1,1,1,0]
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
describe "Dot Product" do
|
96
|
+
it "should return 50" do
|
97
|
+
[5, 5].dot_product([5, 5]).should == 50.0
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
describe "Euclidean normalize" do
|
102
|
+
it "should" do
|
103
|
+
[10].euclidean_normalize.should == 10
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: measurable
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Carlos Agarie
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-10-09 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: bundler
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rake
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ~>
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0.9'
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ~>
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0.9'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: rake-compiler
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ~>
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: 0.8.1
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 0.8.1
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: rspec
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ~>
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: 2.9.0
|
70
|
+
type: :development
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ~>
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: 2.9.0
|
78
|
+
description: A Ruby module with a lot of distance measures for your projects.
|
79
|
+
email: carlos@onox.com.br
|
80
|
+
executables: []
|
81
|
+
extensions:
|
82
|
+
- ext/measurable/extconf.rb
|
83
|
+
extra_rdoc_files: []
|
84
|
+
files:
|
85
|
+
- .gitignore
|
86
|
+
- Gemfile
|
87
|
+
- Gemfile.lock
|
88
|
+
- LICENSE
|
89
|
+
- README.md
|
90
|
+
- Rakefile
|
91
|
+
- ext/measurable/extconf.rb
|
92
|
+
- ext/measurable/measurable.c
|
93
|
+
- lib/measurable.rb
|
94
|
+
- lib/measurable/cosine_similarity.rb
|
95
|
+
- lib/measurable/haversine.rb
|
96
|
+
- lib/measurable/jaccard.rb
|
97
|
+
- lib/measurable/tanimoto_coefficient.rb
|
98
|
+
- lib/measurable/version.rb
|
99
|
+
- measurable.gemspec
|
100
|
+
- spec/measurable.rb
|
101
|
+
- spec/spec_helper.rb
|
102
|
+
homepage: http://github.com/agarie/measurable
|
103
|
+
licenses: []
|
104
|
+
post_install_message:
|
105
|
+
rdoc_options: []
|
106
|
+
require_paths:
|
107
|
+
- lib
|
108
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
109
|
+
none: false
|
110
|
+
requirements:
|
111
|
+
- - ! '>='
|
112
|
+
- !ruby/object:Gem::Version
|
113
|
+
version: 1.9.2
|
114
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
115
|
+
none: false
|
116
|
+
requirements:
|
117
|
+
- - ! '>='
|
118
|
+
- !ruby/object:Gem::Version
|
119
|
+
version: '0'
|
120
|
+
requirements: []
|
121
|
+
rubyforge_project:
|
122
|
+
rubygems_version: 1.8.24
|
123
|
+
signing_key:
|
124
|
+
specification_version: 3
|
125
|
+
summary: A Ruby module with a lot of distance measures for your projects.
|
126
|
+
test_files:
|
127
|
+
- spec/measurable.rb
|
128
|
+
- spec/spec_helper.rb
|