measurable 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +3 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +30 -0
- data/LICENSE +20 -0
- data/README.md +73 -0
- data/Rakefile +20 -0
- data/ext/measurable/extconf.rb +5 -0
- data/ext/measurable/measurable.c +209 -0
- data/lib/measurable.rb +32 -0
- data/lib/measurable/cosine_similarity.rb +8 -0
- data/lib/measurable/haversine.rb +46 -0
- data/lib/measurable/jaccard.rb +26 -0
- data/lib/measurable/tanimoto_coefficient.rb +9 -0
- data/lib/measurable/version.rb +3 -0
- data/measurable.gemspec +30 -0
- data/spec/measurable.rb +106 -0
- data/spec/spec_helper.rb +9 -0
- metadata +128 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
measurables (0.0.1)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: http://rubygems.org/
|
8
|
+
specs:
|
9
|
+
diff-lcs (1.1.3)
|
10
|
+
rake (0.9.2.2)
|
11
|
+
rake-compiler (0.8.1)
|
12
|
+
rake
|
13
|
+
rspec (2.9.0)
|
14
|
+
rspec-core (~> 2.9.0)
|
15
|
+
rspec-expectations (~> 2.9.0)
|
16
|
+
rspec-mocks (~> 2.9.0)
|
17
|
+
rspec-core (2.9.0)
|
18
|
+
rspec-expectations (2.9.1)
|
19
|
+
diff-lcs (~> 1.1.3)
|
20
|
+
rspec-mocks (2.9.0)
|
21
|
+
|
22
|
+
PLATFORMS
|
23
|
+
ruby
|
24
|
+
|
25
|
+
DEPENDENCIES
|
26
|
+
bundler
|
27
|
+
measurables!
|
28
|
+
rake (~> 0.9)
|
29
|
+
rake-compiler (~> 0.8.1)
|
30
|
+
rspec (~> 2.9.0)
|
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2012 Carlos Agarie
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
# Measurable
|
2
|
+
|
3
|
+
This (soon to be) gem encompasses various distance measures to be used in different projects. I want to support both the built-in `Array` class and [NMatrix](http://github.com/sciruby/nmatrix)'s `NVector`.
|
4
|
+
|
5
|
+
This is a fork of the gem [Distance Measure](https://github.com/reddavis/Distance-Measures), which has a similar objective, but isn't actively maintained and doesn't support NMatrix. Thank you, [reddavis](https://github.com/reddavis). :)
|
6
|
+
|
7
|
+
# Install
|
8
|
+
|
9
|
+
I'll update this section when I publish the gem. For now... wait.
|
10
|
+
|
11
|
+
## How to use
|
12
|
+
|
13
|
+
This list will be updated as I have time. I'll refactor the existing measures and add some that I'll need in a project.
|
14
|
+
|
15
|
+
The API I intend to support is something like this:
|
16
|
+
|
17
|
+
```ruby
|
18
|
+
require "measurable"
|
19
|
+
|
20
|
+
u = NVector.ones(2)
|
21
|
+
v = NVector.zeros(2)
|
22
|
+
w = [1, 0]
|
23
|
+
|
24
|
+
Measurable::euclidean(u, v) # => 1.41421
|
25
|
+
Measurable::euclidean(w, v) # => 1.00000
|
26
|
+
Measurable::euclidean(w, w) # => 0.00000
|
27
|
+
```
|
28
|
+
|
29
|
+
Maybe add some support for some of NMatrix's dtypes, like `:float32`, `:float64`, `:complex64`, `:complex128`, etc.
|
30
|
+
|
31
|
+
## How to use, the old way:
|
32
|
+
|
33
|
+
a = [1,1]
|
34
|
+
b = [2,2]
|
35
|
+
|
36
|
+
a.euclidean_distance(b)
|
37
|
+
|
38
|
+
a.cosine_similarity(b)
|
39
|
+
|
40
|
+
a.jaccard_index(b)
|
41
|
+
|
42
|
+
a.jaccard_distance(b)
|
43
|
+
|
44
|
+
a.binary_jaccard_index(b)
|
45
|
+
|
46
|
+
a.binary_jaccard_distance(b)
|
47
|
+
|
48
|
+
a.tanimoto_coefficient(b)
|
49
|
+
|
50
|
+
a.haversine_distance(b)
|
51
|
+
|
52
|
+
This may or may not be the complete list, best thing is to check the source code.
|
53
|
+
|
54
|
+
There are also a couple bonus methods:
|
55
|
+
|
56
|
+
a.dot_product(b)
|
57
|
+
|
58
|
+
a.sum_of_squares
|
59
|
+
|
60
|
+
a.intersection_with(b)
|
61
|
+
|
62
|
+
a.union_with(b)
|
63
|
+
|
64
|
+
# When your dealing with 1's and 0's
|
65
|
+
a.binary_intersection_with(b)
|
66
|
+
|
67
|
+
a.binary_union_with(b)
|
68
|
+
|
69
|
+
## License
|
70
|
+
|
71
|
+
Copyright (c) 2012 Carlos Agarie. See LICENSE for details.
|
72
|
+
|
73
|
+
The original `Distance Measure` gem is copyrighted by reddavis 2010.
|
data/Rakefile
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require "rake/extensiontask"
|
3
|
+
|
4
|
+
# Setup the necessary gems, specified in the gemspec.
|
5
|
+
require 'bundler'
|
6
|
+
begin
|
7
|
+
Bundler.setup(:default, :development)
|
8
|
+
rescue Bundler::BundlerError => e
|
9
|
+
$stderr.puts e.message
|
10
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
11
|
+
exit e.status_code
|
12
|
+
end
|
13
|
+
|
14
|
+
# Compile task.
|
15
|
+
Rake::ExtensionTask.new do |ext|
|
16
|
+
ext.name = 'measurable'
|
17
|
+
ext.ext_dir = 'ext/measurable'
|
18
|
+
ext.lib_dir = 'lib/'
|
19
|
+
ext.source_pattern = "**/*.{c, cpp, h}"
|
20
|
+
end
|
@@ -0,0 +1,209 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
#include <math.h>
|
3
|
+
|
4
|
+
#ifndef RUBY_19
|
5
|
+
#ifndef RARRAY_LEN
|
6
|
+
#define RARRAY_LEN(v) (RARRAY(v)->len)
|
7
|
+
#endif
|
8
|
+
#ifndef RARRAY_PTR
|
9
|
+
#define RARRAY_PTR(v) (RARRAY(v)->ptr)
|
10
|
+
#endif
|
11
|
+
#endif
|
12
|
+
|
13
|
+
/*
|
14
|
+
** def euclidean_distance(other)
|
15
|
+
** sum = 0.0
|
16
|
+
** self.each_index do |i|
|
17
|
+
** sum += (self[i] - other[i])**2
|
18
|
+
** end
|
19
|
+
** Math.sqrt(sum)
|
20
|
+
** end
|
21
|
+
*/
|
22
|
+
|
23
|
+
static VALUE rb_euclidean(VALUE self, VALUE other_array) {
|
24
|
+
double value = 0.0;
|
25
|
+
|
26
|
+
/* TODO: check they're the same size. */
|
27
|
+
long vector_length = (RARRAY_LEN(self) - 1);
|
28
|
+
int index;
|
29
|
+
|
30
|
+
for (index = 0; index <= vector_length; index++) {
|
31
|
+
double x, y;
|
32
|
+
|
33
|
+
x = NUM2DBL(RARRAY_PTR(self)[index]);
|
34
|
+
y = NUM2DBL(RARRAY_PTR(other_array)[index]);
|
35
|
+
|
36
|
+
value += pow(x - y, 2);
|
37
|
+
}
|
38
|
+
|
39
|
+
return rb_float_new(sqrt(value));
|
40
|
+
}
|
41
|
+
|
42
|
+
/* Prototypes */
|
43
|
+
long c_array_size(VALUE array);
|
44
|
+
|
45
|
+
/*
|
46
|
+
** def dot_product(other)
|
47
|
+
** sum = 0.0
|
48
|
+
** self.each_with_index do |n, index|
|
49
|
+
** sum += n * other[index]
|
50
|
+
** end
|
51
|
+
**
|
52
|
+
** sum
|
53
|
+
** end
|
54
|
+
*/
|
55
|
+
|
56
|
+
static VALUE rb_dot_product(VALUE self, VALUE other_array) {
|
57
|
+
double sum = 0;
|
58
|
+
|
59
|
+
/* TODO check they're the same size. */
|
60
|
+
long array_size = c_array_size(self);
|
61
|
+
int index;
|
62
|
+
|
63
|
+
for(index = 0; index <= array_size; index++) {
|
64
|
+
double x, y;
|
65
|
+
|
66
|
+
x = NUM2DBL(RARRAY_PTR(self)[index]);
|
67
|
+
y = NUM2DBL(RARRAY_PTR(other_array)[index]);
|
68
|
+
|
69
|
+
sum += x * y;
|
70
|
+
}
|
71
|
+
|
72
|
+
return rb_float_new(sum);
|
73
|
+
}
|
74
|
+
|
75
|
+
/*
|
76
|
+
** def sum_of_squares
|
77
|
+
** inject(0) {|sum, n| sum + n ** 2}
|
78
|
+
** end
|
79
|
+
*/
|
80
|
+
|
81
|
+
static VALUE rb_sum_of_squares(VALUE self) {
|
82
|
+
double sum = 0;
|
83
|
+
long array_size = c_array_size(self);
|
84
|
+
int index;
|
85
|
+
|
86
|
+
for(index = 0; index <= array_size; index++) {
|
87
|
+
double x;
|
88
|
+
|
89
|
+
x = NUM2DBL(RARRAY_PTR(self)[index]);
|
90
|
+
|
91
|
+
sum += pow(x, 2);
|
92
|
+
}
|
93
|
+
|
94
|
+
return rb_float_new(sum);
|
95
|
+
}
|
96
|
+
|
97
|
+
/*
|
98
|
+
** def euclidean_normalize
|
99
|
+
** sum = 0.0
|
100
|
+
** self.each do |n|
|
101
|
+
** sum += n ** 2
|
102
|
+
** end
|
103
|
+
**
|
104
|
+
** Math.sqrt(sum)
|
105
|
+
** end
|
106
|
+
*/
|
107
|
+
|
108
|
+
static VALUE rb_euclidean_normalize(VALUE self) {
|
109
|
+
double sum = 0;
|
110
|
+
long array_size = c_array_size(self);
|
111
|
+
int index;
|
112
|
+
|
113
|
+
for(index = 0; index <= array_size; index++) {
|
114
|
+
double x;
|
115
|
+
|
116
|
+
x = NUM2DBL(RARRAY_PTR(self)[index]);
|
117
|
+
|
118
|
+
sum += pow(x, 2);
|
119
|
+
}
|
120
|
+
|
121
|
+
return rb_float_new(sqrt(sum));
|
122
|
+
}
|
123
|
+
|
124
|
+
/*
|
125
|
+
** def binary_union_with(other)
|
126
|
+
** unions = []
|
127
|
+
** self.each_with_index do |n, index|
|
128
|
+
** if n == 1 || other[index] == 1
|
129
|
+
** unions << 1
|
130
|
+
** else
|
131
|
+
** unions << 0
|
132
|
+
** end
|
133
|
+
** end
|
134
|
+
**
|
135
|
+
** unions
|
136
|
+
** end
|
137
|
+
*/
|
138
|
+
|
139
|
+
static VALUE rb_binary_union_with(VALUE self, VALUE other_array) {
|
140
|
+
//TODO: check arrays are same size
|
141
|
+
long array_size = c_array_size(self);
|
142
|
+
int index;
|
143
|
+
VALUE results = rb_ary_new();
|
144
|
+
|
145
|
+
for(index = 0; index <= array_size; index++) {
|
146
|
+
int self_attribute = NUM2INT(RARRAY_PTR(self)[index]);
|
147
|
+
int other_array_attribute = NUM2INT(RARRAY_PTR(other_array)[index]);
|
148
|
+
|
149
|
+
if(self_attribute == 1 || other_array_attribute == 1) {
|
150
|
+
rb_ary_push(results, rb_int_new(1));
|
151
|
+
} else {
|
152
|
+
rb_ary_push(results, rb_int_new(0));
|
153
|
+
}
|
154
|
+
}
|
155
|
+
|
156
|
+
return results;
|
157
|
+
}
|
158
|
+
|
159
|
+
/*
|
160
|
+
** def binary_intersection_with(other)
|
161
|
+
** intersects = []
|
162
|
+
** self.each_with_index do |n, index|
|
163
|
+
** if n == 1 && other[index] == 1
|
164
|
+
** intersects << 1
|
165
|
+
** else
|
166
|
+
** intersects << 0
|
167
|
+
** end
|
168
|
+
** end
|
169
|
+
**
|
170
|
+
** intersects
|
171
|
+
** end
|
172
|
+
*/
|
173
|
+
|
174
|
+
static VALUE rb_binary_intersection_with(VALUE self, VALUE other_array) {
|
175
|
+
/* TODO check arrays are same size */
|
176
|
+
long array_size = c_array_size(self);
|
177
|
+
int index;
|
178
|
+
VALUE results = rb_ary_new();
|
179
|
+
|
180
|
+
for(index = 0; index <= array_size; index++) {
|
181
|
+
int self_attribute = NUM2INT(RARRAY_PTR(self)[index]);
|
182
|
+
int other_array_attribute = NUM2INT(RARRAY_PTR(other_array)[index]);
|
183
|
+
|
184
|
+
if(self_attribute == 1 && other_array_attribute == 1) {
|
185
|
+
rb_ary_push(results, rb_int_new(1));
|
186
|
+
} else {
|
187
|
+
rb_ary_push(results, rb_int_new(0));
|
188
|
+
}
|
189
|
+
}
|
190
|
+
|
191
|
+
return results;
|
192
|
+
}
|
193
|
+
|
194
|
+
/* return the size of a Ruby array - 1 */
|
195
|
+
long c_array_size(VALUE array) {
|
196
|
+
return (RARRAY_LEN(array) - 1);
|
197
|
+
}
|
198
|
+
|
199
|
+
void
|
200
|
+
Init_measurable()
|
201
|
+
{
|
202
|
+
VALUE rb_measurable = rb_define_module("Measurable");
|
203
|
+
rb_define_method(rb_measurable, "euclidean", rb_euclidean, 1);
|
204
|
+
rb_define_method(rb_measurable, "dot_product", rb_dot_product, 1);
|
205
|
+
rb_define_method(rb_measurable, "sum_of_squares", rb_sum_of_squares, 0);
|
206
|
+
rb_define_method(rb_measurable, "euclidean_normalize", rb_euclidean_normalize, 0);
|
207
|
+
rb_define_method(rb_measurable, "binary_union_with", rb_binary_union_with, 1);
|
208
|
+
rb_define_method(rb_measurable, "binary_intersection_with", rb_binary_intersection_with, 1);
|
209
|
+
}
|
data/lib/measurable.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__) + "/../lib")
|
2
|
+
|
3
|
+
require "measurable/version.rb"
|
4
|
+
|
5
|
+
require "measurable/cosine_similarity"
|
6
|
+
require "measurable/tanimoto_coefficient"
|
7
|
+
require "measurable/jaccard"
|
8
|
+
require "measurable/haversine"
|
9
|
+
|
10
|
+
require "measurable.so"
|
11
|
+
|
12
|
+
class Array
|
13
|
+
include Measurable
|
14
|
+
|
15
|
+
# http://en.wikipedia.org/wiki/Intersection_(set_theory)
|
16
|
+
def intersection_with(other)
|
17
|
+
(self & other)
|
18
|
+
end
|
19
|
+
|
20
|
+
# http://en.wikipedia.org/wiki/Union_(set_theory)
|
21
|
+
def union_with(other)
|
22
|
+
(self + other).uniq
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
# Checks if we"re dealing with NaN"s and will return 0.0 unless
|
28
|
+
# handle NaN"s is set to false
|
29
|
+
def handle_nan(result)
|
30
|
+
result.nan? ? 0.0 : result
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
#
|
2
|
+
# Notes:
|
3
|
+
#
|
4
|
+
# translated into Ruby based on information contained in:
|
5
|
+
# http://mathforum.org/library/drmath/view/51879.html Doctors Rick and Peterson - 4/20/99
|
6
|
+
# http://www.movable-type.co.uk/scripts/latlong.html
|
7
|
+
# http://en.wikipedia.org/wiki/Haversine_formula
|
8
|
+
#
|
9
|
+
# This formula can compute accurate distances between two points given latitude and longitude, even for
|
10
|
+
# short distances.
|
11
|
+
|
12
|
+
module Measurable
|
13
|
+
|
14
|
+
# PI = 3.1415926535
|
15
|
+
RAD_PER_DEG = 0.017453293 # PI/180
|
16
|
+
|
17
|
+
R_MILES = 3956 # radius of the great circle in miles
|
18
|
+
R_KM = 6371 # radius in kilometers...some algorithms use 6367
|
19
|
+
|
20
|
+
# the great circle distance d will be in whatever units R is in
|
21
|
+
R = {
|
22
|
+
:miles => R_MILES,
|
23
|
+
:km => R_KM,
|
24
|
+
:feet => R_MILES * 5282,
|
25
|
+
:meters => R_KM * 1000
|
26
|
+
}
|
27
|
+
|
28
|
+
def haversine_distance(other, um = :meters)
|
29
|
+
dlon = other[1] - self[1]
|
30
|
+
dlat = other[0] - self[0]
|
31
|
+
|
32
|
+
dlon_rad = dlon * RAD_PER_DEG
|
33
|
+
dlat_rad = dlat * RAD_PER_DEG
|
34
|
+
|
35
|
+
lat1_rad = self[0] * RAD_PER_DEG
|
36
|
+
lon1_rad = self[1] * RAD_PER_DEG
|
37
|
+
|
38
|
+
lat2_rad = other[0] * RAD_PER_DEG
|
39
|
+
lon2_rad = other[1] * RAD_PER_DEG
|
40
|
+
|
41
|
+
a = (Math.sin(dlat_rad/2))**2 + Math.cos(lat1_rad) * Math.cos(lat2_rad) * (Math.sin(dlon_rad/2))**2
|
42
|
+
c = 2 * Math.atan2( Math.sqrt(a), Math.sqrt(1-a))
|
43
|
+
|
44
|
+
R[um] * c
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# http://en.wikipedia.org/wiki/Jaccard_coefficient
|
2
|
+
module Measurable
|
3
|
+
|
4
|
+
def jaccard_distance(other)
|
5
|
+
1 - self.jaccard_index(other)
|
6
|
+
end
|
7
|
+
|
8
|
+
def jaccard_index(other)
|
9
|
+
union = (self + other).uniq.size.to_f
|
10
|
+
intersection = self.intersection_with(other).size.to_f
|
11
|
+
|
12
|
+
intersection / union
|
13
|
+
end
|
14
|
+
|
15
|
+
def binary_jaccard_distance(other)
|
16
|
+
1 - self.binary_jaccard_index(other)
|
17
|
+
end
|
18
|
+
|
19
|
+
def binary_jaccard_index(other)
|
20
|
+
intersection = self.binary_intersection_with(other).delete_if {|x| x == 0}.size.to_f
|
21
|
+
union = self.binary_union_with(other).delete_if {|x| x == 0}.size.to_f
|
22
|
+
|
23
|
+
intersection / union
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
@@ -0,0 +1,9 @@
|
|
1
|
+
# http://en.wikipedia.org/wiki/Jaccard_index#Tanimoto_coefficient_.28extended_Jaccard_coefficient.29
|
2
|
+
module Measurable
|
3
|
+
def tanimoto_coefficient(other)
|
4
|
+
dot = self.dot_product(other).to_f
|
5
|
+
result = dot / (self.sum_of_squares + other.sum_of_squares - dot).to_f
|
6
|
+
|
7
|
+
handle_nan(result)
|
8
|
+
end
|
9
|
+
end
|
data/measurable.gemspec
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
lib = File.expand_path('../lib/', __FILE__)
|
2
|
+
$:.unshift lib unless $:.include?(lib)
|
3
|
+
|
4
|
+
require 'measurable/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |gem|
|
7
|
+
gem.name = "measurable"
|
8
|
+
gem.version = Measurable::VERSION
|
9
|
+
gem.date = Date.today.to_s
|
10
|
+
gem.summary = %Q{A Ruby module with a lot of distance measures for your projects.}
|
11
|
+
gem.description = %Q{A Ruby module with a lot of distance measures for your projects.}
|
12
|
+
|
13
|
+
gem.authors = ["Carlos Agarie"]
|
14
|
+
gem.email = "carlos@onox.com.br"
|
15
|
+
gem.homepage = "http://github.com/agarie/measurable"
|
16
|
+
|
17
|
+
gem.files = `git ls-files`.split("\n")
|
18
|
+
gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
19
|
+
gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
20
|
+
|
21
|
+
gem.require_paths = ["lib"]
|
22
|
+
gem.extensions = ['ext/measurable/extconf.rb']
|
23
|
+
|
24
|
+
gem.required_ruby_version = '>= 1.9.2'
|
25
|
+
|
26
|
+
gem.add_development_dependency 'bundler'
|
27
|
+
gem.add_development_dependency 'rake', '~> 0.9'
|
28
|
+
gem.add_development_dependency 'rake-compiler', '~> 0.8.1'
|
29
|
+
gem.add_development_dependency 'rspec', '~> 2.9.0'
|
30
|
+
end
|
data/spec/measurable.rb
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
|
3
|
+
describe Measurable do
|
4
|
+
|
5
|
+
let(:array) { [5, 5] }
|
6
|
+
let(:array_2) { [7, 3, 2, 4, 1] }
|
7
|
+
let(:array_3) { [4, 1, 9, 7, 5] }
|
8
|
+
|
9
|
+
describe "Euclidean Distance" do
|
10
|
+
it "should return 0.0" do
|
11
|
+
array.euclidean_distance(array).should == 0.0
|
12
|
+
end
|
13
|
+
|
14
|
+
it "should return 4.0" do
|
15
|
+
[5].euclidean_distance([1]).should == 4.0
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
describe "Cosine Similarity" do
|
20
|
+
it "should return 1.0" do
|
21
|
+
array.cosine_similarity(array).should.to_s == "1.0" # WTF
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should handle NaN's" do
|
25
|
+
[0.0, 0.0].cosine_similarity([0.0, 0.0]).nan?.should be_false
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
describe "Tanimoto Coefficient" do
|
30
|
+
it "should return 1.0" do
|
31
|
+
array.tanimoto_coefficient(array).should == 1.0
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should handle NaN's" do
|
35
|
+
[0.0, 0.0].tanimoto_coefficient([0.0, 0.0]).nan?.should be_false
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
describe "Sum of Squares" do
|
40
|
+
it "should return 50" do
|
41
|
+
array.sum_of_squares.should == 50
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
describe "Jaccard" do
|
46
|
+
describe "Jaccard Distance" do
|
47
|
+
it "should return" do
|
48
|
+
array_2.jaccard_distance(array_3).should == (1 - 3.0/7.0)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
describe "Jaccard Index" do
|
53
|
+
it "should return" do
|
54
|
+
array_2.jaccard_index(array_3).should == 3.0/7.0
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
describe "Binary Jaccard Index" do
|
59
|
+
it "should return 1/4" do
|
60
|
+
[1,1,1,1].binary_jaccard_index([0,1,0,0]).should == 1/4.0
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
describe "Binary Jaccard Distance" do
|
66
|
+
it "should return 0.75" do
|
67
|
+
[1,1,1,1].binary_jaccard_distance([0,1,0,0]).should == 1 - (1/4.0)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
describe "Intersection" do
|
72
|
+
it "should return [7,4,1]" do
|
73
|
+
array_2.intersection_with(array_3).should == [7,4,1]
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
describe "Union" do
|
78
|
+
it "should return " do
|
79
|
+
array_2.union_with(array_3).should == [7,3,2,4,1,9,5]
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
describe "Binary Intersection" do
|
84
|
+
it "should return [0,1,0,0]" do
|
85
|
+
[1,1,1,1].binary_intersection_with([0,1,0,0]).should == [0,1,0,0]
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
describe "Binary Union" do
|
90
|
+
it "should return [1,1,1,0]" do
|
91
|
+
[1,1,1,0].binary_union_with([0,0,0,0]).should == [1,1,1,0]
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
describe "Dot Product" do
|
96
|
+
it "should return 50" do
|
97
|
+
[5, 5].dot_product([5, 5]).should == 50.0
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
describe "Euclidean normalize" do
|
102
|
+
it "should" do
|
103
|
+
[10].euclidean_normalize.should == 10
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: measurable
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Carlos Agarie
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-10-09 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: bundler
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rake
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ~>
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0.9'
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ~>
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0.9'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: rake-compiler
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ~>
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: 0.8.1
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 0.8.1
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: rspec
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ~>
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: 2.9.0
|
70
|
+
type: :development
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ~>
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: 2.9.0
|
78
|
+
description: A Ruby module with a lot of distance measures for your projects.
|
79
|
+
email: carlos@onox.com.br
|
80
|
+
executables: []
|
81
|
+
extensions:
|
82
|
+
- ext/measurable/extconf.rb
|
83
|
+
extra_rdoc_files: []
|
84
|
+
files:
|
85
|
+
- .gitignore
|
86
|
+
- Gemfile
|
87
|
+
- Gemfile.lock
|
88
|
+
- LICENSE
|
89
|
+
- README.md
|
90
|
+
- Rakefile
|
91
|
+
- ext/measurable/extconf.rb
|
92
|
+
- ext/measurable/measurable.c
|
93
|
+
- lib/measurable.rb
|
94
|
+
- lib/measurable/cosine_similarity.rb
|
95
|
+
- lib/measurable/haversine.rb
|
96
|
+
- lib/measurable/jaccard.rb
|
97
|
+
- lib/measurable/tanimoto_coefficient.rb
|
98
|
+
- lib/measurable/version.rb
|
99
|
+
- measurable.gemspec
|
100
|
+
- spec/measurable.rb
|
101
|
+
- spec/spec_helper.rb
|
102
|
+
homepage: http://github.com/agarie/measurable
|
103
|
+
licenses: []
|
104
|
+
post_install_message:
|
105
|
+
rdoc_options: []
|
106
|
+
require_paths:
|
107
|
+
- lib
|
108
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
109
|
+
none: false
|
110
|
+
requirements:
|
111
|
+
- - ! '>='
|
112
|
+
- !ruby/object:Gem::Version
|
113
|
+
version: 1.9.2
|
114
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
115
|
+
none: false
|
116
|
+
requirements:
|
117
|
+
- - ! '>='
|
118
|
+
- !ruby/object:Gem::Version
|
119
|
+
version: '0'
|
120
|
+
requirements: []
|
121
|
+
rubyforge_project:
|
122
|
+
rubygems_version: 1.8.24
|
123
|
+
signing_key:
|
124
|
+
specification_version: 3
|
125
|
+
summary: A Ruby module with a lot of distance measures for your projects.
|
126
|
+
test_files:
|
127
|
+
- spec/measurable.rb
|
128
|
+
- spec/spec_helper.rb
|