lsh 0.3.2 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/lsh/index.rb +13 -17
- data/lib/lsh/math_util_gsl.rb +12 -8
- data/lib/lsh/math_util_jblas.rb +10 -6
- data/lib/lsh/storage/redis_backend.rb +5 -11
- data/lib/lsh/web.rb +3 -3
- metadata +2 -2
data/lib/lsh/index.rb
CHANGED
@@ -111,9 +111,9 @@ module LSH
|
|
111
111
|
|
112
112
|
def hash(vector, projection, bias = true)
|
113
113
|
hash = []
|
114
|
-
projection.
|
115
|
-
|
116
|
-
|
114
|
+
dot_products = (vector * projection).row(0).to_a
|
115
|
+
window = storage.parameters[:window]
|
116
|
+
dot_products.each do |dot_product|
|
117
117
|
if window == Float::INFINITY # Binary LSH
|
118
118
|
if dot_product >= 0
|
119
119
|
hash << 1
|
@@ -128,6 +128,15 @@ module LSH
|
|
128
128
|
hash
|
129
129
|
end
|
130
130
|
|
131
|
+
def random_vector(dim)
|
132
|
+
MathUtil.random_gaussian_matrix(1, dim)
|
133
|
+
end
|
134
|
+
|
135
|
+
def random_vector_unit(dim)
|
136
|
+
r = random_vector(dim)
|
137
|
+
r /= MathUtil.norm(r)
|
138
|
+
end
|
139
|
+
|
131
140
|
def array_to_hash(array)
|
132
141
|
return array.hash
|
133
142
|
# Derives a 28 bit hash value from an array of integers
|
@@ -150,20 +159,7 @@ module LSH
|
|
150
159
|
end
|
151
160
|
|
152
161
|
def generate_projection(dim, k)
|
153
|
-
|
154
|
-
k.times do |i|
|
155
|
-
vectors << random_vector(dim)
|
156
|
-
end
|
157
|
-
vectors
|
158
|
-
end
|
159
|
-
|
160
|
-
def random_vector_unit(dim)
|
161
|
-
r = random_vector(dim)
|
162
|
-
r /= MathUtil.norm(r)
|
163
|
-
end
|
164
|
-
|
165
|
-
def random_vector(dim)
|
166
|
-
MathUtil.random_gaussian_vector(dim)
|
162
|
+
MathUtil.random_gaussian_matrix(dim, k)
|
167
163
|
end
|
168
164
|
|
169
165
|
def similarity(v1, v2)
|
data/lib/lsh/math_util_gsl.rb
CHANGED
@@ -27,20 +27,24 @@ module LSH
|
|
27
27
|
@@gsl_random.uniform
|
28
28
|
end
|
29
29
|
|
30
|
-
def self.
|
31
|
-
|
30
|
+
def self.zeros(k, l)
|
31
|
+
GSL::Matrix.alloc(k, l)
|
32
32
|
end
|
33
33
|
|
34
|
-
def self.
|
35
|
-
|
34
|
+
def self.random_gaussian_vector(dim)
|
35
|
+
@@gsl_random.gaussian(1, dim)
|
36
36
|
end
|
37
37
|
|
38
38
|
def self.random_gaussian_matrix(k, l)
|
39
|
-
|
39
|
+
matrix = zeros(k, l)
|
40
|
+
(0..(k - 1)).each do |i|
|
41
|
+
matrix.set_row(i, random_gaussian_vector(l))
|
42
|
+
end
|
43
|
+
matrix
|
40
44
|
end
|
41
45
|
|
42
46
|
def self.dot(v1, v2)
|
43
|
-
v1 * v2.
|
47
|
+
(v1 * v2.transpose)[0,0]
|
44
48
|
end
|
45
49
|
|
46
50
|
def self.norm(v)
|
@@ -49,7 +53,7 @@ module LSH
|
|
49
53
|
|
50
54
|
def self.uniq(vs)
|
51
55
|
# Can't use uniq as
|
52
|
-
# [ v, JSON.parse(v.to_json) ].uniq.size == 2 with GSL
|
56
|
+
# [ v, JSON.parse(v.to_json, :create_additions => true) ].uniq.size == 2 with GSL
|
53
57
|
results = []
|
54
58
|
vs.each { |v| results << v unless results.member? v }
|
55
59
|
results
|
@@ -61,7 +65,7 @@ end
|
|
61
65
|
|
62
66
|
module GSL
|
63
67
|
|
64
|
-
class
|
68
|
+
class Matrix
|
65
69
|
|
66
70
|
def to_json(*a)
|
67
71
|
{
|
data/lib/lsh/math_util_jblas.rb
CHANGED
@@ -24,16 +24,16 @@ module LSH
|
|
24
24
|
JBLAS.rand[0,0]
|
25
25
|
end
|
26
26
|
|
27
|
-
def self.random_gaussian_vector(dim)
|
28
|
-
JBLAS.randn(1, dim)
|
29
|
-
end
|
30
|
-
|
31
27
|
def self.random_gaussian_matrix(k, l)
|
32
28
|
JBLAS.randn(k, l)
|
33
29
|
end
|
34
30
|
|
35
|
-
def self.
|
36
|
-
|
31
|
+
def self.random_gaussian_vector(l)
|
32
|
+
random_gaussian_matrix(1, l)
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.zeros(k, l)
|
36
|
+
JBLAS::DoubleMatrix.new(k, l)
|
37
37
|
end
|
38
38
|
|
39
39
|
def self.dot(v1, v2)
|
@@ -68,6 +68,10 @@ module JBLAS
|
|
68
68
|
from_array(o['data']).t
|
69
69
|
end
|
70
70
|
|
71
|
+
def size
|
72
|
+
[ rows, columns ]
|
73
|
+
end
|
74
|
+
|
71
75
|
end
|
72
76
|
|
73
77
|
end
|
@@ -68,9 +68,7 @@ module LSH
|
|
68
68
|
# (too slow to serialize and store in Redis for
|
69
69
|
# large number of dimensions/projections)
|
70
70
|
projections.each_with_index do |projection, i|
|
71
|
-
projection.
|
72
|
-
vector.save(File.join(@data_dir, 'projections', "vector_#{i}_#{j}.dat"))
|
73
|
-
end
|
71
|
+
projection.save(File.join(@data_dir, 'projections', "projection_#{i}.dat"))
|
74
72
|
end
|
75
73
|
end
|
76
74
|
|
@@ -79,13 +77,9 @@ module LSH
|
|
79
77
|
@projections ||= (
|
80
78
|
projections = []
|
81
79
|
parameters[:number_of_independent_projections].times do |i|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
v.load(File.join(@data_dir, 'projections', "vector_#{i}_#{j}.dat"))
|
86
|
-
vectors << v
|
87
|
-
end
|
88
|
-
projections << vectors
|
80
|
+
m = MathUtil.zeros(parameters[:dim], parameters[:number_of_random_vectors])
|
81
|
+
m.load(File.join(@data_dir, 'projections', "projection_#{i}.dat"))
|
82
|
+
projections << m
|
89
83
|
end
|
90
84
|
projections
|
91
85
|
)
|
@@ -119,7 +113,7 @@ module LSH
|
|
119
113
|
end
|
120
114
|
|
121
115
|
def load_vector(hash)
|
122
|
-
vector = MathUtil.zeros(parameters[:dim])
|
116
|
+
vector = MathUtil.zeros(1, parameters[:dim])
|
123
117
|
vector.load(File.join(@data_dir, hash+'.dat'))
|
124
118
|
vector
|
125
119
|
end
|
data/lib/lsh/web.rb
CHANGED
@@ -23,7 +23,7 @@ module LSH
|
|
23
23
|
mime_type = (params[:mime_type] || 'application/json')
|
24
24
|
if mime_type == 'application/json'
|
25
25
|
t0 = Time.now
|
26
|
-
vector = JSON.parse(params[:data])
|
26
|
+
vector = JSON.parse(params[:data], :create_additions => true)
|
27
27
|
result_vectors = index.query(vector, params[:radius] || 0)
|
28
28
|
results = []
|
29
29
|
if params[:include] == 'id'
|
@@ -43,7 +43,7 @@ module LSH
|
|
43
43
|
mime_type = (params[:mime_type] || 'application/json')
|
44
44
|
if mime_type == 'application/json'
|
45
45
|
t0 = Time.now
|
46
|
-
vector = JSON.parse(params[:data])
|
46
|
+
vector = JSON.parse(params[:data], :create_additions => true)
|
47
47
|
results = index.query_ids_by_vector(vector, params[:radius] || 0)
|
48
48
|
content_type :json
|
49
49
|
{ "time" => Time.now - t0, "results" => results }.to_json
|
@@ -66,7 +66,7 @@ module LSH
|
|
66
66
|
mime_type = (params[:mime_type] || 'application/json')
|
67
67
|
if mime_type == 'application/json'
|
68
68
|
t0 = Time.now
|
69
|
-
vector = JSON.parse(params[:data])
|
69
|
+
vector = JSON.parse(params[:data], :create_additions => true)
|
70
70
|
index.add(vector, params[:id])
|
71
71
|
content_type :json
|
72
72
|
{ "time" => Time.now - t0, "status" => "indexed" }.to_json
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lsh
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-03-18 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: gsl
|