lsh 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/lsh/index.rb +13 -17
- data/lib/lsh/math_util_gsl.rb +12 -8
- data/lib/lsh/math_util_jblas.rb +10 -6
- data/lib/lsh/storage/redis_backend.rb +5 -11
- data/lib/lsh/web.rb +3 -3
- metadata +2 -2
data/lib/lsh/index.rb
CHANGED
@@ -111,9 +111,9 @@ module LSH
|
|
111
111
|
|
112
112
|
def hash(vector, projection, bias = true)
|
113
113
|
hash = []
|
114
|
-
projection.
|
115
|
-
|
116
|
-
|
114
|
+
dot_products = (vector * projection).row(0).to_a
|
115
|
+
window = storage.parameters[:window]
|
116
|
+
dot_products.each do |dot_product|
|
117
117
|
if window == Float::INFINITY # Binary LSH
|
118
118
|
if dot_product >= 0
|
119
119
|
hash << 1
|
@@ -128,6 +128,15 @@ module LSH
|
|
128
128
|
hash
|
129
129
|
end
|
130
130
|
|
131
|
+
def random_vector(dim)
|
132
|
+
MathUtil.random_gaussian_matrix(1, dim)
|
133
|
+
end
|
134
|
+
|
135
|
+
def random_vector_unit(dim)
|
136
|
+
r = random_vector(dim)
|
137
|
+
r /= MathUtil.norm(r)
|
138
|
+
end
|
139
|
+
|
131
140
|
def array_to_hash(array)
|
132
141
|
return array.hash
|
133
142
|
# Derives a 28 bit hash value from an array of integers
|
@@ -150,20 +159,7 @@ module LSH
|
|
150
159
|
end
|
151
160
|
|
152
161
|
def generate_projection(dim, k)
|
153
|
-
|
154
|
-
k.times do |i|
|
155
|
-
vectors << random_vector(dim)
|
156
|
-
end
|
157
|
-
vectors
|
158
|
-
end
|
159
|
-
|
160
|
-
def random_vector_unit(dim)
|
161
|
-
r = random_vector(dim)
|
162
|
-
r /= MathUtil.norm(r)
|
163
|
-
end
|
164
|
-
|
165
|
-
def random_vector(dim)
|
166
|
-
MathUtil.random_gaussian_vector(dim)
|
162
|
+
MathUtil.random_gaussian_matrix(dim, k)
|
167
163
|
end
|
168
164
|
|
169
165
|
def similarity(v1, v2)
|
data/lib/lsh/math_util_gsl.rb
CHANGED
@@ -27,20 +27,24 @@ module LSH
|
|
27
27
|
@@gsl_random.uniform
|
28
28
|
end
|
29
29
|
|
30
|
-
def self.
|
31
|
-
|
30
|
+
def self.zeros(k, l)
|
31
|
+
GSL::Matrix.alloc(k, l)
|
32
32
|
end
|
33
33
|
|
34
|
-
def self.
|
35
|
-
|
34
|
+
def self.random_gaussian_vector(dim)
|
35
|
+
@@gsl_random.gaussian(1, dim)
|
36
36
|
end
|
37
37
|
|
38
38
|
def self.random_gaussian_matrix(k, l)
|
39
|
-
|
39
|
+
matrix = zeros(k, l)
|
40
|
+
(0..(k - 1)).each do |i|
|
41
|
+
matrix.set_row(i, random_gaussian_vector(l))
|
42
|
+
end
|
43
|
+
matrix
|
40
44
|
end
|
41
45
|
|
42
46
|
def self.dot(v1, v2)
|
43
|
-
v1 * v2.
|
47
|
+
(v1 * v2.transpose)[0,0]
|
44
48
|
end
|
45
49
|
|
46
50
|
def self.norm(v)
|
@@ -49,7 +53,7 @@ module LSH
|
|
49
53
|
|
50
54
|
def self.uniq(vs)
|
51
55
|
# Can't use uniq as
|
52
|
-
# [ v, JSON.parse(v.to_json) ].uniq.size == 2 with GSL
|
56
|
+
# [ v, JSON.parse(v.to_json, :create_additions => true) ].uniq.size == 2 with GSL
|
53
57
|
results = []
|
54
58
|
vs.each { |v| results << v unless results.member? v }
|
55
59
|
results
|
@@ -61,7 +65,7 @@ end
|
|
61
65
|
|
62
66
|
module GSL
|
63
67
|
|
64
|
-
class
|
68
|
+
class Matrix
|
65
69
|
|
66
70
|
def to_json(*a)
|
67
71
|
{
|
data/lib/lsh/math_util_jblas.rb
CHANGED
@@ -24,16 +24,16 @@ module LSH
|
|
24
24
|
JBLAS.rand[0,0]
|
25
25
|
end
|
26
26
|
|
27
|
-
def self.random_gaussian_vector(dim)
|
28
|
-
JBLAS.randn(1, dim)
|
29
|
-
end
|
30
|
-
|
31
27
|
def self.random_gaussian_matrix(k, l)
|
32
28
|
JBLAS.randn(k, l)
|
33
29
|
end
|
34
30
|
|
35
|
-
def self.
|
36
|
-
|
31
|
+
def self.random_gaussian_vector(l)
|
32
|
+
random_gaussian_matrix(1, l)
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.zeros(k, l)
|
36
|
+
JBLAS::DoubleMatrix.new(k, l)
|
37
37
|
end
|
38
38
|
|
39
39
|
def self.dot(v1, v2)
|
@@ -68,6 +68,10 @@ module JBLAS
|
|
68
68
|
from_array(o['data']).t
|
69
69
|
end
|
70
70
|
|
71
|
+
def size
|
72
|
+
[ rows, columns ]
|
73
|
+
end
|
74
|
+
|
71
75
|
end
|
72
76
|
|
73
77
|
end
|
@@ -68,9 +68,7 @@ module LSH
|
|
68
68
|
# (too slow to serialize and store in Redis for
|
69
69
|
# large number of dimensions/projections)
|
70
70
|
projections.each_with_index do |projection, i|
|
71
|
-
projection.
|
72
|
-
vector.save(File.join(@data_dir, 'projections', "vector_#{i}_#{j}.dat"))
|
73
|
-
end
|
71
|
+
projection.save(File.join(@data_dir, 'projections', "projection_#{i}.dat"))
|
74
72
|
end
|
75
73
|
end
|
76
74
|
|
@@ -79,13 +77,9 @@ module LSH
|
|
79
77
|
@projections ||= (
|
80
78
|
projections = []
|
81
79
|
parameters[:number_of_independent_projections].times do |i|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
v.load(File.join(@data_dir, 'projections', "vector_#{i}_#{j}.dat"))
|
86
|
-
vectors << v
|
87
|
-
end
|
88
|
-
projections << vectors
|
80
|
+
m = MathUtil.zeros(parameters[:dim], parameters[:number_of_random_vectors])
|
81
|
+
m.load(File.join(@data_dir, 'projections', "projection_#{i}.dat"))
|
82
|
+
projections << m
|
89
83
|
end
|
90
84
|
projections
|
91
85
|
)
|
@@ -119,7 +113,7 @@ module LSH
|
|
119
113
|
end
|
120
114
|
|
121
115
|
def load_vector(hash)
|
122
|
-
vector = MathUtil.zeros(parameters[:dim])
|
116
|
+
vector = MathUtil.zeros(1, parameters[:dim])
|
123
117
|
vector.load(File.join(@data_dir, hash+'.dat'))
|
124
118
|
vector
|
125
119
|
end
|
data/lib/lsh/web.rb
CHANGED
@@ -23,7 +23,7 @@ module LSH
|
|
23
23
|
mime_type = (params[:mime_type] || 'application/json')
|
24
24
|
if mime_type == 'application/json'
|
25
25
|
t0 = Time.now
|
26
|
-
vector = JSON.parse(params[:data])
|
26
|
+
vector = JSON.parse(params[:data], :create_additions => true)
|
27
27
|
result_vectors = index.query(vector, params[:radius] || 0)
|
28
28
|
results = []
|
29
29
|
if params[:include] == 'id'
|
@@ -43,7 +43,7 @@ module LSH
|
|
43
43
|
mime_type = (params[:mime_type] || 'application/json')
|
44
44
|
if mime_type == 'application/json'
|
45
45
|
t0 = Time.now
|
46
|
-
vector = JSON.parse(params[:data])
|
46
|
+
vector = JSON.parse(params[:data], :create_additions => true)
|
47
47
|
results = index.query_ids_by_vector(vector, params[:radius] || 0)
|
48
48
|
content_type :json
|
49
49
|
{ "time" => Time.now - t0, "results" => results }.to_json
|
@@ -66,7 +66,7 @@ module LSH
|
|
66
66
|
mime_type = (params[:mime_type] || 'application/json')
|
67
67
|
if mime_type == 'application/json'
|
68
68
|
t0 = Time.now
|
69
|
-
vector = JSON.parse(params[:data])
|
69
|
+
vector = JSON.parse(params[:data], :create_additions => true)
|
70
70
|
index.add(vector, params[:id])
|
71
71
|
content_type :json
|
72
72
|
{ "time" => Time.now - t0, "status" => "indexed" }.to_json
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lsh
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-03-18 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: gsl
|