lsh 0.2.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/lsh/index.rb +22 -1
- data/lib/lsh/storage/memory.rb +15 -0
- data/lib/lsh/storage/redis_backend.rb +28 -3
- data/lib/lsh/web.rb +31 -2
- metadata +2 -2
data/lib/lsh/index.rb
CHANGED
@@ -38,7 +38,8 @@ module LSH
|
|
38
38
|
Index.new(storage.parameters, storage) if storage.has_index?
|
39
39
|
end
|
40
40
|
|
41
|
-
def add(vector)
|
41
|
+
def add(vector, id = nil)
|
42
|
+
storage.add_vector_id(vector, id) if id
|
42
43
|
hashes(vector).each_with_index do |hash, i|
|
43
44
|
hash_i = array_to_hash(hash)
|
44
45
|
bucket = storage.find_bucket(i)
|
@@ -46,6 +47,14 @@ module LSH
|
|
46
47
|
end
|
47
48
|
end
|
48
49
|
|
50
|
+
def vector_to_id(vector)
|
51
|
+
storage.vector_to_id(vector)
|
52
|
+
end
|
53
|
+
|
54
|
+
def id_to_vector(id)
|
55
|
+
storage.id_to_vector(id)
|
56
|
+
end
|
57
|
+
|
49
58
|
def query(vector, multiprobe_radius = 0)
|
50
59
|
hash_arrays = hashes(vector)
|
51
60
|
hashes = hash_arrays.map { |a| array_to_hash(a) }
|
@@ -64,6 +73,18 @@ module LSH
|
|
64
73
|
order_vectors_by_similarity(vector, results)
|
65
74
|
end
|
66
75
|
|
76
|
+
def query_ids(id, multiprobe_radius = 0)
|
77
|
+
vector = id_to_vector(id)
|
78
|
+
query_ids_by_vector(vector, multiprobe_radius)
|
79
|
+
end
|
80
|
+
|
81
|
+
def query_ids_by_vector(vector, multiprobe_radius = 0)
|
82
|
+
vectors = query(vector, multiprobe_radius)
|
83
|
+
results = []
|
84
|
+
vectors.each { |v| results << vector_to_id(v) }
|
85
|
+
results
|
86
|
+
end
|
87
|
+
|
67
88
|
def multiprobe_hashes_arrays(hash_arrays, multiprobe_radius)
|
68
89
|
mp_arrays = []
|
69
90
|
(1..multiprobe_radius).to_a.each do |radius|
|
data/lib/lsh/storage/memory.rb
CHANGED
@@ -45,6 +45,21 @@ module LSH
|
|
45
45
|
end
|
46
46
|
end
|
47
47
|
|
48
|
+
def add_vector_id(vector, id)
|
49
|
+
@vector_to_id ||= {}
|
50
|
+
@vector_to_id[vector.hash] = id
|
51
|
+
@id_to_vector ||= {}
|
52
|
+
@id_to_vector[id] = vector
|
53
|
+
end
|
54
|
+
|
55
|
+
def vector_to_id(vector)
|
56
|
+
@vector_to_id[vector.hash] if @vector_to_id
|
57
|
+
end
|
58
|
+
|
59
|
+
def id_to_vector(id)
|
60
|
+
@id_to_vector[id] if @id_to_vector
|
61
|
+
end
|
62
|
+
|
48
63
|
def find_bucket(i)
|
49
64
|
@buckets[i]
|
50
65
|
end
|
@@ -112,11 +112,37 @@ module LSH
|
|
112
112
|
@redis.incr "lsh:buckets"
|
113
113
|
end
|
114
114
|
|
115
|
+
def save_vector(vector)
|
116
|
+
path = File.join(@data_dir, vector.hash.to_s+'.dat')
|
117
|
+
vector.save(path) unless File.exists?(path)
|
118
|
+
end
|
119
|
+
|
120
|
+
def load_vector(hash)
|
121
|
+
vector = MathUtil.zeros(parameters[:dim])
|
122
|
+
vector.load(File.join(@data_dir, hash+'.dat'))
|
123
|
+
vector
|
124
|
+
end
|
125
|
+
|
115
126
|
def add_vector_to_bucket(bucket, hash, vector)
|
116
|
-
|
127
|
+
save_vector(vector) # Writing vector to disk if not already there
|
117
128
|
@redis.sadd "#{bucket}:#{hash}", vector.hash.to_s # Only storing vector's hash in Redis
|
118
129
|
end
|
119
130
|
|
131
|
+
def add_vector_id(vector, id)
|
132
|
+
save_vector(vector) # Writing vector to disk if not already there
|
133
|
+
@redis.set "lsh:vector_to_id:#{vector.hash}", id
|
134
|
+
@redis.set "lsh:id_to_vector:#{id}", vector.hash.to_s
|
135
|
+
end
|
136
|
+
|
137
|
+
def vector_to_id(vector)
|
138
|
+
@redis.get "lsh:vector_to_id:#{vector.hash}"
|
139
|
+
end
|
140
|
+
|
141
|
+
def id_to_vector(id)
|
142
|
+
vector_hash = @redis.get "lsh:id_to_vector:#{id}"
|
143
|
+
load_vector(vector_hash)
|
144
|
+
end
|
145
|
+
|
120
146
|
def find_bucket(i)
|
121
147
|
"lsh:bucket:#{i}"
|
122
148
|
end
|
@@ -133,8 +159,7 @@ module LSH
|
|
133
159
|
vector_hashes.uniq!
|
134
160
|
results = []
|
135
161
|
vector_hashes.each do |vector_hash|
|
136
|
-
vector =
|
137
|
-
vector.load(File.join(@data_dir, vector_hash+'.dat'))
|
162
|
+
vector = load_vector(vector_hash)
|
138
163
|
results << vector
|
139
164
|
end
|
140
165
|
results
|
data/lib/lsh/web.rb
CHANGED
@@ -24,7 +24,13 @@ module LSH
|
|
24
24
|
if mime_type == 'application/json'
|
25
25
|
t0 = Time.now
|
26
26
|
vector = JSON.parse(params[:data])
|
27
|
-
|
27
|
+
result_vectors = index.query(vector, params[:radius] || 0)
|
28
|
+
results = []
|
29
|
+
if params[:include] == 'id'
|
30
|
+
result_vectors.each { |v| results << { :id => index.vector_to_id(v), :data => v } }
|
31
|
+
else
|
32
|
+
result_vectors.each { |v| results << { :data => v } }
|
33
|
+
end
|
28
34
|
content_type :json
|
29
35
|
{ "time" => Time.now - t0, "results" => results }.to_json
|
30
36
|
else
|
@@ -32,13 +38,36 @@ module LSH
|
|
32
38
|
end
|
33
39
|
end
|
34
40
|
|
41
|
+
post '/query-ids' do
|
42
|
+
if params[:data] # We're querying with a vector
|
43
|
+
mime_type = (params[:mime_type] || 'application/json')
|
44
|
+
if mime_type == 'application/json'
|
45
|
+
t0 = Time.now
|
46
|
+
vector = JSON.parse(params[:data])
|
47
|
+
results = index.query_ids_by_vector(vector, params[:radius] || 0)
|
48
|
+
content_type :json
|
49
|
+
{ "time" => Time.now - t0, "results" => results }.to_json
|
50
|
+
else
|
51
|
+
raise "Unrecognised mime-type"
|
52
|
+
end
|
53
|
+
elsif params[:id] # We're querying with an id
|
54
|
+
raise "Unknown id" unless index.id_to_vector(params[:id])
|
55
|
+
t0 = Time.now
|
56
|
+
results = index.query_ids(params[:id], params[:radius] || 0)
|
57
|
+
content_type :json
|
58
|
+
{ "time" => Time.now - t0, "results" => results }.to_json
|
59
|
+
else
|
60
|
+
raise "Missing query"
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
35
64
|
post '/index' do
|
36
65
|
raise "Missing data" unless params[:data]
|
37
66
|
mime_type = (params[:mime_type] || 'application/json')
|
38
67
|
if mime_type == 'application/json'
|
39
68
|
t0 = Time.now
|
40
69
|
vector = JSON.parse(params[:data])
|
41
|
-
index.add(vector)
|
70
|
+
index.add(vector, params[:id])
|
42
71
|
content_type :json
|
43
72
|
{ "time" => Time.now - t0, "status" => "indexed" }.to_json
|
44
73
|
else
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lsh
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-01-
|
12
|
+
date: 2013-01-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: gsl
|