lsh 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/lsh/index.rb +23 -19
- data/lib/lsh/storage/memory.rb +8 -2
- data/lib/lsh/storage/redis_backend.rb +8 -2
- metadata +1 -1
data/lib/lsh/index.rb
CHANGED
@@ -47,31 +47,35 @@ module LSH
|
|
47
47
|
end
|
48
48
|
|
49
49
|
def query(vector, multiprobe_radius = 0)
|
50
|
-
|
51
|
-
hashes
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
(
|
61
|
-
(0..(storage.parameters[:number_of_random_vectors] - 1)).to_a.combination(radius).each do |flips|
|
62
|
-
probe = hash.clone
|
63
|
-
flips.each { |d| probe[d] = (probe[d] == 1) ? 0 : 1 }
|
64
|
-
probe_hash = array_to_hash(probe)
|
65
|
-
probe_bucket_results = storage.query_bucket(bucket, probe_hash)
|
66
|
-
results += probe_bucket_results if probe_bucket_results
|
67
|
-
end
|
68
|
-
end
|
50
|
+
hash_arrays = hashes(vector)
|
51
|
+
hashes = hash_arrays.map { |a| array_to_hash(a) }
|
52
|
+
results = storage.query_buckets(hashes)
|
53
|
+
# Multiprobe LSH
|
54
|
+
# Take query hashes, move them around at radius r, and use them to do another query
|
55
|
+
# TODO: only works for binary LSH atm
|
56
|
+
if multiprobe_radius > 0
|
57
|
+
mp_arrays = multiprobe_hashes_arrays(hash_arrays, multiprobe_radius)
|
58
|
+
mp_arrays.each do |probes_arrays|
|
59
|
+
probes_hashes = probes_arrays.map { |a| array_to_hash(a) }
|
60
|
+
results += storage.query_buckets(probes_hashes)
|
69
61
|
end
|
70
62
|
end
|
71
63
|
results = MathUtil.uniq(results)
|
72
64
|
order_vectors_by_similarity(vector, results)
|
73
65
|
end
|
74
66
|
|
67
|
+
def multiprobe_hashes_arrays(hash_arrays, multiprobe_radius)
|
68
|
+
mp_arrays = []
|
69
|
+
(1..multiprobe_radius).to_a.each do |radius|
|
70
|
+
(0..(storage.parameters[:number_of_random_vectors] - 1)).to_a.combination(radius).each do |flips|
|
71
|
+
probes = Marshal.load(Marshal.dump(hash_arrays))
|
72
|
+
probes.each { |probe| flips.each { |d| probe[d] = (probe[d] == 1) ? 0 : 1 } }
|
73
|
+
mp_arrays << probes
|
74
|
+
end
|
75
|
+
end
|
76
|
+
mp_arrays
|
77
|
+
end
|
78
|
+
|
75
79
|
def order_vectors_by_similarity(vector, vectors)
|
76
80
|
vectors.map { |v| [ v, similarity(vector, v) ] } .sort_by { |v, sim| sim } .reverse .map { |vs| vs[0] }
|
77
81
|
end
|
data/lib/lsh/storage/memory.rb
CHANGED
@@ -49,8 +49,14 @@ module LSH
|
|
49
49
|
@buckets[i]
|
50
50
|
end
|
51
51
|
|
52
|
-
def
|
53
|
-
|
52
|
+
def query_buckets(hashes)
|
53
|
+
results = []
|
54
|
+
hashes.each_with_index do |hash, i|
|
55
|
+
bucket = find_bucket(i)
|
56
|
+
in_bucket = bucket[hash]
|
57
|
+
results += in_bucket if in_bucket
|
58
|
+
end
|
59
|
+
results
|
54
60
|
end
|
55
61
|
|
56
62
|
end
|
@@ -111,9 +111,15 @@ module LSH
|
|
111
111
|
"bucket:#{i}" if @redis.get("buckets").to_i > i
|
112
112
|
end
|
113
113
|
|
114
|
-
def
|
114
|
+
def query_buckets(hashes)
|
115
|
+
vector_hashes = []
|
116
|
+
hashes.each_with_index do |hash, i|
|
117
|
+
bucket = find_bucket(i)
|
118
|
+
vector_hashes += @redis.smembers("#{bucket}:#{hash}")
|
119
|
+
end
|
120
|
+
vector_hashes.uniq!
|
115
121
|
results = []
|
116
|
-
|
122
|
+
vector_hashes.each do |vector_hash|
|
117
123
|
vector = MathUtil.zeros(parameters[:dim])
|
118
124
|
vector.load(File.join(@data_dir, vector_hash+'.dat'))
|
119
125
|
results << vector
|