lsh 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
data/lib/lsh/index.rb CHANGED
@@ -47,31 +47,35 @@ module LSH
47
47
  end
48
48
 
49
49
  def query(vector, multiprobe_radius = 0)
50
- results = []
51
- hashes(vector).each_with_index do |hash, i|
52
- hash_i = array_to_hash(hash)
53
- bucket = storage.find_bucket(i)
54
- # Multiprobe LSH
55
- # Take query hash, move it around at radius r, hash it and use the result as a query
56
- # TODO: only works for binary LSH atm
57
- bucket_results = storage.query_bucket(bucket, hash_i)
58
- results += bucket_results if bucket_results
59
- if multiprobe_radius > 0
60
- (1..multiprobe_radius).to_a.each do |radius|
61
- (0..(storage.parameters[:number_of_random_vectors] - 1)).to_a.combination(radius).each do |flips|
62
- probe = hash.clone
63
- flips.each { |d| probe[d] = (probe[d] == 1) ? 0 : 1 }
64
- probe_hash = array_to_hash(probe)
65
- probe_bucket_results = storage.query_bucket(bucket, probe_hash)
66
- results += probe_bucket_results if probe_bucket_results
67
- end
68
- end
50
+ hash_arrays = hashes(vector)
51
+ hashes = hash_arrays.map { |a| array_to_hash(a) }
52
+ results = storage.query_buckets(hashes)
53
+ # Multiprobe LSH
54
+ # Take query hashes, move them around at radius r, and use them to do another query
55
+ # TODO: only works for binary LSH atm
56
+ if multiprobe_radius > 0
57
+ mp_arrays = multiprobe_hashes_arrays(hash_arrays, multiprobe_radius)
58
+ mp_arrays.each do |probes_arrays|
59
+ probes_hashes = probes_arrays.map { |a| array_to_hash(a) }
60
+ results += storage.query_buckets(probes_hashes)
69
61
  end
70
62
  end
71
63
  results = MathUtil.uniq(results)
72
64
  order_vectors_by_similarity(vector, results)
73
65
  end
74
66
 
67
+ def multiprobe_hashes_arrays(hash_arrays, multiprobe_radius)
68
+ mp_arrays = []
69
+ (1..multiprobe_radius).to_a.each do |radius|
70
+ (0..(storage.parameters[:number_of_random_vectors] - 1)).to_a.combination(radius).each do |flips|
71
+ probes = Marshal.load(Marshal.dump(hash_arrays))
72
+ probes.each { |probe| flips.each { |d| probe[d] = (probe[d] == 1) ? 0 : 1 } }
73
+ mp_arrays << probes
74
+ end
75
+ end
76
+ mp_arrays
77
+ end
78
+
75
79
  def order_vectors_by_similarity(vector, vectors)
76
80
  vectors.map { |v| [ v, similarity(vector, v) ] } .sort_by { |v, sim| sim } .reverse .map { |vs| vs[0] }
77
81
  end
@@ -49,8 +49,14 @@ module LSH
49
49
  @buckets[i]
50
50
  end
51
51
 
52
- def query_bucket(bucket, hash)
53
- bucket[hash]
52
+ def query_buckets(hashes)
53
+ results = []
54
+ hashes.each_with_index do |hash, i|
55
+ bucket = find_bucket(i)
56
+ in_bucket = bucket[hash]
57
+ results += in_bucket if in_bucket
58
+ end
59
+ results
54
60
  end
55
61
 
56
62
  end
@@ -111,9 +111,15 @@ module LSH
111
111
  "bucket:#{i}" if @redis.get("buckets").to_i > i
112
112
  end
113
113
 
114
- def query_bucket(bucket, hash)
114
+ def query_buckets(hashes)
115
+ vector_hashes = []
116
+ hashes.each_with_index do |hash, i|
117
+ bucket = find_bucket(i)
118
+ vector_hashes += @redis.smembers("#{bucket}:#{hash}")
119
+ end
120
+ vector_hashes.uniq!
115
121
  results = []
116
- @redis.smembers("#{bucket}:#{hash}").map do |vector_hash|
122
+ vector_hashes.each do |vector_hash|
117
123
  vector = MathUtil.zeros(parameters[:dim])
118
124
  vector.load(File.join(@data_dir, vector_hash+'.dat'))
119
125
  results << vector
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lsh
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: