lsh 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/lsh/index.rb CHANGED
@@ -47,31 +47,35 @@ module LSH
47
47
  end
48
48
 
49
49
  def query(vector, multiprobe_radius = 0)
50
- results = []
51
- hashes(vector).each_with_index do |hash, i|
52
- hash_i = array_to_hash(hash)
53
- bucket = storage.find_bucket(i)
54
- # Multiprobe LSH
55
- # Take query hash, move it around at radius r, hash it and use the result as a query
56
- # TODO: only works for binary LSH atm
57
- bucket_results = storage.query_bucket(bucket, hash_i)
58
- results += bucket_results if bucket_results
59
- if multiprobe_radius > 0
60
- (1..multiprobe_radius).to_a.each do |radius|
61
- (0..(storage.parameters[:number_of_random_vectors] - 1)).to_a.combination(radius).each do |flips|
62
- probe = hash.clone
63
- flips.each { |d| probe[d] = (probe[d] == 1) ? 0 : 1 }
64
- probe_hash = array_to_hash(probe)
65
- probe_bucket_results = storage.query_bucket(bucket, probe_hash)
66
- results += probe_bucket_results if probe_bucket_results
67
- end
68
- end
50
+ hash_arrays = hashes(vector)
51
+ hashes = hash_arrays.map { |a| array_to_hash(a) }
52
+ results = storage.query_buckets(hashes)
53
+ # Multiprobe LSH
54
+ # Take query hashes, move them around at radius r, and use them to do another query
55
+ # TODO: only works for binary LSH atm
56
+ if multiprobe_radius > 0
57
+ mp_arrays = multiprobe_hashes_arrays(hash_arrays, multiprobe_radius)
58
+ mp_arrays.each do |probes_arrays|
59
+ probes_hashes = probes_arrays.map { |a| array_to_hash(a) }
60
+ results += storage.query_buckets(probes_hashes)
69
61
  end
70
62
  end
71
63
  results = MathUtil.uniq(results)
72
64
  order_vectors_by_similarity(vector, results)
73
65
  end
74
66
 
67
+ def multiprobe_hashes_arrays(hash_arrays, multiprobe_radius)
68
+ mp_arrays = []
69
+ (1..multiprobe_radius).to_a.each do |radius|
70
+ (0..(storage.parameters[:number_of_random_vectors] - 1)).to_a.combination(radius).each do |flips|
71
+ probes = Marshal.load(Marshal.dump(hash_arrays))
72
+ probes.each { |probe| flips.each { |d| probe[d] = (probe[d] == 1) ? 0 : 1 } }
73
+ mp_arrays << probes
74
+ end
75
+ end
76
+ mp_arrays
77
+ end
78
+
75
79
  def order_vectors_by_similarity(vector, vectors)
76
80
  vectors.map { |v| [ v, similarity(vector, v) ] } .sort_by { |v, sim| sim } .reverse .map { |vs| vs[0] }
77
81
  end
@@ -49,8 +49,14 @@ module LSH
49
49
  @buckets[i]
50
50
  end
51
51
 
52
- def query_bucket(bucket, hash)
53
- bucket[hash]
52
+ def query_buckets(hashes)
53
+ results = []
54
+ hashes.each_with_index do |hash, i|
55
+ bucket = find_bucket(i)
56
+ in_bucket = bucket[hash]
57
+ results += in_bucket if in_bucket
58
+ end
59
+ results
54
60
  end
55
61
 
56
62
  end
@@ -111,9 +111,15 @@ module LSH
111
111
  "bucket:#{i}" if @redis.get("buckets").to_i > i
112
112
  end
113
113
 
114
- def query_bucket(bucket, hash)
114
+ def query_buckets(hashes)
115
+ vector_hashes = []
116
+ hashes.each_with_index do |hash, i|
117
+ bucket = find_bucket(i)
118
+ vector_hashes += @redis.smembers("#{bucket}:#{hash}")
119
+ end
120
+ vector_hashes.uniq!
115
121
  results = []
116
- @redis.smembers("#{bucket}:#{hash}").map do |vector_hash|
122
+ vector_hashes.each do |vector_hash|
117
123
  vector = MathUtil.zeros(parameters[:dim])
118
124
  vector.load(File.join(@data_dir, vector_hash+'.dat'))
119
125
  results << vector
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lsh
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: