lsh 0.2.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -38,7 +38,8 @@ module LSH
38
38
  Index.new(storage.parameters, storage) if storage.has_index?
39
39
  end
40
40
 
41
- def add(vector)
41
+ def add(vector, id = nil)
42
+ storage.add_vector_id(vector, id) if id
42
43
  hashes(vector).each_with_index do |hash, i|
43
44
  hash_i = array_to_hash(hash)
44
45
  bucket = storage.find_bucket(i)
@@ -46,6 +47,14 @@ module LSH
46
47
  end
47
48
  end
48
49
 
50
+ def vector_to_id(vector)
51
+ storage.vector_to_id(vector)
52
+ end
53
+
54
+ def id_to_vector(id)
55
+ storage.id_to_vector(id)
56
+ end
57
+
49
58
  def query(vector, multiprobe_radius = 0)
50
59
  hash_arrays = hashes(vector)
51
60
  hashes = hash_arrays.map { |a| array_to_hash(a) }
@@ -64,6 +73,18 @@ module LSH
64
73
  order_vectors_by_similarity(vector, results)
65
74
  end
66
75
 
76
+ def query_ids(id, multiprobe_radius = 0)
77
+ vector = id_to_vector(id)
78
+ query_ids_by_vector(vector, multiprobe_radius)
79
+ end
80
+
81
+ def query_ids_by_vector(vector, multiprobe_radius = 0)
82
+ vectors = query(vector, multiprobe_radius)
83
+ results = []
84
+ vectors.each { |v| results << vector_to_id(v) }
85
+ results
86
+ end
87
+
67
88
  def multiprobe_hashes_arrays(hash_arrays, multiprobe_radius)
68
89
  mp_arrays = []
69
90
  (1..multiprobe_radius).to_a.each do |radius|
@@ -45,6 +45,21 @@ module LSH
45
45
  end
46
46
  end
47
47
 
48
+ def add_vector_id(vector, id)
49
+ @vector_to_id ||= {}
50
+ @vector_to_id[vector.hash] = id
51
+ @id_to_vector ||= {}
52
+ @id_to_vector[id] = vector
53
+ end
54
+
55
+ def vector_to_id(vector)
56
+ @vector_to_id[vector.hash] if @vector_to_id
57
+ end
58
+
59
+ def id_to_vector(id)
60
+ @id_to_vector[id] if @id_to_vector
61
+ end
62
+
48
63
  def find_bucket(i)
49
64
  @buckets[i]
50
65
  end
@@ -112,11 +112,37 @@ module LSH
112
112
  @redis.incr "lsh:buckets"
113
113
  end
114
114
 
115
+ def save_vector(vector)
116
+ path = File.join(@data_dir, vector.hash.to_s+'.dat')
117
+ vector.save(path) unless File.exists?(path)
118
+ end
119
+
120
+ def load_vector(hash)
121
+ vector = MathUtil.zeros(parameters[:dim])
122
+ vector.load(File.join(@data_dir, hash+'.dat'))
123
+ vector
124
+ end
125
+
115
126
  def add_vector_to_bucket(bucket, hash, vector)
116
- vector.save(File.join(@data_dir, vector.hash.to_s+'.dat')) # Writing vector to disk
127
+ save_vector(vector) # Writing vector to disk if not already there
117
128
  @redis.sadd "#{bucket}:#{hash}", vector.hash.to_s # Only storing vector's hash in Redis
118
129
  end
119
130
 
131
+ def add_vector_id(vector, id)
132
+ save_vector(vector) # Writing vector to disk if not already there
133
+ @redis.set "lsh:vector_to_id:#{vector.hash}", id
134
+ @redis.set "lsh:id_to_vector:#{id}", vector.hash.to_s
135
+ end
136
+
137
+ def vector_to_id(vector)
138
+ @redis.get "lsh:vector_to_id:#{vector.hash}"
139
+ end
140
+
141
+ def id_to_vector(id)
142
+ vector_hash = @redis.get "lsh:id_to_vector:#{id}"
143
+ load_vector(vector_hash)
144
+ end
145
+
120
146
  def find_bucket(i)
121
147
  "lsh:bucket:#{i}"
122
148
  end
@@ -133,8 +159,7 @@ module LSH
133
159
  vector_hashes.uniq!
134
160
  results = []
135
161
  vector_hashes.each do |vector_hash|
136
- vector = MathUtil.zeros(parameters[:dim])
137
- vector.load(File.join(@data_dir, vector_hash+'.dat'))
162
+ vector = load_vector(vector_hash)
138
163
  results << vector
139
164
  end
140
165
  results
@@ -24,7 +24,13 @@ module LSH
24
24
  if mime_type == 'application/json'
25
25
  t0 = Time.now
26
26
  vector = JSON.parse(params[:data])
27
- results = index.query(vector)
27
+ result_vectors = index.query(vector, params[:radius] || 0)
28
+ results = []
29
+ if params[:include] == 'id'
30
+ result_vectors.each { |v| results << { :id => index.vector_to_id(v), :data => v } }
31
+ else
32
+ result_vectors.each { |v| results << { :data => v } }
33
+ end
28
34
  content_type :json
29
35
  { "time" => Time.now - t0, "results" => results }.to_json
30
36
  else
@@ -32,13 +38,36 @@ module LSH
32
38
  end
33
39
  end
34
40
 
41
+ post '/query-ids' do
42
+ if params[:data] # We're querying with a vector
43
+ mime_type = (params[:mime_type] || 'application/json')
44
+ if mime_type == 'application/json'
45
+ t0 = Time.now
46
+ vector = JSON.parse(params[:data])
47
+ results = index.query_ids_by_vector(vector, params[:radius] || 0)
48
+ content_type :json
49
+ { "time" => Time.now - t0, "results" => results }.to_json
50
+ else
51
+ raise "Unrecognised mime-type"
52
+ end
53
+ elsif params[:id] # We're querying with an id
54
+ raise "Unknown id" unless index.id_to_vector(params[:id])
55
+ t0 = Time.now
56
+ results = index.query_ids(params[:id], params[:radius] || 0)
57
+ content_type :json
58
+ { "time" => Time.now - t0, "results" => results }.to_json
59
+ else
60
+ raise "Missing query"
61
+ end
62
+ end
63
+
35
64
  post '/index' do
36
65
  raise "Missing data" unless params[:data]
37
66
  mime_type = (params[:mime_type] || 'application/json')
38
67
  if mime_type == 'application/json'
39
68
  t0 = Time.now
40
69
  vector = JSON.parse(params[:data])
41
- index.add(vector)
70
+ index.add(vector, params[:id])
42
71
  content_type :json
43
72
  { "time" => Time.now - t0, "status" => "indexed" }.to_json
44
73
  else
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lsh
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-01-03 00:00:00.000000000 Z
12
+ date: 2013-01-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: gsl