lsh 0.2.0-java → 0.3.0-java

Sign up to get free protection for your applications and to get access to all the features.
data/lib/lsh/index.rb CHANGED
@@ -38,7 +38,8 @@ module LSH
38
38
  Index.new(storage.parameters, storage) if storage.has_index?
39
39
  end
40
40
 
41
- def add(vector)
41
+ def add(vector, id = nil)
42
+ storage.add_vector_id(vector, id) if id
42
43
  hashes(vector).each_with_index do |hash, i|
43
44
  hash_i = array_to_hash(hash)
44
45
  bucket = storage.find_bucket(i)
@@ -46,6 +47,14 @@ module LSH
46
47
  end
47
48
  end
48
49
 
50
+ def vector_to_id(vector)
51
+ storage.vector_to_id(vector)
52
+ end
53
+
54
+ def id_to_vector(id)
55
+ storage.id_to_vector(id)
56
+ end
57
+
49
58
  def query(vector, multiprobe_radius = 0)
50
59
  hash_arrays = hashes(vector)
51
60
  hashes = hash_arrays.map { |a| array_to_hash(a) }
@@ -64,6 +73,18 @@ module LSH
64
73
  order_vectors_by_similarity(vector, results)
65
74
  end
66
75
 
76
+ def query_ids(id, multiprobe_radius = 0)
77
+ vector = id_to_vector(id)
78
+ query_ids_by_vector(vector, multiprobe_radius)
79
+ end
80
+
81
+ def query_ids_by_vector(vector, multiprobe_radius = 0)
82
+ vectors = query(vector, multiprobe_radius)
83
+ results = []
84
+ vectors.each { |v| results << vector_to_id(v) }
85
+ results
86
+ end
87
+
67
88
  def multiprobe_hashes_arrays(hash_arrays, multiprobe_radius)
68
89
  mp_arrays = []
69
90
  (1..multiprobe_radius).to_a.each do |radius|
@@ -45,6 +45,21 @@ module LSH
45
45
  end
46
46
  end
47
47
 
48
+ def add_vector_id(vector, id)
49
+ @vector_to_id ||= {}
50
+ @vector_to_id[vector.hash] = id
51
+ @id_to_vector ||= {}
52
+ @id_to_vector[id] = vector
53
+ end
54
+
55
+ def vector_to_id(vector)
56
+ @vector_to_id[vector.hash] if @vector_to_id
57
+ end
58
+
59
+ def id_to_vector(id)
60
+ @id_to_vector[id] if @id_to_vector
61
+ end
62
+
48
63
  def find_bucket(i)
49
64
  @buckets[i]
50
65
  end
@@ -112,11 +112,37 @@ module LSH
112
112
  @redis.incr "lsh:buckets"
113
113
  end
114
114
 
115
+ def save_vector(vector)
116
+ path = File.join(@data_dir, vector.hash.to_s+'.dat')
117
+ vector.save(path) unless File.exists?(path)
118
+ end
119
+
120
+ def load_vector(hash)
121
+ vector = MathUtil.zeros(parameters[:dim])
122
+ vector.load(File.join(@data_dir, hash+'.dat'))
123
+ vector
124
+ end
125
+
115
126
  def add_vector_to_bucket(bucket, hash, vector)
116
- vector.save(File.join(@data_dir, vector.hash.to_s+'.dat')) # Writing vector to disk
127
+ save_vector(vector) # Writing vector to disk if not already there
117
128
  @redis.sadd "#{bucket}:#{hash}", vector.hash.to_s # Only storing vector's hash in Redis
118
129
  end
119
130
 
131
+ def add_vector_id(vector, id)
132
+ save_vector(vector) # Writing vector to disk if not already there
133
+ @redis.set "lsh:vector_to_id:#{vector.hash}", id
134
+ @redis.set "lsh:id_to_vector:#{id}", vector.hash.to_s
135
+ end
136
+
137
+ def vector_to_id(vector)
138
+ @redis.get "lsh:vector_to_id:#{vector.hash}"
139
+ end
140
+
141
+ def id_to_vector(id)
142
+ vector_hash = @redis.get "lsh:id_to_vector:#{id}"
143
+ load_vector(vector_hash)
144
+ end
145
+
120
146
  def find_bucket(i)
121
147
  "lsh:bucket:#{i}"
122
148
  end
@@ -133,8 +159,7 @@ module LSH
133
159
  vector_hashes.uniq!
134
160
  results = []
135
161
  vector_hashes.each do |vector_hash|
136
- vector = MathUtil.zeros(parameters[:dim])
137
- vector.load(File.join(@data_dir, vector_hash+'.dat'))
162
+ vector = load_vector(vector_hash)
138
163
  results << vector
139
164
  end
140
165
  results
data/lib/lsh/web.rb CHANGED
@@ -24,7 +24,7 @@ module LSH
24
24
  if mime_type == 'application/json'
25
25
  t0 = Time.now
26
26
  vector = JSON.parse(params[:data])
27
- results = index.query(vector)
27
+ results = index.query(vector, params[:radius] || 0)
28
28
  content_type :json
29
29
  { "time" => Time.now - t0, "results" => results }.to_json
30
30
  else
@@ -32,13 +32,36 @@ module LSH
32
32
  end
33
33
  end
34
34
 
35
+ post '/query-ids' do
36
+ if params[:data] # We're querying with a vector
37
+ mime_type = (params[:mime_type] || 'application/json')
38
+ if mime_type == 'application/json'
39
+ t0 = Time.now
40
+ vector = JSON.parse(params[:data])
41
+ results = index.query_ids_by_vector(vector, params[:radius] || 0)
42
+ content_type :json
43
+ { "time" => Time.now - t0, "results" => results }.to_json
44
+ else
45
+ raise "Unrecognised mime-type"
46
+ end
47
+ elsif params[:id] # We're querying with an id
48
+ raise "Unknown id" unless index.id_to_vector(params[:id])
49
+ t0 = Time.now
50
+ results = index.query_ids(params[:id], params[:radius] || 0)
51
+ content_type :json
52
+ { "time" => Time.now - t0, "results" => results }.to_json
53
+ else
54
+ raise "Missing query"
55
+ end
56
+ end
57
+
35
58
  post '/index' do
36
59
  raise "Missing data" unless params[:data]
37
60
  mime_type = (params[:mime_type] || 'application/json')
38
61
  if mime_type == 'application/json'
39
62
  t0 = Time.now
40
63
  vector = JSON.parse(params[:data])
41
- index.add(vector)
64
+ index.add(vector, params[:id])
42
65
  content_type :json
43
66
  { "time" => Time.now - t0, "status" => "indexed" }.to_json
44
67
  else
metadata CHANGED
@@ -2,14 +2,14 @@
2
2
  name: lsh
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.2.0
5
+ version: 0.3.0
6
6
  platform: java
7
7
  authors:
8
8
  - Yves Raimond
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-01-03 00:00:00.000000000 Z
12
+ date: 2013-01-04 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: jblas-ruby