lsh 0.2.0-java → 0.3.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/lsh/index.rb CHANGED
@@ -38,7 +38,8 @@ module LSH
38
38
  Index.new(storage.parameters, storage) if storage.has_index?
39
39
  end
40
40
 
41
- def add(vector)
41
+ def add(vector, id = nil)
42
+ storage.add_vector_id(vector, id) if id
42
43
  hashes(vector).each_with_index do |hash, i|
43
44
  hash_i = array_to_hash(hash)
44
45
  bucket = storage.find_bucket(i)
@@ -46,6 +47,14 @@ module LSH
46
47
  end
47
48
  end
48
49
 
50
+ def vector_to_id(vector)
51
+ storage.vector_to_id(vector)
52
+ end
53
+
54
+ def id_to_vector(id)
55
+ storage.id_to_vector(id)
56
+ end
57
+
49
58
  def query(vector, multiprobe_radius = 0)
50
59
  hash_arrays = hashes(vector)
51
60
  hashes = hash_arrays.map { |a| array_to_hash(a) }
@@ -64,6 +73,18 @@ module LSH
64
73
  order_vectors_by_similarity(vector, results)
65
74
  end
66
75
 
76
+ def query_ids(id, multiprobe_radius = 0)
77
+ vector = id_to_vector(id)
78
+ query_ids_by_vector(vector, multiprobe_radius)
79
+ end
80
+
81
+ def query_ids_by_vector(vector, multiprobe_radius = 0)
82
+ vectors = query(vector, multiprobe_radius)
83
+ results = []
84
+ vectors.each { |v| results << vector_to_id(v) }
85
+ results
86
+ end
87
+
67
88
  def multiprobe_hashes_arrays(hash_arrays, multiprobe_radius)
68
89
  mp_arrays = []
69
90
  (1..multiprobe_radius).to_a.each do |radius|
@@ -45,6 +45,21 @@ module LSH
45
45
  end
46
46
  end
47
47
 
48
+ def add_vector_id(vector, id)
49
+ @vector_to_id ||= {}
50
+ @vector_to_id[vector.hash] = id
51
+ @id_to_vector ||= {}
52
+ @id_to_vector[id] = vector
53
+ end
54
+
55
+ def vector_to_id(vector)
56
+ @vector_to_id[vector.hash] if @vector_to_id
57
+ end
58
+
59
+ def id_to_vector(id)
60
+ @id_to_vector[id] if @id_to_vector
61
+ end
62
+
48
63
  def find_bucket(i)
49
64
  @buckets[i]
50
65
  end
@@ -112,11 +112,37 @@ module LSH
112
112
  @redis.incr "lsh:buckets"
113
113
  end
114
114
 
115
+ def save_vector(vector)
116
+ path = File.join(@data_dir, vector.hash.to_s+'.dat')
117
+ vector.save(path) unless File.exists?(path)
118
+ end
119
+
120
+ def load_vector(hash)
121
+ vector = MathUtil.zeros(parameters[:dim])
122
+ vector.load(File.join(@data_dir, hash+'.dat'))
123
+ vector
124
+ end
125
+
115
126
  def add_vector_to_bucket(bucket, hash, vector)
116
- vector.save(File.join(@data_dir, vector.hash.to_s+'.dat')) # Writing vector to disk
127
+ save_vector(vector) # Writing vector to disk if not already there
117
128
  @redis.sadd "#{bucket}:#{hash}", vector.hash.to_s # Only storing vector's hash in Redis
118
129
  end
119
130
 
131
+ def add_vector_id(vector, id)
132
+ save_vector(vector) # Writing vector to disk if not already there
133
+ @redis.set "lsh:vector_to_id:#{vector.hash}", id
134
+ @redis.set "lsh:id_to_vector:#{id}", vector.hash.to_s
135
+ end
136
+
137
+ def vector_to_id(vector)
138
+ @redis.get "lsh:vector_to_id:#{vector.hash}"
139
+ end
140
+
141
+ def id_to_vector(id)
142
+ vector_hash = @redis.get "lsh:id_to_vector:#{id}"
143
+ load_vector(vector_hash)
144
+ end
145
+
120
146
  def find_bucket(i)
121
147
  "lsh:bucket:#{i}"
122
148
  end
@@ -133,8 +159,7 @@ module LSH
133
159
  vector_hashes.uniq!
134
160
  results = []
135
161
  vector_hashes.each do |vector_hash|
136
- vector = MathUtil.zeros(parameters[:dim])
137
- vector.load(File.join(@data_dir, vector_hash+'.dat'))
162
+ vector = load_vector(vector_hash)
138
163
  results << vector
139
164
  end
140
165
  results
data/lib/lsh/web.rb CHANGED
@@ -24,7 +24,7 @@ module LSH
24
24
  if mime_type == 'application/json'
25
25
  t0 = Time.now
26
26
  vector = JSON.parse(params[:data])
27
- results = index.query(vector)
27
+ results = index.query(vector, params[:radius] || 0)
28
28
  content_type :json
29
29
  { "time" => Time.now - t0, "results" => results }.to_json
30
30
  else
@@ -32,13 +32,36 @@ module LSH
32
32
  end
33
33
  end
34
34
 
35
+ post '/query-ids' do
36
+ if params[:data] # We're querying with a vector
37
+ mime_type = (params[:mime_type] || 'application/json')
38
+ if mime_type == 'application/json'
39
+ t0 = Time.now
40
+ vector = JSON.parse(params[:data])
41
+ results = index.query_ids_by_vector(vector, params[:radius] || 0)
42
+ content_type :json
43
+ { "time" => Time.now - t0, "results" => results }.to_json
44
+ else
45
+ raise "Unrecognised mime-type"
46
+ end
47
+ elsif params[:id] # We're querying with an id
48
+ raise "Unknown id" unless index.id_to_vector(params[:id])
49
+ t0 = Time.now
50
+ results = index.query_ids(params[:id], params[:radius] || 0)
51
+ content_type :json
52
+ { "time" => Time.now - t0, "results" => results }.to_json
53
+ else
54
+ raise "Missing query"
55
+ end
56
+ end
57
+
35
58
  post '/index' do
36
59
  raise "Missing data" unless params[:data]
37
60
  mime_type = (params[:mime_type] || 'application/json')
38
61
  if mime_type == 'application/json'
39
62
  t0 = Time.now
40
63
  vector = JSON.parse(params[:data])
41
- index.add(vector)
64
+ index.add(vector, params[:id])
42
65
  content_type :json
43
66
  { "time" => Time.now - t0, "status" => "indexed" }.to_json
44
67
  else
metadata CHANGED
@@ -2,14 +2,14 @@
2
2
  name: lsh
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.2.0
5
+ version: 0.3.0
6
6
  platform: java
7
7
  authors:
8
8
  - Yves Raimond
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-01-03 00:00:00.000000000 Z
12
+ date: 2013-01-04 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: jblas-ruby