lsh 0.2.0-java → 0.3.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/lsh/index.rb +22 -1
- data/lib/lsh/storage/memory.rb +15 -0
- data/lib/lsh/storage/redis_backend.rb +28 -3
- data/lib/lsh/web.rb +25 -2
- metadata +2 -2
data/lib/lsh/index.rb
CHANGED
@@ -38,7 +38,8 @@ module LSH
|
|
38
38
|
Index.new(storage.parameters, storage) if storage.has_index?
|
39
39
|
end
|
40
40
|
|
41
|
-
def add(vector)
|
41
|
+
def add(vector, id = nil)
|
42
|
+
storage.add_vector_id(vector, id) if id
|
42
43
|
hashes(vector).each_with_index do |hash, i|
|
43
44
|
hash_i = array_to_hash(hash)
|
44
45
|
bucket = storage.find_bucket(i)
|
@@ -46,6 +47,14 @@ module LSH
|
|
46
47
|
end
|
47
48
|
end
|
48
49
|
|
50
|
+
def vector_to_id(vector)
|
51
|
+
storage.vector_to_id(vector)
|
52
|
+
end
|
53
|
+
|
54
|
+
def id_to_vector(id)
|
55
|
+
storage.id_to_vector(id)
|
56
|
+
end
|
57
|
+
|
49
58
|
def query(vector, multiprobe_radius = 0)
|
50
59
|
hash_arrays = hashes(vector)
|
51
60
|
hashes = hash_arrays.map { |a| array_to_hash(a) }
|
@@ -64,6 +73,18 @@ module LSH
|
|
64
73
|
order_vectors_by_similarity(vector, results)
|
65
74
|
end
|
66
75
|
|
76
|
+
def query_ids(id, multiprobe_radius = 0)
|
77
|
+
vector = id_to_vector(id)
|
78
|
+
query_ids_by_vector(vector, multiprobe_radius)
|
79
|
+
end
|
80
|
+
|
81
|
+
def query_ids_by_vector(vector, multiprobe_radius = 0)
|
82
|
+
vectors = query(vector, multiprobe_radius)
|
83
|
+
results = []
|
84
|
+
vectors.each { |v| results << vector_to_id(v) }
|
85
|
+
results
|
86
|
+
end
|
87
|
+
|
67
88
|
def multiprobe_hashes_arrays(hash_arrays, multiprobe_radius)
|
68
89
|
mp_arrays = []
|
69
90
|
(1..multiprobe_radius).to_a.each do |radius|
|
data/lib/lsh/storage/memory.rb
CHANGED
@@ -45,6 +45,21 @@ module LSH
|
|
45
45
|
end
|
46
46
|
end
|
47
47
|
|
48
|
+
def add_vector_id(vector, id)
|
49
|
+
@vector_to_id ||= {}
|
50
|
+
@vector_to_id[vector.hash] = id
|
51
|
+
@id_to_vector ||= {}
|
52
|
+
@id_to_vector[id] = vector
|
53
|
+
end
|
54
|
+
|
55
|
+
def vector_to_id(vector)
|
56
|
+
@vector_to_id[vector.hash] if @vector_to_id
|
57
|
+
end
|
58
|
+
|
59
|
+
def id_to_vector(id)
|
60
|
+
@id_to_vector[id] if @id_to_vector
|
61
|
+
end
|
62
|
+
|
48
63
|
def find_bucket(i)
|
49
64
|
@buckets[i]
|
50
65
|
end
|
@@ -112,11 +112,37 @@ module LSH
|
|
112
112
|
@redis.incr "lsh:buckets"
|
113
113
|
end
|
114
114
|
|
115
|
+
def save_vector(vector)
|
116
|
+
path = File.join(@data_dir, vector.hash.to_s+'.dat')
|
117
|
+
vector.save(path) unless File.exists?(path)
|
118
|
+
end
|
119
|
+
|
120
|
+
def load_vector(hash)
|
121
|
+
vector = MathUtil.zeros(parameters[:dim])
|
122
|
+
vector.load(File.join(@data_dir, hash+'.dat'))
|
123
|
+
vector
|
124
|
+
end
|
125
|
+
|
115
126
|
def add_vector_to_bucket(bucket, hash, vector)
|
116
|
-
|
127
|
+
save_vector(vector) # Writing vector to disk if not already there
|
117
128
|
@redis.sadd "#{bucket}:#{hash}", vector.hash.to_s # Only storing vector's hash in Redis
|
118
129
|
end
|
119
130
|
|
131
|
+
def add_vector_id(vector, id)
|
132
|
+
save_vector(vector) # Writing vector to disk if not already there
|
133
|
+
@redis.set "lsh:vector_to_id:#{vector.hash}", id
|
134
|
+
@redis.set "lsh:id_to_vector:#{id}", vector.hash.to_s
|
135
|
+
end
|
136
|
+
|
137
|
+
def vector_to_id(vector)
|
138
|
+
@redis.get "lsh:vector_to_id:#{vector.hash}"
|
139
|
+
end
|
140
|
+
|
141
|
+
def id_to_vector(id)
|
142
|
+
vector_hash = @redis.get "lsh:id_to_vector:#{id}"
|
143
|
+
load_vector(vector_hash)
|
144
|
+
end
|
145
|
+
|
120
146
|
def find_bucket(i)
|
121
147
|
"lsh:bucket:#{i}"
|
122
148
|
end
|
@@ -133,8 +159,7 @@ module LSH
|
|
133
159
|
vector_hashes.uniq!
|
134
160
|
results = []
|
135
161
|
vector_hashes.each do |vector_hash|
|
136
|
-
vector =
|
137
|
-
vector.load(File.join(@data_dir, vector_hash+'.dat'))
|
162
|
+
vector = load_vector(vector_hash)
|
138
163
|
results << vector
|
139
164
|
end
|
140
165
|
results
|
data/lib/lsh/web.rb
CHANGED
@@ -24,7 +24,7 @@ module LSH
|
|
24
24
|
if mime_type == 'application/json'
|
25
25
|
t0 = Time.now
|
26
26
|
vector = JSON.parse(params[:data])
|
27
|
-
results = index.query(vector)
|
27
|
+
results = index.query(vector, params[:radius] || 0)
|
28
28
|
content_type :json
|
29
29
|
{ "time" => Time.now - t0, "results" => results }.to_json
|
30
30
|
else
|
@@ -32,13 +32,36 @@ module LSH
|
|
32
32
|
end
|
33
33
|
end
|
34
34
|
|
35
|
+
post '/query-ids' do
|
36
|
+
if params[:data] # We're querying with a vector
|
37
|
+
mime_type = (params[:mime_type] || 'application/json')
|
38
|
+
if mime_type == 'application/json'
|
39
|
+
t0 = Time.now
|
40
|
+
vector = JSON.parse(params[:data])
|
41
|
+
results = index.query_ids_by_vector(vector, params[:radius] || 0)
|
42
|
+
content_type :json
|
43
|
+
{ "time" => Time.now - t0, "results" => results }.to_json
|
44
|
+
else
|
45
|
+
raise "Unrecognised mime-type"
|
46
|
+
end
|
47
|
+
elsif params[:id] # We're querying with an id
|
48
|
+
raise "Unknown id" unless index.id_to_vector(params[:id])
|
49
|
+
t0 = Time.now
|
50
|
+
results = index.query_ids(params[:id], params[:radius] || 0)
|
51
|
+
content_type :json
|
52
|
+
{ "time" => Time.now - t0, "results" => results }.to_json
|
53
|
+
else
|
54
|
+
raise "Missing query"
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
35
58
|
post '/index' do
|
36
59
|
raise "Missing data" unless params[:data]
|
37
60
|
mime_type = (params[:mime_type] || 'application/json')
|
38
61
|
if mime_type == 'application/json'
|
39
62
|
t0 = Time.now
|
40
63
|
vector = JSON.parse(params[:data])
|
41
|
-
index.add(vector)
|
64
|
+
index.add(vector, params[:id])
|
42
65
|
content_type :json
|
43
66
|
{ "time" => Time.now - t0, "status" => "indexed" }.to_json
|
44
67
|
else
|
metadata
CHANGED
@@ -2,14 +2,14 @@
|
|
2
2
|
name: lsh
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.
|
5
|
+
version: 0.3.0
|
6
6
|
platform: java
|
7
7
|
authors:
|
8
8
|
- Yves Raimond
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-01-
|
12
|
+
date: 2013-01-04 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: jblas-ruby
|