lsh 0.0.5-java → 0.1.0-java

Sign up to get free protection for your applications and to get access to all the features.
data/lib/lsh.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  # ruby-lsh
2
2
  #
3
- # Copyright (c) 2011 British Broadcasting Corporation
3
+ # Copyright (c) 2012 British Broadcasting Corporation
4
4
  #
5
5
  # Licensed under the Apache License, Version 2.0 (the "License");
6
6
  # you may not use this file except in compliance with the License.
@@ -15,6 +15,8 @@
15
15
  # limitations under the License.
16
16
 
17
17
  require_relative 'lsh/index.rb'
18
+ require_relative 'lsh/storage/memory.rb'
19
+ require_relative 'lsh/storage/redis_backend.rb'
18
20
  if RUBY_PLATFORM == 'java'
19
21
  require_relative 'lsh/math_util_jblas.rb'
20
22
  else
data/lib/lsh/index.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  # ruby-lsh
2
2
  #
3
- # Copyright (c) 2011 British Broadcasting Corporation
3
+ # Copyright (c) 2012 British Broadcasting Corporation
4
4
  #
5
5
  # Licensed under the Apache License, Version 2.0 (the "License");
6
6
  # you may not use this file except in compliance with the License.
@@ -18,26 +18,31 @@ module LSH
18
18
 
19
19
  class Index
20
20
 
21
- attr_reader :projections, :buckets
21
+ attr_reader :projections, :buckets, :storage
22
+
23
+ def initialize(parameters = {}, storage = LSH::Storage::Memory.new)
24
+ @storage = storage
25
+ unless storage.has_index?
26
+ storage.parameters = parameters
27
+ # Initializing projections and buckets
28
+ storage.projections = generate_projections(
29
+ parameters[:dim],
30
+ parameters[:number_of_random_vectors],
31
+ parameters[:number_of_independent_projections]
32
+ )
33
+ parameters[:number_of_independent_projections].times { |i| storage.create_new_bucket }
34
+ end
35
+ end
22
36
 
23
- def initialize(dim, k, w = Float::INFINITY, l = 150)
24
- @window = w
25
- @dim = dim
26
- @number_of_random_vectors = k
27
- @number_of_independent_projections = l
28
- @projections = generate_projections(dim, k, l)
29
- @buckets = []
30
- l.times { |i| @buckets << {} }
37
+ def self.load(storage)
38
+ Index.new(storage.parameters, storage) if storage.has_index?
31
39
  end
32
40
 
33
41
  def add(vector)
34
42
  hashes(vector).each_with_index do |hash, i|
35
43
  hash_i = array_to_hash(hash)
36
- if @buckets[i].has_key? hash_i
37
- @buckets[i][hash_i] << vector
38
- else
39
- @buckets[i][hash_i] = [vector]
40
- end
44
+ bucket = storage.find_bucket(i)
45
+ storage.add_vector_to_bucket(bucket, hash_i, vector)
41
46
  end
42
47
  end
43
48
 
@@ -45,22 +50,25 @@ module LSH
45
50
  results = []
46
51
  hashes(vector).each_with_index do |hash, i|
47
52
  hash_i = array_to_hash(hash)
48
- bucket = @buckets[i]
53
+ bucket = storage.find_bucket(i)
54
+ # Multiprobe LSH
49
55
  # Take query hash, move it around at radius r, hash it and use the result as a query
50
56
  # TODO: only works for binary LSH atm
51
- results += bucket[hash_i] if bucket[hash_i]
57
+ bucket_results = storage.query_bucket(bucket, hash_i)
58
+ results += bucket_results if bucket_results
52
59
  if multiprobe_radius > 0
53
60
  (1..multiprobe_radius).to_a.each do |radius|
54
- (0..(@number_of_random_vectors - 1)).to_a.combination(radius).each do |flips|
61
+ (0..(storage.parameters[:number_of_random_vectors] - 1)).to_a.combination(radius).each do |flips|
55
62
  probe = hash.clone
56
63
  flips.each { |d| probe[d] = (probe[d] == 1) ? 0 : 1 }
57
64
  probe_hash = array_to_hash(probe)
58
- results += bucket[probe_hash] if bucket.has_key?(probe_hash)
65
+ probe_bucket_results = storage.query_bucket(bucket, probe_hash)
66
+ results += probe_bucket_results if probe_bucket_results
59
67
  end
60
68
  end
61
69
  end
62
70
  end
63
- results.uniq!
71
+ results = MathUtil.uniq(results)
64
72
  order_vectors_by_similarity(vector, results)
65
73
  end
66
74
 
@@ -70,7 +78,7 @@ module LSH
70
78
 
71
79
  def hashes(vector)
72
80
  hashes = []
73
- @projections.each do |projection|
81
+ storage.projections.each do |projection|
74
82
  hashes << hash(vector, projection)
75
83
  end
76
84
  hashes
@@ -80,7 +88,8 @@ module LSH
80
88
  hash = []
81
89
  projection.each do |random_vector|
82
90
  dot_product = similarity(vector, random_vector)
83
- if @window == Float::INFINITY # Binary LSH
91
+ window = storage.parameters[:window]
92
+ if window == Float::INFINITY # Binary LSH
84
93
  if dot_product >= 0
85
94
  hash << 1
86
95
  else
@@ -88,7 +97,7 @@ module LSH
88
97
  end
89
98
  else
90
99
  b = bias ? MathUtil.random_uniform : 0.0
91
- hash << (b + dot_product / @window).floor
100
+ hash << (b + dot_product / window).floor
92
101
  end
93
102
  end
94
103
  hash
@@ -137,7 +146,7 @@ module LSH
137
146
  end
138
147
 
139
148
  def inspect
140
- "LSH index; dimension: #{@dim}; window size: #{@window}; #{@number_of_random_vectors} random vectors; #{@number_of_independent_projections} independent projections"
149
+ "#<LSH index; dimension: #{storage.parameters.dim}; window size: #{storage.parameters.window}; #{storage.parameters.number_of_random_vectors} random vectors; #{storage.parameters.number_of_independent_projections} independent projections>"
141
150
  end
142
151
 
143
152
  end
@@ -1,6 +1,6 @@
1
1
  # ruby-lsh
2
2
  #
3
- # Copyright (c) 2011 British Broadcasting Corporation
3
+ # Copyright (c) 2012 British Broadcasting Corporation
4
4
  #
5
5
  # Licensed under the Apache License, Version 2.0 (the "License");
6
6
  # you may not use this file except in compliance with the License.
@@ -15,6 +15,7 @@
15
15
  # limitations under the License.
16
16
 
17
17
  require 'gsl'
18
+ require 'json'
18
19
 
19
20
  module LSH
20
21
 
@@ -42,6 +43,33 @@ module LSH
42
43
  v.norm
43
44
  end
44
45
 
46
+ def self.uniq(vs)
47
+ # Can't use uniq as
48
+ # [ v, JSON.parse(v.to_json) ].uniq.size == 2 with GSL
49
+ results = []
50
+ vs.each { |v| results << v unless results.member? v }
51
+ results
52
+ end
53
+
54
+ end
55
+
56
+ end
57
+
58
+ module GSL
59
+
60
+ class Vector
61
+
62
+ def to_json(*a)
63
+ {
64
+ 'json_class' => self.class.name,
65
+ 'data' => to_a,
66
+ }.to_json(*a)
67
+ end
68
+
69
+ def self.json_create(o)
70
+ alloc(*o['data'])
71
+ end
72
+
45
73
  end
46
74
 
47
75
  end
@@ -1,6 +1,6 @@
1
1
  # ruby-lsh
2
2
  #
3
- # Copyright (c) 2011 British Broadcasting Corporation
3
+ # Copyright (c) 2012 British Broadcasting Corporation
4
4
  #
5
5
  # Licensed under the Apache License, Version 2.0 (the "License");
6
6
  # you may not use this file except in compliance with the License.
@@ -40,6 +40,31 @@ module LSH
40
40
  v.norm2
41
41
  end
42
42
 
43
+ def self.uniq(vs)
44
+ vs.uniq
45
+ end
46
+
47
+ end
48
+
49
+ end
50
+
51
+
52
+ module JBLAS
53
+
54
+ class DoubleMatrix
55
+
56
+ def to_json(*a)
57
+ {
58
+ 'json_class' => 'JBLAS::DoubleMatrix',
59
+ 'data' => to_a,
60
+ }.to_json(*a)
61
+ end
62
+
63
+ def self.json_create(o)
64
+ from_array(o['data']).t
65
+ end
66
+
43
67
  end
44
68
 
45
69
  end
70
+
@@ -0,0 +1,60 @@
1
+ # ruby-lsh
2
+ #
3
+ # Copyright (c) 2012 British Broadcasting Corporation
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ module LSH
18
+
19
+ module Storage
20
+
21
+ class Memory
22
+
23
+ attr_accessor :projections
24
+ attr_accessor :parameters
25
+ attr_reader :buckets
26
+
27
+ def has_index?
28
+ projections and parameters and @buckets
29
+ end
30
+
31
+ def reset!
32
+ @buckets = nil
33
+ end
34
+
35
+ def create_new_bucket
36
+ @buckets ||= []
37
+ @buckets << {}
38
+ end
39
+
40
+ def add_vector_to_bucket(bucket, hash, vector)
41
+ if bucket.has_key? hash
42
+ bucket[hash] << vector
43
+ else
44
+ bucket[hash] = [vector]
45
+ end
46
+ end
47
+
48
+ def find_bucket(i)
49
+ @buckets[i]
50
+ end
51
+
52
+ def query_bucket(bucket, hash)
53
+ bucket[hash]
54
+ end
55
+
56
+ end
57
+
58
+ end
59
+
60
+ end
@@ -0,0 +1,86 @@
1
+ # ruby-lsh
2
+ #
3
+ # Copyright (c) 2012 British Broadcasting Corporation
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ require 'redis'
18
+ require 'json'
19
+
20
+ module LSH
21
+
22
+ module Storage
23
+
24
+ class RedisBackend
25
+
26
+ attr_reader :redis
27
+
28
+ def initialize(params = { :redis => { :host => '127.0.0.1', :port => 6379 } })
29
+ @redis = Redis.new(params[:redis])
30
+ end
31
+
32
+ def reset!
33
+ @redis.flushall
34
+ end
35
+
36
+ def has_index?
37
+ projections and parameters and @redis.get("buckets") > 0
38
+ end
39
+
40
+ def projections=(projections)
41
+ @redis.set "projections", projections.to_json
42
+ end
43
+
44
+ def projections
45
+ begin
46
+ @projections ||= JSON.parse(@redis.get "projections")
47
+ rescue TypeError
48
+ nil
49
+ end
50
+ end
51
+
52
+ def parameters=(parms)
53
+ parms[:window] = 'Infinity' if parms[:window] == Float::INFINITY
54
+ @redis.set "parameters", parms.to_json
55
+ end
56
+
57
+ def parameters
58
+ @parms ||= (
59
+ parms = JSON.parse(@redis.get "parameters")
60
+ parms.keys.each { |k| parms[k.to_sym] = parms[k]; parms.delete(k) }
61
+ parms[:window] = Float::INFINITY if parms[:window] == 'Infinity'
62
+ parms
63
+ )
64
+ end
65
+
66
+ def create_new_bucket
67
+ @redis.incr "buckets"
68
+ end
69
+
70
+ def add_vector_to_bucket(bucket, hash, vector)
71
+ @redis.sadd "#{bucket}:#{hash}", vector.to_json
72
+ end
73
+
74
+ def find_bucket(i)
75
+ "bucket:#{i}" if @redis.get("buckets").to_i > i
76
+ end
77
+
78
+ def query_bucket(bucket, hash)
79
+ @redis.smembers("#{bucket}:#{hash}").map { |vector_json| JSON.parse(vector_json) }
80
+ end
81
+
82
+ end
83
+
84
+ end
85
+
86
+ end
metadata CHANGED
@@ -2,14 +2,14 @@
2
2
  name: lsh
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.0.5
5
+ version: 0.1.0
6
6
  platform: java
7
7
  authors:
8
8
  - Yves Raimond
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-12-17 00:00:00.000000000 Z
12
+ date: 2012-12-27 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: jblas-ruby
@@ -29,6 +29,42 @@ dependencies:
29
29
  none: false
30
30
  prerelease: false
31
31
  type: :runtime
32
+ - !ruby/object:Gem::Dependency
33
+ name: json-jruby
34
+ version_requirements: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - ">="
37
+ - !ruby/object:Gem::Version
38
+ version: !binary |-
39
+ MA==
40
+ none: false
41
+ requirement: !ruby/object:Gem::Requirement
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ version: !binary |-
46
+ MA==
47
+ none: false
48
+ prerelease: false
49
+ type: :runtime
50
+ - !ruby/object:Gem::Dependency
51
+ name: redis
52
+ version_requirements: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: !binary |-
57
+ MA==
58
+ none: false
59
+ requirement: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ version: !binary |-
64
+ MA==
65
+ none: false
66
+ prerelease: false
67
+ type: :runtime
32
68
  description: An implementation of LSH in Ruby, using JBLAS for JRuby and GSL for MRI
33
69
  email: yves.raimond@bbc.co.uk
34
70
  executables: []
@@ -39,6 +75,8 @@ files:
39
75
  - lib/lsh/index.rb
40
76
  - lib/lsh/math_util_gsl.rb
41
77
  - lib/lsh/math_util_jblas.rb
78
+ - lib/lsh/storage/memory.rb
79
+ - lib/lsh/storage/redis_backend.rb
42
80
  homepage: https://github.com/bbcrd/ruby-lsh
43
81
  licenses: []
44
82
  post_install_message: