lsh 0.3.2 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -111,9 +111,9 @@ module LSH
111
111
 
112
112
  def hash(vector, projection, bias = true)
113
113
  hash = []
114
- projection.each do |random_vector|
115
- dot_product = similarity(vector, random_vector)
116
- window = storage.parameters[:window]
114
+ dot_products = (vector * projection).row(0).to_a
115
+ window = storage.parameters[:window]
116
+ dot_products.each do |dot_product|
117
117
  if window == Float::INFINITY # Binary LSH
118
118
  if dot_product >= 0
119
119
  hash << 1
@@ -128,6 +128,15 @@ module LSH
128
128
  hash
129
129
  end
130
130
 
131
+ def random_vector(dim)
132
+ MathUtil.random_gaussian_matrix(1, dim)
133
+ end
134
+
135
+ def random_vector_unit(dim)
136
+ r = random_vector(dim)
137
+ r /= MathUtil.norm(r)
138
+ end
139
+
131
140
  def array_to_hash(array)
132
141
  return array.hash
133
142
  # Derives a 28 bit hash value from an array of integers
@@ -150,20 +159,7 @@ module LSH
150
159
  end
151
160
 
152
161
  def generate_projection(dim, k)
153
- vectors = []
154
- k.times do |i|
155
- vectors << random_vector(dim)
156
- end
157
- vectors
158
- end
159
-
160
- def random_vector_unit(dim)
161
- r = random_vector(dim)
162
- r /= MathUtil.norm(r)
163
- end
164
-
165
- def random_vector(dim)
166
- MathUtil.random_gaussian_vector(dim)
162
+ MathUtil.random_gaussian_matrix(dim, k)
167
163
  end
168
164
 
169
165
  def similarity(v1, v2)
@@ -27,20 +27,24 @@ module LSH
27
27
  @@gsl_random.uniform
28
28
  end
29
29
 
30
- def self.random_gaussian_vector(dim)
31
- @@gsl_random.gaussian(1, dim)
30
+ def self.zeros(k, l)
31
+ GSL::Matrix.alloc(k, l)
32
32
  end
33
33
 
34
- def self.zeros(dim)
35
- GSL::Vector.alloc(dim)
34
+ def self.random_gaussian_vector(dim)
35
+ @@gsl_random.gaussian(1, dim)
36
36
  end
37
37
 
38
38
  def self.random_gaussian_matrix(k, l)
39
- GSL::Matrix.randn(k, l)
39
+ matrix = zeros(k, l)
40
+ (0..(k - 1)).each do |i|
41
+ matrix.set_row(i, random_gaussian_vector(l))
42
+ end
43
+ matrix
40
44
  end
41
45
 
42
46
  def self.dot(v1, v2)
43
- v1 * v2.col
47
+ (v1 * v2.transpose)[0,0]
44
48
  end
45
49
 
46
50
  def self.norm(v)
@@ -49,7 +53,7 @@ module LSH
49
53
 
50
54
  def self.uniq(vs)
51
55
  # Can't use uniq as
52
- # [ v, JSON.parse(v.to_json) ].uniq.size == 2 with GSL
56
+ # [ v, JSON.parse(v.to_json, :create_additions => true) ].uniq.size == 2 with GSL
53
57
  results = []
54
58
  vs.each { |v| results << v unless results.member? v }
55
59
  results
@@ -61,7 +65,7 @@ end
61
65
 
62
66
  module GSL
63
67
 
64
- class Vector
68
+ class Matrix
65
69
 
66
70
  def to_json(*a)
67
71
  {
@@ -24,16 +24,16 @@ module LSH
24
24
  JBLAS.rand[0,0]
25
25
  end
26
26
 
27
- def self.random_gaussian_vector(dim)
28
- JBLAS.randn(1, dim)
29
- end
30
-
31
27
  def self.random_gaussian_matrix(k, l)
32
28
  JBLAS.randn(k, l)
33
29
  end
34
30
 
35
- def self.zeros(dim)
36
- JBLAS::DoubleMatrix.new(1, dim)
31
+ def self.random_gaussian_vector(l)
32
+ random_gaussian_matrix(1, l)
33
+ end
34
+
35
+ def self.zeros(k, l)
36
+ JBLAS::DoubleMatrix.new(k, l)
37
37
  end
38
38
 
39
39
  def self.dot(v1, v2)
@@ -68,6 +68,10 @@ module JBLAS
68
68
  from_array(o['data']).t
69
69
  end
70
70
 
71
+ def size
72
+ [ rows, columns ]
73
+ end
74
+
71
75
  end
72
76
 
73
77
  end
@@ -68,9 +68,7 @@ module LSH
68
68
  # (too slow to serialize and store in Redis for
69
69
  # large number of dimensions/projections)
70
70
  projections.each_with_index do |projection, i|
71
- projection.each_with_index do |vector, j|
72
- vector.save(File.join(@data_dir, 'projections', "vector_#{i}_#{j}.dat"))
73
- end
71
+ projection.save(File.join(@data_dir, 'projections', "projection_#{i}.dat"))
74
72
  end
75
73
  end
76
74
 
@@ -79,13 +77,9 @@ module LSH
79
77
  @projections ||= (
80
78
  projections = []
81
79
  parameters[:number_of_independent_projections].times do |i|
82
- vectors = []
83
- parameters[:number_of_random_vectors].times do |j|
84
- v = MathUtil.zeros(parameters[:dim])
85
- v.load(File.join(@data_dir, 'projections', "vector_#{i}_#{j}.dat"))
86
- vectors << v
87
- end
88
- projections << vectors
80
+ m = MathUtil.zeros(parameters[:dim], parameters[:number_of_random_vectors])
81
+ m.load(File.join(@data_dir, 'projections', "projection_#{i}.dat"))
82
+ projections << m
89
83
  end
90
84
  projections
91
85
  )
@@ -119,7 +113,7 @@ module LSH
119
113
  end
120
114
 
121
115
  def load_vector(hash)
122
- vector = MathUtil.zeros(parameters[:dim])
116
+ vector = MathUtil.zeros(1, parameters[:dim])
123
117
  vector.load(File.join(@data_dir, hash+'.dat'))
124
118
  vector
125
119
  end
@@ -23,7 +23,7 @@ module LSH
23
23
  mime_type = (params[:mime_type] || 'application/json')
24
24
  if mime_type == 'application/json'
25
25
  t0 = Time.now
26
- vector = JSON.parse(params[:data])
26
+ vector = JSON.parse(params[:data], :create_additions => true)
27
27
  result_vectors = index.query(vector, params[:radius] || 0)
28
28
  results = []
29
29
  if params[:include] == 'id'
@@ -43,7 +43,7 @@ module LSH
43
43
  mime_type = (params[:mime_type] || 'application/json')
44
44
  if mime_type == 'application/json'
45
45
  t0 = Time.now
46
- vector = JSON.parse(params[:data])
46
+ vector = JSON.parse(params[:data], :create_additions => true)
47
47
  results = index.query_ids_by_vector(vector, params[:radius] || 0)
48
48
  content_type :json
49
49
  { "time" => Time.now - t0, "results" => results }.to_json
@@ -66,7 +66,7 @@ module LSH
66
66
  mime_type = (params[:mime_type] || 'application/json')
67
67
  if mime_type == 'application/json'
68
68
  t0 = Time.now
69
- vector = JSON.parse(params[:data])
69
+ vector = JSON.parse(params[:data], :create_additions => true)
70
70
  index.add(vector, params[:id])
71
71
  content_type :json
72
72
  { "time" => Time.now - t0, "status" => "indexed" }.to_json
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lsh
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.4.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-02-04 00:00:00.000000000 Z
12
+ date: 2013-03-18 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: gsl