lsh 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -111,9 +111,9 @@ module LSH
111
111
 
112
112
  def hash(vector, projection, bias = true)
113
113
  hash = []
114
- projection.each do |random_vector|
115
- dot_product = similarity(vector, random_vector)
116
- window = storage.parameters[:window]
114
+ dot_products = (vector * projection).row(0).to_a
115
+ window = storage.parameters[:window]
116
+ dot_products.each do |dot_product|
117
117
  if window == Float::INFINITY # Binary LSH
118
118
  if dot_product >= 0
119
119
  hash << 1
@@ -128,6 +128,15 @@ module LSH
128
128
  hash
129
129
  end
130
130
 
131
+ def random_vector(dim)
132
+ MathUtil.random_gaussian_matrix(1, dim)
133
+ end
134
+
135
+ def random_vector_unit(dim)
136
+ r = random_vector(dim)
137
+ r /= MathUtil.norm(r)
138
+ end
139
+
131
140
  def array_to_hash(array)
132
141
  return array.hash
133
142
  # Derives a 28 bit hash value from an array of integers
@@ -150,20 +159,7 @@ module LSH
150
159
  end
151
160
 
152
161
  def generate_projection(dim, k)
153
- vectors = []
154
- k.times do |i|
155
- vectors << random_vector(dim)
156
- end
157
- vectors
158
- end
159
-
160
- def random_vector_unit(dim)
161
- r = random_vector(dim)
162
- r /= MathUtil.norm(r)
163
- end
164
-
165
- def random_vector(dim)
166
- MathUtil.random_gaussian_vector(dim)
162
+ MathUtil.random_gaussian_matrix(dim, k)
167
163
  end
168
164
 
169
165
  def similarity(v1, v2)
@@ -27,20 +27,24 @@ module LSH
27
27
  @@gsl_random.uniform
28
28
  end
29
29
 
30
- def self.random_gaussian_vector(dim)
31
- @@gsl_random.gaussian(1, dim)
30
+ def self.zeros(k, l)
31
+ GSL::Matrix.alloc(k, l)
32
32
  end
33
33
 
34
- def self.zeros(dim)
35
- GSL::Vector.alloc(dim)
34
+ def self.random_gaussian_vector(dim)
35
+ @@gsl_random.gaussian(1, dim)
36
36
  end
37
37
 
38
38
  def self.random_gaussian_matrix(k, l)
39
- GSL::Matrix.randn(k, l)
39
+ matrix = zeros(k, l)
40
+ (0..(k - 1)).each do |i|
41
+ matrix.set_row(i, random_gaussian_vector(l))
42
+ end
43
+ matrix
40
44
  end
41
45
 
42
46
  def self.dot(v1, v2)
43
- v1 * v2.col
47
+ (v1 * v2.transpose)[0,0]
44
48
  end
45
49
 
46
50
  def self.norm(v)
@@ -49,7 +53,7 @@ module LSH
49
53
 
50
54
  def self.uniq(vs)
51
55
  # Can't use uniq as
52
- # [ v, JSON.parse(v.to_json) ].uniq.size == 2 with GSL
56
+ # [ v, JSON.parse(v.to_json, :create_additions => true) ].uniq.size == 2 with GSL
53
57
  results = []
54
58
  vs.each { |v| results << v unless results.member? v }
55
59
  results
@@ -61,7 +65,7 @@ end
61
65
 
62
66
  module GSL
63
67
 
64
- class Vector
68
+ class Matrix
65
69
 
66
70
  def to_json(*a)
67
71
  {
@@ -24,16 +24,16 @@ module LSH
24
24
  JBLAS.rand[0,0]
25
25
  end
26
26
 
27
- def self.random_gaussian_vector(dim)
28
- JBLAS.randn(1, dim)
29
- end
30
-
31
27
  def self.random_gaussian_matrix(k, l)
32
28
  JBLAS.randn(k, l)
33
29
  end
34
30
 
35
- def self.zeros(dim)
36
- JBLAS::DoubleMatrix.new(1, dim)
31
+ def self.random_gaussian_vector(l)
32
+ random_gaussian_matrix(1, l)
33
+ end
34
+
35
+ def self.zeros(k, l)
36
+ JBLAS::DoubleMatrix.new(k, l)
37
37
  end
38
38
 
39
39
  def self.dot(v1, v2)
@@ -68,6 +68,10 @@ module JBLAS
68
68
  from_array(o['data']).t
69
69
  end
70
70
 
71
+ def size
72
+ [ rows, columns ]
73
+ end
74
+
71
75
  end
72
76
 
73
77
  end
@@ -68,9 +68,7 @@ module LSH
68
68
  # (too slow to serialize and store in Redis for
69
69
  # large number of dimensions/projections)
70
70
  projections.each_with_index do |projection, i|
71
- projection.each_with_index do |vector, j|
72
- vector.save(File.join(@data_dir, 'projections', "vector_#{i}_#{j}.dat"))
73
- end
71
+ projection.save(File.join(@data_dir, 'projections', "projection_#{i}.dat"))
74
72
  end
75
73
  end
76
74
 
@@ -79,13 +77,9 @@ module LSH
79
77
  @projections ||= (
80
78
  projections = []
81
79
  parameters[:number_of_independent_projections].times do |i|
82
- vectors = []
83
- parameters[:number_of_random_vectors].times do |j|
84
- v = MathUtil.zeros(parameters[:dim])
85
- v.load(File.join(@data_dir, 'projections', "vector_#{i}_#{j}.dat"))
86
- vectors << v
87
- end
88
- projections << vectors
80
+ m = MathUtil.zeros(parameters[:dim], parameters[:number_of_random_vectors])
81
+ m.load(File.join(@data_dir, 'projections', "projection_#{i}.dat"))
82
+ projections << m
89
83
  end
90
84
  projections
91
85
  )
@@ -119,7 +113,7 @@ module LSH
119
113
  end
120
114
 
121
115
  def load_vector(hash)
122
- vector = MathUtil.zeros(parameters[:dim])
116
+ vector = MathUtil.zeros(1, parameters[:dim])
123
117
  vector.load(File.join(@data_dir, hash+'.dat'))
124
118
  vector
125
119
  end
@@ -23,7 +23,7 @@ module LSH
23
23
  mime_type = (params[:mime_type] || 'application/json')
24
24
  if mime_type == 'application/json'
25
25
  t0 = Time.now
26
- vector = JSON.parse(params[:data])
26
+ vector = JSON.parse(params[:data], :create_additions => true)
27
27
  result_vectors = index.query(vector, params[:radius] || 0)
28
28
  results = []
29
29
  if params[:include] == 'id'
@@ -43,7 +43,7 @@ module LSH
43
43
  mime_type = (params[:mime_type] || 'application/json')
44
44
  if mime_type == 'application/json'
45
45
  t0 = Time.now
46
- vector = JSON.parse(params[:data])
46
+ vector = JSON.parse(params[:data], :create_additions => true)
47
47
  results = index.query_ids_by_vector(vector, params[:radius] || 0)
48
48
  content_type :json
49
49
  { "time" => Time.now - t0, "results" => results }.to_json
@@ -66,7 +66,7 @@ module LSH
66
66
  mime_type = (params[:mime_type] || 'application/json')
67
67
  if mime_type == 'application/json'
68
68
  t0 = Time.now
69
- vector = JSON.parse(params[:data])
69
+ vector = JSON.parse(params[:data], :create_additions => true)
70
70
  index.add(vector, params[:id])
71
71
  content_type :json
72
72
  { "time" => Time.now - t0, "status" => "indexed" }.to_json
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lsh
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.4.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-02-04 00:00:00.000000000 Z
12
+ date: 2013-03-18 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: gsl