anngler 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 02dc26823e95142aba1c8f1468ee440e5850d17c940df0488227ceec9e5d9568
4
+ data.tar.gz: 0214635227152047fc51f98a16d753e38133e3bd00a1ef50bb6a86a9fc3e4fc4
5
+ SHA512:
6
+ metadata.gz: fdabb190537d475693d21619d2d5ee700b1cb13c27f3c526df0e880023f021d95cf4c5263248fd8d0b68346ca0c04f707faf2f836c3ad7336e3a91f747c7efee
7
+ data.tar.gz: 91cd7f7c122227c3764d3293385125eb81521181e513095eeed4f3cafa8fb97bea20dc60edc7d8c5d0441375b1efcf22bfacb69ac9495a25c50162f42747130a
@@ -0,0 +1,4 @@
1
+ require_relative 'anngler/index.rb'
2
+ require_relative 'anngler/helpers.rb'
3
+ require_relative 'anngler/storage/memory_backend.rb'
4
+ require_relative 'anngler/storage/redis_backend.rb'
@@ -0,0 +1,16 @@
1
+ module Anngler
2
+ module Helpers
3
+ class << self
4
+
5
+ def magnitude(vec)
6
+ Math.sqrt(vec.square.to_a.reduce(:+))
7
+ end
8
+
9
+ def cosine_distance(a, b)
10
+ 1 - a.dot(b) / (magnitude(a) * magnitude(b))
11
+ end
12
+
13
+ end
14
+
15
+ end
16
+ end
@@ -0,0 +1,128 @@
1
+ require 'json'
2
+ require 'base64'
3
+ require 'zlib'
4
+
5
+ module Anngler
6
+ class Index
7
+ #the number of features of the vectors we are storing
8
+ attr_reader :n_features
9
+
10
+ #the number of projections to generate (more = less vectors per bucket)
11
+ attr_reader :n_projections
12
+
13
+ #seed for our random number generator, we ensure this is deterministic buy resorting to the base16 of the bucket name if no seed is provided
14
+ attr_reader :seed
15
+
16
+ #the name of the bucket to allow multiple different hash tables in redis
17
+ attr_reader :bucket_name
18
+
19
+ #the random number generator for the projection matrices
20
+ attr_reader :rng
21
+
22
+ #an n_trees x n_features x n_projections matrix to store our projections
23
+ attr_reader :trees
24
+
25
+ #how many different projections to overlap (more allows for better accuracy but will slow performance)
26
+ attr_reader :n_trees
27
+
28
+ #which storage service to use (either redis or local memory)
29
+ attr_reader :storage
30
+
31
+ def initialize(
32
+ bucket_name,
33
+ n_projections,
34
+ n_features,
35
+ seed: nil,
36
+ n_trees: 1,
37
+ storage: Anngler::Storage::MemoryBackend.new
38
+ )
39
+ @n_projections = n_projections
40
+ @n_features = n_features
41
+ @seed = seed
42
+ @seed ||= bucket_name.to_i(36)
43
+ @bucket_name = bucket_name
44
+ @rng = Random.new(@seed)
45
+ @n_trees = n_trees
46
+ @storage = storage
47
+
48
+ gen_trees
49
+ end
50
+
51
+ def add(vec, label: "")
52
+ hashes = calc_hashes(vec)
53
+ #Serialize the vector and the label
54
+ serialized_data = pack_data(vec, label)
55
+
56
+ #add the vector into each tree
57
+ hashes.each_with_index do |hash, i|
58
+ bucket = "#{@bucket_name}:#{i}:#{hash2string(hash)}"
59
+ @storage.add_vector(bucket, serialized_data)
60
+ end
61
+ end
62
+
63
+ def remove(vec)
64
+ hashes = calc_hashes(vec)
65
+
66
+ #remove vector from each tree
67
+ hashes.each_with_index do |hash, i|
68
+ bucket = "#{@bucket_name}:#{i}:#{hash2string(hash)}"
69
+ @storage.remove_vector(bucket, encode_vec(vec))
70
+ end
71
+ end
72
+
73
+ def query(vec)
74
+ hashes = calc_hashes(vec)
75
+ raw_results = []
76
+
77
+ #search each tree and append the results into raw_results
78
+ hashes.each_with_index do |hash, i|
79
+ bucket = "#{@bucket_name}:#{i}:#{hash2string(hash)}"
80
+ raw_results += @storage.query_bucket(bucket)
81
+ end
82
+
83
+ #remove duplicates and decode the data
84
+ raw_results.uniq.map do |encoded_data|
85
+ unpack_data(encoded_data)
86
+ end.sort_by do |data|
87
+ #sort the results by cosine distance
88
+ Helpers.cosine_distance(vec, data["vec"])
89
+ end
90
+ end
91
+
92
+ private
93
+
94
+ def gen_trees
95
+ vals = Array.new(@n_trees * @n_features * @n_projections) { (@rng.rand * 2) -1 }
96
+ @trees = Numo::DFloat.asarray(vals).reshape(@n_trees, @n_features, @n_projections)
97
+ end
98
+
99
+ #return a hash key for each tree
100
+ def calc_hashes(vec)
101
+ [0..@n_trees - 1].map do |i|
102
+ vec.dot(@trees[i, true, true]).ge(0.0)
103
+ end
104
+ end
105
+
106
+ #turn the vector into a hexadecimal string
107
+ def hash2string(hash)
108
+ hash.to_a.join.to_i(2).to_s(16)
109
+ end
110
+
111
+ def encode_vec(vec)
112
+ Base64.encode64(Zlib::Deflate.deflate(vec.to_a.join(",")))
113
+ end
114
+
115
+ def decode_vec(vec)
116
+ Numo::DFloat.asarray(Zlib::Inflate.inflate(Base64.decode64(vec)).split(",").map(&:to_f))
117
+ end
118
+
119
+ def pack_data(vec, label)
120
+ "#{encode_vec(vec)}:#{label}"
121
+ end
122
+
123
+ def unpack_data(encoded_data)
124
+ encoded_vector, label = encoded_data.split(":")
125
+ { "label" => label, "vec" => decode_vec(encoded_vector) }
126
+ end
127
+ end
128
+ end
@@ -0,0 +1,23 @@
1
+ module Anngler
2
+ module Storage
3
+ class MemoryBackend
4
+ def initialize
5
+ @storage = Hash.new( [] )
6
+ end
7
+
8
+ def add_vector(bucket, data)
9
+ @storage[bucket] = [data] + @storage[bucket]
10
+ end
11
+
12
+ def remove_vector(bucket, encoded_vec)
13
+ @storage[bucket] = @storage[bucket].reject do |encoded_str|
14
+ encoded_str.split(":")[0] == encoded_vec
15
+ end
16
+ end
17
+
18
+ def query_bucket(bucket)
19
+ @storage[bucket]
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,30 @@
1
+ require 'redis'
2
+
3
+ module Anngler
4
+ module Storage
5
+ class RedisBackend
6
+
7
+ def initialize(instance)
8
+ @instance = instance
9
+ end
10
+
11
+ def add_vector(bucket, data)
12
+ @instance.lpush(bucket, data)
13
+ end
14
+
15
+ def remove_vector(bucket, encoded_vec)
16
+ @instance.lrange(bucket, 0, -1).each do |val|
17
+ if(val.split(":")[0] == encoded_vec)
18
+ @instance.lrem(bucket, 0, val)
19
+ return
20
+ end
21
+ end
22
+ end
23
+
24
+ def query_bucket(bucket)
25
+ @instance.lrange(bucket, 0, -1)
26
+ end
27
+
28
+ end
29
+ end
30
+ end
metadata ADDED
@@ -0,0 +1,158 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: anngler
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Aiden Leeming
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2020-05-20 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rspec
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: mock_redis
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: bundler
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: json
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: numo-narray
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: redis
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: zlib
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ description:
126
+ email:
127
+ executables: []
128
+ extensions: []
129
+ extra_rdoc_files: []
130
+ files:
131
+ - lib/anngler.rb
132
+ - lib/anngler/helpers.rb
133
+ - lib/anngler/index.rb
134
+ - lib/anngler/storage/memory_backend.rb
135
+ - lib/anngler/storage/redis_backend.rb
136
+ homepage:
137
+ licenses: []
138
+ metadata: {}
139
+ post_install_message:
140
+ rdoc_options: []
141
+ require_paths:
142
+ - lib
143
+ required_ruby_version: !ruby/object:Gem::Requirement
144
+ requirements:
145
+ - - ">="
146
+ - !ruby/object:Gem::Version
147
+ version: '0'
148
+ required_rubygems_version: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ requirements: []
154
+ rubygems_version: 3.1.4
155
+ signing_key:
156
+ specification_version: 4
157
+ summary: A ruby locality sensitive hashing implementation using Redis for storage
158
+ test_files: []