anngler 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 02dc26823e95142aba1c8f1468ee440e5850d17c940df0488227ceec9e5d9568
4
+ data.tar.gz: 0214635227152047fc51f98a16d753e38133e3bd00a1ef50bb6a86a9fc3e4fc4
5
+ SHA512:
6
+ metadata.gz: fdabb190537d475693d21619d2d5ee700b1cb13c27f3c526df0e880023f021d95cf4c5263248fd8d0b68346ca0c04f707faf2f836c3ad7336e3a91f747c7efee
7
+ data.tar.gz: 91cd7f7c122227c3764d3293385125eb81521181e513095eeed4f3cafa8fb97bea20dc60edc7d8c5d0441375b1efcf22bfacb69ac9495a25c50162f42747130a
@@ -0,0 +1,4 @@
1
+ require_relative 'anngler/index.rb'
2
+ require_relative 'anngler/helpers.rb'
3
+ require_relative 'anngler/storage/memory_backend.rb'
4
+ require_relative 'anngler/storage/redis_backend.rb'
@@ -0,0 +1,16 @@
1
+ module Anngler
2
+ module Helpers
3
+ class << self
4
+
5
+ def magnitude(vec)
6
+ Math.sqrt(vec.square.to_a.reduce(:+))
7
+ end
8
+
9
+ def cosine_distance(a, b)
10
+ 1 - a.dot(b) / (magnitude(a) * magnitude(b))
11
+ end
12
+
13
+ end
14
+
15
+ end
16
+ end
@@ -0,0 +1,128 @@
1
+ require 'json'
2
+ require 'base64'
3
+ require 'zlib'
4
+
5
+ module Anngler
6
+ class Index
7
+ #the number of features of the vectors we are storing
8
+ attr_reader :n_features
9
+
10
+ #the number of projections to generate (more = less vectors per bucket)
11
+ attr_reader :n_projections
12
+
13
+ #seed for our random number generator, we ensure this is deterministic buy resorting to the base16 of the bucket name if no seed is provided
14
+ attr_reader :seed
15
+
16
+ #the name of the bucket to allow multiple different hash tables in redis
17
+ attr_reader :bucket_name
18
+
19
+ #the random number generator for the projection matrices
20
+ attr_reader :rng
21
+
22
+ #an n_trees x n_features x n_projections matrix to store our projections
23
+ attr_reader :trees
24
+
25
+ #how many different projections to overlap (more allows for better accuracy but will slow performance)
26
+ attr_reader :n_trees
27
+
28
+ #which storage service to use (either redis or local memory)
29
+ attr_reader :storage
30
+
31
+ def initialize(
32
+ bucket_name,
33
+ n_projections,
34
+ n_features,
35
+ seed: nil,
36
+ n_trees: 1,
37
+ storage: Anngler::Storage::MemoryBackend.new
38
+ )
39
+ @n_projections = n_projections
40
+ @n_features = n_features
41
+ @seed = seed
42
+ @seed ||= bucket_name.to_i(36)
43
+ @bucket_name = bucket_name
44
+ @rng = Random.new(@seed)
45
+ @n_trees = n_trees
46
+ @storage = storage
47
+
48
+ gen_trees
49
+ end
50
+
51
+ def add(vec, label: "")
52
+ hashes = calc_hashes(vec)
53
+ #Serialize the vector and the label
54
+ serialized_data = pack_data(vec, label)
55
+
56
+ #add the vector into each tree
57
+ hashes.each_with_index do |hash, i|
58
+ bucket = "#{@bucket_name}:#{i}:#{hash2string(hash)}"
59
+ @storage.add_vector(bucket, serialized_data)
60
+ end
61
+ end
62
+
63
+ def remove(vec)
64
+ hashes = calc_hashes(vec)
65
+
66
+ #remove vector from each tree
67
+ hashes.each_with_index do |hash, i|
68
+ bucket = "#{@bucket_name}:#{i}:#{hash2string(hash)}"
69
+ @storage.remove_vector(bucket, encode_vec(vec))
70
+ end
71
+ end
72
+
73
+ def query(vec)
74
+ hashes = calc_hashes(vec)
75
+ raw_results = []
76
+
77
+ #search each tree and append the results into raw_results
78
+ hashes.each_with_index do |hash, i|
79
+ bucket = "#{@bucket_name}:#{i}:#{hash2string(hash)}"
80
+ raw_results += @storage.query_bucket(bucket)
81
+ end
82
+
83
+ #remove duplicates and decode the data
84
+ raw_results.uniq.map do |encoded_data|
85
+ unpack_data(encoded_data)
86
+ end.sort_by do |data|
87
+ #sort the results by cosine distance
88
+ Helpers.cosine_distance(vec, data["vec"])
89
+ end
90
+ end
91
+
92
+ private
93
+
94
+ def gen_trees
95
+ vals = Array.new(@n_trees * @n_features * @n_projections) { (@rng.rand * 2) -1 }
96
+ @trees = Numo::DFloat.asarray(vals).reshape(@n_trees, @n_features, @n_projections)
97
+ end
98
+
99
+ #return a hash key for each tree
100
+ def calc_hashes(vec)
101
+ [0..@n_trees - 1].map do |i|
102
+ vec.dot(@trees[i, true, true]).ge(0.0)
103
+ end
104
+ end
105
+
106
+ #turn the vector into a hexadecimal string
107
+ def hash2string(hash)
108
+ hash.to_a.join.to_i(2).to_s(16)
109
+ end
110
+
111
+ def encode_vec(vec)
112
+ Base64.encode64(Zlib::Deflate.deflate(vec.to_a.join(",")))
113
+ end
114
+
115
+ def decode_vec(vec)
116
+ Numo::DFloat.asarray(Zlib::Inflate.inflate(Base64.decode64(vec)).split(",").map(&:to_f))
117
+ end
118
+
119
+ def pack_data(vec, label)
120
+ "#{encode_vec(vec)}:#{label}"
121
+ end
122
+
123
+ def unpack_data(encoded_data)
124
+ encoded_vector, label = encoded_data.split(":")
125
+ { "label" => label, "vec" => decode_vec(encoded_vector) }
126
+ end
127
+ end
128
+ end
@@ -0,0 +1,23 @@
1
+ module Anngler
2
+ module Storage
3
+ class MemoryBackend
4
+ def initialize
5
+ @storage = Hash.new( [] )
6
+ end
7
+
8
+ def add_vector(bucket, data)
9
+ @storage[bucket] = [data] + @storage[bucket]
10
+ end
11
+
12
+ def remove_vector(bucket, encoded_vec)
13
+ @storage[bucket] = @storage[bucket].reject do |encoded_str|
14
+ encoded_str.split(":")[0] == encoded_vec
15
+ end
16
+ end
17
+
18
+ def query_bucket(bucket)
19
+ @storage[bucket]
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,30 @@
1
+ require 'redis'
2
+
3
+ module Anngler
4
+ module Storage
5
+ class RedisBackend
6
+
7
+ def initialize(instance)
8
+ @instance = instance
9
+ end
10
+
11
+ def add_vector(bucket, data)
12
+ @instance.lpush(bucket, data)
13
+ end
14
+
15
+ def remove_vector(bucket, encoded_vec)
16
+ @instance.lrange(bucket, 0, -1).each do |val|
17
+ if(val.split(":")[0] == encoded_vec)
18
+ @instance.lrem(bucket, 0, val)
19
+ return
20
+ end
21
+ end
22
+ end
23
+
24
+ def query_bucket(bucket)
25
+ @instance.lrange(bucket, 0, -1)
26
+ end
27
+
28
+ end
29
+ end
30
+ end
metadata ADDED
@@ -0,0 +1,158 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: anngler
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Aiden Leeming
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2020-05-20 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rspec
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: mock_redis
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: bundler
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: json
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: numo-narray
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: redis
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: zlib
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ description:
126
+ email:
127
+ executables: []
128
+ extensions: []
129
+ extra_rdoc_files: []
130
+ files:
131
+ - lib/anngler.rb
132
+ - lib/anngler/helpers.rb
133
+ - lib/anngler/index.rb
134
+ - lib/anngler/storage/memory_backend.rb
135
+ - lib/anngler/storage/redis_backend.rb
136
+ homepage:
137
+ licenses: []
138
+ metadata: {}
139
+ post_install_message:
140
+ rdoc_options: []
141
+ require_paths:
142
+ - lib
143
+ required_ruby_version: !ruby/object:Gem::Requirement
144
+ requirements:
145
+ - - ">="
146
+ - !ruby/object:Gem::Version
147
+ version: '0'
148
+ required_rubygems_version: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ requirements: []
154
+ rubygems_version: 3.1.4
155
+ signing_key:
156
+ specification_version: 4
157
+ summary: A ruby locality sensitive hashing implementation using Redis for storage
158
+ test_files: []