annoy-rb 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,106 @@
1
+ #ifndef KISSRANDOM_H
2
+ #define KISSRANDOM_H
3
+
4
+ #if defined(_MSC_VER) && _MSC_VER == 1500
5
+ typedef unsigned __int32 uint32_t;
6
+ typedef unsigned __int64 uint64_t;
7
+ #else
8
+ #include <stdint.h>
9
+ #endif
10
+
11
+ // KISS = "keep it simple, stupid", but high quality random number generator
12
+ // http://www0.cs.ucl.ac.uk/staff/d.jones/GoodPracticeRNG.pdf -> "Use a good RNG and build it into your code"
13
+ // http://mathforum.org/kb/message.jspa?messageID=6627731
14
+ // https://de.wikipedia.org/wiki/KISS_(Zufallszahlengenerator)
15
+
16
+ // 32 bit KISS
17
+ struct Kiss32Random {
18
+ uint32_t x;
19
+ uint32_t y;
20
+ uint32_t z;
21
+ uint32_t c;
22
+
23
+ // seed must be != 0
24
+ Kiss32Random(uint32_t seed = 123456789) {
25
+ x = seed;
26
+ y = 362436000;
27
+ z = 521288629;
28
+ c = 7654321;
29
+ }
30
+
31
+ uint32_t kiss() {
32
+ // Linear congruence generator
33
+ x = 69069 * x + 12345;
34
+
35
+ // Xor shift
36
+ y ^= y << 13;
37
+ y ^= y >> 17;
38
+ y ^= y << 5;
39
+
40
+ // Multiply-with-carry
41
+ uint64_t t = 698769069ULL * z + c;
42
+ c = t >> 32;
43
+ z = (uint32_t) t;
44
+
45
+ return x + y + z;
46
+ }
47
+ inline int flip() {
48
+ // Draw random 0 or 1
49
+ return kiss() & 1;
50
+ }
51
+ inline size_t index(size_t n) {
52
+ // Draw random integer between 0 and n-1 where n is at most the number of data points you have
53
+ return kiss() % n;
54
+ }
55
+ inline void set_seed(uint32_t seed) {
56
+ x = seed;
57
+ }
58
+ };
59
+
60
+ // 64 bit KISS. Use this if you have more than about 2^24 data points ("big data" ;) )
61
+ struct Kiss64Random {
62
+ uint64_t x;
63
+ uint64_t y;
64
+ uint64_t z;
65
+ uint64_t c;
66
+
67
+ // seed must be != 0
68
+ Kiss64Random(uint64_t seed = 1234567890987654321ULL) {
69
+ x = seed;
70
+ y = 362436362436362436ULL;
71
+ z = 1066149217761810ULL;
72
+ c = 123456123456123456ULL;
73
+ }
74
+
75
+ uint64_t kiss() {
76
+ // Linear congruence generator
77
+ z = 6906969069LL*z+1234567;
78
+
79
+ // Xor shift
80
+ y ^= (y<<13);
81
+ y ^= (y>>17);
82
+ y ^= (y<<43);
83
+
84
+ // Multiply-with-carry (uint128_t t = (2^58 + 1) * x + c; c = t >> 64; x = (uint64_t) t)
85
+ uint64_t t = (x<<58)+c;
86
+ c = (x>>6);
87
+ x += t;
88
+ c += (x<t);
89
+
90
+ return x + y + z;
91
+ }
92
+ inline int flip() {
93
+ // Draw random 0 or 1
94
+ return kiss() & 1;
95
+ }
96
+ inline size_t index(size_t n) {
97
+ // Draw random integer between 0 and n-1 where n is at most the number of data points you have
98
+ return kiss() % n;
99
+ }
100
+ inline void set_seed(uint32_t seed) {
101
+ x = seed;
102
+ }
103
+ };
104
+
105
+ #endif
106
+ // vim: tabstop=2 shiftwidth=2
@@ -0,0 +1,174 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'annoy/version'
4
+ require 'annoy/annoy'
5
+
6
+ module Annoy
7
+ # AnnoyIndex is a class that provides functions for k-nearest neighbors search.
8
+ # The methods in this class are implemented similarly to Annoy's Python API (https://github.com/spotify/annoy#full-python-api).
9
+ #
10
+ # @example
11
+ # require 'annoy'
12
+ #
13
+ # index = AnnoyIndex.new(n_features: 100, metric: 'euclidean')
14
+ #
15
+ # 5000.times do |item_id|
16
+ # item_vec = Array.new(100) { rand - 0.5 }
17
+ # index.add_item(item_id, item_vec)
18
+ # end
19
+ #
20
+ # index.build(10)
21
+ #
22
+ # index.get_nns_by_item(0, 100)
23
+ #
24
+ class AnnoyIndex
25
+ # Returns the number of features of indexed item.
26
+ # @return [Integer]
27
+ attr_reader :n_features
28
+
29
+ # Returns the metric of index.
30
+ # @return [String]
31
+ attr_reader :metric
32
+
33
+ # Create a new search index.
34
+ #
35
+ # @param n_features [Integer] The number of features (dimensions) of stored vector.
36
+ # @param metric [String] The distance metric between vectors ('angular', 'dot', 'hamming', 'euclidean', or 'manhattan').
37
+ def initialize(n_features:, metric: 'angular')
38
+ raise ArgumentError, 'Expect n_features to be Integer.' unless n_features.is_a?(Numeric)
39
+
40
+ @n_features = n_features.to_i
41
+ @metric = metric
42
+
43
+ @index = case @metric
44
+ when 'angular'
45
+ AnnoyIndexAngular.new(@n_features)
46
+ when 'dot'
47
+ AnnoyIndexDotProduct.new(@n_features)
48
+ when 'hamming'
49
+ AnnoyIndexHamming.new(@n_features)
50
+ when 'euclidean'
51
+ AnnoyIndexEuclidean.new(@n_features)
52
+ when 'manhattan'
53
+ AnnoyIndexManhattan.new(@n_features)
54
+ else
55
+ raise ArgumentError, "No such metric: #{@metric}."
56
+ end
57
+ end
58
+
59
+ # Add item to be indexed.
60
+ #
61
+ # @param i [Integer] The ID of item.
62
+ # @param v [Array] The vector of item.
63
+ # @return [Boolean]
64
+ def add_item(i, v)
65
+ @index.add_item(i, v)
66
+ end
67
+
68
+ # Build a forest of index trees. After building, no more items can be added.
69
+ #
70
+ # @param n_trees [Integer] The number of trees. More trees gives higher search precision.
71
+ # @return [Boolean]
72
+ def build(n_trees)
73
+ @index.build(n_trees)
74
+ end
75
+
76
+ # Save the search index to disk. After saving, no more items can be added.
77
+ #
78
+ # @param filename [String] The filename of search index.
79
+ # @return [Boolean]
80
+ def save(filename, prefault: false)
81
+ @index.save(filename, prefault)
82
+ end
83
+
84
+ # Load a search index from disk.
85
+ #
86
+ # @param filename [String] The filename of search index.
87
+ # @param prefault [Boolean] The flag indicating whether to pre-read the entire file into memory.
88
+ # @return [Boolean]
89
+ def load(filename, prefault: false)
90
+ @index.load(filename, prefault)
91
+ end
92
+
93
+ # Unload the search index.
94
+ #
95
+ # @return [Boolean]
96
+ def unload
97
+ @index.unload
98
+ end
99
+
100
+ # Search the n closest items.
101
+ #
102
+ # @param i [Integer] The ID of query item.
103
+ # @param n [Integer] The number of nearest neighbors.
104
+ # @param search_k [Integer] The maximum number of nodes inspected during the search. If -1 is given, it sets to n * n_trees.
105
+ # @param include_distances [Boolean] The flag indicating whether to returns all corresponding distances.
106
+ # @return [Array<Integer> or Array<Array<Integer>, Array<Float>>]
107
+ def get_nns_by_item(i, n, search_k: -1, include_distances: false)
108
+ @index.get_nns_by_item(i, n, search_k, include_distances)
109
+ end
110
+
111
+ # Search the n closest items.
112
+ #
113
+ # @param v [Array] The vector of query item.
114
+ # @param n [Integer] The number of nearest neighbors.
115
+ # @param search_k [Integer] The maximum number of nodes inspected during the search. If -1 is given, it sets to n * n_trees.
116
+ # @param include_distances [Boolean] The flag indicating whether to returns all corresponding distances.
117
+ # @return [Array<Integer> or Array<Array<Integer>, Array<Float>>]
118
+ def get_nns_by_vector(v, n, search_k: -1, include_distances: false)
119
+ @index.get_nns_by_vector(v, n, search_k, include_distances)
120
+ end
121
+
122
+ # Return the item vector.
123
+ #
124
+ # @param i [Integer] The ID of item.
125
+ # @return [Array]
126
+ def get_item(i)
127
+ @index.get_item(i)
128
+ end
129
+
130
+ # Calculate the distances between items.
131
+ #
132
+ # @param i [Integer] The ID of item.
133
+ # @param j [Integer] The ID of item.
134
+ # @return [Float or Integer]
135
+ def get_distance(i, j)
136
+ @index.get_distance(i, j)
137
+ end
138
+
139
+ # Return the number of items in the search index.
140
+ # @return [Integer]
141
+ def n_items
142
+ @index.get_n_items
143
+ end
144
+
145
+ # Return the number of trees in the search index.
146
+ # @return [Integer]
147
+ def n_trees
148
+ @index.get_n_trees
149
+ end
150
+
151
+ # Prepare annoy to build the index in the specified file instead of RAM.
152
+ # (call this method before adding items, no need to save after building).
153
+ #
154
+ # @param filename [String] The filename of search index.
155
+ # @return [Boolean]
156
+ def on_disk_build(filename)
157
+ @index.on_disk_build(filename)
158
+ end
159
+
160
+ # Set to verbose mode.
161
+ #
162
+ # @param flag [Boolean]
163
+ def verbose(flag)
164
+ @index.verbose(flag)
165
+ end
166
+
167
+ # Set seed for the random number generator.
168
+ #
169
+ # @param s [Integer]
170
+ def seed(s)
171
+ @index.set_seed(s)
172
+ end
173
+ end
174
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Annoy.rb is a Ruby wrapper for Annoy (Approximate Nearest Neighbors Oh Yeah).
4
+ module Annoy
5
+ # The version of Annoy.rb you are using.
6
+ VERSION = '0.1.0'.freeze
7
+ end
metadata ADDED
@@ -0,0 +1,65 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: annoy-rb
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - yoshoku
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2020-08-08 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Annoy.rb is a Ruby binding for the Annoy (Approximate Nearest Neighbors
14
+ Oh Yeah).
15
+ email:
16
+ - yoshoku@outlook.com
17
+ executables: []
18
+ extensions:
19
+ - ext/annoy/extconf.rb
20
+ extra_rdoc_files: []
21
+ files:
22
+ - ".gitignore"
23
+ - ".rspec"
24
+ - ".travis.yml"
25
+ - CHANGELOG.md
26
+ - CODE_OF_CONDUCT.md
27
+ - Gemfile
28
+ - LICENSE.txt
29
+ - README.md
30
+ - Rakefile
31
+ - annoy-rb.gemspec
32
+ - ext/annoy/annoy.cpp
33
+ - ext/annoy/annoy.hpp
34
+ - ext/annoy/extconf.rb
35
+ - ext/annoy/src/annoylib.h
36
+ - ext/annoy/src/kissrandom.h
37
+ - lib/annoy.rb
38
+ - lib/annoy/version.rb
39
+ homepage: https://github.com/yoshoku/annoy.rb
40
+ licenses:
41
+ - Apache-2.0
42
+ metadata:
43
+ homepage_uri: https://github.com/yoshoku/annoy.rb
44
+ source_code_uri: https://github.com/yoshoku/annoy.rb
45
+ changelog_uri: https://github.com/yoshoku/annoy.rb/blob/master/CHANGELOG.md
46
+ post_install_message:
47
+ rdoc_options: []
48
+ require_paths:
49
+ - lib
50
+ required_ruby_version: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ required_rubygems_version: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: '0'
60
+ requirements: []
61
+ rubygems_version: 3.1.2
62
+ signing_key:
63
+ specification_version: 4
64
+ summary: Ruby binding for the Annoy (Approximate Nearest Neighbors Oh Yeah).
65
+ test_files: []