annoy-rb 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/build.yml +22 -0
- data/CHANGELOG.md +19 -0
- data/Gemfile +6 -4
- data/README.md +9 -4
- data/Rakefile +2 -1
- data/Steepfile +20 -0
- data/annoy-rb.gemspec +2 -1
- data/ext/annoy/{annoy.cpp → annoyext.cpp} +7 -7
- data/ext/annoy/{annoy.hpp → annoyext.hpp} +66 -34
- data/ext/annoy/extconf.rb +2 -2
- data/ext/annoy/src/annoylib.h +201 -56
- data/ext/annoy/src/mman.h +242 -0
- data/lib/annoy.rb +4 -3
- data/lib/annoy/version.rb +1 -1
- data/sig/annoy.rbs +114 -0
- metadata +14 -10
- data/.travis.yml +0 -12
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a8884d4c472560181032959b32eb3396b698dedc7dac7211af67bdb03aaa90f6
|
|
4
|
+
data.tar.gz: 1eb4acf66b0685e06aed6455c4efd0ffcb17f8805e9b619f003fb62046db41db
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 0103da7a9f443c4fed168ac61004aa135bbd534edaff6f20d641ef818348288a851370693d75657794c8ebfe9dd45dd54177315c7ac68a3400e43c87dfb1c4bb
|
|
7
|
+
data.tar.gz: 8e7314fbee4ed318c47848941aba40f6363f350181c616045354af696dc99cbd678c7b999b5bfa35339ad82e6cb3f6782510354d44db5d4d1fb81591128f4822
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
name: build
|
|
2
|
+
|
|
3
|
+
on: [push, pull_request]
|
|
4
|
+
|
|
5
|
+
jobs:
|
|
6
|
+
build:
|
|
7
|
+
runs-on: ubuntu-latest
|
|
8
|
+
strategy:
|
|
9
|
+
fail-fast: false
|
|
10
|
+
matrix:
|
|
11
|
+
ruby: [ '2.6', '2.7', '3.0' ]
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v2
|
|
14
|
+
- name: Set upt Ruby ${{ matrix.ruby }}
|
|
15
|
+
uses: actions/setup-ruby@v1
|
|
16
|
+
with:
|
|
17
|
+
ruby-version: ${{ matrix.ruby }}
|
|
18
|
+
- name: Build and test with Rake
|
|
19
|
+
run: |
|
|
20
|
+
gem install --no-document bundler
|
|
21
|
+
bundle install --jobs 4 --retry 3
|
|
22
|
+
bundle exec rake
|
data/CHANGELOG.md
CHANGED
|
@@ -1,2 +1,21 @@
|
|
|
1
|
+
## 0.3.0
|
|
2
|
+
- Add type declaration file: sig/annoy.rbs
|
|
3
|
+
- Fix get_distance method to return integer typed value on hamming metric index.
|
|
4
|
+
- Rename native extension files.
|
|
5
|
+
|
|
6
|
+
## 0.2.3
|
|
7
|
+
- Add GC guard to index saving and loading methods.
|
|
8
|
+
|
|
9
|
+
## 0.2.2
|
|
10
|
+
- Replace Data_ functions to TypedData_ functions.
|
|
11
|
+
|
|
12
|
+
## 0.2.1
|
|
13
|
+
- Fix to free char array of error message before calling rb_raise.
|
|
14
|
+
- Fix to use array allocated with ruby_xmalloc instead of vector class in C++.
|
|
15
|
+
|
|
16
|
+
## 0.2.0
|
|
17
|
+
- Update bundled Annoy version to 1.17.0.
|
|
18
|
+
- Support multithreaded index building.
|
|
19
|
+
|
|
1
20
|
## 0.1.0
|
|
2
21
|
- First release.
|
data/Gemfile
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
|
-
source
|
|
1
|
+
source 'https://rubygems.org'
|
|
2
2
|
|
|
3
3
|
# Specify your gem's dependencies in annoy.gemspec
|
|
4
4
|
gemspec
|
|
5
5
|
|
|
6
|
-
gem
|
|
7
|
-
gem
|
|
8
|
-
gem
|
|
6
|
+
gem 'rake', '~> 13.0'
|
|
7
|
+
gem 'rake-compiler', '~> 1.1'
|
|
8
|
+
gem 'rspec', '~> 3.0'
|
|
9
|
+
gem 'rbs', '~> 1.2'
|
|
10
|
+
gem 'steep', '~> 0.44'
|
data/README.md
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
# Annoy.rb
|
|
2
2
|
|
|
3
|
-
[](https://github.com/yoshoku/annoy.rb/actions?query=workflow%3Abuild)
|
|
4
4
|
[](https://badge.fury.io/rb/annoy-rb)
|
|
5
|
-
[](https://github.com/yoshoku/annoy.rb/blob/
|
|
5
|
+
[](https://github.com/yoshoku/annoy.rb/blob/main/LICENSE.txt)
|
|
6
|
+
[](https://yoshoku.github.io/annoy.rb/doc/)
|
|
6
7
|
|
|
7
8
|
Annoy.rb is a Ruby binding for the [Annoy (Approximate Nearest Neighbors Oh Yeah)](https://github.com/spotify/annoy).
|
|
8
9
|
|
|
@@ -24,6 +25,10 @@ Or install it yourself as:
|
|
|
24
25
|
|
|
25
26
|
Note: Annoy.rb does not require the installation of another external library.
|
|
26
27
|
|
|
28
|
+
## Documentation
|
|
29
|
+
|
|
30
|
+
* [Annoy.rb API Documentation](https://yoshoku.github.io/annoy.rb/doc/)
|
|
31
|
+
|
|
27
32
|
## Usage
|
|
28
33
|
|
|
29
34
|
```ruby
|
|
@@ -51,8 +56,8 @@ The gem is available as open source under the terms of the [Apache-2.0 License](
|
|
|
51
56
|
|
|
52
57
|
## Contributing
|
|
53
58
|
|
|
54
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/yoshoku/annoy.rb. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [code of conduct](https://github.com/yoshoku/annoy.rb/blob/
|
|
59
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/yoshoku/annoy.rb. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [code of conduct](https://github.com/yoshoku/annoy.rb/blob/main/CODE_OF_CONDUCT.md).
|
|
55
60
|
|
|
56
61
|
## Code of Conduct
|
|
57
62
|
|
|
58
|
-
Everyone interacting in the Annoy.rb project's codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/yoshoku/annoy.rb/blob/
|
|
63
|
+
Everyone interacting in the Annoy.rb project's codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/yoshoku/annoy.rb/blob/main/CODE_OF_CONDUCT.md).
|
data/Rakefile
CHANGED
data/Steepfile
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
target :lib do
|
|
2
|
+
signature "sig"
|
|
3
|
+
|
|
4
|
+
check "lib" # Directory name
|
|
5
|
+
# check "Gemfile" # File name
|
|
6
|
+
# check "app/models/**/*.rb" # Glob
|
|
7
|
+
# # ignore "lib/templates/*.rb"
|
|
8
|
+
#
|
|
9
|
+
# # library "pathname", "set" # Standard libraries
|
|
10
|
+
# # library "strong_json" # Gems
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
# target :spec do
|
|
14
|
+
# signature "sig", "sig-private"
|
|
15
|
+
#
|
|
16
|
+
# check "spec"
|
|
17
|
+
#
|
|
18
|
+
# # library "pathname", "set" # Standard libraries
|
|
19
|
+
# # library "rspec"
|
|
20
|
+
# end
|
data/annoy-rb.gemspec
CHANGED
|
@@ -13,7 +13,8 @@ Gem::Specification.new do |spec|
|
|
|
13
13
|
|
|
14
14
|
spec.metadata['homepage_uri'] = spec.homepage
|
|
15
15
|
spec.metadata['source_code_uri'] = spec.homepage
|
|
16
|
-
spec.metadata['changelog_uri'] = 'https://github.com/yoshoku/annoy.rb/blob/
|
|
16
|
+
spec.metadata['changelog_uri'] = 'https://github.com/yoshoku/annoy.rb/blob/main/CHANGELOG.md'
|
|
17
|
+
spec.metadata['documentation_uri'] = 'https://yoshoku.github.io/annoy.rb/doc/'
|
|
17
18
|
|
|
18
19
|
# Specify which files should be added to the gem when it is released.
|
|
19
20
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
|
@@ -16,15 +16,15 @@
|
|
|
16
16
|
* limitations under the License.
|
|
17
17
|
*/
|
|
18
18
|
|
|
19
|
-
#include "
|
|
19
|
+
#include "annoyext.hpp"
|
|
20
20
|
|
|
21
21
|
extern "C"
|
|
22
|
-
void
|
|
22
|
+
void Init_annoyext(void)
|
|
23
23
|
{
|
|
24
24
|
VALUE rb_mAnnoy = rb_define_module("Annoy");
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
25
|
+
RbAnnoyIndex<AnnoyIndexAngular, double>::define_class(rb_mAnnoy, "AnnoyIndexAngular");
|
|
26
|
+
RbAnnoyIndex<AnnoyIndexDotProduct, double>::define_class(rb_mAnnoy, "AnnoyIndexDotProduct");
|
|
27
|
+
RbAnnoyIndex<AnnoyIndexHamming, uint64_t>::define_class(rb_mAnnoy, "AnnoyIndexHamming");
|
|
28
|
+
RbAnnoyIndex<AnnoyIndexEuclidean, double>::define_class(rb_mAnnoy, "AnnoyIndexEuclidean");
|
|
29
|
+
RbAnnoyIndex<AnnoyIndexManhattan, double>::define_class(rb_mAnnoy, "AnnoyIndexManhattan");
|
|
30
30
|
}
|
|
@@ -16,8 +16,8 @@
|
|
|
16
16
|
* limitations under the License.
|
|
17
17
|
*/
|
|
18
18
|
|
|
19
|
-
#ifndef
|
|
20
|
-
#define
|
|
19
|
+
#ifndef ANNOYEXT_HPP
|
|
20
|
+
#define ANNOYEXT_HPP 1
|
|
21
21
|
|
|
22
22
|
#include <typeinfo>
|
|
23
23
|
|
|
@@ -25,28 +25,38 @@
|
|
|
25
25
|
#include <annoylib.h>
|
|
26
26
|
#include <kissrandom.h>
|
|
27
27
|
|
|
28
|
-
|
|
29
|
-
typedef
|
|
30
|
-
|
|
31
|
-
typedef
|
|
32
|
-
|
|
28
|
+
#ifdef ANNOYLIB_MULTITHREADED_BUILD
|
|
29
|
+
typedef AnnoyIndexMultiThreadedBuildPolicy AnnoyIndexThreadedBuildPolicy;
|
|
30
|
+
#else
|
|
31
|
+
typedef AnnoyIndexSingleThreadedBuildPolicy AnnoyIndexThreadedBuildPolicy;
|
|
32
|
+
#endif
|
|
33
|
+
|
|
34
|
+
typedef AnnoyIndex<int, double, Angular, Kiss64Random, AnnoyIndexThreadedBuildPolicy> AnnoyIndexAngular;
|
|
35
|
+
typedef AnnoyIndex<int, double, DotProduct, Kiss64Random, AnnoyIndexThreadedBuildPolicy> AnnoyIndexDotProduct;
|
|
36
|
+
typedef AnnoyIndex<int, uint64_t, Hamming, Kiss64Random, AnnoyIndexThreadedBuildPolicy> AnnoyIndexHamming;
|
|
37
|
+
typedef AnnoyIndex<int, double, Euclidean, Kiss64Random, AnnoyIndexThreadedBuildPolicy> AnnoyIndexEuclidean;
|
|
38
|
+
typedef AnnoyIndex<int, double, Manhattan, Kiss64Random, AnnoyIndexThreadedBuildPolicy> AnnoyIndexManhattan;
|
|
33
39
|
|
|
34
40
|
template<class T, typename F> class RbAnnoyIndex
|
|
35
41
|
{
|
|
36
42
|
public:
|
|
37
43
|
static VALUE annoy_index_alloc(VALUE self) {
|
|
38
44
|
T* ptr = (T*)ruby_xmalloc(sizeof(T));
|
|
39
|
-
return
|
|
45
|
+
return TypedData_Wrap_Struct(self, &annoy_index_type, ptr);
|
|
40
46
|
};
|
|
41
47
|
|
|
42
|
-
static void annoy_index_free(
|
|
43
|
-
ptr->~AnnoyIndex();
|
|
48
|
+
static void annoy_index_free(void* ptr) {
|
|
49
|
+
((T*)ptr)->~AnnoyIndex();
|
|
44
50
|
ruby_xfree(ptr);
|
|
45
51
|
};
|
|
46
52
|
|
|
53
|
+
static size_t annoy_index_size(const void* ptr) {
|
|
54
|
+
return sizeof(*((T*)ptr));
|
|
55
|
+
};
|
|
56
|
+
|
|
47
57
|
static T* get_annoy_index(VALUE self) {
|
|
48
58
|
T* ptr;
|
|
49
|
-
|
|
59
|
+
TypedData_Get_Struct(self, T, &annoy_index_type, ptr);
|
|
50
60
|
return ptr;
|
|
51
61
|
};
|
|
52
62
|
|
|
@@ -55,7 +65,7 @@ template<class T, typename F> class RbAnnoyIndex
|
|
|
55
65
|
rb_define_alloc_func(rb_cAnnoyIndex, annoy_index_alloc);
|
|
56
66
|
rb_define_method(rb_cAnnoyIndex, "initialize", RUBY_METHOD_FUNC(_annoy_index_init), 1);
|
|
57
67
|
rb_define_method(rb_cAnnoyIndex, "add_item", RUBY_METHOD_FUNC(_annoy_index_add_item), 2);
|
|
58
|
-
rb_define_method(rb_cAnnoyIndex, "build", RUBY_METHOD_FUNC(_annoy_index_build),
|
|
68
|
+
rb_define_method(rb_cAnnoyIndex, "build", RUBY_METHOD_FUNC(_annoy_index_build), 2);
|
|
59
69
|
rb_define_method(rb_cAnnoyIndex, "save", RUBY_METHOD_FUNC(_annoy_index_save), 2);
|
|
60
70
|
rb_define_method(rb_cAnnoyIndex, "load", RUBY_METHOD_FUNC(_annoy_index_load), 2);
|
|
61
71
|
rb_define_method(rb_cAnnoyIndex, "unload", RUBY_METHOD_FUNC(_annoy_index_unload), 0);
|
|
@@ -73,6 +83,7 @@ template<class T, typename F> class RbAnnoyIndex
|
|
|
73
83
|
};
|
|
74
84
|
|
|
75
85
|
private:
|
|
86
|
+
static const rb_data_type_t annoy_index_type;
|
|
76
87
|
|
|
77
88
|
static VALUE _annoy_index_init(VALUE self, VALUE _n_dims) {
|
|
78
89
|
const int n_dims = NUM2INT(_n_dims);
|
|
@@ -95,31 +106,34 @@ template<class T, typename F> class RbAnnoyIndex
|
|
|
95
106
|
return Qfalse;
|
|
96
107
|
}
|
|
97
108
|
|
|
98
|
-
|
|
109
|
+
F* vec = (F*)ruby_xmalloc(n_dims * sizeof(F));
|
|
99
110
|
for (int i = 0; i < n_dims; i++) {
|
|
100
111
|
vec[i] = typeid(F) == typeid(double) ? NUM2DBL(rb_ary_entry(arr, i)) : NUM2UINT(rb_ary_entry(arr, i));
|
|
101
112
|
}
|
|
102
113
|
|
|
103
114
|
char* error;
|
|
104
|
-
if (!get_annoy_index(self)->add_item(idx,
|
|
105
|
-
|
|
115
|
+
if (!get_annoy_index(self)->add_item(idx, vec, &error)) {
|
|
116
|
+
VALUE error_str = rb_str_new_cstr(error);
|
|
106
117
|
free(error);
|
|
118
|
+
ruby_xfree(vec);
|
|
119
|
+
rb_raise(rb_eRuntimeError, "%s", StringValuePtr(error_str));
|
|
107
120
|
return Qfalse;
|
|
108
121
|
}
|
|
109
122
|
|
|
123
|
+
ruby_xfree(vec);
|
|
110
124
|
return Qtrue;
|
|
111
125
|
};
|
|
112
126
|
|
|
113
|
-
static VALUE _annoy_index_build(VALUE self, VALUE _n_trees) {
|
|
127
|
+
static VALUE _annoy_index_build(VALUE self, VALUE _n_trees, VALUE _n_jobs) {
|
|
114
128
|
const int n_trees = NUM2INT(_n_trees);
|
|
129
|
+
const int n_jobs = NUM2INT(_n_jobs);
|
|
115
130
|
char* error;
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
rb_raise(rb_eRuntimeError, "%s", error);
|
|
131
|
+
if (!get_annoy_index(self)->build(n_trees, n_jobs, &error)) {
|
|
132
|
+
VALUE error_str = rb_str_new_cstr(error);
|
|
119
133
|
free(error);
|
|
134
|
+
rb_raise(rb_eRuntimeError, "%s", StringValuePtr(error_str));
|
|
120
135
|
return Qfalse;
|
|
121
136
|
}
|
|
122
|
-
|
|
123
137
|
return Qtrue;
|
|
124
138
|
};
|
|
125
139
|
|
|
@@ -127,13 +141,13 @@ template<class T, typename F> class RbAnnoyIndex
|
|
|
127
141
|
const char* filename = StringValuePtr(_filename);
|
|
128
142
|
const bool prefault = _prefault == Qtrue ? true : false;
|
|
129
143
|
char* error;
|
|
130
|
-
|
|
131
144
|
if (!get_annoy_index(self)->save(filename, prefault, &error)) {
|
|
132
|
-
|
|
145
|
+
VALUE error_str = rb_str_new_cstr(error);
|
|
133
146
|
free(error);
|
|
147
|
+
rb_raise(rb_eRuntimeError, "%s", StringValuePtr(error_str));
|
|
134
148
|
return Qfalse;
|
|
135
149
|
}
|
|
136
|
-
|
|
150
|
+
RB_GC_GUARD(_filename);
|
|
137
151
|
return Qtrue;
|
|
138
152
|
};
|
|
139
153
|
|
|
@@ -141,13 +155,13 @@ template<class T, typename F> class RbAnnoyIndex
|
|
|
141
155
|
const char* filename = StringValuePtr(_filename);
|
|
142
156
|
const bool prefault = _prefault == Qtrue ? true : false;
|
|
143
157
|
char* error;
|
|
144
|
-
|
|
145
158
|
if (!get_annoy_index(self)->load(filename, prefault, &error)) {
|
|
146
|
-
|
|
159
|
+
VALUE error_str = rb_str_new_cstr(error);
|
|
147
160
|
free(error);
|
|
161
|
+
rb_raise(rb_eRuntimeError, "%s", StringValuePtr(error_str));
|
|
148
162
|
return Qfalse;
|
|
149
163
|
}
|
|
150
|
-
|
|
164
|
+
RB_GC_GUARD(_filename);
|
|
151
165
|
return Qtrue;
|
|
152
166
|
};
|
|
153
167
|
|
|
@@ -201,7 +215,7 @@ template<class T, typename F> class RbAnnoyIndex
|
|
|
201
215
|
return Qfalse;
|
|
202
216
|
}
|
|
203
217
|
|
|
204
|
-
|
|
218
|
+
F* vec = (F*)ruby_xmalloc(n_dims * sizeof(F));
|
|
205
219
|
for (int i = 0; i < n_dims; i++) {
|
|
206
220
|
vec[i] = typeid(F) == typeid(double) ? NUM2DBL(rb_ary_entry(_vec, i)) : NUM2UINT(rb_ary_entry(_vec, i));
|
|
207
221
|
}
|
|
@@ -212,7 +226,9 @@ template<class T, typename F> class RbAnnoyIndex
|
|
|
212
226
|
std::vector<int> neighbors;
|
|
213
227
|
std::vector<F> distances;
|
|
214
228
|
|
|
215
|
-
get_annoy_index(self)->get_nns_by_vector(
|
|
229
|
+
get_annoy_index(self)->get_nns_by_vector(vec, n_neighbors, search_k, &neighbors, include_distances ? &distances : NULL);
|
|
230
|
+
|
|
231
|
+
ruby_xfree(vec);
|
|
216
232
|
|
|
217
233
|
const int sz_neighbors = neighbors.size();
|
|
218
234
|
VALUE neighbors_arr = rb_ary_new2(sz_neighbors);
|
|
@@ -239,23 +255,24 @@ template<class T, typename F> class RbAnnoyIndex
|
|
|
239
255
|
static VALUE _annoy_index_get_item(VALUE self, VALUE _idx) {
|
|
240
256
|
const int idx = NUM2INT(_idx);
|
|
241
257
|
const int n_dims = get_annoy_index(self)->get_f();
|
|
242
|
-
|
|
258
|
+
F* vec = (F*)ruby_xmalloc(n_dims * sizeof(F));
|
|
243
259
|
VALUE arr = rb_ary_new2(n_dims);
|
|
244
260
|
|
|
245
|
-
get_annoy_index(self)->get_item(idx,
|
|
261
|
+
get_annoy_index(self)->get_item(idx, vec);
|
|
246
262
|
|
|
247
263
|
for (int i = 0; i < n_dims; i++) {
|
|
248
264
|
rb_ary_store(arr, i, typeid(F) == typeid(double) ? DBL2NUM(vec[i]) : UINT2NUM(vec[i]));
|
|
249
265
|
}
|
|
250
266
|
|
|
267
|
+
ruby_xfree(vec);
|
|
251
268
|
return arr;
|
|
252
269
|
};
|
|
253
270
|
|
|
254
271
|
static VALUE _annoy_index_get_distance(VALUE self, VALUE _i, VALUE _j) {
|
|
255
272
|
const int i = NUM2INT(_i);
|
|
256
273
|
const int j = NUM2INT(_j);
|
|
257
|
-
const
|
|
258
|
-
return DBL2NUM(dist);
|
|
274
|
+
const F dist = get_annoy_index(self)->get_distance(i, j);
|
|
275
|
+
return typeid(F) == typeid(double) ? DBL2NUM(dist) : UINT2NUM(dist);
|
|
259
276
|
};
|
|
260
277
|
|
|
261
278
|
static VALUE _annoy_index_get_n_items(VALUE self) {
|
|
@@ -272,10 +289,12 @@ template<class T, typename F> class RbAnnoyIndex
|
|
|
272
289
|
const char* filename = StringValuePtr(_filename);
|
|
273
290
|
char* error;
|
|
274
291
|
if (!get_annoy_index(self)->on_disk_build(filename, &error)) {
|
|
275
|
-
|
|
292
|
+
VALUE error_str = rb_str_new_cstr(error);
|
|
276
293
|
free(error);
|
|
294
|
+
rb_raise(rb_eRuntimeError, "%s", StringValuePtr(error_str));
|
|
277
295
|
return Qfalse;
|
|
278
296
|
}
|
|
297
|
+
RB_GC_GUARD(_filename);
|
|
279
298
|
return Qtrue;
|
|
280
299
|
};
|
|
281
300
|
|
|
@@ -297,4 +316,17 @@ template<class T, typename F> class RbAnnoyIndex
|
|
|
297
316
|
};
|
|
298
317
|
};
|
|
299
318
|
|
|
300
|
-
|
|
319
|
+
template<class T, typename F>
|
|
320
|
+
const rb_data_type_t RbAnnoyIndex<T, F>::annoy_index_type = {
|
|
321
|
+
"RbAnnoyIndex",
|
|
322
|
+
{
|
|
323
|
+
NULL,
|
|
324
|
+
RbAnnoyIndex::annoy_index_free,
|
|
325
|
+
RbAnnoyIndex::annoy_index_size
|
|
326
|
+
},
|
|
327
|
+
NULL,
|
|
328
|
+
NULL,
|
|
329
|
+
RUBY_TYPED_FREE_IMMEDIATELY
|
|
330
|
+
};
|
|
331
|
+
|
|
332
|
+
#endif /* ANNOYEXT_HPP */
|
data/ext/annoy/extconf.rb
CHANGED
|
@@ -2,8 +2,8 @@ require 'mkmf'
|
|
|
2
2
|
|
|
3
3
|
abort 'libstdc++ is not found.' unless have_library('stdc++')
|
|
4
4
|
|
|
5
|
-
$CXXFLAGS << " -march=native"
|
|
5
|
+
$CXXFLAGS << " -std=c++14 -march=native -DANNOYLIB_MULTITHREADED_BUILD"
|
|
6
6
|
$INCFLAGS << " -I$(srcdir)/src"
|
|
7
7
|
$VPATH << "$(srcdir)/src"
|
|
8
8
|
|
|
9
|
-
create_makefile('annoy/
|
|
9
|
+
create_makefile('annoy/annoyext')
|
data/ext/annoy/src/annoylib.h
CHANGED
|
@@ -58,6 +58,12 @@ typedef signed __int64 int64_t;
|
|
|
58
58
|
#include <queue>
|
|
59
59
|
#include <limits>
|
|
60
60
|
|
|
61
|
+
#ifdef ANNOYLIB_MULTITHREADED_BUILD
|
|
62
|
+
#include <thread>
|
|
63
|
+
#include <mutex>
|
|
64
|
+
#include <shared_mutex>
|
|
65
|
+
#endif
|
|
66
|
+
|
|
61
67
|
#ifdef _MSC_VER
|
|
62
68
|
// Needed for Visual Studio to disable runtime checks for mempcy
|
|
63
69
|
#pragma runtime_checks("s", off)
|
|
@@ -104,7 +110,6 @@ inline void set_error_from_string(char **error, const char* msg) {
|
|
|
104
110
|
#ifndef _MSC_VER
|
|
105
111
|
#define popcount __builtin_popcountll
|
|
106
112
|
#else // See #293, #358
|
|
107
|
-
#define isnan(x) _isnan(x)
|
|
108
113
|
#define popcount cole_popcount
|
|
109
114
|
#endif
|
|
110
115
|
|
|
@@ -346,7 +351,7 @@ inline float euclidean_distance<float>(const float* x, const float* y, int f) {
|
|
|
346
351
|
|
|
347
352
|
#endif
|
|
348
353
|
|
|
349
|
-
|
|
354
|
+
|
|
350
355
|
template<typename T>
|
|
351
356
|
inline T get_norm(T* v, int f) {
|
|
352
357
|
return sqrt(dot(v, v, f));
|
|
@@ -358,7 +363,7 @@ inline void two_means(const vector<Node*>& nodes, int f, Random& random, bool co
|
|
|
358
363
|
This algorithm is a huge heuristic. Empirically it works really well, but I
|
|
359
364
|
can't motivate it well. The basic idea is to keep two centroids and assign
|
|
360
365
|
points to either one of them. We weight each centroid by the number of points
|
|
361
|
-
assigned to it, so to balance it.
|
|
366
|
+
assigned to it, so to balance it.
|
|
362
367
|
*/
|
|
363
368
|
static int iteration_steps = 200;
|
|
364
369
|
size_t count = nodes.size();
|
|
@@ -548,7 +553,7 @@ struct DotProduct : Angular {
|
|
|
548
553
|
static inline void create_split(const vector<Node<S, T>*>& nodes, int f, size_t s, Random& random, Node<S, T>* n) {
|
|
549
554
|
Node<S, T>* p = (Node<S, T>*)alloca(s);
|
|
550
555
|
Node<S, T>* q = (Node<S, T>*)alloca(s);
|
|
551
|
-
DotProduct::zero_value(p);
|
|
556
|
+
DotProduct::zero_value(p);
|
|
552
557
|
DotProduct::zero_value(q);
|
|
553
558
|
two_means<T, Random, DotProduct, Node<S, T> >(nodes, f, random, true, p, q);
|
|
554
559
|
for (int z = 0; z < f; z++)
|
|
@@ -594,8 +599,8 @@ struct DotProduct : Angular {
|
|
|
594
599
|
// Step one: compute the norm of each vector and store that in its extra dimension (f-1)
|
|
595
600
|
for (S i = 0; i < node_count; i++) {
|
|
596
601
|
Node* node = get_node_ptr<S, Node>(nodes, _s, i);
|
|
597
|
-
T
|
|
598
|
-
|
|
602
|
+
T d = dot(node->v, node->v, f);
|
|
603
|
+
T norm = d < 0 ? 0 : sqrt(d);
|
|
599
604
|
node->dot_factor = norm;
|
|
600
605
|
}
|
|
601
606
|
|
|
@@ -612,9 +617,8 @@ struct DotProduct : Angular {
|
|
|
612
617
|
for (S i = 0; i < node_count; i++) {
|
|
613
618
|
Node* node = get_node_ptr<S, Node>(nodes, _s, i);
|
|
614
619
|
T node_norm = node->dot_factor;
|
|
615
|
-
|
|
616
|
-
T dot_factor =
|
|
617
|
-
if (isnan(dot_factor)) dot_factor = 0;
|
|
620
|
+
T squared_norm_diff = pow(max_norm, static_cast<T>(2.0)) - pow(node_norm, static_cast<T>(2.0));
|
|
621
|
+
T dot_factor = squared_norm_diff < 0 ? 0 : sqrt(squared_norm_diff);
|
|
618
622
|
|
|
619
623
|
node->dot_factor = dot_factor;
|
|
620
624
|
}
|
|
@@ -753,7 +757,7 @@ struct Minkowski : Base {
|
|
|
753
757
|
struct Euclidean : Minkowski {
|
|
754
758
|
template<typename S, typename T>
|
|
755
759
|
static inline T distance(const Node<S, T>* x, const Node<S, T>* y, int f) {
|
|
756
|
-
return euclidean_distance(x->v, y->v, f);
|
|
760
|
+
return euclidean_distance(x->v, y->v, f);
|
|
757
761
|
}
|
|
758
762
|
template<typename S, typename T, typename Random>
|
|
759
763
|
static inline void create_split(const vector<Node<S, T>*>& nodes, int f, size_t s, Random& random, Node<S, T>* n) {
|
|
@@ -817,7 +821,7 @@ class AnnoyIndexInterface {
|
|
|
817
821
|
// Note that the methods with an **error argument will allocate memory and write the pointer to that string if error is non-NULL
|
|
818
822
|
virtual ~AnnoyIndexInterface() {};
|
|
819
823
|
virtual bool add_item(S item, const T* w, char** error=NULL) = 0;
|
|
820
|
-
virtual bool build(int q, char** error=NULL) = 0;
|
|
824
|
+
virtual bool build(int q, int n_threads=-1, char** error=NULL) = 0;
|
|
821
825
|
virtual bool unbuild(char** error=NULL) = 0;
|
|
822
826
|
virtual bool save(const char* filename, bool prefault=false, char** error=NULL) = 0;
|
|
823
827
|
virtual void unload() = 0;
|
|
@@ -833,7 +837,7 @@ class AnnoyIndexInterface {
|
|
|
833
837
|
virtual bool on_disk_build(const char* filename, char** error=NULL) = 0;
|
|
834
838
|
};
|
|
835
839
|
|
|
836
|
-
template<typename S, typename T, typename Distance, typename Random>
|
|
840
|
+
template<typename S, typename T, typename Distance, typename Random, class ThreadedBuildPolicy>
|
|
837
841
|
class AnnoyIndex : public AnnoyIndexInterface<S, T> {
|
|
838
842
|
/*
|
|
839
843
|
* We use random projection to build a forest of binary trees of all items.
|
|
@@ -850,12 +854,13 @@ protected:
|
|
|
850
854
|
const int _f;
|
|
851
855
|
size_t _s;
|
|
852
856
|
S _n_items;
|
|
853
|
-
Random _random;
|
|
854
857
|
void* _nodes; // Could either be mmapped, or point to a memory buffer that we reallocate
|
|
855
858
|
S _n_nodes;
|
|
856
859
|
S _nodes_size;
|
|
857
860
|
vector<S> _roots;
|
|
858
861
|
S _K;
|
|
862
|
+
bool _is_seeded;
|
|
863
|
+
int _seed;
|
|
859
864
|
bool _loaded;
|
|
860
865
|
bool _verbose;
|
|
861
866
|
int _fd;
|
|
@@ -863,7 +868,7 @@ protected:
|
|
|
863
868
|
bool _built;
|
|
864
869
|
public:
|
|
865
870
|
|
|
866
|
-
AnnoyIndex(int f) : _f(f)
|
|
871
|
+
AnnoyIndex(int f) : _f(f) {
|
|
867
872
|
_s = offsetof(Node, v) + _f * sizeof(T); // Size of each node
|
|
868
873
|
_verbose = false;
|
|
869
874
|
_built = false;
|
|
@@ -907,7 +912,7 @@ public:
|
|
|
907
912
|
|
|
908
913
|
return true;
|
|
909
914
|
}
|
|
910
|
-
|
|
915
|
+
|
|
911
916
|
bool on_disk_build(const char* file, char** error=NULL) {
|
|
912
917
|
_on_disk = true;
|
|
913
918
|
_fd = open(file, O_RDWR | O_CREAT | O_TRUNC, (int) 0600);
|
|
@@ -928,8 +933,8 @@ public:
|
|
|
928
933
|
#endif
|
|
929
934
|
return true;
|
|
930
935
|
}
|
|
931
|
-
|
|
932
|
-
bool build(int q, char** error=NULL) {
|
|
936
|
+
|
|
937
|
+
bool build(int q, int n_threads=-1, char** error=NULL) {
|
|
933
938
|
if (_loaded) {
|
|
934
939
|
set_error_from_string(error, "You can't build a loaded index");
|
|
935
940
|
return false;
|
|
@@ -943,21 +948,8 @@ public:
|
|
|
943
948
|
D::template preprocess<T, S, Node>(_nodes, _s, _n_items, _f);
|
|
944
949
|
|
|
945
950
|
_n_nodes = _n_items;
|
|
946
|
-
while (1) {
|
|
947
|
-
if (q == -1 && _n_nodes >= _n_items * 2)
|
|
948
|
-
break;
|
|
949
|
-
if (q != -1 && _roots.size() >= (size_t)q)
|
|
950
|
-
break;
|
|
951
|
-
if (_verbose) showUpdate("pass %zd...\n", _roots.size());
|
|
952
|
-
|
|
953
|
-
vector<S> indices;
|
|
954
|
-
for (S i = 0; i < _n_items; i++) {
|
|
955
|
-
if (_get(i)->n_descendants >= 1) // Issue #223
|
|
956
|
-
indices.push_back(i);
|
|
957
|
-
}
|
|
958
951
|
|
|
959
|
-
|
|
960
|
-
}
|
|
952
|
+
ThreadedBuildPolicy::template build<S, T>(this, q, n_threads);
|
|
961
953
|
|
|
962
954
|
// Also, copy the roots into the last segment of the array
|
|
963
955
|
// This way we can load them faster without reading the whole file
|
|
@@ -967,7 +959,7 @@ public:
|
|
|
967
959
|
_n_nodes += _roots.size();
|
|
968
960
|
|
|
969
961
|
if (_verbose) showUpdate("has %d nodes\n", _n_nodes);
|
|
970
|
-
|
|
962
|
+
|
|
971
963
|
if (_on_disk) {
|
|
972
964
|
if (!remap_memory_and_truncate(&_nodes, _fd,
|
|
973
965
|
static_cast<size_t>(_s) * static_cast<size_t>(_nodes_size),
|
|
@@ -981,7 +973,7 @@ public:
|
|
|
981
973
|
_built = true;
|
|
982
974
|
return true;
|
|
983
975
|
}
|
|
984
|
-
|
|
976
|
+
|
|
985
977
|
bool unbuild(char** error=NULL) {
|
|
986
978
|
if (_loaded) {
|
|
987
979
|
set_error_from_string(error, "You can't unbuild a loaded index");
|
|
@@ -1035,6 +1027,7 @@ public:
|
|
|
1035
1027
|
_n_nodes = 0;
|
|
1036
1028
|
_nodes_size = 0;
|
|
1037
1029
|
_on_disk = false;
|
|
1030
|
+
_is_seeded = false;
|
|
1038
1031
|
_roots.clear();
|
|
1039
1032
|
}
|
|
1040
1033
|
|
|
@@ -1142,29 +1135,82 @@ public:
|
|
|
1142
1135
|
}
|
|
1143
1136
|
|
|
1144
1137
|
void set_seed(int seed) {
|
|
1138
|
+
_is_seeded = true;
|
|
1139
|
+
_seed = seed;
|
|
1140
|
+
}
|
|
1141
|
+
|
|
1142
|
+
void thread_build(int q, int thread_idx, ThreadedBuildPolicy& threaded_build_policy) {
|
|
1143
|
+
Random _random;
|
|
1144
|
+
// Each thread needs its own seed, otherwise each thread would be building the same tree(s)
|
|
1145
|
+
int seed = _is_seeded ? _seed + thread_idx : thread_idx;
|
|
1145
1146
|
_random.set_seed(seed);
|
|
1147
|
+
|
|
1148
|
+
vector<S> thread_roots;
|
|
1149
|
+
while (1) {
|
|
1150
|
+
if (q == -1) {
|
|
1151
|
+
threaded_build_policy.lock_n_nodes();
|
|
1152
|
+
if (_n_nodes >= 2 * _n_items) {
|
|
1153
|
+
threaded_build_policy.unlock_n_nodes();
|
|
1154
|
+
break;
|
|
1155
|
+
}
|
|
1156
|
+
threaded_build_policy.unlock_n_nodes();
|
|
1157
|
+
} else {
|
|
1158
|
+
if (thread_roots.size() >= (size_t)q) {
|
|
1159
|
+
break;
|
|
1160
|
+
}
|
|
1161
|
+
}
|
|
1162
|
+
|
|
1163
|
+
if (_verbose) showUpdate("pass %zd...\n", thread_roots.size());
|
|
1164
|
+
|
|
1165
|
+
vector<S> indices;
|
|
1166
|
+
threaded_build_policy.lock_shared_nodes();
|
|
1167
|
+
for (S i = 0; i < _n_items; i++) {
|
|
1168
|
+
if (_get(i)->n_descendants >= 1) { // Issue #223
|
|
1169
|
+
indices.push_back(i);
|
|
1170
|
+
}
|
|
1171
|
+
}
|
|
1172
|
+
threaded_build_policy.unlock_shared_nodes();
|
|
1173
|
+
|
|
1174
|
+
thread_roots.push_back(_make_tree(indices, true, _random, threaded_build_policy));
|
|
1175
|
+
}
|
|
1176
|
+
|
|
1177
|
+
threaded_build_policy.lock_roots();
|
|
1178
|
+
_roots.insert(_roots.end(), thread_roots.begin(), thread_roots.end());
|
|
1179
|
+
threaded_build_policy.unlock_roots();
|
|
1146
1180
|
}
|
|
1147
1181
|
|
|
1148
1182
|
protected:
|
|
1149
|
-
void
|
|
1183
|
+
void _reallocate_nodes(S n) {
|
|
1184
|
+
const double reallocation_factor = 1.3;
|
|
1185
|
+
S new_nodes_size = std::max(n, (S) ((_nodes_size + 1) * reallocation_factor));
|
|
1186
|
+
void *old = _nodes;
|
|
1187
|
+
|
|
1188
|
+
if (_on_disk) {
|
|
1189
|
+
if (!remap_memory_and_truncate(&_nodes, _fd,
|
|
1190
|
+
static_cast<size_t>(_s) * static_cast<size_t>(_nodes_size),
|
|
1191
|
+
static_cast<size_t>(_s) * static_cast<size_t>(new_nodes_size)) &&
|
|
1192
|
+
_verbose)
|
|
1193
|
+
showUpdate("File truncation error\n");
|
|
1194
|
+
} else {
|
|
1195
|
+
_nodes = realloc(_nodes, _s * new_nodes_size);
|
|
1196
|
+
memset((char *) _nodes + (_nodes_size * _s) / sizeof(char), 0, (new_nodes_size - _nodes_size) * _s);
|
|
1197
|
+
}
|
|
1198
|
+
|
|
1199
|
+
_nodes_size = new_nodes_size;
|
|
1200
|
+
if (_verbose) showUpdate("Reallocating to %d nodes: old_address=%p, new_address=%p\n", new_nodes_size, old, _nodes);
|
|
1201
|
+
}
|
|
1202
|
+
|
|
1203
|
+
void _allocate_size(S n, ThreadedBuildPolicy& threaded_build_policy) {
|
|
1150
1204
|
if (n > _nodes_size) {
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
if (!remap_memory_and_truncate(&_nodes, _fd,
|
|
1157
|
-
static_cast<size_t>(_s) * static_cast<size_t>(_nodes_size),
|
|
1158
|
-
static_cast<size_t>(_s) * static_cast<size_t>(new_nodes_size)) &&
|
|
1159
|
-
_verbose)
|
|
1160
|
-
showUpdate("File truncation error\n");
|
|
1161
|
-
} else {
|
|
1162
|
-
_nodes = realloc(_nodes, _s * new_nodes_size);
|
|
1163
|
-
memset((char *) _nodes + (_nodes_size * _s) / sizeof(char), 0, (new_nodes_size - _nodes_size) * _s);
|
|
1164
|
-
}
|
|
1205
|
+
threaded_build_policy.lock_nodes();
|
|
1206
|
+
_reallocate_nodes(n);
|
|
1207
|
+
threaded_build_policy.unlock_nodes();
|
|
1208
|
+
}
|
|
1209
|
+
}
|
|
1165
1210
|
|
|
1166
|
-
|
|
1167
|
-
|
|
1211
|
+
void _allocate_size(S n) {
|
|
1212
|
+
if (n > _nodes_size) {
|
|
1213
|
+
_reallocate_nodes(n);
|
|
1168
1214
|
}
|
|
1169
1215
|
}
|
|
1170
1216
|
|
|
@@ -1179,7 +1225,7 @@ protected:
|
|
|
1179
1225
|
return std::max(f, 1-f);
|
|
1180
1226
|
}
|
|
1181
1227
|
|
|
1182
|
-
S _make_tree(const vector<S>& indices, bool is_root) {
|
|
1228
|
+
S _make_tree(const vector<S>& indices, bool is_root, Random& _random, ThreadedBuildPolicy& threaded_build_policy) {
|
|
1183
1229
|
// The basic rule is that if we have <= _K items, then it's a leaf node, otherwise it's a split node.
|
|
1184
1230
|
// There's some regrettable complications caused by the problem that root nodes have to be "special":
|
|
1185
1231
|
// 1. We identify root nodes by the arguable logic that _n_items == n->n_descendants, regardless of how many descendants they actually have
|
|
@@ -1189,8 +1235,12 @@ protected:
|
|
|
1189
1235
|
return indices[0];
|
|
1190
1236
|
|
|
1191
1237
|
if (indices.size() <= (size_t)_K && (!is_root || (size_t)_n_items <= (size_t)_K || indices.size() == 1)) {
|
|
1192
|
-
|
|
1238
|
+
threaded_build_policy.lock_n_nodes();
|
|
1239
|
+
_allocate_size(_n_nodes + 1, threaded_build_policy);
|
|
1193
1240
|
S item = _n_nodes++;
|
|
1241
|
+
threaded_build_policy.unlock_n_nodes();
|
|
1242
|
+
|
|
1243
|
+
threaded_build_policy.lock_shared_nodes();
|
|
1194
1244
|
Node* m = _get(item);
|
|
1195
1245
|
m->n_descendants = is_root ? _n_items : (S)indices.size();
|
|
1196
1246
|
|
|
@@ -1200,9 +1250,12 @@ protected:
|
|
|
1200
1250
|
// Only copy when necessary to avoid crash in MSVC 9. #293
|
|
1201
1251
|
if (!indices.empty())
|
|
1202
1252
|
memcpy(m->children, &indices[0], indices.size() * sizeof(S));
|
|
1253
|
+
|
|
1254
|
+
threaded_build_policy.unlock_shared_nodes();
|
|
1203
1255
|
return item;
|
|
1204
1256
|
}
|
|
1205
1257
|
|
|
1258
|
+
threaded_build_policy.lock_shared_nodes();
|
|
1206
1259
|
vector<Node*> children;
|
|
1207
1260
|
for (size_t i = 0; i < indices.size(); i++) {
|
|
1208
1261
|
S j = indices[i];
|
|
@@ -1233,6 +1286,7 @@ protected:
|
|
|
1233
1286
|
if (_split_imbalance(children_indices[0], children_indices[1]) < 0.95)
|
|
1234
1287
|
break;
|
|
1235
1288
|
}
|
|
1289
|
+
threaded_build_policy.unlock_shared_nodes();
|
|
1236
1290
|
|
|
1237
1291
|
// If we didn't find a hyperplane, just randomize sides as a last option
|
|
1238
1292
|
while (_split_imbalance(children_indices[0], children_indices[1]) > 0.99) {
|
|
@@ -1259,13 +1313,17 @@ protected:
|
|
|
1259
1313
|
m->n_descendants = is_root ? _n_items : (S)indices.size();
|
|
1260
1314
|
for (int side = 0; side < 2; side++) {
|
|
1261
1315
|
// run _make_tree for the smallest child first (for cache locality)
|
|
1262
|
-
m->children[side^flip] = _make_tree(children_indices[side^flip], false);
|
|
1316
|
+
m->children[side^flip] = _make_tree(children_indices[side^flip], false, _random, threaded_build_policy);
|
|
1263
1317
|
}
|
|
1264
1318
|
|
|
1265
|
-
|
|
1266
|
-
_allocate_size(_n_nodes + 1);
|
|
1319
|
+
threaded_build_policy.lock_n_nodes();
|
|
1320
|
+
_allocate_size(_n_nodes + 1, threaded_build_policy);
|
|
1267
1321
|
S item = _n_nodes++;
|
|
1322
|
+
threaded_build_policy.unlock_n_nodes();
|
|
1323
|
+
|
|
1324
|
+
threaded_build_policy.lock_shared_nodes();
|
|
1268
1325
|
memcpy(_get(item), m, _s);
|
|
1326
|
+
threaded_build_policy.unlock_shared_nodes();
|
|
1269
1327
|
|
|
1270
1328
|
return item;
|
|
1271
1329
|
}
|
|
@@ -1311,7 +1369,7 @@ protected:
|
|
|
1311
1369
|
vector<pair<T, S> > nns_dist;
|
|
1312
1370
|
S last = -1;
|
|
1313
1371
|
for (size_t i = 0; i < nns.size(); i++) {
|
|
1314
|
-
S j = nns[i];
|
|
1372
|
+
S j = nns[i];
|
|
1315
1373
|
if (j == last)
|
|
1316
1374
|
continue;
|
|
1317
1375
|
last = j;
|
|
@@ -1330,5 +1388,92 @@ protected:
|
|
|
1330
1388
|
}
|
|
1331
1389
|
};
|
|
1332
1390
|
|
|
1391
|
+
class AnnoyIndexSingleThreadedBuildPolicy {
|
|
1392
|
+
public:
|
|
1393
|
+
template<typename S, typename T, typename D, typename Random>
|
|
1394
|
+
static void build(AnnoyIndex<S, T, D, Random, AnnoyIndexSingleThreadedBuildPolicy>* annoy, int q, int n_threads) {
|
|
1395
|
+
AnnoyIndexSingleThreadedBuildPolicy threaded_build_policy;
|
|
1396
|
+
annoy->thread_build(q, 0, threaded_build_policy);
|
|
1397
|
+
}
|
|
1398
|
+
|
|
1399
|
+
void lock_n_nodes() {}
|
|
1400
|
+
void unlock_n_nodes() {}
|
|
1401
|
+
|
|
1402
|
+
void lock_nodes() {}
|
|
1403
|
+
void unlock_nodes() {}
|
|
1404
|
+
|
|
1405
|
+
void lock_shared_nodes() {}
|
|
1406
|
+
void unlock_shared_nodes() {}
|
|
1407
|
+
|
|
1408
|
+
void lock_roots() {}
|
|
1409
|
+
void unlock_roots() {}
|
|
1410
|
+
};
|
|
1411
|
+
|
|
1412
|
+
#ifdef ANNOYLIB_MULTITHREADED_BUILD
|
|
1413
|
+
class AnnoyIndexMultiThreadedBuildPolicy {
|
|
1414
|
+
private:
|
|
1415
|
+
std::shared_timed_mutex nodes_mutex;
|
|
1416
|
+
std::mutex n_nodes_mutex;
|
|
1417
|
+
std::mutex roots_mutex;
|
|
1418
|
+
|
|
1419
|
+
public:
|
|
1420
|
+
template<typename S, typename T, typename D, typename Random>
|
|
1421
|
+
static void build(AnnoyIndex<S, T, D, Random, AnnoyIndexMultiThreadedBuildPolicy>* annoy, int q, int n_threads) {
|
|
1422
|
+
AnnoyIndexMultiThreadedBuildPolicy threaded_build_policy;
|
|
1423
|
+
if (n_threads == -1) {
|
|
1424
|
+
// If the hardware_concurrency() value is not well defined or not computable, it returns 0.
|
|
1425
|
+
// We guard against this by using at least 1 thread.
|
|
1426
|
+
n_threads = std::max(1, (int)std::thread::hardware_concurrency());
|
|
1427
|
+
}
|
|
1428
|
+
|
|
1429
|
+
vector<std::thread> threads(n_threads);
|
|
1430
|
+
|
|
1431
|
+
for (int thread_idx = 0; thread_idx < n_threads; thread_idx++) {
|
|
1432
|
+
int trees_per_thread = q == -1 ? -1 : (int)floor((q + thread_idx) / n_threads);
|
|
1433
|
+
|
|
1434
|
+
threads[thread_idx] = std::thread(
|
|
1435
|
+
&AnnoyIndex<S, T, D, Random, AnnoyIndexMultiThreadedBuildPolicy>::thread_build,
|
|
1436
|
+
annoy,
|
|
1437
|
+
trees_per_thread,
|
|
1438
|
+
thread_idx,
|
|
1439
|
+
std::ref(threaded_build_policy)
|
|
1440
|
+
);
|
|
1441
|
+
}
|
|
1442
|
+
|
|
1443
|
+
for (auto& thread : threads) {
|
|
1444
|
+
thread.join();
|
|
1445
|
+
}
|
|
1446
|
+
}
|
|
1447
|
+
|
|
1448
|
+
void lock_n_nodes() {
|
|
1449
|
+
n_nodes_mutex.lock();
|
|
1450
|
+
}
|
|
1451
|
+
void unlock_n_nodes() {
|
|
1452
|
+
n_nodes_mutex.unlock();
|
|
1453
|
+
}
|
|
1454
|
+
|
|
1455
|
+
void lock_nodes() {
|
|
1456
|
+
nodes_mutex.lock();
|
|
1457
|
+
}
|
|
1458
|
+
void unlock_nodes() {
|
|
1459
|
+
nodes_mutex.unlock();
|
|
1460
|
+
}
|
|
1461
|
+
|
|
1462
|
+
void lock_shared_nodes() {
|
|
1463
|
+
nodes_mutex.lock_shared();
|
|
1464
|
+
}
|
|
1465
|
+
void unlock_shared_nodes() {
|
|
1466
|
+
nodes_mutex.unlock_shared();
|
|
1467
|
+
}
|
|
1468
|
+
|
|
1469
|
+
void lock_roots() {
|
|
1470
|
+
roots_mutex.lock();
|
|
1471
|
+
}
|
|
1472
|
+
void unlock_roots() {
|
|
1473
|
+
roots_mutex.unlock();
|
|
1474
|
+
}
|
|
1475
|
+
};
|
|
1476
|
+
#endif
|
|
1477
|
+
|
|
1333
1478
|
#endif
|
|
1334
1479
|
// vim: tabstop=2 shiftwidth=2
|