annoy-rb 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 06af1da176d1d25ba1d0058a34d21483413deff690004d9e3dd5471c9d9f4294
4
- data.tar.gz: 98eab997b5d6b2439c7a2b3ab26a25818428bb6fcc0811447a903c3a83decfab
3
+ metadata.gz: f81b49dd55acb002949f5da8cd3e1dc39ab14b2020cf61e9046dfe71356d603f
4
+ data.tar.gz: 3cd782e87cc70d0517504bdfc233e720c5aeaa15d4f03c7887e0d859f84cc2cc
5
5
  SHA512:
6
- metadata.gz: 054df371bf5596ebe6a59d003c5c483d1b4ba885ad55fcd1e0232d6a3d52ad0650e4d6f24805737123ca68350789b7cded0a9e58aafabb94cfa31ed5f813619b
7
- data.tar.gz: 9b22bf40a94e081a39249f1068d32cfa0b4112f951f0172a9004a887ab2b7ebe460b6baa4bbd904906928c1571257cb0e277e59adf6d21e5df61c842759d46df
6
+ metadata.gz: dda8c77484034cbaaf3f9b5a6ea6558040350ef3e75121b699aab5f011722a0d79039c037ee63641cd1da7588d5e7dedb56fae91a37489300e1780934eaec80d
7
+ data.tar.gz: 8ad8aef4eac7ec930f4f1ffea12db8d944a7ee36e2140c7533ab4054586024af42befc3a113312d6289e5587a45b4f500c109dccad17330f0a20b5e349018a9a
@@ -1,2 +1,6 @@
1
+ ## 0.2.0
2
+ - Update bundled Annoy version to 1.17.0.
3
+ - Support multithreaded index building.
4
+
1
5
  ## 0.1.0
2
6
  - First release.
data/README.md CHANGED
@@ -3,6 +3,7 @@
3
3
  [![Build Status](https://travis-ci.org/yoshoku/annoy.rb.svg?branch=master)](https://travis-ci.org/yoshoku/annoy.rb)
4
4
  [![Gem Version](https://badge.fury.io/rb/annoy-rb.svg)](https://badge.fury.io/rb/annoy-rb)
5
5
  [![License](https://img.shields.io/badge/License-Apache%202.0-yellowgreen.svg)](https://github.com/yoshoku/annoy.rb/blob/master/LICENSE.txt)
6
+ [![Documentation](http://img.shields.io/badge/api-reference-blue.svg)](https://yoshoku.github.io/annoy.rb/doc/)
6
7
 
7
8
  Annoy.rb is a Ruby binding for the [Annoy (Approximate Nearest Neighbors Oh Yeah)](https://github.com/spotify/annoy).
8
9
 
@@ -14,6 +14,7 @@ Gem::Specification.new do |spec|
14
14
  spec.metadata['homepage_uri'] = spec.homepage
15
15
  spec.metadata['source_code_uri'] = spec.homepage
16
16
  spec.metadata['changelog_uri'] = 'https://github.com/yoshoku/annoy.rb/blob/master/CHANGELOG.md'
17
+ spec.metadata['documentation_uri'] = 'https://yoshoku.github.io/annoy.rb/doc/'
17
18
 
18
19
  # Specify which files should be added to the gem when it is released.
19
20
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
@@ -22,9 +22,9 @@ extern "C"
22
22
  void Init_annoy(void)
23
23
  {
24
24
  VALUE rb_mAnnoy = rb_define_module("Annoy");
25
- VALUE rb_cAnnoyIndexAngular = RbAnnoyIndex<AnnoyIndexAngular, double>::define_class(rb_mAnnoy, "AnnoyIndexAngular");
26
- VALUE rb_cAnnoyIndexDotProduct = RbAnnoyIndex<AnnoyIndexDotProduct, double>::define_class(rb_mAnnoy, "AnnoyIndexDotProduct");
27
- VALUE rb_cAnnoyIndexHamming = RbAnnoyIndex<AnnoyIndexHamming, uint64_t>::define_class(rb_mAnnoy, "AnnoyIndexHamming");
28
- VALUE rb_cAnnoyIndexEuclidean = RbAnnoyIndex<AnnoyIndexEuclidean, double>::define_class(rb_mAnnoy, "AnnoyIndexEuclidean");
29
- VALUE rb_cAnnoyIndexManhattan = RbAnnoyIndex<AnnoyIndexManhattan, double>::define_class(rb_mAnnoy, "AnnoyIndexManhattan");
25
+ RbAnnoyIndex<AnnoyIndexAngular, double>::define_class(rb_mAnnoy, "AnnoyIndexAngular");
26
+ RbAnnoyIndex<AnnoyIndexDotProduct, double>::define_class(rb_mAnnoy, "AnnoyIndexDotProduct");
27
+ RbAnnoyIndex<AnnoyIndexHamming, uint64_t>::define_class(rb_mAnnoy, "AnnoyIndexHamming");
28
+ RbAnnoyIndex<AnnoyIndexEuclidean, double>::define_class(rb_mAnnoy, "AnnoyIndexEuclidean");
29
+ RbAnnoyIndex<AnnoyIndexManhattan, double>::define_class(rb_mAnnoy, "AnnoyIndexManhattan");
30
30
  }
@@ -25,11 +25,17 @@
25
25
  #include <annoylib.h>
26
26
  #include <kissrandom.h>
27
27
 
28
- typedef AnnoyIndex<int, double, Angular, Kiss64Random> AnnoyIndexAngular;
29
- typedef AnnoyIndex<int, double, DotProduct, Kiss64Random> AnnoyIndexDotProduct;
30
- typedef AnnoyIndex<int, uint64_t, Hamming, Kiss64Random> AnnoyIndexHamming;
31
- typedef AnnoyIndex<int, double, Euclidean, Kiss64Random> AnnoyIndexEuclidean;
32
- typedef AnnoyIndex<int, double, Manhattan, Kiss64Random> AnnoyIndexManhattan;
28
+ #ifdef ANNOYLIB_MULTITHREADED_BUILD
29
+ typedef AnnoyIndexMultiThreadedBuildPolicy AnnoyIndexThreadedBuildPolicy;
30
+ #else
31
+ typedef AnnoyIndexSingleThreadedBuildPolicy AnnoyIndexThreadedBuildPolicy;
32
+ #endif
33
+
34
+ typedef AnnoyIndex<int, double, Angular, Kiss64Random, AnnoyIndexThreadedBuildPolicy> AnnoyIndexAngular;
35
+ typedef AnnoyIndex<int, double, DotProduct, Kiss64Random, AnnoyIndexThreadedBuildPolicy> AnnoyIndexDotProduct;
36
+ typedef AnnoyIndex<int, uint64_t, Hamming, Kiss64Random, AnnoyIndexThreadedBuildPolicy> AnnoyIndexHamming;
37
+ typedef AnnoyIndex<int, double, Euclidean, Kiss64Random, AnnoyIndexThreadedBuildPolicy> AnnoyIndexEuclidean;
38
+ typedef AnnoyIndex<int, double, Manhattan, Kiss64Random, AnnoyIndexThreadedBuildPolicy> AnnoyIndexManhattan;
33
39
 
34
40
  template<class T, typename F> class RbAnnoyIndex
35
41
  {
@@ -55,7 +61,7 @@ template<class T, typename F> class RbAnnoyIndex
55
61
  rb_define_alloc_func(rb_cAnnoyIndex, annoy_index_alloc);
56
62
  rb_define_method(rb_cAnnoyIndex, "initialize", RUBY_METHOD_FUNC(_annoy_index_init), 1);
57
63
  rb_define_method(rb_cAnnoyIndex, "add_item", RUBY_METHOD_FUNC(_annoy_index_add_item), 2);
58
- rb_define_method(rb_cAnnoyIndex, "build", RUBY_METHOD_FUNC(_annoy_index_build), 1);
64
+ rb_define_method(rb_cAnnoyIndex, "build", RUBY_METHOD_FUNC(_annoy_index_build), 2);
59
65
  rb_define_method(rb_cAnnoyIndex, "save", RUBY_METHOD_FUNC(_annoy_index_save), 2);
60
66
  rb_define_method(rb_cAnnoyIndex, "load", RUBY_METHOD_FUNC(_annoy_index_load), 2);
61
67
  rb_define_method(rb_cAnnoyIndex, "unload", RUBY_METHOD_FUNC(_annoy_index_unload), 0);
@@ -110,11 +116,12 @@ template<class T, typename F> class RbAnnoyIndex
110
116
  return Qtrue;
111
117
  };
112
118
 
113
- static VALUE _annoy_index_build(VALUE self, VALUE _n_trees) {
119
+ static VALUE _annoy_index_build(VALUE self, VALUE _n_trees, VALUE _n_jobs) {
114
120
  const int n_trees = NUM2INT(_n_trees);
121
+ const int n_jobs = NUM2INT(_n_jobs);
115
122
  char* error;
116
123
 
117
- if (!get_annoy_index(self)->build(n_trees, &error)) {
124
+ if (!get_annoy_index(self)->build(n_trees, n_jobs, &error)) {
118
125
  rb_raise(rb_eRuntimeError, "%s", error);
119
126
  free(error);
120
127
  return Qfalse;
@@ -2,7 +2,7 @@ require 'mkmf'
2
2
 
3
3
  abort 'libstdc++ is not found.' unless have_library('stdc++')
4
4
 
5
- $CXXFLAGS << " -march=native"
5
+ $CXXFLAGS << " -std=c++14 -march=native -DANNOYLIB_MULTITHREADED_BUILD"
6
6
  $INCFLAGS << " -I$(srcdir)/src"
7
7
  $VPATH << "$(srcdir)/src"
8
8
 
@@ -58,6 +58,12 @@ typedef signed __int64 int64_t;
58
58
  #include <queue>
59
59
  #include <limits>
60
60
 
61
+ #ifdef ANNOYLIB_MULTITHREADED_BUILD
62
+ #include <thread>
63
+ #include <mutex>
64
+ #include <shared_mutex>
65
+ #endif
66
+
61
67
  #ifdef _MSC_VER
62
68
  // Needed for Visual Studio to disable runtime checks for mempcy
63
69
  #pragma runtime_checks("s", off)
@@ -104,7 +110,6 @@ inline void set_error_from_string(char **error, const char* msg) {
104
110
  #ifndef _MSC_VER
105
111
  #define popcount __builtin_popcountll
106
112
  #else // See #293, #358
107
- #define isnan(x) _isnan(x)
108
113
  #define popcount cole_popcount
109
114
  #endif
110
115
 
@@ -346,7 +351,7 @@ inline float euclidean_distance<float>(const float* x, const float* y, int f) {
346
351
 
347
352
  #endif
348
353
 
349
-
354
+
350
355
  template<typename T>
351
356
  inline T get_norm(T* v, int f) {
352
357
  return sqrt(dot(v, v, f));
@@ -358,7 +363,7 @@ inline void two_means(const vector<Node*>& nodes, int f, Random& random, bool co
358
363
  This algorithm is a huge heuristic. Empirically it works really well, but I
359
364
  can't motivate it well. The basic idea is to keep two centroids and assign
360
365
  points to either one of them. We weight each centroid by the number of points
361
- assigned to it, so to balance it.
366
+ assigned to it, so to balance it.
362
367
  */
363
368
  static int iteration_steps = 200;
364
369
  size_t count = nodes.size();
@@ -548,7 +553,7 @@ struct DotProduct : Angular {
548
553
  static inline void create_split(const vector<Node<S, T>*>& nodes, int f, size_t s, Random& random, Node<S, T>* n) {
549
554
  Node<S, T>* p = (Node<S, T>*)alloca(s);
550
555
  Node<S, T>* q = (Node<S, T>*)alloca(s);
551
- DotProduct::zero_value(p);
556
+ DotProduct::zero_value(p);
552
557
  DotProduct::zero_value(q);
553
558
  two_means<T, Random, DotProduct, Node<S, T> >(nodes, f, random, true, p, q);
554
559
  for (int z = 0; z < f; z++)
@@ -594,8 +599,8 @@ struct DotProduct : Angular {
594
599
  // Step one: compute the norm of each vector and store that in its extra dimension (f-1)
595
600
  for (S i = 0; i < node_count; i++) {
596
601
  Node* node = get_node_ptr<S, Node>(nodes, _s, i);
597
- T norm = sqrt(dot(node->v, node->v, f));
598
- if (isnan(norm)) norm = 0;
602
+ T d = dot(node->v, node->v, f);
603
+ T norm = d < 0 ? 0 : sqrt(d);
599
604
  node->dot_factor = norm;
600
605
  }
601
606
 
@@ -612,9 +617,8 @@ struct DotProduct : Angular {
612
617
  for (S i = 0; i < node_count; i++) {
613
618
  Node* node = get_node_ptr<S, Node>(nodes, _s, i);
614
619
  T node_norm = node->dot_factor;
615
-
616
- T dot_factor = sqrt(pow(max_norm, static_cast<T>(2.0)) - pow(node_norm, static_cast<T>(2.0)));
617
- if (isnan(dot_factor)) dot_factor = 0;
620
+ T squared_norm_diff = pow(max_norm, static_cast<T>(2.0)) - pow(node_norm, static_cast<T>(2.0));
621
+ T dot_factor = squared_norm_diff < 0 ? 0 : sqrt(squared_norm_diff);
618
622
 
619
623
  node->dot_factor = dot_factor;
620
624
  }
@@ -753,7 +757,7 @@ struct Minkowski : Base {
753
757
  struct Euclidean : Minkowski {
754
758
  template<typename S, typename T>
755
759
  static inline T distance(const Node<S, T>* x, const Node<S, T>* y, int f) {
756
- return euclidean_distance(x->v, y->v, f);
760
+ return euclidean_distance(x->v, y->v, f);
757
761
  }
758
762
  template<typename S, typename T, typename Random>
759
763
  static inline void create_split(const vector<Node<S, T>*>& nodes, int f, size_t s, Random& random, Node<S, T>* n) {
@@ -817,7 +821,7 @@ class AnnoyIndexInterface {
817
821
  // Note that the methods with an **error argument will allocate memory and write the pointer to that string if error is non-NULL
818
822
  virtual ~AnnoyIndexInterface() {};
819
823
  virtual bool add_item(S item, const T* w, char** error=NULL) = 0;
820
- virtual bool build(int q, char** error=NULL) = 0;
824
+ virtual bool build(int q, int n_threads=-1, char** error=NULL) = 0;
821
825
  virtual bool unbuild(char** error=NULL) = 0;
822
826
  virtual bool save(const char* filename, bool prefault=false, char** error=NULL) = 0;
823
827
  virtual void unload() = 0;
@@ -833,7 +837,7 @@ class AnnoyIndexInterface {
833
837
  virtual bool on_disk_build(const char* filename, char** error=NULL) = 0;
834
838
  };
835
839
 
836
- template<typename S, typename T, typename Distance, typename Random>
840
+ template<typename S, typename T, typename Distance, typename Random, class ThreadedBuildPolicy>
837
841
  class AnnoyIndex : public AnnoyIndexInterface<S, T> {
838
842
  /*
839
843
  * We use random projection to build a forest of binary trees of all items.
@@ -850,12 +854,13 @@ protected:
850
854
  const int _f;
851
855
  size_t _s;
852
856
  S _n_items;
853
- Random _random;
854
857
  void* _nodes; // Could either be mmapped, or point to a memory buffer that we reallocate
855
858
  S _n_nodes;
856
859
  S _nodes_size;
857
860
  vector<S> _roots;
858
861
  S _K;
862
+ bool _is_seeded;
863
+ int _seed;
859
864
  bool _loaded;
860
865
  bool _verbose;
861
866
  int _fd;
@@ -863,7 +868,7 @@ protected:
863
868
  bool _built;
864
869
  public:
865
870
 
866
- AnnoyIndex(int f) : _f(f), _random() {
871
+ AnnoyIndex(int f) : _f(f) {
867
872
  _s = offsetof(Node, v) + _f * sizeof(T); // Size of each node
868
873
  _verbose = false;
869
874
  _built = false;
@@ -907,7 +912,7 @@ public:
907
912
 
908
913
  return true;
909
914
  }
910
-
915
+
911
916
  bool on_disk_build(const char* file, char** error=NULL) {
912
917
  _on_disk = true;
913
918
  _fd = open(file, O_RDWR | O_CREAT | O_TRUNC, (int) 0600);
@@ -928,8 +933,8 @@ public:
928
933
  #endif
929
934
  return true;
930
935
  }
931
-
932
- bool build(int q, char** error=NULL) {
936
+
937
+ bool build(int q, int n_threads=-1, char** error=NULL) {
933
938
  if (_loaded) {
934
939
  set_error_from_string(error, "You can't build a loaded index");
935
940
  return false;
@@ -943,21 +948,8 @@ public:
943
948
  D::template preprocess<T, S, Node>(_nodes, _s, _n_items, _f);
944
949
 
945
950
  _n_nodes = _n_items;
946
- while (1) {
947
- if (q == -1 && _n_nodes >= _n_items * 2)
948
- break;
949
- if (q != -1 && _roots.size() >= (size_t)q)
950
- break;
951
- if (_verbose) showUpdate("pass %zd...\n", _roots.size());
952
-
953
- vector<S> indices;
954
- for (S i = 0; i < _n_items; i++) {
955
- if (_get(i)->n_descendants >= 1) // Issue #223
956
- indices.push_back(i);
957
- }
958
951
 
959
- _roots.push_back(_make_tree(indices, true));
960
- }
952
+ ThreadedBuildPolicy::template build<S, T>(this, q, n_threads);
961
953
 
962
954
  // Also, copy the roots into the last segment of the array
963
955
  // This way we can load them faster without reading the whole file
@@ -967,7 +959,7 @@ public:
967
959
  _n_nodes += _roots.size();
968
960
 
969
961
  if (_verbose) showUpdate("has %d nodes\n", _n_nodes);
970
-
962
+
971
963
  if (_on_disk) {
972
964
  if (!remap_memory_and_truncate(&_nodes, _fd,
973
965
  static_cast<size_t>(_s) * static_cast<size_t>(_nodes_size),
@@ -981,7 +973,7 @@ public:
981
973
  _built = true;
982
974
  return true;
983
975
  }
984
-
976
+
985
977
  bool unbuild(char** error=NULL) {
986
978
  if (_loaded) {
987
979
  set_error_from_string(error, "You can't unbuild a loaded index");
@@ -1035,6 +1027,7 @@ public:
1035
1027
  _n_nodes = 0;
1036
1028
  _nodes_size = 0;
1037
1029
  _on_disk = false;
1030
+ _is_seeded = false;
1038
1031
  _roots.clear();
1039
1032
  }
1040
1033
 
@@ -1142,29 +1135,82 @@ public:
1142
1135
  }
1143
1136
 
1144
1137
  void set_seed(int seed) {
1138
+ _is_seeded = true;
1139
+ _seed = seed;
1140
+ }
1141
+
1142
+ void thread_build(int q, int thread_idx, ThreadedBuildPolicy& threaded_build_policy) {
1143
+ Random _random;
1144
+ // Each thread needs its own seed, otherwise each thread would be building the same tree(s)
1145
+ int seed = _is_seeded ? _seed + thread_idx : thread_idx;
1145
1146
  _random.set_seed(seed);
1147
+
1148
+ vector<S> thread_roots;
1149
+ while (1) {
1150
+ if (q == -1) {
1151
+ threaded_build_policy.lock_n_nodes();
1152
+ if (_n_nodes >= 2 * _n_items) {
1153
+ threaded_build_policy.unlock_n_nodes();
1154
+ break;
1155
+ }
1156
+ threaded_build_policy.unlock_n_nodes();
1157
+ } else {
1158
+ if (thread_roots.size() >= (size_t)q) {
1159
+ break;
1160
+ }
1161
+ }
1162
+
1163
+ if (_verbose) showUpdate("pass %zd...\n", thread_roots.size());
1164
+
1165
+ vector<S> indices;
1166
+ threaded_build_policy.lock_shared_nodes();
1167
+ for (S i = 0; i < _n_items; i++) {
1168
+ if (_get(i)->n_descendants >= 1) { // Issue #223
1169
+ indices.push_back(i);
1170
+ }
1171
+ }
1172
+ threaded_build_policy.unlock_shared_nodes();
1173
+
1174
+ thread_roots.push_back(_make_tree(indices, true, _random, threaded_build_policy));
1175
+ }
1176
+
1177
+ threaded_build_policy.lock_roots();
1178
+ _roots.insert(_roots.end(), thread_roots.begin(), thread_roots.end());
1179
+ threaded_build_policy.unlock_roots();
1146
1180
  }
1147
1181
 
1148
1182
  protected:
1149
- void _allocate_size(S n) {
1183
+ void _reallocate_nodes(S n) {
1184
+ const double reallocation_factor = 1.3;
1185
+ S new_nodes_size = std::max(n, (S) ((_nodes_size + 1) * reallocation_factor));
1186
+ void *old = _nodes;
1187
+
1188
+ if (_on_disk) {
1189
+ if (!remap_memory_and_truncate(&_nodes, _fd,
1190
+ static_cast<size_t>(_s) * static_cast<size_t>(_nodes_size),
1191
+ static_cast<size_t>(_s) * static_cast<size_t>(new_nodes_size)) &&
1192
+ _verbose)
1193
+ showUpdate("File truncation error\n");
1194
+ } else {
1195
+ _nodes = realloc(_nodes, _s * new_nodes_size);
1196
+ memset((char *) _nodes + (_nodes_size * _s) / sizeof(char), 0, (new_nodes_size - _nodes_size) * _s);
1197
+ }
1198
+
1199
+ _nodes_size = new_nodes_size;
1200
+ if (_verbose) showUpdate("Reallocating to %d nodes: old_address=%p, new_address=%p\n", new_nodes_size, old, _nodes);
1201
+ }
1202
+
1203
+ void _allocate_size(S n, ThreadedBuildPolicy& threaded_build_policy) {
1150
1204
  if (n > _nodes_size) {
1151
- const double reallocation_factor = 1.3;
1152
- S new_nodes_size = std::max(n, (S) ((_nodes_size + 1) * reallocation_factor));
1153
- void *old = _nodes;
1154
-
1155
- if (_on_disk) {
1156
- if (!remap_memory_and_truncate(&_nodes, _fd,
1157
- static_cast<size_t>(_s) * static_cast<size_t>(_nodes_size),
1158
- static_cast<size_t>(_s) * static_cast<size_t>(new_nodes_size)) &&
1159
- _verbose)
1160
- showUpdate("File truncation error\n");
1161
- } else {
1162
- _nodes = realloc(_nodes, _s * new_nodes_size);
1163
- memset((char *) _nodes + (_nodes_size * _s) / sizeof(char), 0, (new_nodes_size - _nodes_size) * _s);
1164
- }
1205
+ threaded_build_policy.lock_nodes();
1206
+ _reallocate_nodes(n);
1207
+ threaded_build_policy.unlock_nodes();
1208
+ }
1209
+ }
1165
1210
 
1166
- _nodes_size = new_nodes_size;
1167
- if (_verbose) showUpdate("Reallocating to %d nodes: old_address=%p, new_address=%p\n", new_nodes_size, old, _nodes);
1211
+ void _allocate_size(S n) {
1212
+ if (n > _nodes_size) {
1213
+ _reallocate_nodes(n);
1168
1214
  }
1169
1215
  }
1170
1216
 
@@ -1179,7 +1225,7 @@ protected:
1179
1225
  return std::max(f, 1-f);
1180
1226
  }
1181
1227
 
1182
- S _make_tree(const vector<S>& indices, bool is_root) {
1228
+ S _make_tree(const vector<S>& indices, bool is_root, Random& _random, ThreadedBuildPolicy& threaded_build_policy) {
1183
1229
  // The basic rule is that if we have <= _K items, then it's a leaf node, otherwise it's a split node.
1184
1230
  // There's some regrettable complications caused by the problem that root nodes have to be "special":
1185
1231
  // 1. We identify root nodes by the arguable logic that _n_items == n->n_descendants, regardless of how many descendants they actually have
@@ -1189,8 +1235,12 @@ protected:
1189
1235
  return indices[0];
1190
1236
 
1191
1237
  if (indices.size() <= (size_t)_K && (!is_root || (size_t)_n_items <= (size_t)_K || indices.size() == 1)) {
1192
- _allocate_size(_n_nodes + 1);
1238
+ threaded_build_policy.lock_n_nodes();
1239
+ _allocate_size(_n_nodes + 1, threaded_build_policy);
1193
1240
  S item = _n_nodes++;
1241
+ threaded_build_policy.unlock_n_nodes();
1242
+
1243
+ threaded_build_policy.lock_shared_nodes();
1194
1244
  Node* m = _get(item);
1195
1245
  m->n_descendants = is_root ? _n_items : (S)indices.size();
1196
1246
 
@@ -1200,9 +1250,12 @@ protected:
1200
1250
  // Only copy when necessary to avoid crash in MSVC 9. #293
1201
1251
  if (!indices.empty())
1202
1252
  memcpy(m->children, &indices[0], indices.size() * sizeof(S));
1253
+
1254
+ threaded_build_policy.unlock_shared_nodes();
1203
1255
  return item;
1204
1256
  }
1205
1257
 
1258
+ threaded_build_policy.lock_shared_nodes();
1206
1259
  vector<Node*> children;
1207
1260
  for (size_t i = 0; i < indices.size(); i++) {
1208
1261
  S j = indices[i];
@@ -1233,6 +1286,7 @@ protected:
1233
1286
  if (_split_imbalance(children_indices[0], children_indices[1]) < 0.95)
1234
1287
  break;
1235
1288
  }
1289
+ threaded_build_policy.unlock_shared_nodes();
1236
1290
 
1237
1291
  // If we didn't find a hyperplane, just randomize sides as a last option
1238
1292
  while (_split_imbalance(children_indices[0], children_indices[1]) > 0.99) {
@@ -1259,13 +1313,17 @@ protected:
1259
1313
  m->n_descendants = is_root ? _n_items : (S)indices.size();
1260
1314
  for (int side = 0; side < 2; side++) {
1261
1315
  // run _make_tree for the smallest child first (for cache locality)
1262
- m->children[side^flip] = _make_tree(children_indices[side^flip], false);
1316
+ m->children[side^flip] = _make_tree(children_indices[side^flip], false, _random, threaded_build_policy);
1263
1317
  }
1264
1318
 
1265
-
1266
- _allocate_size(_n_nodes + 1);
1319
+ threaded_build_policy.lock_n_nodes();
1320
+ _allocate_size(_n_nodes + 1, threaded_build_policy);
1267
1321
  S item = _n_nodes++;
1322
+ threaded_build_policy.unlock_n_nodes();
1323
+
1324
+ threaded_build_policy.lock_shared_nodes();
1268
1325
  memcpy(_get(item), m, _s);
1326
+ threaded_build_policy.unlock_shared_nodes();
1269
1327
 
1270
1328
  return item;
1271
1329
  }
@@ -1311,7 +1369,7 @@ protected:
1311
1369
  vector<pair<T, S> > nns_dist;
1312
1370
  S last = -1;
1313
1371
  for (size_t i = 0; i < nns.size(); i++) {
1314
- S j = nns[i];
1372
+ S j = nns[i];
1315
1373
  if (j == last)
1316
1374
  continue;
1317
1375
  last = j;
@@ -1330,5 +1388,92 @@ protected:
1330
1388
  }
1331
1389
  };
1332
1390
 
1391
+ class AnnoyIndexSingleThreadedBuildPolicy {
1392
+ public:
1393
+ template<typename S, typename T, typename D, typename Random>
1394
+ static void build(AnnoyIndex<S, T, D, Random, AnnoyIndexSingleThreadedBuildPolicy>* annoy, int q, int n_threads) {
1395
+ AnnoyIndexSingleThreadedBuildPolicy threaded_build_policy;
1396
+ annoy->thread_build(q, 0, threaded_build_policy);
1397
+ }
1398
+
1399
+ void lock_n_nodes() {}
1400
+ void unlock_n_nodes() {}
1401
+
1402
+ void lock_nodes() {}
1403
+ void unlock_nodes() {}
1404
+
1405
+ void lock_shared_nodes() {}
1406
+ void unlock_shared_nodes() {}
1407
+
1408
+ void lock_roots() {}
1409
+ void unlock_roots() {}
1410
+ };
1411
+
1412
+ #ifdef ANNOYLIB_MULTITHREADED_BUILD
1413
+ class AnnoyIndexMultiThreadedBuildPolicy {
1414
+ private:
1415
+ std::shared_timed_mutex nodes_mutex;
1416
+ std::mutex n_nodes_mutex;
1417
+ std::mutex roots_mutex;
1418
+
1419
+ public:
1420
+ template<typename S, typename T, typename D, typename Random>
1421
+ static void build(AnnoyIndex<S, T, D, Random, AnnoyIndexMultiThreadedBuildPolicy>* annoy, int q, int n_threads) {
1422
+ AnnoyIndexMultiThreadedBuildPolicy threaded_build_policy;
1423
+ if (n_threads == -1) {
1424
+ // If the hardware_concurrency() value is not well defined or not computable, it returns 0.
1425
+ // We guard against this by using at least 1 thread.
1426
+ n_threads = std::max(1, (int)std::thread::hardware_concurrency());
1427
+ }
1428
+
1429
+ vector<std::thread> threads(n_threads);
1430
+
1431
+ for (int thread_idx = 0; thread_idx < n_threads; thread_idx++) {
1432
+ int trees_per_thread = q == -1 ? -1 : (int)floor((q + thread_idx) / n_threads);
1433
+
1434
+ threads[thread_idx] = std::thread(
1435
+ &AnnoyIndex<S, T, D, Random, AnnoyIndexMultiThreadedBuildPolicy>::thread_build,
1436
+ annoy,
1437
+ trees_per_thread,
1438
+ thread_idx,
1439
+ std::ref(threaded_build_policy)
1440
+ );
1441
+ }
1442
+
1443
+ for (auto& thread : threads) {
1444
+ thread.join();
1445
+ }
1446
+ }
1447
+
1448
+ void lock_n_nodes() {
1449
+ n_nodes_mutex.lock();
1450
+ }
1451
+ void unlock_n_nodes() {
1452
+ n_nodes_mutex.unlock();
1453
+ }
1454
+
1455
+ void lock_nodes() {
1456
+ nodes_mutex.lock();
1457
+ }
1458
+ void unlock_nodes() {
1459
+ nodes_mutex.unlock();
1460
+ }
1461
+
1462
+ void lock_shared_nodes() {
1463
+ nodes_mutex.lock_shared();
1464
+ }
1465
+ void unlock_shared_nodes() {
1466
+ nodes_mutex.unlock_shared();
1467
+ }
1468
+
1469
+ void lock_roots() {
1470
+ roots_mutex.lock();
1471
+ }
1472
+ void unlock_roots() {
1473
+ roots_mutex.unlock();
1474
+ }
1475
+ };
1476
+ #endif
1477
+
1333
1478
  #endif
1334
1479
  // vim: tabstop=2 shiftwidth=2
@@ -0,0 +1,242 @@
1
+
2
+ // This is from https://code.google.com/p/mman-win32/
3
+ //
4
+ // Licensed under MIT
5
+
6
+ #ifndef _MMAN_WIN32_H
7
+ #define _MMAN_WIN32_H
8
+
9
+ #ifndef _WIN32_WINNT // Allow use of features specific to Windows XP or later.
10
+ #define _WIN32_WINNT 0x0501 // Change this to the appropriate value to target other versions of Windows.
11
+ #endif
12
+
13
+ #include <sys/types.h>
14
+ #include <windows.h>
15
+ #include <errno.h>
16
+ #include <io.h>
17
+
18
+ #define PROT_NONE 0
19
+ #define PROT_READ 1
20
+ #define PROT_WRITE 2
21
+ #define PROT_EXEC 4
22
+
23
+ #define MAP_FILE 0
24
+ #define MAP_SHARED 1
25
+ #define MAP_PRIVATE 2
26
+ #define MAP_TYPE 0xf
27
+ #define MAP_FIXED 0x10
28
+ #define MAP_ANONYMOUS 0x20
29
+ #define MAP_ANON MAP_ANONYMOUS
30
+
31
+ #define MAP_FAILED ((void *)-1)
32
+
33
+ /* Flags for msync. */
34
+ #define MS_ASYNC 1
35
+ #define MS_SYNC 2
36
+ #define MS_INVALIDATE 4
37
+
38
+ #ifndef FILE_MAP_EXECUTE
39
+ #define FILE_MAP_EXECUTE 0x0020
40
+ #endif
41
+
42
+ static int __map_mman_error(const DWORD err, const int deferr)
43
+ {
44
+ if (err == 0)
45
+ return 0;
46
+ //TODO: implement
47
+ return err;
48
+ }
49
+
50
+ static DWORD __map_mmap_prot_page(const int prot)
51
+ {
52
+ DWORD protect = 0;
53
+
54
+ if (prot == PROT_NONE)
55
+ return protect;
56
+
57
+ if ((prot & PROT_EXEC) != 0)
58
+ {
59
+ protect = ((prot & PROT_WRITE) != 0) ?
60
+ PAGE_EXECUTE_READWRITE : PAGE_EXECUTE_READ;
61
+ }
62
+ else
63
+ {
64
+ protect = ((prot & PROT_WRITE) != 0) ?
65
+ PAGE_READWRITE : PAGE_READONLY;
66
+ }
67
+
68
+ return protect;
69
+ }
70
+
71
+ static DWORD __map_mmap_prot_file(const int prot)
72
+ {
73
+ DWORD desiredAccess = 0;
74
+
75
+ if (prot == PROT_NONE)
76
+ return desiredAccess;
77
+
78
+ if ((prot & PROT_READ) != 0)
79
+ desiredAccess |= FILE_MAP_READ;
80
+ if ((prot & PROT_WRITE) != 0)
81
+ desiredAccess |= FILE_MAP_WRITE;
82
+ if ((prot & PROT_EXEC) != 0)
83
+ desiredAccess |= FILE_MAP_EXECUTE;
84
+
85
+ return desiredAccess;
86
+ }
87
+
88
+ inline void* mmap(void *addr, size_t len, int prot, int flags, int fildes, off_t off)
89
+ {
90
+ HANDLE fm, h;
91
+
92
+ void * map = MAP_FAILED;
93
+
94
+ #ifdef _MSC_VER
95
+ #pragma warning(push)
96
+ #pragma warning(disable: 4293)
97
+ #endif
98
+
99
+ const DWORD dwFileOffsetLow = (sizeof(off_t) <= sizeof(DWORD)) ?
100
+ (DWORD)off : (DWORD)(off & 0xFFFFFFFFL);
101
+ const DWORD dwFileOffsetHigh = (sizeof(off_t) <= sizeof(DWORD)) ?
102
+ (DWORD)0 : (DWORD)((off >> 32) & 0xFFFFFFFFL);
103
+ const DWORD protect = __map_mmap_prot_page(prot);
104
+ const DWORD desiredAccess = __map_mmap_prot_file(prot);
105
+
106
+ const off_t maxSize = off + (off_t)len;
107
+
108
+ const DWORD dwMaxSizeLow = (sizeof(off_t) <= sizeof(DWORD)) ?
109
+ (DWORD)maxSize : (DWORD)(maxSize & 0xFFFFFFFFL);
110
+ const DWORD dwMaxSizeHigh = (sizeof(off_t) <= sizeof(DWORD)) ?
111
+ (DWORD)0 : (DWORD)((maxSize >> 32) & 0xFFFFFFFFL);
112
+
113
+ #ifdef _MSC_VER
114
+ #pragma warning(pop)
115
+ #endif
116
+
117
+ errno = 0;
118
+
119
+ if (len == 0
120
+ /* Unsupported flag combinations */
121
+ || (flags & MAP_FIXED) != 0
122
+ /* Usupported protection combinations */
123
+ || prot == PROT_EXEC)
124
+ {
125
+ errno = EINVAL;
126
+ return MAP_FAILED;
127
+ }
128
+
129
+ h = ((flags & MAP_ANONYMOUS) == 0) ?
130
+ (HANDLE)_get_osfhandle(fildes) : INVALID_HANDLE_VALUE;
131
+
132
+ if ((flags & MAP_ANONYMOUS) == 0 && h == INVALID_HANDLE_VALUE)
133
+ {
134
+ errno = EBADF;
135
+ return MAP_FAILED;
136
+ }
137
+
138
+ fm = CreateFileMapping(h, NULL, protect, dwMaxSizeHigh, dwMaxSizeLow, NULL);
139
+
140
+ if (fm == NULL)
141
+ {
142
+ errno = __map_mman_error(GetLastError(), EPERM);
143
+ return MAP_FAILED;
144
+ }
145
+
146
+ map = MapViewOfFile(fm, desiredAccess, dwFileOffsetHigh, dwFileOffsetLow, len);
147
+
148
+ CloseHandle(fm);
149
+
150
+ if (map == NULL)
151
+ {
152
+ errno = __map_mman_error(GetLastError(), EPERM);
153
+ return MAP_FAILED;
154
+ }
155
+
156
+ return map;
157
+ }
158
+
159
+ inline int munmap(void *addr, size_t len)
160
+ {
161
+ if (UnmapViewOfFile(addr))
162
+ return 0;
163
+
164
+ errno = __map_mman_error(GetLastError(), EPERM);
165
+
166
+ return -1;
167
+ }
168
+
169
+ inline int mprotect(void *addr, size_t len, int prot)
170
+ {
171
+ DWORD newProtect = __map_mmap_prot_page(prot);
172
+ DWORD oldProtect = 0;
173
+
174
+ if (VirtualProtect(addr, len, newProtect, &oldProtect))
175
+ return 0;
176
+
177
+ errno = __map_mman_error(GetLastError(), EPERM);
178
+
179
+ return -1;
180
+ }
181
+
182
+ inline int msync(void *addr, size_t len, int flags)
183
+ {
184
+ if (FlushViewOfFile(addr, len))
185
+ return 0;
186
+
187
+ errno = __map_mman_error(GetLastError(), EPERM);
188
+
189
+ return -1;
190
+ }
191
+
192
+ inline int mlock(const void *addr, size_t len)
193
+ {
194
+ if (VirtualLock((LPVOID)addr, len))
195
+ return 0;
196
+
197
+ errno = __map_mman_error(GetLastError(), EPERM);
198
+
199
+ return -1;
200
+ }
201
+
202
+ inline int munlock(const void *addr, size_t len)
203
+ {
204
+ if (VirtualUnlock((LPVOID)addr, len))
205
+ return 0;
206
+
207
+ errno = __map_mman_error(GetLastError(), EPERM);
208
+
209
+ return -1;
210
+ }
211
+
212
+ #if !defined(__MINGW32__)
213
+ inline int ftruncate(const int fd, const int64_t size) {
214
+ if (fd < 0) {
215
+ errno = EBADF;
216
+ return -1;
217
+ }
218
+
219
+ HANDLE h = reinterpret_cast<HANDLE>(_get_osfhandle(fd));
220
+ LARGE_INTEGER li_start, li_size;
221
+ li_start.QuadPart = static_cast<int64_t>(0);
222
+ li_size.QuadPart = size;
223
+ if (SetFilePointerEx(h, li_start, NULL, FILE_CURRENT) == ~0 ||
224
+ SetFilePointerEx(h, li_size, NULL, FILE_BEGIN) == ~0 ||
225
+ !SetEndOfFile(h)) {
226
+ unsigned long error = GetLastError();
227
+ fprintf(stderr, "I/O error while truncating: %lu\n", error);
228
+ switch (error) {
229
+ case ERROR_INVALID_HANDLE:
230
+ errno = EBADF;
231
+ break;
232
+ default:
233
+ errno = EIO;
234
+ break;
235
+ }
236
+ return -1;
237
+ }
238
+ return 0;
239
+ }
240
+ #endif
241
+
242
+ #endif
@@ -68,9 +68,10 @@ module Annoy
68
68
  # Build a forest of index trees. After building, no more items can be added.
69
69
  #
70
70
  # @param n_trees [Integer] The number of trees. More trees gives higher search precision.
71
+ # @param n_jobs [Integer] The number of threads used to build the trees. If -1 is given, uses all available CPU cores.
71
72
  # @return [Boolean]
72
- def build(n_trees)
73
- @index.build(n_trees)
73
+ def build(n_trees, n_jobs: -1)
74
+ @index.build(n_trees, n_jobs)
74
75
  end
75
76
 
76
77
  # Save the search index to disk. After saving, no more items can be added.
@@ -3,5 +3,5 @@
3
3
  # Annoy.rb is a Ruby wrapper for Annoy (Approximate Nearest Neighbors Oh Yeah).
4
4
  module Annoy
5
5
  # The version of Annoy.rb you are using.
6
- VERSION = '0.1.0'.freeze
6
+ VERSION = '0.2.0'.freeze
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: annoy-rb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-08-08 00:00:00.000000000 Z
11
+ date: 2020-09-19 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Annoy.rb is a Ruby binding for the Annoy (Approximate Nearest Neighbors
14
14
  Oh Yeah).
@@ -34,6 +34,7 @@ files:
34
34
  - ext/annoy/extconf.rb
35
35
  - ext/annoy/src/annoylib.h
36
36
  - ext/annoy/src/kissrandom.h
37
+ - ext/annoy/src/mman.h
37
38
  - lib/annoy.rb
38
39
  - lib/annoy/version.rb
39
40
  homepage: https://github.com/yoshoku/annoy.rb
@@ -43,6 +44,7 @@ metadata:
43
44
  homepage_uri: https://github.com/yoshoku/annoy.rb
44
45
  source_code_uri: https://github.com/yoshoku/annoy.rb
45
46
  changelog_uri: https://github.com/yoshoku/annoy.rb/blob/master/CHANGELOG.md
47
+ documentation_uri: https://yoshoku.github.io/annoy.rb/doc/
46
48
  post_install_message:
47
49
  rdoc_options: []
48
50
  require_paths: