faiss 0.1.7 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +18 -0
  3. data/README.md +7 -7
  4. data/ext/faiss/ext.cpp +1 -1
  5. data/ext/faiss/extconf.rb +8 -2
  6. data/ext/faiss/index.cpp +102 -69
  7. data/ext/faiss/index_binary.cpp +24 -30
  8. data/ext/faiss/kmeans.cpp +20 -16
  9. data/ext/faiss/numo.hpp +867 -0
  10. data/ext/faiss/pca_matrix.cpp +13 -14
  11. data/ext/faiss/product_quantizer.cpp +23 -24
  12. data/ext/faiss/utils.cpp +10 -37
  13. data/ext/faiss/utils.h +2 -13
  14. data/lib/faiss/version.rb +1 -1
  15. data/lib/faiss.rb +0 -5
  16. data/vendor/faiss/faiss/AutoTune.cpp +292 -291
  17. data/vendor/faiss/faiss/AutoTune.h +55 -56
  18. data/vendor/faiss/faiss/Clustering.cpp +334 -195
  19. data/vendor/faiss/faiss/Clustering.h +88 -35
  20. data/vendor/faiss/faiss/IVFlib.cpp +171 -195
  21. data/vendor/faiss/faiss/IVFlib.h +48 -51
  22. data/vendor/faiss/faiss/Index.cpp +85 -103
  23. data/vendor/faiss/faiss/Index.h +54 -48
  24. data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
  25. data/vendor/faiss/faiss/Index2Layer.h +22 -22
  26. data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
  27. data/vendor/faiss/faiss/IndexBinary.h +140 -132
  28. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
  29. data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
  30. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
  31. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
  32. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
  33. data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
  34. data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
  35. data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
  36. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
  37. data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
  38. data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
  39. data/vendor/faiss/faiss/IndexFlat.h +35 -46
  40. data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
  41. data/vendor/faiss/faiss/IndexHNSW.h +57 -41
  42. data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
  43. data/vendor/faiss/faiss/IndexIVF.h +146 -113
  44. data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
  45. data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
  46. data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
  47. data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
  48. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
  49. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
  50. data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
  51. data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
  52. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
  53. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
  54. data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
  55. data/vendor/faiss/faiss/IndexLSH.h +21 -26
  56. data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
  57. data/vendor/faiss/faiss/IndexLattice.h +11 -16
  58. data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
  59. data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
  60. data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
  61. data/vendor/faiss/faiss/IndexNSG.h +85 -0
  62. data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
  63. data/vendor/faiss/faiss/IndexPQ.h +64 -67
  64. data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
  65. data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
  66. data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
  67. data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
  68. data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
  69. data/vendor/faiss/faiss/IndexRefine.h +22 -23
  70. data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
  71. data/vendor/faiss/faiss/IndexReplicas.h +62 -56
  72. data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
  73. data/vendor/faiss/faiss/IndexResidual.h +152 -0
  74. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
  75. data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
  76. data/vendor/faiss/faiss/IndexShards.cpp +256 -240
  77. data/vendor/faiss/faiss/IndexShards.h +85 -73
  78. data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
  79. data/vendor/faiss/faiss/MatrixStats.h +7 -10
  80. data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
  81. data/vendor/faiss/faiss/MetaIndexes.h +40 -34
  82. data/vendor/faiss/faiss/MetricType.h +7 -7
  83. data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
  84. data/vendor/faiss/faiss/VectorTransform.h +61 -89
  85. data/vendor/faiss/faiss/clone_index.cpp +77 -73
  86. data/vendor/faiss/faiss/clone_index.h +4 -9
  87. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
  88. data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
  89. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
  90. data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
  91. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
  92. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
  93. data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
  94. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
  95. data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
  96. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
  97. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
  98. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
  99. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
  100. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
  101. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
  102. data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
  103. data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
  104. data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
  106. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
  107. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
  108. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
  109. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
  110. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
  111. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
  112. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
  113. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
  114. data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
  115. data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
  116. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
  117. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
  118. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
  119. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
  120. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
  121. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
  122. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
  123. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
  124. data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
  125. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
  126. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
  127. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
  128. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
  129. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
  130. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
  131. data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
  132. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
  133. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
  134. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
  135. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
  136. data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
  137. data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
  138. data/vendor/faiss/faiss/impl/FaissException.h +41 -29
  139. data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
  140. data/vendor/faiss/faiss/impl/HNSW.h +179 -200
  141. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
  142. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
  143. data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
  144. data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
  145. data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
  146. data/vendor/faiss/faiss/impl/NSG.h +199 -0
  147. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
  148. data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
  149. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
  150. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
  151. data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
  152. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
  153. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
  154. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
  155. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
  156. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
  157. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
  158. data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
  159. data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
  160. data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
  161. data/vendor/faiss/faiss/impl/io.cpp +75 -94
  162. data/vendor/faiss/faiss/impl/io.h +31 -41
  163. data/vendor/faiss/faiss/impl/io_macros.h +40 -29
  164. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
  165. data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
  166. data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
  167. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
  168. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
  169. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
  170. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
  171. data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
  172. data/vendor/faiss/faiss/index_factory.cpp +269 -218
  173. data/vendor/faiss/faiss/index_factory.h +6 -7
  174. data/vendor/faiss/faiss/index_io.h +23 -26
  175. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
  176. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
  177. data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
  178. data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
  179. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
  180. data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
  181. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
  182. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
  183. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
  184. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
  185. data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
  186. data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
  187. data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
  188. data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
  189. data/vendor/faiss/faiss/utils/Heap.h +186 -209
  190. data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
  191. data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
  192. data/vendor/faiss/faiss/utils/distances.cpp +301 -310
  193. data/vendor/faiss/faiss/utils/distances.h +133 -118
  194. data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
  195. data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
  196. data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
  197. data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
  198. data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
  199. data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
  200. data/vendor/faiss/faiss/utils/hamming.h +62 -85
  201. data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
  202. data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
  203. data/vendor/faiss/faiss/utils/partitioning.h +26 -21
  204. data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
  205. data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
  206. data/vendor/faiss/faiss/utils/random.cpp +39 -63
  207. data/vendor/faiss/faiss/utils/random.h +13 -16
  208. data/vendor/faiss/faiss/utils/simdlib.h +4 -2
  209. data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
  210. data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
  211. data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
  212. data/vendor/faiss/faiss/utils/utils.cpp +304 -287
  213. data/vendor/faiss/faiss/utils/utils.h +53 -48
  214. metadata +26 -12
  215. data/lib/faiss/index.rb +0 -20
  216. data/lib/faiss/index_binary.rb +0 -20
  217. data/lib/faiss/kmeans.rb +0 -15
  218. data/lib/faiss/pca_matrix.rb +0 -15
  219. data/lib/faiss/product_quantizer.rb +0 -22
@@ -14,18 +14,16 @@
14
14
 
15
15
  #include <faiss/impl/FaissAssert.h>
16
16
 
17
-
18
17
  namespace faiss {
19
18
 
20
-
21
19
  /***********************************************************************
22
20
  * RangeSearchResult
23
21
  ***********************************************************************/
24
22
 
25
- RangeSearchResult::RangeSearchResult (idx_t nq, bool alloc_lims): nq (nq) {
23
+ RangeSearchResult::RangeSearchResult(idx_t nq, bool alloc_lims) : nq(nq) {
26
24
  if (alloc_lims) {
27
- lims = new size_t [nq + 1];
28
- memset (lims, 0, sizeof(*lims) * (nq + 1));
25
+ lims = new size_t[nq + 1];
26
+ memset(lims, 0, sizeof(*lims) * (nq + 1));
29
27
  } else {
30
28
  lims = nullptr;
31
29
  }
@@ -36,145 +34,129 @@ RangeSearchResult::RangeSearchResult (idx_t nq, bool alloc_lims): nq (nq) {
36
34
 
37
35
  /// called when lims contains the nb of elements result entries
38
36
  /// for each query
39
- void RangeSearchResult::do_allocation () {
37
+ void RangeSearchResult::do_allocation() {
38
+ // works only if all the partial results are aggregated
39
+ // simulatenously
40
+ FAISS_THROW_IF_NOT(labels == nullptr && distances == nullptr);
40
41
  size_t ofs = 0;
41
42
  for (int i = 0; i < nq; i++) {
42
43
  size_t n = lims[i];
43
- lims [i] = ofs;
44
+ lims[i] = ofs;
44
45
  ofs += n;
45
46
  }
46
- lims [nq] = ofs;
47
- labels = new idx_t [ofs];
48
- distances = new float [ofs];
47
+ lims[nq] = ofs;
48
+ labels = new idx_t[ofs];
49
+ distances = new float[ofs];
49
50
  }
50
51
 
51
- RangeSearchResult::~RangeSearchResult () {
52
- delete [] labels;
53
- delete [] distances;
54
- delete [] lims;
52
+ RangeSearchResult::~RangeSearchResult() {
53
+ delete[] labels;
54
+ delete[] distances;
55
+ delete[] lims;
55
56
  }
56
57
 
57
-
58
-
59
-
60
-
61
58
  /***********************************************************************
62
59
  * BufferList
63
60
  ***********************************************************************/
64
61
 
65
-
66
- BufferList::BufferList (size_t buffer_size):
67
- buffer_size (buffer_size)
68
- {
62
+ BufferList::BufferList(size_t buffer_size) : buffer_size(buffer_size) {
69
63
  wp = buffer_size;
70
64
  }
71
65
 
72
- BufferList::~BufferList ()
73
- {
66
+ BufferList::~BufferList() {
74
67
  for (int i = 0; i < buffers.size(); i++) {
75
- delete [] buffers[i].ids;
76
- delete [] buffers[i].dis;
68
+ delete[] buffers[i].ids;
69
+ delete[] buffers[i].dis;
77
70
  }
78
71
  }
79
72
 
80
- void BufferList::add (idx_t id, float dis) {
73
+ void BufferList::add(idx_t id, float dis) {
81
74
  if (wp == buffer_size) { // need new buffer
82
75
  append_buffer();
83
76
  }
84
- Buffer & buf = buffers.back();
85
- buf.ids [wp] = id;
86
- buf.dis [wp] = dis;
77
+ Buffer& buf = buffers.back();
78
+ buf.ids[wp] = id;
79
+ buf.dis[wp] = dis;
87
80
  wp++;
88
81
  }
89
82
 
90
-
91
- void BufferList::append_buffer ()
92
- {
93
- Buffer buf = {new idx_t [buffer_size], new float [buffer_size]};
94
- buffers.push_back (buf);
83
+ void BufferList::append_buffer() {
84
+ Buffer buf = {new idx_t[buffer_size], new float[buffer_size]};
85
+ buffers.push_back(buf);
95
86
  wp = 0;
96
87
  }
97
88
 
98
89
  /// copy elemnts ofs:ofs+n-1 seen as linear data in the buffers to
99
90
  /// tables dest_ids, dest_dis
100
- void BufferList::copy_range (size_t ofs, size_t n,
101
- idx_t * dest_ids, float *dest_dis)
102
- {
91
+ void BufferList::copy_range(
92
+ size_t ofs,
93
+ size_t n,
94
+ idx_t* dest_ids,
95
+ float* dest_dis) {
103
96
  size_t bno = ofs / buffer_size;
104
97
  ofs -= bno * buffer_size;
105
98
  while (n > 0) {
106
99
  size_t ncopy = ofs + n < buffer_size ? n : buffer_size - ofs;
107
- Buffer buf = buffers [bno];
108
- memcpy (dest_ids, buf.ids + ofs, ncopy * sizeof(*dest_ids));
109
- memcpy (dest_dis, buf.dis + ofs, ncopy * sizeof(*dest_dis));
100
+ Buffer buf = buffers[bno];
101
+ memcpy(dest_ids, buf.ids + ofs, ncopy * sizeof(*dest_ids));
102
+ memcpy(dest_dis, buf.dis + ofs, ncopy * sizeof(*dest_dis));
110
103
  dest_ids += ncopy;
111
104
  dest_dis += ncopy;
112
105
  ofs = 0;
113
- bno ++;
106
+ bno++;
114
107
  n -= ncopy;
115
108
  }
116
109
  }
117
110
 
118
-
119
111
  /***********************************************************************
120
112
  * RangeSearchPartialResult
121
113
  ***********************************************************************/
122
114
 
123
- void RangeQueryResult::add (float dis, idx_t id) {
115
+ void RangeQueryResult::add(float dis, idx_t id) {
124
116
  nres++;
125
- pres->add (id, dis);
117
+ pres->add(id, dis);
126
118
  }
127
119
 
128
-
129
-
130
- RangeSearchPartialResult::RangeSearchPartialResult (RangeSearchResult * res_in):
131
- BufferList(res_in->buffer_size),
132
- res(res_in)
133
- {}
134
-
120
+ RangeSearchPartialResult::RangeSearchPartialResult(RangeSearchResult* res_in)
121
+ : BufferList(res_in->buffer_size), res(res_in) {}
135
122
 
136
123
  /// begin a new result
137
- RangeQueryResult &
138
- RangeSearchPartialResult::new_result (idx_t qno)
139
- {
124
+ RangeQueryResult& RangeSearchPartialResult::new_result(idx_t qno) {
140
125
  RangeQueryResult qres = {qno, 0, this};
141
- queries.push_back (qres);
126
+ queries.push_back(qres);
142
127
  return queries.back();
143
128
  }
144
129
 
145
-
146
- void RangeSearchPartialResult::finalize ()
147
- {
148
- set_lims ();
130
+ void RangeSearchPartialResult::finalize() {
131
+ set_lims();
149
132
  #pragma omp barrier
150
133
 
151
134
  #pragma omp single
152
- res->do_allocation ();
135
+ res->do_allocation();
153
136
 
154
137
  #pragma omp barrier
155
- copy_result ();
138
+ copy_result();
156
139
  }
157
140
 
158
-
159
141
  /// called by range_search before do_allocation
160
- void RangeSearchPartialResult::set_lims ()
161
- {
142
+ void RangeSearchPartialResult::set_lims() {
162
143
  for (int i = 0; i < queries.size(); i++) {
163
- RangeQueryResult & qres = queries[i];
144
+ RangeQueryResult& qres = queries[i];
164
145
  res->lims[qres.qno] = qres.nres;
165
146
  }
166
147
  }
167
148
 
168
149
  /// called by range_search after do_allocation
169
- void RangeSearchPartialResult::copy_result (bool incremental)
170
- {
150
+ void RangeSearchPartialResult::copy_result(bool incremental) {
171
151
  size_t ofs = 0;
172
152
  for (int i = 0; i < queries.size(); i++) {
173
- RangeQueryResult & qres = queries[i];
153
+ RangeQueryResult& qres = queries[i];
174
154
 
175
- copy_range (ofs, qres.nres,
176
- res->labels + res->lims[qres.qno],
177
- res->distances + res->lims[qres.qno]);
155
+ copy_range(
156
+ ofs,
157
+ qres.nres,
158
+ res->labels + res->lims[qres.qno],
159
+ res->distances + res->lims[qres.qno]);
178
160
  if (incremental) {
179
161
  res->lims[qres.qno] += qres.nres;
180
162
  }
@@ -182,26 +164,28 @@ void RangeSearchPartialResult::copy_result (bool incremental)
182
164
  }
183
165
  }
184
166
 
185
- void RangeSearchPartialResult::merge (std::vector <RangeSearchPartialResult *> &
186
- partial_results, bool do_delete)
187
- {
188
-
167
+ void RangeSearchPartialResult::merge(
168
+ std::vector<RangeSearchPartialResult*>& partial_results,
169
+ bool do_delete) {
189
170
  int npres = partial_results.size();
190
- if (npres == 0) return;
191
- RangeSearchResult *result = partial_results[0]->res;
171
+ if (npres == 0)
172
+ return;
173
+ RangeSearchResult* result = partial_results[0]->res;
192
174
  size_t nx = result->nq;
193
175
 
194
176
  // count
195
- for (const RangeSearchPartialResult * pres : partial_results) {
196
- if (!pres) continue;
197
- for (const RangeQueryResult &qres : pres->queries) {
177
+ for (const RangeSearchPartialResult* pres : partial_results) {
178
+ if (!pres)
179
+ continue;
180
+ for (const RangeQueryResult& qres : pres->queries) {
198
181
  result->lims[qres.qno] += qres.nres;
199
182
  }
200
183
  }
201
- result->do_allocation ();
184
+ result->do_allocation();
202
185
  for (int j = 0; j < npres; j++) {
203
- if (!partial_results[j]) continue;
204
- partial_results[j]->copy_result (true);
186
+ if (!partial_results[j])
187
+ continue;
188
+ partial_results[j]->copy_result(true);
205
189
  if (do_delete) {
206
190
  delete partial_results[j];
207
191
  partial_results[j] = nullptr;
@@ -210,22 +194,19 @@ void RangeSearchPartialResult::merge (std::vector <RangeSearchPartialResult *> &
210
194
 
211
195
  // reset the limits
212
196
  for (size_t i = nx; i > 0; i--) {
213
- result->lims [i] = result->lims [i - 1];
197
+ result->lims[i] = result->lims[i - 1];
214
198
  }
215
- result->lims [0] = 0;
199
+ result->lims[0] = 0;
216
200
  }
217
201
 
218
202
  /***********************************************************************
219
203
  * IDSelectorRange
220
204
  ***********************************************************************/
221
205
 
222
- IDSelectorRange::IDSelectorRange (idx_t imin, idx_t imax):
223
- imin (imin), imax (imax)
224
- {
225
- }
206
+ IDSelectorRange::IDSelectorRange(idx_t imin, idx_t imax)
207
+ : imin(imin), imax(imax) {}
226
208
 
227
- bool IDSelectorRange::is_member (idx_t id) const
228
- {
209
+ bool IDSelectorRange::is_member(idx_t id) const {
229
210
  return id >= imin && id < imax;
230
211
  }
231
212
 
@@ -233,33 +214,29 @@ bool IDSelectorRange::is_member (idx_t id) const
233
214
  * IDSelectorArray
234
215
  ***********************************************************************/
235
216
 
236
- IDSelectorArray::IDSelectorArray (size_t n, const idx_t *ids):
237
- n (n), ids(ids)
238
- {
239
- }
217
+ IDSelectorArray::IDSelectorArray(size_t n, const idx_t* ids) : n(n), ids(ids) {}
240
218
 
241
- bool IDSelectorArray::is_member (idx_t id) const
242
- {
219
+ bool IDSelectorArray::is_member(idx_t id) const {
243
220
  for (idx_t i = 0; i < n; i++) {
244
- if (ids[i] == id) return true;
221
+ if (ids[i] == id)
222
+ return true;
245
223
  }
246
224
  return false;
247
225
  }
248
226
 
249
-
250
227
  /***********************************************************************
251
228
  * IDSelectorBatch
252
229
  ***********************************************************************/
253
230
 
254
- IDSelectorBatch::IDSelectorBatch (size_t n, const idx_t *indices)
255
- {
231
+ IDSelectorBatch::IDSelectorBatch(size_t n, const idx_t* indices) {
256
232
  nbits = 0;
257
- while (n > (1L << nbits)) nbits++;
233
+ while (n > (1L << nbits))
234
+ nbits++;
258
235
  nbits += 5;
259
236
  // for n = 1M, nbits = 25 is optimal, see P56659518
260
237
 
261
238
  mask = (1L << nbits) - 1;
262
- bloom.resize (1UL << (nbits - 3), 0);
239
+ bloom.resize(1UL << (nbits - 3), 0);
263
240
  for (long i = 0; i < n; i++) {
264
241
  Index::idx_t id = indices[i];
265
242
  set.insert(id);
@@ -268,39 +245,36 @@ IDSelectorBatch::IDSelectorBatch (size_t n, const idx_t *indices)
268
245
  }
269
246
  }
270
247
 
271
- bool IDSelectorBatch::is_member (idx_t i) const
272
- {
248
+ bool IDSelectorBatch::is_member(idx_t i) const {
273
249
  long im = i & mask;
274
- if(!(bloom[im>>3] & (1 << (im & 7)))) {
250
+ if (!(bloom[im >> 3] & (1 << (im & 7)))) {
275
251
  return 0;
276
252
  }
277
253
  return set.count(i);
278
254
  }
279
255
 
280
-
281
256
  /***********************************************************
282
257
  * Interrupt callback
283
258
  ***********************************************************/
284
259
 
285
-
286
260
  std::unique_ptr<InterruptCallback> InterruptCallback::instance;
287
261
 
288
262
  std::mutex InterruptCallback::lock;
289
263
 
290
- void InterruptCallback::clear_instance () {
291
- delete instance.release ();
264
+ void InterruptCallback::clear_instance() {
265
+ delete instance.release();
292
266
  }
293
267
 
294
- void InterruptCallback::check () {
268
+ void InterruptCallback::check() {
295
269
  if (!instance.get()) {
296
270
  return;
297
271
  }
298
- if (instance->want_interrupt ()) {
299
- FAISS_THROW_MSG ("computation interrupted");
272
+ if (instance->want_interrupt()) {
273
+ FAISS_THROW_MSG("computation interrupted");
300
274
  }
301
275
  }
302
276
 
303
- bool InterruptCallback::is_interrupted () {
277
+ bool InterruptCallback::is_interrupted() {
304
278
  if (!instance.get()) {
305
279
  return false;
306
280
  }
@@ -308,8 +282,7 @@ bool InterruptCallback::is_interrupted () {
308
282
  return instance->want_interrupt();
309
283
  }
310
284
 
311
-
312
- size_t InterruptCallback::get_period_hint (size_t flops) {
285
+ size_t InterruptCallback::get_period_hint(size_t flops) {
313
286
  if (!instance.get()) {
314
287
  return 1L << 30; // never check
315
288
  }
@@ -317,7 +290,4 @@ size_t InterruptCallback::get_period_hint (size_t flops) {
317
290
  return std::max((size_t)10 * 10 * 1000 * 1000 / (flops + 1), (size_t)1);
318
291
  }
319
292
 
320
-
321
-
322
-
323
293
  } // namespace faiss
@@ -15,15 +15,15 @@
15
15
 
16
16
  #include <stdint.h>
17
17
 
18
- #include <vector>
19
- #include <unordered_set>
18
+ #include <cstring>
20
19
  #include <memory>
21
20
  #include <mutex>
21
+ #include <unordered_set>
22
+ #include <vector>
22
23
 
23
24
  #include <faiss/Index.h>
24
25
  #include <faiss/impl/platform_macros.h>
25
26
 
26
-
27
27
  namespace faiss {
28
28
 
29
29
  /** The objective is to have a simple result structure while
@@ -31,42 +31,39 @@ namespace faiss {
31
31
  * do_allocation can be overloaded to allocate the result tables in
32
32
  * the matrix type of a scripting language like Lua or Python. */
33
33
  struct RangeSearchResult {
34
- size_t nq; ///< nb of queries
35
- size_t *lims; ///< size (nq + 1)
34
+ size_t nq; ///< nb of queries
35
+ size_t* lims; ///< size (nq + 1)
36
36
 
37
37
  typedef Index::idx_t idx_t;
38
38
 
39
- idx_t *labels; ///< result for query i is labels[lims[i]:lims[i+1]]
40
- float *distances; ///< corresponding distances (not sorted)
39
+ idx_t* labels; ///< result for query i is labels[lims[i]:lims[i+1]]
40
+ float* distances; ///< corresponding distances (not sorted)
41
41
 
42
42
  size_t buffer_size; ///< size of the result buffers used
43
43
 
44
44
  /// lims must be allocated on input to range_search.
45
- explicit RangeSearchResult (idx_t nq, bool alloc_lims=true);
45
+ explicit RangeSearchResult(idx_t nq, bool alloc_lims = true);
46
46
 
47
47
  /// called when lims contains the nb of elements result entries
48
48
  /// for each query
49
49
 
50
- virtual void do_allocation ();
50
+ virtual void do_allocation();
51
51
 
52
- virtual ~RangeSearchResult ();
52
+ virtual ~RangeSearchResult();
53
53
  };
54
54
 
55
-
56
55
  /** Encapsulates a set of ids to remove. */
57
56
  struct IDSelector {
58
57
  typedef Index::idx_t idx_t;
59
- virtual bool is_member (idx_t id) const = 0;
58
+ virtual bool is_member(idx_t id) const = 0;
60
59
  virtual ~IDSelector() {}
61
60
  };
62
61
 
63
-
64
-
65
62
  /** remove ids between [imni, imax) */
66
- struct IDSelectorRange: IDSelector {
63
+ struct IDSelectorRange : IDSelector {
67
64
  idx_t imin, imax;
68
65
 
69
- IDSelectorRange (idx_t imin, idx_t imax);
66
+ IDSelectorRange(idx_t imin, idx_t imax);
70
67
  bool is_member(idx_t id) const override;
71
68
  ~IDSelectorRange() override {}
72
69
  };
@@ -76,11 +73,11 @@ struct IDSelectorRange: IDSelector {
76
73
  * this is inefficient in most cases, except for IndexIVF with
77
74
  * maintain_direct_map
78
75
  */
79
- struct IDSelectorArray: IDSelector {
76
+ struct IDSelectorArray : IDSelector {
80
77
  size_t n;
81
- const idx_t *ids;
78
+ const idx_t* ids;
82
79
 
83
- IDSelectorArray (size_t n, const idx_t *ids);
80
+ IDSelectorArray(size_t n, const idx_t* ids);
84
81
  bool is_member(idx_t id) const override;
85
82
  ~IDSelectorArray() override {}
86
83
  };
@@ -91,8 +88,7 @@ struct IDSelectorArray: IDSelector {
91
88
  * unordered_set are just the least significant bits of the id. This
92
89
  * works fine for random ids or ids in sequences but will produce many
93
90
  * hash collisions if lsb's are always the same */
94
- struct IDSelectorBatch: IDSelector {
95
-
91
+ struct IDSelectorBatch : IDSelector {
96
92
  std::unordered_set<idx_t> set;
97
93
 
98
94
  typedef unsigned char uint8_t;
@@ -100,7 +96,7 @@ struct IDSelectorBatch: IDSelector {
100
96
  int nbits;
101
97
  idx_t mask;
102
98
 
103
- IDSelectorBatch (size_t n, const idx_t *indices);
99
+ IDSelectorBatch(size_t n, const idx_t* indices);
104
100
  bool is_member(idx_t id) const override;
105
101
  ~IDSelectorBatch() override {}
106
102
  };
@@ -124,28 +120,26 @@ struct BufferList {
124
120
  size_t buffer_size;
125
121
 
126
122
  struct Buffer {
127
- idx_t *ids;
128
- float *dis;
123
+ idx_t* ids;
124
+ float* dis;
129
125
  };
130
126
 
131
127
  std::vector<Buffer> buffers;
132
128
  size_t wp; ///< write pointer in the last buffer.
133
129
 
134
- explicit BufferList (size_t buffer_size);
130
+ explicit BufferList(size_t buffer_size);
135
131
 
136
- ~BufferList ();
132
+ ~BufferList();
137
133
 
138
134
  /// create a new buffer
139
- void append_buffer ();
135
+ void append_buffer();
140
136
 
141
137
  /// add one result, possibly appending a new buffer if needed
142
- void add (idx_t id, float dis);
138
+ void add(idx_t id, float dis);
143
139
 
144
140
  /// copy elemnts ofs:ofs+n-1 seen as linear data in the buffers to
145
141
  /// tables dest_ids, dest_dis
146
- void copy_range (size_t ofs, size_t n,
147
- idx_t * dest_ids, float *dest_dis);
148
-
142
+ void copy_range(size_t ofs, size_t n, idx_t* dest_ids, float* dest_dis);
149
143
  };
150
144
 
151
145
  struct RangeSearchPartialResult;
@@ -153,46 +147,45 @@ struct RangeSearchPartialResult;
153
147
  /// result structure for a single query
154
148
  struct RangeQueryResult {
155
149
  using idx_t = Index::idx_t;
156
- idx_t qno; //< id of the query
157
- size_t nres; //< nb of results for this query
158
- RangeSearchPartialResult * pres;
150
+ idx_t qno; //< id of the query
151
+ size_t nres; //< nb of results for this query
152
+ RangeSearchPartialResult* pres;
159
153
 
160
154
  /// called by search function to report a new result
161
- void add (float dis, idx_t id);
155
+ void add(float dis, idx_t id);
162
156
  };
163
157
 
164
158
  /// the entries in the buffers are split per query
165
- struct RangeSearchPartialResult: BufferList {
166
- RangeSearchResult * res;
159
+ struct RangeSearchPartialResult : BufferList {
160
+ RangeSearchResult* res;
167
161
 
168
162
  /// eventually the result will be stored in res_in
169
- explicit RangeSearchPartialResult (RangeSearchResult * res_in);
163
+ explicit RangeSearchPartialResult(RangeSearchResult* res_in);
170
164
 
171
165
  /// query ids + nb of results per query.
172
166
  std::vector<RangeQueryResult> queries;
173
167
 
174
168
  /// begin a new result
175
- RangeQueryResult & new_result (idx_t qno);
169
+ RangeQueryResult& new_result(idx_t qno);
176
170
 
177
171
  /*****************************************
178
172
  * functions used at the end of the search to merge the result
179
173
  * lists */
180
- void finalize ();
174
+ void finalize();
181
175
 
182
176
  /// called by range_search before do_allocation
183
- void set_lims ();
177
+ void set_lims();
184
178
 
185
179
  /// called by range_search after do_allocation
186
- void copy_result (bool incremental = false);
180
+ void copy_result(bool incremental = false);
187
181
 
188
182
  /// merge a set of PartialResult's into one RangeSearchResult
189
183
  /// on ouptut the partialresults are empty!
190
- static void merge (std::vector <RangeSearchPartialResult *> &
191
- partial_results, bool do_delete=true);
192
-
184
+ static void merge(
185
+ std::vector<RangeSearchPartialResult*>& partial_results,
186
+ bool do_delete = true);
193
187
  };
194
188
 
195
-
196
189
  /***********************************************************
197
190
  * The distance computer maintains a current query and computes
198
191
  * distances to elements in an index that supports random access.
@@ -202,19 +195,19 @@ struct RangeSearchPartialResult: BufferList {
202
195
  * instantiate one from each thread if needed.
203
196
  ***********************************************************/
204
197
  struct DistanceComputer {
205
- using idx_t = Index::idx_t;
198
+ using idx_t = Index::idx_t;
206
199
 
207
- /// called before computing distances. Pointer x should remain valid
208
- /// while operator () is called
209
- virtual void set_query(const float *x) = 0;
200
+ /// called before computing distances. Pointer x should remain valid
201
+ /// while operator () is called
202
+ virtual void set_query(const float* x) = 0;
210
203
 
211
- /// compute distance of vector i to current query
212
- virtual float operator () (idx_t i) = 0;
204
+ /// compute distance of vector i to current query
205
+ virtual float operator()(idx_t i) = 0;
213
206
 
214
- /// compute distance between two stored vectors
215
- virtual float symmetric_dis (idx_t i, idx_t j) = 0;
207
+ /// compute distance between two stored vectors
208
+ virtual float symmetric_dis(idx_t i, idx_t j) = 0;
216
209
 
217
- virtual ~DistanceComputer() {}
210
+ virtual ~DistanceComputer() {}
218
211
  };
219
212
 
220
213
  /***********************************************************
@@ -222,7 +215,7 @@ struct DistanceComputer {
222
215
  ***********************************************************/
223
216
 
224
217
  struct FAISS_API InterruptCallback {
225
- virtual bool want_interrupt () = 0;
218
+ virtual bool want_interrupt() = 0;
226
219
  virtual ~InterruptCallback() {}
227
220
 
228
221
  // lock that protects concurrent calls to is_interrupted
@@ -230,7 +223,7 @@ struct FAISS_API InterruptCallback {
230
223
 
231
224
  static std::unique_ptr<InterruptCallback> instance;
232
225
 
233
- static void clear_instance ();
226
+ static void clear_instance();
234
227
 
235
228
  /** check if:
236
229
  * - an interrupt callback is set
@@ -238,23 +231,46 @@ struct FAISS_API InterruptCallback {
238
231
  * if this is the case, then throw an exception. Should not be called
239
232
  * from multiple threads.
240
233
  */
241
- static void check ();
234
+ static void check();
242
235
 
243
236
  /// same as check() but return true if is interrupted instead of
244
237
  /// throwing. Can be called from multiple threads.
245
- static bool is_interrupted ();
238
+ static bool is_interrupted();
246
239
 
247
240
  /** assuming each iteration takes a certain number of flops, what
248
241
  * is a reasonable interval to check for interrupts?
249
242
  */
250
- static size_t get_period_hint (size_t flops);
251
-
243
+ static size_t get_period_hint(size_t flops);
252
244
  };
253
245
 
246
+ /// set implementation optimized for fast access.
247
+ struct VisitedTable {
248
+ std::vector<uint8_t> visited;
249
+ int visno;
250
+
251
+ explicit VisitedTable(int size) : visited(size), visno(1) {}
252
+
253
+ /// set flag #no to true
254
+ void set(int no) {
255
+ visited[no] = visno;
256
+ }
257
+
258
+ /// get flag #no
259
+ bool get(int no) const {
260
+ return visited[no] == visno;
261
+ }
262
+
263
+ /// reset all flags to false
264
+ void advance() {
265
+ visno++;
266
+ if (visno == 250) {
267
+ // 250 rather than 255 because sometimes we use visno and visno+1
268
+ memset(visited.data(), 0, sizeof(visited[0]) * visited.size());
269
+ visno = 1;
270
+ }
271
+ }
272
+ };
254
273
 
255
-
256
- }; // namespace faiss
257
-
258
-
274
+ } // namespace faiss
259
275
 
260
276
  #endif