faiss 0.2.6 → 0.2.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (189) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/ext/faiss/extconf.rb +1 -1
  4. data/lib/faiss/version.rb +1 -1
  5. data/lib/faiss.rb +2 -2
  6. data/vendor/faiss/faiss/AutoTune.cpp +15 -4
  7. data/vendor/faiss/faiss/AutoTune.h +0 -1
  8. data/vendor/faiss/faiss/Clustering.cpp +1 -5
  9. data/vendor/faiss/faiss/Clustering.h +0 -2
  10. data/vendor/faiss/faiss/IVFlib.h +0 -2
  11. data/vendor/faiss/faiss/Index.h +1 -2
  12. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +17 -3
  13. data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +10 -1
  14. data/vendor/faiss/faiss/IndexBinary.h +0 -1
  15. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +2 -1
  16. data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -0
  17. data/vendor/faiss/faiss/IndexBinaryHash.cpp +1 -3
  18. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +273 -48
  19. data/vendor/faiss/faiss/IndexBinaryIVF.h +18 -11
  20. data/vendor/faiss/faiss/IndexFastScan.cpp +13 -10
  21. data/vendor/faiss/faiss/IndexFastScan.h +5 -1
  22. data/vendor/faiss/faiss/IndexFlat.cpp +16 -3
  23. data/vendor/faiss/faiss/IndexFlat.h +1 -1
  24. data/vendor/faiss/faiss/IndexFlatCodes.cpp +5 -0
  25. data/vendor/faiss/faiss/IndexFlatCodes.h +7 -2
  26. data/vendor/faiss/faiss/IndexHNSW.cpp +3 -6
  27. data/vendor/faiss/faiss/IndexHNSW.h +0 -1
  28. data/vendor/faiss/faiss/IndexIDMap.cpp +4 -4
  29. data/vendor/faiss/faiss/IndexIDMap.h +0 -2
  30. data/vendor/faiss/faiss/IndexIVF.cpp +155 -129
  31. data/vendor/faiss/faiss/IndexIVF.h +121 -61
  32. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +2 -2
  33. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +12 -11
  34. data/vendor/faiss/faiss/IndexIVFFastScan.h +6 -1
  35. data/vendor/faiss/faiss/IndexIVFPQ.cpp +221 -165
  36. data/vendor/faiss/faiss/IndexIVFPQ.h +1 -0
  37. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +6 -1
  38. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +0 -2
  39. data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -2
  40. data/vendor/faiss/faiss/IndexNNDescent.h +0 -1
  41. data/vendor/faiss/faiss/IndexNSG.cpp +1 -2
  42. data/vendor/faiss/faiss/IndexPQ.cpp +7 -9
  43. data/vendor/faiss/faiss/IndexRefine.cpp +1 -1
  44. data/vendor/faiss/faiss/IndexReplicas.cpp +3 -4
  45. data/vendor/faiss/faiss/IndexReplicas.h +0 -1
  46. data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +8 -1
  47. data/vendor/faiss/faiss/IndexRowwiseMinMax.h +7 -0
  48. data/vendor/faiss/faiss/IndexShards.cpp +26 -109
  49. data/vendor/faiss/faiss/IndexShards.h +2 -3
  50. data/vendor/faiss/faiss/IndexShardsIVF.cpp +246 -0
  51. data/vendor/faiss/faiss/IndexShardsIVF.h +42 -0
  52. data/vendor/faiss/faiss/MetaIndexes.cpp +86 -0
  53. data/vendor/faiss/faiss/MetaIndexes.h +29 -0
  54. data/vendor/faiss/faiss/MetricType.h +14 -0
  55. data/vendor/faiss/faiss/VectorTransform.cpp +8 -10
  56. data/vendor/faiss/faiss/VectorTransform.h +1 -3
  57. data/vendor/faiss/faiss/clone_index.cpp +232 -18
  58. data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +25 -3
  59. data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +7 -0
  60. data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +78 -0
  61. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +20 -6
  62. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +7 -1
  63. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +21 -7
  64. data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +7 -0
  65. data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +7 -0
  66. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +10 -3
  67. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +7 -1
  68. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +11 -3
  69. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +25 -2
  70. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +76 -29
  71. data/vendor/faiss/faiss/gpu/GpuCloner.h +2 -2
  72. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +14 -13
  73. data/vendor/faiss/faiss/gpu/GpuDistance.h +18 -6
  74. data/vendor/faiss/faiss/gpu/GpuIndex.h +23 -21
  75. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +10 -10
  76. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -12
  77. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +29 -50
  78. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +3 -3
  79. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +8 -8
  80. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +4 -4
  81. data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +2 -5
  82. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +9 -7
  83. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +4 -4
  84. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -2
  85. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +1 -1
  86. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +55 -6
  87. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +20 -6
  88. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +95 -25
  89. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +67 -16
  90. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +4 -4
  91. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +7 -7
  92. data/vendor/faiss/faiss/gpu/test/TestUtils.h +4 -4
  93. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
  94. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
  95. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +0 -7
  96. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +9 -9
  97. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
  98. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +2 -7
  99. data/vendor/faiss/faiss/impl/CodePacker.cpp +67 -0
  100. data/vendor/faiss/faiss/impl/CodePacker.h +71 -0
  101. data/vendor/faiss/faiss/impl/DistanceComputer.h +0 -2
  102. data/vendor/faiss/faiss/impl/HNSW.cpp +3 -7
  103. data/vendor/faiss/faiss/impl/HNSW.h +6 -9
  104. data/vendor/faiss/faiss/impl/IDSelector.cpp +1 -1
  105. data/vendor/faiss/faiss/impl/IDSelector.h +39 -1
  106. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +62 -51
  107. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +11 -12
  108. data/vendor/faiss/faiss/impl/NNDescent.cpp +3 -9
  109. data/vendor/faiss/faiss/impl/NNDescent.h +10 -10
  110. data/vendor/faiss/faiss/impl/NSG.cpp +1 -6
  111. data/vendor/faiss/faiss/impl/NSG.h +4 -7
  112. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +1 -15
  113. data/vendor/faiss/faiss/impl/PolysemousTraining.h +11 -10
  114. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +0 -7
  115. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -12
  116. data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -4
  117. data/vendor/faiss/faiss/impl/Quantizer.h +6 -3
  118. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +796 -174
  119. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +16 -8
  120. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +3 -5
  121. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +4 -4
  122. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +3 -3
  123. data/vendor/faiss/faiss/impl/ThreadedIndex.h +4 -4
  124. data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +291 -0
  125. data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +74 -0
  126. data/vendor/faiss/faiss/impl/code_distance/code_distance.h +123 -0
  127. data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +102 -0
  128. data/vendor/faiss/faiss/impl/index_read.cpp +13 -10
  129. data/vendor/faiss/faiss/impl/index_write.cpp +3 -4
  130. data/vendor/faiss/faiss/impl/kmeans1d.cpp +0 -1
  131. data/vendor/faiss/faiss/impl/kmeans1d.h +3 -3
  132. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
  133. data/vendor/faiss/faiss/impl/platform_macros.h +61 -0
  134. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +48 -4
  135. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +18 -4
  136. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +2 -2
  137. data/vendor/faiss/faiss/index_factory.cpp +8 -10
  138. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +29 -12
  139. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +8 -2
  140. data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
  141. data/vendor/faiss/faiss/invlists/DirectMap.h +2 -4
  142. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +118 -18
  143. data/vendor/faiss/faiss/invlists/InvertedLists.h +44 -4
  144. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
  145. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
  146. data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
  147. data/vendor/faiss/faiss/python/python_callbacks.h +1 -1
  148. data/vendor/faiss/faiss/utils/AlignedTable.h +3 -1
  149. data/vendor/faiss/faiss/utils/Heap.cpp +139 -3
  150. data/vendor/faiss/faiss/utils/Heap.h +35 -1
  151. data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +84 -0
  152. data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +196 -0
  153. data/vendor/faiss/faiss/utils/approx_topk/generic.h +138 -0
  154. data/vendor/faiss/faiss/utils/approx_topk/mode.h +34 -0
  155. data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +367 -0
  156. data/vendor/faiss/faiss/utils/distances.cpp +61 -7
  157. data/vendor/faiss/faiss/utils/distances.h +11 -0
  158. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +346 -0
  159. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +36 -0
  160. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +42 -0
  161. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +40 -0
  162. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +352 -0
  163. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +32 -0
  164. data/vendor/faiss/faiss/utils/distances_simd.cpp +515 -327
  165. data/vendor/faiss/faiss/utils/extra_distances-inl.h +17 -1
  166. data/vendor/faiss/faiss/utils/extra_distances.cpp +37 -8
  167. data/vendor/faiss/faiss/utils/extra_distances.h +2 -1
  168. data/vendor/faiss/faiss/utils/fp16-fp16c.h +7 -0
  169. data/vendor/faiss/faiss/utils/fp16-inl.h +7 -0
  170. data/vendor/faiss/faiss/utils/fp16.h +7 -0
  171. data/vendor/faiss/faiss/utils/hamming-inl.h +0 -456
  172. data/vendor/faiss/faiss/utils/hamming.cpp +104 -120
  173. data/vendor/faiss/faiss/utils/hamming.h +21 -10
  174. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +535 -0
  175. data/vendor/faiss/faiss/utils/hamming_distance/common.h +48 -0
  176. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +519 -0
  177. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +26 -0
  178. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +614 -0
  179. data/vendor/faiss/faiss/utils/partitioning.cpp +21 -25
  180. data/vendor/faiss/faiss/utils/simdlib_avx2.h +344 -3
  181. data/vendor/faiss/faiss/utils/simdlib_emulated.h +390 -0
  182. data/vendor/faiss/faiss/utils/simdlib_neon.h +655 -130
  183. data/vendor/faiss/faiss/utils/sorting.cpp +692 -0
  184. data/vendor/faiss/faiss/utils/sorting.h +71 -0
  185. data/vendor/faiss/faiss/utils/transpose/transpose-avx2-inl.h +165 -0
  186. data/vendor/faiss/faiss/utils/utils.cpp +4 -176
  187. data/vendor/faiss/faiss/utils/utils.h +2 -9
  188. metadata +29 -3
  189. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +0 -26
@@ -18,7 +18,7 @@
18
18
  #include <mutex>
19
19
  #include <vector>
20
20
 
21
- #include <faiss/Index.h>
21
+ #include <faiss/MetricType.h>
22
22
  #include <faiss/impl/platform_macros.h>
23
23
 
24
24
  namespace faiss {
@@ -31,15 +31,13 @@ struct RangeSearchResult {
31
31
  size_t nq; ///< nb of queries
32
32
  size_t* lims; ///< size (nq + 1)
33
33
 
34
- typedef Index::idx_t idx_t;
35
-
36
34
  idx_t* labels; ///< result for query i is labels[lims[i]:lims[i+1]]
37
35
  float* distances; ///< corresponding distances (not sorted)
38
36
 
39
37
  size_t buffer_size; ///< size of the result buffers used
40
38
 
41
39
  /// lims must be allocated on input to range_search.
42
- explicit RangeSearchResult(idx_t nq, bool alloc_lims = true);
40
+ explicit RangeSearchResult(size_t nq, bool alloc_lims = true);
43
41
 
44
42
  /// called when lims contains the nb of elements result entries
45
43
  /// for each query
@@ -62,8 +60,6 @@ struct RangeSearchResult {
62
60
  /** List of temporary buffers used to store results before they are
63
61
  * copied to the RangeSearchResult object. */
64
62
  struct BufferList {
65
- typedef Index::idx_t idx_t;
66
-
67
63
  // buffer sizes in # entries
68
64
  size_t buffer_size;
69
65
 
@@ -94,7 +90,6 @@ struct RangeSearchPartialResult;
94
90
 
95
91
  /// result structure for a single query
96
92
  struct RangeQueryResult {
97
- using idx_t = Index::idx_t;
98
93
  idx_t qno; //< id of the query
99
94
  size_t nres; //< nb of results for this query
100
95
  RangeSearchPartialResult* pres;
@@ -0,0 +1,67 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #include <faiss/impl/CodePacker.h>
9
+
10
+ #include <cassert>
11
+ #include <cstring>
12
+
13
+ namespace faiss {
14
+
15
+ /*********************************************
16
+ * CodePacker
17
+ * default of pack_all / unpack_all loops over the _1 versions
18
+ */
19
+
20
+ void CodePacker::pack_all(const uint8_t* flat_codes, uint8_t* block) const {
21
+ for (size_t i = 0; i < nvec; i++) {
22
+ pack_1(flat_codes + code_size * i, i, block);
23
+ }
24
+ }
25
+
26
+ void CodePacker::unpack_all(const uint8_t* block, uint8_t* flat_codes) const {
27
+ for (size_t i = 0; i < nvec; i++) {
28
+ unpack_1(block, i, flat_codes + code_size * i);
29
+ }
30
+ }
31
+
32
+ /*********************************************
33
+ * CodePackerFlat
34
+ */
35
+
36
+ CodePackerFlat::CodePackerFlat(size_t code_size) {
37
+ this->code_size = code_size;
38
+ nvec = 1;
39
+ block_size = code_size;
40
+ }
41
+
42
+ void CodePackerFlat::pack_all(const uint8_t* flat_codes, uint8_t* block) const {
43
+ memcpy(block, flat_codes, code_size);
44
+ }
45
+
46
+ void CodePackerFlat::unpack_all(const uint8_t* block, uint8_t* flat_codes)
47
+ const {
48
+ memcpy(flat_codes, block, code_size);
49
+ }
50
+
51
+ void CodePackerFlat::pack_1(
52
+ const uint8_t* flat_code,
53
+ size_t offset,
54
+ uint8_t* block) const {
55
+ assert(offset == 0);
56
+ pack_all(flat_code, block);
57
+ }
58
+
59
+ void CodePackerFlat::unpack_1(
60
+ const uint8_t* block,
61
+ size_t offset,
62
+ uint8_t* flat_code) const {
63
+ assert(offset == 0);
64
+ unpack_all(block, flat_code);
65
+ }
66
+
67
+ } // namespace faiss
@@ -0,0 +1,71 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #include <faiss/MetricType.h>
11
+
12
+ namespace faiss {
13
+
14
+ /**
15
+ * Packing consists in combining a fixed number of codes of constant size
16
+ * (code_size) into a block of data where they may (or may not) be interleaved
17
+ * for efficient consumption by distance computation kernels. This exists for
18
+ * the "fast_scan" indexes on CPU and for some GPU kernels.
19
+ */
20
+ struct CodePacker {
21
+ size_t code_size; // input code size in bytes
22
+ size_t nvec; // number of vectors per block
23
+ size_t block_size; // size of one block in bytes (>= code_size * nvec)
24
+
25
+ // pack a single code to a block
26
+ virtual void pack_1(
27
+ const uint8_t*
28
+ flat_code, // code to write to the block, size code_size
29
+ size_t offset, // offset in the block (0 <= offset < nvec)
30
+ uint8_t* block // block to write to (size block_size)
31
+ ) const = 0;
32
+
33
+ // unpack a single code from a block
34
+ virtual void unpack_1(
35
+ const uint8_t* block, // block to read from (size block_size)
36
+ size_t offset, // offset in the block (0 <= offset < nvec)
37
+ uint8_t* flat_code // where to write the resulting code, size
38
+ // code_size
39
+ ) const = 0;
40
+
41
+ // pack all code in a block
42
+ virtual void pack_all(
43
+ const uint8_t* flat_codes, // codes to write to the block, size
44
+ // (nvec * code_size)
45
+ uint8_t* block // block to write to (size block_size)
46
+ ) const;
47
+
48
+ // unpack all code in a block
49
+ virtual void unpack_all(
50
+ const uint8_t* block, // block to read from (size block_size)
51
+ uint8_t* flat_codes // where to write the resulting codes size (nvec
52
+ // * code_size)
53
+ ) const;
54
+
55
+ virtual ~CodePacker() {}
56
+ };
57
+
58
+ /** Trivial code packer where codes are stored one by one */
59
+ struct CodePackerFlat : CodePacker {
60
+ explicit CodePackerFlat(size_t code_size);
61
+
62
+ void pack_1(const uint8_t* flat_code, size_t offset, uint8_t* block)
63
+ const final;
64
+ void unpack_1(const uint8_t* block, size_t offset, uint8_t* flat_code)
65
+ const final;
66
+
67
+ void pack_all(const uint8_t* flat_codes, uint8_t* block) const final;
68
+ void unpack_all(const uint8_t* block, uint8_t* flat_codes) const final;
69
+ };
70
+
71
+ } // namespace faiss
@@ -23,8 +23,6 @@ namespace faiss {
23
23
  * that has additional methods to handle the inverted list context.
24
24
  ***********************************************************/
25
25
  struct DistanceComputer {
26
- using idx_t = Index::idx_t;
27
-
28
26
  /// called before computing distances. Pointer x should remain valid
29
27
  /// while operator () is called
30
28
  virtual void set_query(const float* x) = 0;
@@ -47,11 +47,6 @@ void HNSW::neighbor_range(idx_t no, int layer_no, size_t* begin, size_t* end)
47
47
 
48
48
  HNSW::HNSW(int M) : rng(12345) {
49
49
  set_default_probas(M, 1.0 / log(M));
50
- max_level = -1;
51
- entry_point = -1;
52
- efSearch = 16;
53
- efConstruction = 40;
54
- upper_beam = 1;
55
50
  offsets.push_back(0);
56
51
  }
57
52
 
@@ -509,7 +504,6 @@ void HNSW::add_with_locks(
509
504
 
510
505
  namespace {
511
506
 
512
- using idx_t = HNSW::idx_t;
513
507
  using MinimaxHeap = HNSW::MinimaxHeap;
514
508
  using Node = HNSW::Node;
515
509
  /** Do a BFS on the candidates list */
@@ -837,8 +831,10 @@ void HNSW::MinimaxHeap::push(storage_idx_t i, float v) {
837
831
  if (k == n) {
838
832
  if (v >= dis[0])
839
833
  return;
834
+ if (ids[0] != -1) {
835
+ --nvalid;
836
+ }
840
837
  faiss::heap_pop<HC>(k--, dis.data(), ids.data());
841
- --nvalid;
842
838
  }
843
839
  faiss::heap_push<HC>(++k, dis.data(), ids.data(), v, i);
844
840
  ++nvalid;
@@ -52,10 +52,7 @@ struct SearchParametersHNSW : SearchParameters {
52
52
 
53
53
  struct HNSW {
54
54
  /// internal storage of vectors (32 bits: this is expensive)
55
- typedef int storage_idx_t;
56
-
57
- /// Faiss results are 64-bit
58
- typedef Index::idx_t idx_t;
55
+ using storage_idx_t = int32_t;
59
56
 
60
57
  typedef std::pair<float, storage_idx_t> Node;
61
58
 
@@ -124,25 +121,25 @@ struct HNSW {
124
121
 
125
122
  /// entry point in the search structure (one of the points with maximum
126
123
  /// level
127
- storage_idx_t entry_point;
124
+ storage_idx_t entry_point = -1;
128
125
 
129
126
  faiss::RandomGenerator rng;
130
127
 
131
128
  /// maximum level
132
- int max_level;
129
+ int max_level = -1;
133
130
 
134
131
  /// expansion factor at construction time
135
- int efConstruction;
132
+ int efConstruction = 40;
136
133
 
137
134
  /// expansion factor at search time
138
- int efSearch;
135
+ int efSearch = 16;
139
136
 
140
137
  /// during search: do we check whether the next best distance is good
141
138
  /// enough?
142
139
  bool check_relative_distance = true;
143
140
 
144
141
  /// number of entry points in levels > 0.
145
- int upper_beam;
142
+ int upper_beam = 1;
146
143
 
147
144
  /// use bounded queue during exploration
148
145
  bool search_bounded_queue = true;
@@ -92,7 +92,7 @@ IDSelectorBatch::IDSelectorBatch(size_t n, const idx_t* indices) {
92
92
  mask = ((idx_t)1 << nbits) - 1;
93
93
  bloom.resize((idx_t)1 << (nbits - 3), 0);
94
94
  for (idx_t i = 0; i < n; i++) {
95
- Index::idx_t id = indices[i];
95
+ idx_t id = indices[i];
96
96
  set.insert(id);
97
97
  id &= mask;
98
98
  bloom[id >> 3] |= 1 << (id & 7);
@@ -19,7 +19,6 @@ namespace faiss {
19
19
 
20
20
  /** Encapsulates a set of ids to handle. */
21
21
  struct IDSelector {
22
- using idx_t = Index::idx_t;
23
22
  virtual bool is_member(idx_t id) const = 0;
24
23
  virtual ~IDSelector() {}
25
24
  };
@@ -132,4 +131,43 @@ struct IDSelectorAll : IDSelector {
132
131
  virtual ~IDSelectorAll() {}
133
132
  };
134
133
 
134
+ /// does an AND operation on the the two given IDSelector's is_membership
135
+ /// results.
136
+ struct IDSelectorAnd : IDSelector {
137
+ const IDSelector* lhs;
138
+ const IDSelector* rhs;
139
+ IDSelectorAnd(const IDSelector* lhs, const IDSelector* rhs)
140
+ : lhs(lhs), rhs(rhs) {}
141
+ bool is_member(idx_t id) const final {
142
+ return lhs->is_member(id) && rhs->is_member(id);
143
+ };
144
+ virtual ~IDSelectorAnd() {}
145
+ };
146
+
147
+ /// does an OR operation on the the two given IDSelector's is_membership
148
+ /// results.
149
+ struct IDSelectorOr : IDSelector {
150
+ const IDSelector* lhs;
151
+ const IDSelector* rhs;
152
+ IDSelectorOr(const IDSelector* lhs, const IDSelector* rhs)
153
+ : lhs(lhs), rhs(rhs) {}
154
+ bool is_member(idx_t id) const final {
155
+ return lhs->is_member(id) || rhs->is_member(id);
156
+ };
157
+ virtual ~IDSelectorOr() {}
158
+ };
159
+
160
+ /// does an XOR operation on the the two given IDSelector's is_membership
161
+ /// results.
162
+ struct IDSelectorXOr : IDSelector {
163
+ const IDSelector* lhs;
164
+ const IDSelector* rhs;
165
+ IDSelectorXOr(const IDSelector* lhs, const IDSelector* rhs)
166
+ : lhs(lhs), rhs(rhs) {}
167
+ bool is_member(idx_t id) const final {
168
+ return lhs->is_member(id) ^ rhs->is_member(id);
169
+ };
170
+ virtual ~IDSelectorXOr() {}
171
+ };
172
+
135
173
  } // namespace faiss
@@ -21,6 +21,15 @@
21
21
  #include <faiss/utils/hamming.h> // BitstringWriter
22
22
  #include <faiss/utils/utils.h>
23
23
 
24
+ #include <faiss/utils/approx_topk/approx_topk.h>
25
+
26
+ // this is needed for prefetching
27
+ #include <faiss/impl/platform_macros.h>
28
+
29
+ #ifdef __AVX2__
30
+ #include <xmmintrin.h>
31
+ #endif
32
+
24
33
  extern "C" {
25
34
  // LU decomoposition of a general matrix
26
35
  void sgetrf_(
@@ -151,23 +160,7 @@ LocalSearchQuantizer::LocalSearchQuantizer(
151
160
  Search_type_t search_type)
152
161
  : AdditiveQuantizer(d, std::vector<size_t>(M, nbits), search_type) {
153
162
  K = (1 << nbits);
154
-
155
- train_iters = 25;
156
- train_ils_iters = 8;
157
- icm_iters = 4;
158
-
159
- encode_ils_iters = 16;
160
-
161
- p = 0.5f;
162
- lambd = 1e-2f;
163
-
164
- chunk_size = 10000;
165
- nperts = 4;
166
-
167
- random_seed = 0x12345;
168
163
  std::srand(random_seed);
169
-
170
- icm_encoder_factory = nullptr;
171
164
  }
172
165
 
173
166
  LocalSearchQuantizer::~LocalSearchQuantizer() {
@@ -192,7 +185,7 @@ void LocalSearchQuantizer::train(size_t n, const float* x) {
192
185
  // allocate memory for codebooks, size [M, K, d]
193
186
  codebooks.resize(M * K * d);
194
187
 
195
- // randomly intialize codes
188
+ // randomly initialize codes
196
189
  std::mt19937 gen(random_seed);
197
190
  std::vector<int32_t> codes(n * M); // [n, M]
198
191
  random_int32(codes, 0, K - 1, gen);
@@ -604,54 +597,72 @@ void LocalSearchQuantizer::icm_encode_step(
604
597
  FAISS_THROW_IF_NOT(M != 0 && K != 0);
605
598
  FAISS_THROW_IF_NOT(binaries != nullptr);
606
599
 
607
- for (size_t iter = 0; iter < n_iters; iter++) {
608
- // condition on the m-th subcode
609
- for (size_t m = 0; m < M; m++) {
610
- std::vector<float> objs(n * K);
611
- #pragma omp parallel for
612
- for (int64_t i = 0; i < n; i++) {
613
- auto u = unaries + m * n * K + i * K;
614
- memcpy(objs.data() + i * K, u, sizeof(float) * K);
615
- }
600
+ #pragma omp parallel for schedule(dynamic)
601
+ for (int64_t i = 0; i < n; i++) {
602
+ std::vector<float> objs(K);
616
603
 
617
- // compute objective function by adding unary
618
- // and binary terms together
619
- for (size_t other_m = 0; other_m < M; other_m++) {
620
- if (other_m == m) {
621
- continue;
604
+ for (size_t iter = 0; iter < n_iters; iter++) {
605
+ // condition on the m-th subcode
606
+ for (size_t m = 0; m < M; m++) {
607
+ // copy
608
+ auto u = unaries + m * n * K + i * K;
609
+ for (size_t code = 0; code < K; code++) {
610
+ objs[code] = u[code];
622
611
  }
623
612
 
624
- #pragma omp parallel for
625
- for (int64_t i = 0; i < n; i++) {
613
+ // compute objective function by adding unary
614
+ // and binary terms together
615
+ for (size_t other_m = 0; other_m < M; other_m++) {
616
+ if (other_m == m) {
617
+ continue;
618
+ }
619
+
620
+ #ifdef __AVX2__
621
+ // TODO: add platform-independent compiler-independent
622
+ // prefetch utilities.
623
+ if (other_m + 1 < M) {
624
+ // do a single prefetch
625
+ int32_t code2 = codes[i * M + other_m + 1];
626
+ // for (int32_t code = 0; code < K; code += 64) {
627
+ int32_t code = 0;
628
+ {
629
+ size_t binary_idx = (other_m + 1) * M * K * K +
630
+ m * K * K + code2 * K + code;
631
+ _mm_prefetch(binaries + binary_idx, _MM_HINT_T0);
632
+ }
633
+ }
634
+ #endif
635
+
626
636
  for (int32_t code = 0; code < K; code++) {
627
637
  int32_t code2 = codes[i * M + other_m];
628
- size_t binary_idx = m * M * K * K + other_m * K * K +
629
- code * K + code2;
630
- // binaries[m, other_m, code, code2]
631
- objs[i * K + code] += binaries[binary_idx];
638
+ size_t binary_idx = other_m * M * K * K + m * K * K +
639
+ code2 * K + code;
640
+ // binaries[m, other_m, code, code2].
641
+ // It is symmetric over (m <-> other_m)
642
+ // and (code <-> code2).
643
+ // So, replace the op with
644
+ // binaries[other_m, m, code2, code].
645
+ objs[code] += binaries[binary_idx];
632
646
  }
633
647
  }
634
- }
635
648
 
636
- // find the optimal value of the m-th subcode
637
- #pragma omp parallel for
638
- for (int64_t i = 0; i < n; i++) {
649
+ // find the optimal value of the m-th subcode
639
650
  float best_obj = HUGE_VALF;
640
651
  int32_t best_code = 0;
641
- for (size_t code = 0; code < K; code++) {
642
- float obj = objs[i * K + code];
643
- if (obj < best_obj) {
644
- best_obj = obj;
645
- best_code = code;
646
- }
647
- }
652
+
653
+ // find one using SIMD. The following operation is similar
654
+ // to the search of the smallest element in objs
655
+ using C = CMax<float, int>;
656
+ HeapWithBuckets<C, 16, 1>::addn(
657
+ K, objs.data(), 1, &best_obj, &best_code);
658
+
659
+ // done
648
660
  codes[i * M + m] = best_code;
649
- }
650
661
 
651
- } // loop M
662
+ } // loop M
663
+ }
652
664
  }
653
665
  }
654
-
655
666
  void LocalSearchQuantizer::perturb_codes(
656
667
  int32_t* codes,
657
668
  size_t n,
@@ -45,22 +45,21 @@ struct IcmEncoderFactory;
45
45
  struct LocalSearchQuantizer : AdditiveQuantizer {
46
46
  size_t K; ///< number of codes per codebook
47
47
 
48
- size_t train_iters; ///< number of iterations in training
48
+ size_t train_iters = 25; ///< number of iterations in training
49
+ size_t encode_ils_iters = 16; ///< iterations of local search in encoding
50
+ size_t train_ils_iters = 8; ///< iterations of local search in training
51
+ size_t icm_iters = 4; ///< number of iterations in icm
49
52
 
50
- size_t encode_ils_iters; ///< iterations of local search in encoding
51
- size_t train_ils_iters; ///< iterations of local search in training
52
- size_t icm_iters; ///< number of iterations in icm
53
+ float p = 0.5f; ///< temperature factor
54
+ float lambd = 1e-2f; ///< regularization factor
53
55
 
54
- float p; ///< temperature factor
55
- float lambd; ///< regularization factor
56
+ size_t chunk_size = 10000; ///< nb of vectors to encode at a time
56
57
 
57
- size_t chunk_size; ///< nb of vectors to encode at a time
58
+ int random_seed = 0x12345; ///< seed for random generator
59
+ size_t nperts = 4; ///< number of perturbation in each code
58
60
 
59
- int random_seed; ///< seed for random generator
60
- size_t nperts; ///< number of perturbation in each code
61
-
62
- ///< if non-NULL, use this encoder to encode
63
- lsq::IcmEncoderFactory* icm_encoder_factory;
61
+ ///< if non-NULL, use this encoder to encode (owned by the object)
62
+ lsq::IcmEncoderFactory* icm_encoder_factory = nullptr;
64
63
 
65
64
  bool update_codebooks_with_double = true;
66
65
 
@@ -147,14 +147,8 @@ using namespace nndescent;
147
147
 
148
148
  constexpr int NUM_EVAL_POINTS = 100;
149
149
 
150
- NNDescent::NNDescent(const int d, const int K) : K(K), random_seed(2021), d(d) {
151
- ntotal = 0;
152
- has_built = false;
153
- S = 10;
154
- R = 100;
150
+ NNDescent::NNDescent(const int d, const int K) : K(K), d(d) {
155
151
  L = K + 50;
156
- iter = 10;
157
- search_L = 0;
158
152
  }
159
153
 
160
154
  NNDescent::~NNDescent() {}
@@ -311,7 +305,7 @@ void NNDescent::generate_eval_set(
311
305
  for (int i = 0; i < c.size(); i++) {
312
306
  std::vector<Neighbor> tmp;
313
307
  for (int j = 0; j < N; j++) {
314
- if (i == j)
308
+ if (c[i] == j)
315
309
  continue; // skip itself
316
310
  float dist = qdis.symmetric_dis(c[i], j);
317
311
  tmp.push_back(Neighbor(j, dist, true));
@@ -425,7 +419,7 @@ void NNDescent::search(
425
419
  // candidate pool, the K best items is the result.
426
420
  std::vector<Neighbor> retset(L + 1);
427
421
 
428
- // Randomly choose L points to intialize the candidate pool
422
+ // Randomly choose L points to initialize the candidate pool
429
423
  std::vector<int> init_ids(L);
430
424
  std::mt19937 rng(random_seed);
431
425
 
@@ -90,7 +90,6 @@ struct Nhood {
90
90
 
91
91
  struct NNDescent {
92
92
  using storage_idx_t = int;
93
- using idx_t = Index::idx_t;
94
93
 
95
94
  using KNNGraph = std::vector<nndescent::Nhood>;
96
95
 
@@ -133,19 +132,20 @@ struct NNDescent {
133
132
  std::vector<int>& ctrl_points,
134
133
  std::vector<std::vector<int>>& acc_eval_set);
135
134
 
136
- bool has_built;
135
+ bool has_built = false;
137
136
 
138
- int K; // K in KNN graph
139
- int S; // number of sample neighbors to be updated for each node
140
- int R; // size of reverse links, 0 means the reverse links will not be used
141
- int L; // size of the candidate pool in building
142
- int iter; // number of iterations to iterate over
143
- int search_L; // size of candidate pool in searching
144
- int random_seed; // random seed for generators
137
+ int S = 10; // number of sample neighbors to be updated for each node
138
+ int R = 100; // size of reverse links, 0 means the reverse links will not be
139
+ // used
140
+ int iter = 10; // number of iterations to iterate over
141
+ int search_L = 0; // size of candidate pool in searching
142
+ int random_seed = 2021; // random seed for generators
145
143
 
144
+ int K; // K in KNN graph
146
145
  int d; // dimensions
146
+ int L; // size of the candidate pool in building
147
147
 
148
- int ntotal;
148
+ int ntotal = 0;
149
149
 
150
150
  KNNGraph graph;
151
151
  std::vector<int> final_graph;
@@ -29,8 +29,6 @@ constexpr int EMPTY_ID = -1;
29
29
  distances. This makes supporting INNER_PRODUCE search easier */
30
30
 
31
31
  struct NegativeDistanceComputer : DistanceComputer {
32
- using idx_t = Index::idx_t;
33
-
34
32
  /// owned by this
35
33
  DistanceComputer* basedis;
36
34
 
@@ -59,7 +57,7 @@ struct NegativeDistanceComputer : DistanceComputer {
59
57
  } // namespace
60
58
 
61
59
  DistanceComputer* storage_distance_computer(const Index* storage) {
62
- if (storage->metric_type == METRIC_INNER_PRODUCT) {
60
+ if (is_similarity_metric(storage->metric_type)) {
63
61
  return new NegativeDistanceComputer(storage->get_distance_computer());
64
62
  } else {
65
63
  return storage->get_distance_computer();
@@ -140,9 +138,6 @@ inline int insert_into_pool(Neighbor* addr, int K, Neighbor nn) {
140
138
  NSG::NSG(int R) : R(R), rng(0x0903) {
141
139
  L = R + 32;
142
140
  C = R + 100;
143
- search_L = 16;
144
- ntotal = 0;
145
- is_built = false;
146
141
  srand(0x1998);
147
142
  }
148
143
 
@@ -98,12 +98,9 @@ DistanceComputer* storage_distance_computer(const Index* storage);
98
98
 
99
99
  struct NSG {
100
100
  /// internal storage of vectors (32 bits: this is expensive)
101
- using storage_idx_t = int;
101
+ using storage_idx_t = int32_t;
102
102
 
103
- /// Faiss results are 64-bit
104
- using idx_t = Index::idx_t;
105
-
106
- int ntotal; ///< nb of nodes
103
+ int ntotal = 0; ///< nb of nodes
107
104
 
108
105
  // construction-time parameters
109
106
  int R; ///< nb of neighbors per node
@@ -111,13 +108,13 @@ struct NSG {
111
108
  int C; ///< candidate pool size at construction time
112
109
 
113
110
  // search-time parameters
114
- int search_L; ///< length of the search path
111
+ int search_L = 16; ///< length of the search path
115
112
 
116
113
  int enterpoint; ///< enterpoint
117
114
 
118
115
  std::shared_ptr<nsg::Graph<int>> final_graph; ///< NSG graph structure
119
116
 
120
- bool is_built; ///< NSG is built or not
117
+ bool is_built = false; ///< NSG is built or not
121
118
 
122
119
  RandomGenerator rng; ///< random generator
123
120