faiss 0.2.6 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/ext/faiss/extconf.rb +1 -1
  4. data/lib/faiss/version.rb +1 -1
  5. data/lib/faiss.rb +2 -2
  6. data/vendor/faiss/faiss/AutoTune.cpp +15 -4
  7. data/vendor/faiss/faiss/AutoTune.h +0 -1
  8. data/vendor/faiss/faiss/Clustering.cpp +1 -5
  9. data/vendor/faiss/faiss/Clustering.h +0 -2
  10. data/vendor/faiss/faiss/IVFlib.h +0 -2
  11. data/vendor/faiss/faiss/Index.h +1 -2
  12. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +17 -3
  13. data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +10 -1
  14. data/vendor/faiss/faiss/IndexBinary.h +0 -1
  15. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +2 -1
  16. data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -0
  17. data/vendor/faiss/faiss/IndexBinaryHash.cpp +1 -3
  18. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +273 -48
  19. data/vendor/faiss/faiss/IndexBinaryIVF.h +18 -11
  20. data/vendor/faiss/faiss/IndexFastScan.cpp +13 -10
  21. data/vendor/faiss/faiss/IndexFastScan.h +5 -1
  22. data/vendor/faiss/faiss/IndexFlat.cpp +16 -3
  23. data/vendor/faiss/faiss/IndexFlat.h +1 -1
  24. data/vendor/faiss/faiss/IndexFlatCodes.cpp +5 -0
  25. data/vendor/faiss/faiss/IndexFlatCodes.h +7 -2
  26. data/vendor/faiss/faiss/IndexHNSW.cpp +3 -6
  27. data/vendor/faiss/faiss/IndexHNSW.h +0 -1
  28. data/vendor/faiss/faiss/IndexIDMap.cpp +4 -4
  29. data/vendor/faiss/faiss/IndexIDMap.h +0 -2
  30. data/vendor/faiss/faiss/IndexIVF.cpp +155 -129
  31. data/vendor/faiss/faiss/IndexIVF.h +121 -61
  32. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +2 -2
  33. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +12 -11
  34. data/vendor/faiss/faiss/IndexIVFFastScan.h +6 -1
  35. data/vendor/faiss/faiss/IndexIVFPQ.cpp +221 -165
  36. data/vendor/faiss/faiss/IndexIVFPQ.h +1 -0
  37. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +6 -1
  38. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +0 -2
  39. data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -2
  40. data/vendor/faiss/faiss/IndexNNDescent.h +0 -1
  41. data/vendor/faiss/faiss/IndexNSG.cpp +1 -2
  42. data/vendor/faiss/faiss/IndexPQ.cpp +7 -9
  43. data/vendor/faiss/faiss/IndexRefine.cpp +1 -1
  44. data/vendor/faiss/faiss/IndexReplicas.cpp +3 -4
  45. data/vendor/faiss/faiss/IndexReplicas.h +0 -1
  46. data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +8 -1
  47. data/vendor/faiss/faiss/IndexRowwiseMinMax.h +7 -0
  48. data/vendor/faiss/faiss/IndexShards.cpp +26 -109
  49. data/vendor/faiss/faiss/IndexShards.h +2 -3
  50. data/vendor/faiss/faiss/IndexShardsIVF.cpp +246 -0
  51. data/vendor/faiss/faiss/IndexShardsIVF.h +42 -0
  52. data/vendor/faiss/faiss/MetaIndexes.cpp +86 -0
  53. data/vendor/faiss/faiss/MetaIndexes.h +29 -0
  54. data/vendor/faiss/faiss/MetricType.h +14 -0
  55. data/vendor/faiss/faiss/VectorTransform.cpp +8 -10
  56. data/vendor/faiss/faiss/VectorTransform.h +1 -3
  57. data/vendor/faiss/faiss/clone_index.cpp +232 -18
  58. data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +25 -3
  59. data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +7 -0
  60. data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +78 -0
  61. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +20 -6
  62. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +7 -1
  63. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +21 -7
  64. data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +7 -0
  65. data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +7 -0
  66. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +10 -3
  67. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +7 -1
  68. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +11 -3
  69. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +25 -2
  70. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +76 -29
  71. data/vendor/faiss/faiss/gpu/GpuCloner.h +2 -2
  72. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +14 -13
  73. data/vendor/faiss/faiss/gpu/GpuDistance.h +18 -6
  74. data/vendor/faiss/faiss/gpu/GpuIndex.h +23 -21
  75. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +10 -10
  76. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -12
  77. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +29 -50
  78. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +3 -3
  79. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +8 -8
  80. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +4 -4
  81. data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +2 -5
  82. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +9 -7
  83. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +4 -4
  84. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -2
  85. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +1 -1
  86. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +55 -6
  87. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +20 -6
  88. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +95 -25
  89. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +67 -16
  90. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +4 -4
  91. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +7 -7
  92. data/vendor/faiss/faiss/gpu/test/TestUtils.h +4 -4
  93. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
  94. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
  95. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +0 -7
  96. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +9 -9
  97. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
  98. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +2 -7
  99. data/vendor/faiss/faiss/impl/CodePacker.cpp +67 -0
  100. data/vendor/faiss/faiss/impl/CodePacker.h +71 -0
  101. data/vendor/faiss/faiss/impl/DistanceComputer.h +0 -2
  102. data/vendor/faiss/faiss/impl/HNSW.cpp +3 -7
  103. data/vendor/faiss/faiss/impl/HNSW.h +6 -9
  104. data/vendor/faiss/faiss/impl/IDSelector.cpp +1 -1
  105. data/vendor/faiss/faiss/impl/IDSelector.h +39 -1
  106. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +62 -51
  107. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +11 -12
  108. data/vendor/faiss/faiss/impl/NNDescent.cpp +3 -9
  109. data/vendor/faiss/faiss/impl/NNDescent.h +10 -10
  110. data/vendor/faiss/faiss/impl/NSG.cpp +1 -6
  111. data/vendor/faiss/faiss/impl/NSG.h +4 -7
  112. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +1 -15
  113. data/vendor/faiss/faiss/impl/PolysemousTraining.h +11 -10
  114. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +0 -7
  115. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -12
  116. data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -4
  117. data/vendor/faiss/faiss/impl/Quantizer.h +6 -3
  118. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +796 -174
  119. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +16 -8
  120. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +3 -5
  121. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +4 -4
  122. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +3 -3
  123. data/vendor/faiss/faiss/impl/ThreadedIndex.h +4 -4
  124. data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +291 -0
  125. data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +74 -0
  126. data/vendor/faiss/faiss/impl/code_distance/code_distance.h +123 -0
  127. data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +102 -0
  128. data/vendor/faiss/faiss/impl/index_read.cpp +13 -10
  129. data/vendor/faiss/faiss/impl/index_write.cpp +3 -4
  130. data/vendor/faiss/faiss/impl/kmeans1d.cpp +0 -1
  131. data/vendor/faiss/faiss/impl/kmeans1d.h +3 -3
  132. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
  133. data/vendor/faiss/faiss/impl/platform_macros.h +61 -0
  134. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +48 -4
  135. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +18 -4
  136. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +2 -2
  137. data/vendor/faiss/faiss/index_factory.cpp +8 -10
  138. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +29 -12
  139. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +8 -2
  140. data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
  141. data/vendor/faiss/faiss/invlists/DirectMap.h +2 -4
  142. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +118 -18
  143. data/vendor/faiss/faiss/invlists/InvertedLists.h +44 -4
  144. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
  145. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
  146. data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
  147. data/vendor/faiss/faiss/python/python_callbacks.h +1 -1
  148. data/vendor/faiss/faiss/utils/AlignedTable.h +3 -1
  149. data/vendor/faiss/faiss/utils/Heap.cpp +139 -3
  150. data/vendor/faiss/faiss/utils/Heap.h +35 -1
  151. data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +84 -0
  152. data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +196 -0
  153. data/vendor/faiss/faiss/utils/approx_topk/generic.h +138 -0
  154. data/vendor/faiss/faiss/utils/approx_topk/mode.h +34 -0
  155. data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +367 -0
  156. data/vendor/faiss/faiss/utils/distances.cpp +61 -7
  157. data/vendor/faiss/faiss/utils/distances.h +11 -0
  158. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +346 -0
  159. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +36 -0
  160. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +42 -0
  161. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +40 -0
  162. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +352 -0
  163. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +32 -0
  164. data/vendor/faiss/faiss/utils/distances_simd.cpp +515 -327
  165. data/vendor/faiss/faiss/utils/extra_distances-inl.h +17 -1
  166. data/vendor/faiss/faiss/utils/extra_distances.cpp +37 -8
  167. data/vendor/faiss/faiss/utils/extra_distances.h +2 -1
  168. data/vendor/faiss/faiss/utils/fp16-fp16c.h +7 -0
  169. data/vendor/faiss/faiss/utils/fp16-inl.h +7 -0
  170. data/vendor/faiss/faiss/utils/fp16.h +7 -0
  171. data/vendor/faiss/faiss/utils/hamming-inl.h +0 -456
  172. data/vendor/faiss/faiss/utils/hamming.cpp +104 -120
  173. data/vendor/faiss/faiss/utils/hamming.h +21 -10
  174. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +535 -0
  175. data/vendor/faiss/faiss/utils/hamming_distance/common.h +48 -0
  176. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +519 -0
  177. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +26 -0
  178. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +614 -0
  179. data/vendor/faiss/faiss/utils/partitioning.cpp +21 -25
  180. data/vendor/faiss/faiss/utils/simdlib_avx2.h +344 -3
  181. data/vendor/faiss/faiss/utils/simdlib_emulated.h +390 -0
  182. data/vendor/faiss/faiss/utils/simdlib_neon.h +655 -130
  183. data/vendor/faiss/faiss/utils/sorting.cpp +692 -0
  184. data/vendor/faiss/faiss/utils/sorting.h +71 -0
  185. data/vendor/faiss/faiss/utils/transpose/transpose-avx2-inl.h +165 -0
  186. data/vendor/faiss/faiss/utils/utils.cpp +4 -176
  187. data/vendor/faiss/faiss/utils/utils.h +2 -9
  188. metadata +29 -3
  189. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +0 -26
@@ -18,7 +18,7 @@
18
18
  #include <mutex>
19
19
  #include <vector>
20
20
 
21
- #include <faiss/Index.h>
21
+ #include <faiss/MetricType.h>
22
22
  #include <faiss/impl/platform_macros.h>
23
23
 
24
24
  namespace faiss {
@@ -31,15 +31,13 @@ struct RangeSearchResult {
31
31
  size_t nq; ///< nb of queries
32
32
  size_t* lims; ///< size (nq + 1)
33
33
 
34
- typedef Index::idx_t idx_t;
35
-
36
34
  idx_t* labels; ///< result for query i is labels[lims[i]:lims[i+1]]
37
35
  float* distances; ///< corresponding distances (not sorted)
38
36
 
39
37
  size_t buffer_size; ///< size of the result buffers used
40
38
 
41
39
  /// lims must be allocated on input to range_search.
42
- explicit RangeSearchResult(idx_t nq, bool alloc_lims = true);
40
+ explicit RangeSearchResult(size_t nq, bool alloc_lims = true);
43
41
 
44
42
  /// called when lims contains the nb of elements result entries
45
43
  /// for each query
@@ -62,8 +60,6 @@ struct RangeSearchResult {
62
60
  /** List of temporary buffers used to store results before they are
63
61
  * copied to the RangeSearchResult object. */
64
62
  struct BufferList {
65
- typedef Index::idx_t idx_t;
66
-
67
63
  // buffer sizes in # entries
68
64
  size_t buffer_size;
69
65
 
@@ -94,7 +90,6 @@ struct RangeSearchPartialResult;
94
90
 
95
91
  /// result structure for a single query
96
92
  struct RangeQueryResult {
97
- using idx_t = Index::idx_t;
98
93
  idx_t qno; //< id of the query
99
94
  size_t nres; //< nb of results for this query
100
95
  RangeSearchPartialResult* pres;
@@ -0,0 +1,67 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #include <faiss/impl/CodePacker.h>
9
+
10
+ #include <cassert>
11
+ #include <cstring>
12
+
13
+ namespace faiss {
14
+
15
+ /*********************************************
16
+ * CodePacker
17
+ * default of pack_all / unpack_all loops over the _1 versions
18
+ */
19
+
20
+ void CodePacker::pack_all(const uint8_t* flat_codes, uint8_t* block) const {
21
+ for (size_t i = 0; i < nvec; i++) {
22
+ pack_1(flat_codes + code_size * i, i, block);
23
+ }
24
+ }
25
+
26
+ void CodePacker::unpack_all(const uint8_t* block, uint8_t* flat_codes) const {
27
+ for (size_t i = 0; i < nvec; i++) {
28
+ unpack_1(block, i, flat_codes + code_size * i);
29
+ }
30
+ }
31
+
32
+ /*********************************************
33
+ * CodePackerFlat
34
+ */
35
+
36
+ CodePackerFlat::CodePackerFlat(size_t code_size) {
37
+ this->code_size = code_size;
38
+ nvec = 1;
39
+ block_size = code_size;
40
+ }
41
+
42
+ void CodePackerFlat::pack_all(const uint8_t* flat_codes, uint8_t* block) const {
43
+ memcpy(block, flat_codes, code_size);
44
+ }
45
+
46
+ void CodePackerFlat::unpack_all(const uint8_t* block, uint8_t* flat_codes)
47
+ const {
48
+ memcpy(flat_codes, block, code_size);
49
+ }
50
+
51
+ void CodePackerFlat::pack_1(
52
+ const uint8_t* flat_code,
53
+ size_t offset,
54
+ uint8_t* block) const {
55
+ assert(offset == 0);
56
+ pack_all(flat_code, block);
57
+ }
58
+
59
+ void CodePackerFlat::unpack_1(
60
+ const uint8_t* block,
61
+ size_t offset,
62
+ uint8_t* flat_code) const {
63
+ assert(offset == 0);
64
+ unpack_all(block, flat_code);
65
+ }
66
+
67
+ } // namespace faiss
@@ -0,0 +1,71 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #include <faiss/MetricType.h>
11
+
12
+ namespace faiss {
13
+
14
+ /**
15
+ * Packing consists in combining a fixed number of codes of constant size
16
+ * (code_size) into a block of data where they may (or may not) be interleaved
17
+ * for efficient consumption by distance computation kernels. This exists for
18
+ * the "fast_scan" indexes on CPU and for some GPU kernels.
19
+ */
20
+ struct CodePacker {
21
+ size_t code_size; // input code size in bytes
22
+ size_t nvec; // number of vectors per block
23
+ size_t block_size; // size of one block in bytes (>= code_size * nvec)
24
+
25
+ // pack a single code to a block
26
+ virtual void pack_1(
27
+ const uint8_t*
28
+ flat_code, // code to write to the block, size code_size
29
+ size_t offset, // offset in the block (0 <= offset < nvec)
30
+ uint8_t* block // block to write to (size block_size)
31
+ ) const = 0;
32
+
33
+ // unpack a single code from a block
34
+ virtual void unpack_1(
35
+ const uint8_t* block, // block to read from (size block_size)
36
+ size_t offset, // offset in the block (0 <= offset < nvec)
37
+ uint8_t* flat_code // where to write the resulting code, size
38
+ // code_size
39
+ ) const = 0;
40
+
41
+ // pack all code in a block
42
+ virtual void pack_all(
43
+ const uint8_t* flat_codes, // codes to write to the block, size
44
+ // (nvec * code_size)
45
+ uint8_t* block // block to write to (size block_size)
46
+ ) const;
47
+
48
+ // unpack all code in a block
49
+ virtual void unpack_all(
50
+ const uint8_t* block, // block to read from (size block_size)
51
+ uint8_t* flat_codes // where to write the resulting codes size (nvec
52
+ // * code_size)
53
+ ) const;
54
+
55
+ virtual ~CodePacker() {}
56
+ };
57
+
58
+ /** Trivial code packer where codes are stored one by one */
59
+ struct CodePackerFlat : CodePacker {
60
+ explicit CodePackerFlat(size_t code_size);
61
+
62
+ void pack_1(const uint8_t* flat_code, size_t offset, uint8_t* block)
63
+ const final;
64
+ void unpack_1(const uint8_t* block, size_t offset, uint8_t* flat_code)
65
+ const final;
66
+
67
+ void pack_all(const uint8_t* flat_codes, uint8_t* block) const final;
68
+ void unpack_all(const uint8_t* block, uint8_t* flat_codes) const final;
69
+ };
70
+
71
+ } // namespace faiss
@@ -23,8 +23,6 @@ namespace faiss {
23
23
  * that has additional methods to handle the inverted list context.
24
24
  ***********************************************************/
25
25
  struct DistanceComputer {
26
- using idx_t = Index::idx_t;
27
-
28
26
  /// called before computing distances. Pointer x should remain valid
29
27
  /// while operator () is called
30
28
  virtual void set_query(const float* x) = 0;
@@ -47,11 +47,6 @@ void HNSW::neighbor_range(idx_t no, int layer_no, size_t* begin, size_t* end)
47
47
 
48
48
  HNSW::HNSW(int M) : rng(12345) {
49
49
  set_default_probas(M, 1.0 / log(M));
50
- max_level = -1;
51
- entry_point = -1;
52
- efSearch = 16;
53
- efConstruction = 40;
54
- upper_beam = 1;
55
50
  offsets.push_back(0);
56
51
  }
57
52
 
@@ -509,7 +504,6 @@ void HNSW::add_with_locks(
509
504
 
510
505
  namespace {
511
506
 
512
- using idx_t = HNSW::idx_t;
513
507
  using MinimaxHeap = HNSW::MinimaxHeap;
514
508
  using Node = HNSW::Node;
515
509
  /** Do a BFS on the candidates list */
@@ -837,8 +831,10 @@ void HNSW::MinimaxHeap::push(storage_idx_t i, float v) {
837
831
  if (k == n) {
838
832
  if (v >= dis[0])
839
833
  return;
834
+ if (ids[0] != -1) {
835
+ --nvalid;
836
+ }
840
837
  faiss::heap_pop<HC>(k--, dis.data(), ids.data());
841
- --nvalid;
842
838
  }
843
839
  faiss::heap_push<HC>(++k, dis.data(), ids.data(), v, i);
844
840
  ++nvalid;
@@ -52,10 +52,7 @@ struct SearchParametersHNSW : SearchParameters {
52
52
 
53
53
  struct HNSW {
54
54
  /// internal storage of vectors (32 bits: this is expensive)
55
- typedef int storage_idx_t;
56
-
57
- /// Faiss results are 64-bit
58
- typedef Index::idx_t idx_t;
55
+ using storage_idx_t = int32_t;
59
56
 
60
57
  typedef std::pair<float, storage_idx_t> Node;
61
58
 
@@ -124,25 +121,25 @@ struct HNSW {
124
121
 
125
122
  /// entry point in the search structure (one of the points with maximum
126
123
  /// level
127
- storage_idx_t entry_point;
124
+ storage_idx_t entry_point = -1;
128
125
 
129
126
  faiss::RandomGenerator rng;
130
127
 
131
128
  /// maximum level
132
- int max_level;
129
+ int max_level = -1;
133
130
 
134
131
  /// expansion factor at construction time
135
- int efConstruction;
132
+ int efConstruction = 40;
136
133
 
137
134
  /// expansion factor at search time
138
- int efSearch;
135
+ int efSearch = 16;
139
136
 
140
137
  /// during search: do we check whether the next best distance is good
141
138
  /// enough?
142
139
  bool check_relative_distance = true;
143
140
 
144
141
  /// number of entry points in levels > 0.
145
- int upper_beam;
142
+ int upper_beam = 1;
146
143
 
147
144
  /// use bounded queue during exploration
148
145
  bool search_bounded_queue = true;
@@ -92,7 +92,7 @@ IDSelectorBatch::IDSelectorBatch(size_t n, const idx_t* indices) {
92
92
  mask = ((idx_t)1 << nbits) - 1;
93
93
  bloom.resize((idx_t)1 << (nbits - 3), 0);
94
94
  for (idx_t i = 0; i < n; i++) {
95
- Index::idx_t id = indices[i];
95
+ idx_t id = indices[i];
96
96
  set.insert(id);
97
97
  id &= mask;
98
98
  bloom[id >> 3] |= 1 << (id & 7);
@@ -19,7 +19,6 @@ namespace faiss {
19
19
 
20
20
  /** Encapsulates a set of ids to handle. */
21
21
  struct IDSelector {
22
- using idx_t = Index::idx_t;
23
22
  virtual bool is_member(idx_t id) const = 0;
24
23
  virtual ~IDSelector() {}
25
24
  };
@@ -132,4 +131,43 @@ struct IDSelectorAll : IDSelector {
132
131
  virtual ~IDSelectorAll() {}
133
132
  };
134
133
 
134
+ /// does an AND operation on the the two given IDSelector's is_membership
135
+ /// results.
136
+ struct IDSelectorAnd : IDSelector {
137
+ const IDSelector* lhs;
138
+ const IDSelector* rhs;
139
+ IDSelectorAnd(const IDSelector* lhs, const IDSelector* rhs)
140
+ : lhs(lhs), rhs(rhs) {}
141
+ bool is_member(idx_t id) const final {
142
+ return lhs->is_member(id) && rhs->is_member(id);
143
+ };
144
+ virtual ~IDSelectorAnd() {}
145
+ };
146
+
147
+ /// does an OR operation on the the two given IDSelector's is_membership
148
+ /// results.
149
+ struct IDSelectorOr : IDSelector {
150
+ const IDSelector* lhs;
151
+ const IDSelector* rhs;
152
+ IDSelectorOr(const IDSelector* lhs, const IDSelector* rhs)
153
+ : lhs(lhs), rhs(rhs) {}
154
+ bool is_member(idx_t id) const final {
155
+ return lhs->is_member(id) || rhs->is_member(id);
156
+ };
157
+ virtual ~IDSelectorOr() {}
158
+ };
159
+
160
+ /// does an XOR operation on the the two given IDSelector's is_membership
161
+ /// results.
162
+ struct IDSelectorXOr : IDSelector {
163
+ const IDSelector* lhs;
164
+ const IDSelector* rhs;
165
+ IDSelectorXOr(const IDSelector* lhs, const IDSelector* rhs)
166
+ : lhs(lhs), rhs(rhs) {}
167
+ bool is_member(idx_t id) const final {
168
+ return lhs->is_member(id) ^ rhs->is_member(id);
169
+ };
170
+ virtual ~IDSelectorXOr() {}
171
+ };
172
+
135
173
  } // namespace faiss
@@ -21,6 +21,15 @@
21
21
  #include <faiss/utils/hamming.h> // BitstringWriter
22
22
  #include <faiss/utils/utils.h>
23
23
 
24
+ #include <faiss/utils/approx_topk/approx_topk.h>
25
+
26
+ // this is needed for prefetching
27
+ #include <faiss/impl/platform_macros.h>
28
+
29
+ #ifdef __AVX2__
30
+ #include <xmmintrin.h>
31
+ #endif
32
+
24
33
  extern "C" {
25
34
  // LU decomoposition of a general matrix
26
35
  void sgetrf_(
@@ -151,23 +160,7 @@ LocalSearchQuantizer::LocalSearchQuantizer(
151
160
  Search_type_t search_type)
152
161
  : AdditiveQuantizer(d, std::vector<size_t>(M, nbits), search_type) {
153
162
  K = (1 << nbits);
154
-
155
- train_iters = 25;
156
- train_ils_iters = 8;
157
- icm_iters = 4;
158
-
159
- encode_ils_iters = 16;
160
-
161
- p = 0.5f;
162
- lambd = 1e-2f;
163
-
164
- chunk_size = 10000;
165
- nperts = 4;
166
-
167
- random_seed = 0x12345;
168
163
  std::srand(random_seed);
169
-
170
- icm_encoder_factory = nullptr;
171
164
  }
172
165
 
173
166
  LocalSearchQuantizer::~LocalSearchQuantizer() {
@@ -192,7 +185,7 @@ void LocalSearchQuantizer::train(size_t n, const float* x) {
192
185
  // allocate memory for codebooks, size [M, K, d]
193
186
  codebooks.resize(M * K * d);
194
187
 
195
- // randomly intialize codes
188
+ // randomly initialize codes
196
189
  std::mt19937 gen(random_seed);
197
190
  std::vector<int32_t> codes(n * M); // [n, M]
198
191
  random_int32(codes, 0, K - 1, gen);
@@ -604,54 +597,72 @@ void LocalSearchQuantizer::icm_encode_step(
604
597
  FAISS_THROW_IF_NOT(M != 0 && K != 0);
605
598
  FAISS_THROW_IF_NOT(binaries != nullptr);
606
599
 
607
- for (size_t iter = 0; iter < n_iters; iter++) {
608
- // condition on the m-th subcode
609
- for (size_t m = 0; m < M; m++) {
610
- std::vector<float> objs(n * K);
611
- #pragma omp parallel for
612
- for (int64_t i = 0; i < n; i++) {
613
- auto u = unaries + m * n * K + i * K;
614
- memcpy(objs.data() + i * K, u, sizeof(float) * K);
615
- }
600
+ #pragma omp parallel for schedule(dynamic)
601
+ for (int64_t i = 0; i < n; i++) {
602
+ std::vector<float> objs(K);
616
603
 
617
- // compute objective function by adding unary
618
- // and binary terms together
619
- for (size_t other_m = 0; other_m < M; other_m++) {
620
- if (other_m == m) {
621
- continue;
604
+ for (size_t iter = 0; iter < n_iters; iter++) {
605
+ // condition on the m-th subcode
606
+ for (size_t m = 0; m < M; m++) {
607
+ // copy
608
+ auto u = unaries + m * n * K + i * K;
609
+ for (size_t code = 0; code < K; code++) {
610
+ objs[code] = u[code];
622
611
  }
623
612
 
624
- #pragma omp parallel for
625
- for (int64_t i = 0; i < n; i++) {
613
+ // compute objective function by adding unary
614
+ // and binary terms together
615
+ for (size_t other_m = 0; other_m < M; other_m++) {
616
+ if (other_m == m) {
617
+ continue;
618
+ }
619
+
620
+ #ifdef __AVX2__
621
+ // TODO: add platform-independent compiler-independent
622
+ // prefetch utilities.
623
+ if (other_m + 1 < M) {
624
+ // do a single prefetch
625
+ int32_t code2 = codes[i * M + other_m + 1];
626
+ // for (int32_t code = 0; code < K; code += 64) {
627
+ int32_t code = 0;
628
+ {
629
+ size_t binary_idx = (other_m + 1) * M * K * K +
630
+ m * K * K + code2 * K + code;
631
+ _mm_prefetch(binaries + binary_idx, _MM_HINT_T0);
632
+ }
633
+ }
634
+ #endif
635
+
626
636
  for (int32_t code = 0; code < K; code++) {
627
637
  int32_t code2 = codes[i * M + other_m];
628
- size_t binary_idx = m * M * K * K + other_m * K * K +
629
- code * K + code2;
630
- // binaries[m, other_m, code, code2]
631
- objs[i * K + code] += binaries[binary_idx];
638
+ size_t binary_idx = other_m * M * K * K + m * K * K +
639
+ code2 * K + code;
640
+ // binaries[m, other_m, code, code2].
641
+ // It is symmetric over (m <-> other_m)
642
+ // and (code <-> code2).
643
+ // So, replace the op with
644
+ // binaries[other_m, m, code2, code].
645
+ objs[code] += binaries[binary_idx];
632
646
  }
633
647
  }
634
- }
635
648
 
636
- // find the optimal value of the m-th subcode
637
- #pragma omp parallel for
638
- for (int64_t i = 0; i < n; i++) {
649
+ // find the optimal value of the m-th subcode
639
650
  float best_obj = HUGE_VALF;
640
651
  int32_t best_code = 0;
641
- for (size_t code = 0; code < K; code++) {
642
- float obj = objs[i * K + code];
643
- if (obj < best_obj) {
644
- best_obj = obj;
645
- best_code = code;
646
- }
647
- }
652
+
653
+ // find one using SIMD. The following operation is similar
654
+ // to the search of the smallest element in objs
655
+ using C = CMax<float, int>;
656
+ HeapWithBuckets<C, 16, 1>::addn(
657
+ K, objs.data(), 1, &best_obj, &best_code);
658
+
659
+ // done
648
660
  codes[i * M + m] = best_code;
649
- }
650
661
 
651
- } // loop M
662
+ } // loop M
663
+ }
652
664
  }
653
665
  }
654
-
655
666
  void LocalSearchQuantizer::perturb_codes(
656
667
  int32_t* codes,
657
668
  size_t n,
@@ -45,22 +45,21 @@ struct IcmEncoderFactory;
45
45
  struct LocalSearchQuantizer : AdditiveQuantizer {
46
46
  size_t K; ///< number of codes per codebook
47
47
 
48
- size_t train_iters; ///< number of iterations in training
48
+ size_t train_iters = 25; ///< number of iterations in training
49
+ size_t encode_ils_iters = 16; ///< iterations of local search in encoding
50
+ size_t train_ils_iters = 8; ///< iterations of local search in training
51
+ size_t icm_iters = 4; ///< number of iterations in icm
49
52
 
50
- size_t encode_ils_iters; ///< iterations of local search in encoding
51
- size_t train_ils_iters; ///< iterations of local search in training
52
- size_t icm_iters; ///< number of iterations in icm
53
+ float p = 0.5f; ///< temperature factor
54
+ float lambd = 1e-2f; ///< regularization factor
53
55
 
54
- float p; ///< temperature factor
55
- float lambd; ///< regularization factor
56
+ size_t chunk_size = 10000; ///< nb of vectors to encode at a time
56
57
 
57
- size_t chunk_size; ///< nb of vectors to encode at a time
58
+ int random_seed = 0x12345; ///< seed for random generator
59
+ size_t nperts = 4; ///< number of perturbation in each code
58
60
 
59
- int random_seed; ///< seed for random generator
60
- size_t nperts; ///< number of perturbation in each code
61
-
62
- ///< if non-NULL, use this encoder to encode
63
- lsq::IcmEncoderFactory* icm_encoder_factory;
61
+ ///< if non-NULL, use this encoder to encode (owned by the object)
62
+ lsq::IcmEncoderFactory* icm_encoder_factory = nullptr;
64
63
 
65
64
  bool update_codebooks_with_double = true;
66
65
 
@@ -147,14 +147,8 @@ using namespace nndescent;
147
147
 
148
148
  constexpr int NUM_EVAL_POINTS = 100;
149
149
 
150
- NNDescent::NNDescent(const int d, const int K) : K(K), random_seed(2021), d(d) {
151
- ntotal = 0;
152
- has_built = false;
153
- S = 10;
154
- R = 100;
150
+ NNDescent::NNDescent(const int d, const int K) : K(K), d(d) {
155
151
  L = K + 50;
156
- iter = 10;
157
- search_L = 0;
158
152
  }
159
153
 
160
154
  NNDescent::~NNDescent() {}
@@ -311,7 +305,7 @@ void NNDescent::generate_eval_set(
311
305
  for (int i = 0; i < c.size(); i++) {
312
306
  std::vector<Neighbor> tmp;
313
307
  for (int j = 0; j < N; j++) {
314
- if (i == j)
308
+ if (c[i] == j)
315
309
  continue; // skip itself
316
310
  float dist = qdis.symmetric_dis(c[i], j);
317
311
  tmp.push_back(Neighbor(j, dist, true));
@@ -425,7 +419,7 @@ void NNDescent::search(
425
419
  // candidate pool, the K best items is the result.
426
420
  std::vector<Neighbor> retset(L + 1);
427
421
 
428
- // Randomly choose L points to intialize the candidate pool
422
+ // Randomly choose L points to initialize the candidate pool
429
423
  std::vector<int> init_ids(L);
430
424
  std::mt19937 rng(random_seed);
431
425
 
@@ -90,7 +90,6 @@ struct Nhood {
90
90
 
91
91
  struct NNDescent {
92
92
  using storage_idx_t = int;
93
- using idx_t = Index::idx_t;
94
93
 
95
94
  using KNNGraph = std::vector<nndescent::Nhood>;
96
95
 
@@ -133,19 +132,20 @@ struct NNDescent {
133
132
  std::vector<int>& ctrl_points,
134
133
  std::vector<std::vector<int>>& acc_eval_set);
135
134
 
136
- bool has_built;
135
+ bool has_built = false;
137
136
 
138
- int K; // K in KNN graph
139
- int S; // number of sample neighbors to be updated for each node
140
- int R; // size of reverse links, 0 means the reverse links will not be used
141
- int L; // size of the candidate pool in building
142
- int iter; // number of iterations to iterate over
143
- int search_L; // size of candidate pool in searching
144
- int random_seed; // random seed for generators
137
+ int S = 10; // number of sample neighbors to be updated for each node
138
+ int R = 100; // size of reverse links, 0 means the reverse links will not be
139
+ // used
140
+ int iter = 10; // number of iterations to iterate over
141
+ int search_L = 0; // size of candidate pool in searching
142
+ int random_seed = 2021; // random seed for generators
145
143
 
144
+ int K; // K in KNN graph
146
145
  int d; // dimensions
146
+ int L; // size of the candidate pool in building
147
147
 
148
- int ntotal;
148
+ int ntotal = 0;
149
149
 
150
150
  KNNGraph graph;
151
151
  std::vector<int> final_graph;
@@ -29,8 +29,6 @@ constexpr int EMPTY_ID = -1;
29
29
  distances. This makes supporting INNER_PRODUCE search easier */
30
30
 
31
31
  struct NegativeDistanceComputer : DistanceComputer {
32
- using idx_t = Index::idx_t;
33
-
34
32
  /// owned by this
35
33
  DistanceComputer* basedis;
36
34
 
@@ -59,7 +57,7 @@ struct NegativeDistanceComputer : DistanceComputer {
59
57
  } // namespace
60
58
 
61
59
  DistanceComputer* storage_distance_computer(const Index* storage) {
62
- if (storage->metric_type == METRIC_INNER_PRODUCT) {
60
+ if (is_similarity_metric(storage->metric_type)) {
63
61
  return new NegativeDistanceComputer(storage->get_distance_computer());
64
62
  } else {
65
63
  return storage->get_distance_computer();
@@ -140,9 +138,6 @@ inline int insert_into_pool(Neighbor* addr, int K, Neighbor nn) {
140
138
  NSG::NSG(int R) : R(R), rng(0x0903) {
141
139
  L = R + 32;
142
140
  C = R + 100;
143
- search_L = 16;
144
- ntotal = 0;
145
- is_built = false;
146
141
  srand(0x1998);
147
142
  }
148
143
 
@@ -98,12 +98,9 @@ DistanceComputer* storage_distance_computer(const Index* storage);
98
98
 
99
99
  struct NSG {
100
100
  /// internal storage of vectors (32 bits: this is expensive)
101
- using storage_idx_t = int;
101
+ using storage_idx_t = int32_t;
102
102
 
103
- /// Faiss results are 64-bit
104
- using idx_t = Index::idx_t;
105
-
106
- int ntotal; ///< nb of nodes
103
+ int ntotal = 0; ///< nb of nodes
107
104
 
108
105
  // construction-time parameters
109
106
  int R; ///< nb of neighbors per node
@@ -111,13 +108,13 @@ struct NSG {
111
108
  int C; ///< candidate pool size at construction time
112
109
 
113
110
  // search-time parameters
114
- int search_L; ///< length of the search path
111
+ int search_L = 16; ///< length of the search path
115
112
 
116
113
  int enterpoint; ///< enterpoint
117
114
 
118
115
  std::shared_ptr<nsg::Graph<int>> final_graph; ///< NSG graph structure
119
116
 
120
- bool is_built; ///< NSG is built or not
117
+ bool is_built = false; ///< NSG is built or not
121
118
 
122
119
  RandomGenerator rng; ///< random generator
123
120