faiss 0.2.6 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/ext/faiss/extconf.rb +1 -1
  4. data/lib/faiss/version.rb +1 -1
  5. data/lib/faiss.rb +2 -2
  6. data/vendor/faiss/faiss/AutoTune.cpp +15 -4
  7. data/vendor/faiss/faiss/AutoTune.h +0 -1
  8. data/vendor/faiss/faiss/Clustering.cpp +1 -5
  9. data/vendor/faiss/faiss/Clustering.h +0 -2
  10. data/vendor/faiss/faiss/IVFlib.h +0 -2
  11. data/vendor/faiss/faiss/Index.h +1 -2
  12. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +17 -3
  13. data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +10 -1
  14. data/vendor/faiss/faiss/IndexBinary.h +0 -1
  15. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +2 -1
  16. data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -0
  17. data/vendor/faiss/faiss/IndexBinaryHash.cpp +1 -3
  18. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +273 -48
  19. data/vendor/faiss/faiss/IndexBinaryIVF.h +18 -11
  20. data/vendor/faiss/faiss/IndexFastScan.cpp +13 -10
  21. data/vendor/faiss/faiss/IndexFastScan.h +5 -1
  22. data/vendor/faiss/faiss/IndexFlat.cpp +16 -3
  23. data/vendor/faiss/faiss/IndexFlat.h +1 -1
  24. data/vendor/faiss/faiss/IndexFlatCodes.cpp +5 -0
  25. data/vendor/faiss/faiss/IndexFlatCodes.h +7 -2
  26. data/vendor/faiss/faiss/IndexHNSW.cpp +3 -6
  27. data/vendor/faiss/faiss/IndexHNSW.h +0 -1
  28. data/vendor/faiss/faiss/IndexIDMap.cpp +4 -4
  29. data/vendor/faiss/faiss/IndexIDMap.h +0 -2
  30. data/vendor/faiss/faiss/IndexIVF.cpp +155 -129
  31. data/vendor/faiss/faiss/IndexIVF.h +121 -61
  32. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +2 -2
  33. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +12 -11
  34. data/vendor/faiss/faiss/IndexIVFFastScan.h +6 -1
  35. data/vendor/faiss/faiss/IndexIVFPQ.cpp +221 -165
  36. data/vendor/faiss/faiss/IndexIVFPQ.h +1 -0
  37. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +6 -1
  38. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +0 -2
  39. data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -2
  40. data/vendor/faiss/faiss/IndexNNDescent.h +0 -1
  41. data/vendor/faiss/faiss/IndexNSG.cpp +1 -2
  42. data/vendor/faiss/faiss/IndexPQ.cpp +7 -9
  43. data/vendor/faiss/faiss/IndexRefine.cpp +1 -1
  44. data/vendor/faiss/faiss/IndexReplicas.cpp +3 -4
  45. data/vendor/faiss/faiss/IndexReplicas.h +0 -1
  46. data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +8 -1
  47. data/vendor/faiss/faiss/IndexRowwiseMinMax.h +7 -0
  48. data/vendor/faiss/faiss/IndexShards.cpp +26 -109
  49. data/vendor/faiss/faiss/IndexShards.h +2 -3
  50. data/vendor/faiss/faiss/IndexShardsIVF.cpp +246 -0
  51. data/vendor/faiss/faiss/IndexShardsIVF.h +42 -0
  52. data/vendor/faiss/faiss/MetaIndexes.cpp +86 -0
  53. data/vendor/faiss/faiss/MetaIndexes.h +29 -0
  54. data/vendor/faiss/faiss/MetricType.h +14 -0
  55. data/vendor/faiss/faiss/VectorTransform.cpp +8 -10
  56. data/vendor/faiss/faiss/VectorTransform.h +1 -3
  57. data/vendor/faiss/faiss/clone_index.cpp +232 -18
  58. data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +25 -3
  59. data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +7 -0
  60. data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +78 -0
  61. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +20 -6
  62. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +7 -1
  63. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +21 -7
  64. data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +7 -0
  65. data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +7 -0
  66. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +10 -3
  67. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +7 -1
  68. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +11 -3
  69. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +25 -2
  70. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +76 -29
  71. data/vendor/faiss/faiss/gpu/GpuCloner.h +2 -2
  72. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +14 -13
  73. data/vendor/faiss/faiss/gpu/GpuDistance.h +18 -6
  74. data/vendor/faiss/faiss/gpu/GpuIndex.h +23 -21
  75. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +10 -10
  76. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -12
  77. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +29 -50
  78. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +3 -3
  79. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +8 -8
  80. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +4 -4
  81. data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +2 -5
  82. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +9 -7
  83. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +4 -4
  84. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -2
  85. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +1 -1
  86. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +55 -6
  87. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +20 -6
  88. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +95 -25
  89. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +67 -16
  90. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +4 -4
  91. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +7 -7
  92. data/vendor/faiss/faiss/gpu/test/TestUtils.h +4 -4
  93. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
  94. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
  95. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +0 -7
  96. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +9 -9
  97. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
  98. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +2 -7
  99. data/vendor/faiss/faiss/impl/CodePacker.cpp +67 -0
  100. data/vendor/faiss/faiss/impl/CodePacker.h +71 -0
  101. data/vendor/faiss/faiss/impl/DistanceComputer.h +0 -2
  102. data/vendor/faiss/faiss/impl/HNSW.cpp +3 -7
  103. data/vendor/faiss/faiss/impl/HNSW.h +6 -9
  104. data/vendor/faiss/faiss/impl/IDSelector.cpp +1 -1
  105. data/vendor/faiss/faiss/impl/IDSelector.h +39 -1
  106. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +62 -51
  107. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +11 -12
  108. data/vendor/faiss/faiss/impl/NNDescent.cpp +3 -9
  109. data/vendor/faiss/faiss/impl/NNDescent.h +10 -10
  110. data/vendor/faiss/faiss/impl/NSG.cpp +1 -6
  111. data/vendor/faiss/faiss/impl/NSG.h +4 -7
  112. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +1 -15
  113. data/vendor/faiss/faiss/impl/PolysemousTraining.h +11 -10
  114. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +0 -7
  115. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -12
  116. data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -4
  117. data/vendor/faiss/faiss/impl/Quantizer.h +6 -3
  118. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +796 -174
  119. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +16 -8
  120. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +3 -5
  121. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +4 -4
  122. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +3 -3
  123. data/vendor/faiss/faiss/impl/ThreadedIndex.h +4 -4
  124. data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +291 -0
  125. data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +74 -0
  126. data/vendor/faiss/faiss/impl/code_distance/code_distance.h +123 -0
  127. data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +102 -0
  128. data/vendor/faiss/faiss/impl/index_read.cpp +13 -10
  129. data/vendor/faiss/faiss/impl/index_write.cpp +3 -4
  130. data/vendor/faiss/faiss/impl/kmeans1d.cpp +0 -1
  131. data/vendor/faiss/faiss/impl/kmeans1d.h +3 -3
  132. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
  133. data/vendor/faiss/faiss/impl/platform_macros.h +61 -0
  134. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +48 -4
  135. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +18 -4
  136. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +2 -2
  137. data/vendor/faiss/faiss/index_factory.cpp +8 -10
  138. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +29 -12
  139. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +8 -2
  140. data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
  141. data/vendor/faiss/faiss/invlists/DirectMap.h +2 -4
  142. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +118 -18
  143. data/vendor/faiss/faiss/invlists/InvertedLists.h +44 -4
  144. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
  145. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
  146. data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
  147. data/vendor/faiss/faiss/python/python_callbacks.h +1 -1
  148. data/vendor/faiss/faiss/utils/AlignedTable.h +3 -1
  149. data/vendor/faiss/faiss/utils/Heap.cpp +139 -3
  150. data/vendor/faiss/faiss/utils/Heap.h +35 -1
  151. data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +84 -0
  152. data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +196 -0
  153. data/vendor/faiss/faiss/utils/approx_topk/generic.h +138 -0
  154. data/vendor/faiss/faiss/utils/approx_topk/mode.h +34 -0
  155. data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +367 -0
  156. data/vendor/faiss/faiss/utils/distances.cpp +61 -7
  157. data/vendor/faiss/faiss/utils/distances.h +11 -0
  158. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +346 -0
  159. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +36 -0
  160. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +42 -0
  161. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +40 -0
  162. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +352 -0
  163. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +32 -0
  164. data/vendor/faiss/faiss/utils/distances_simd.cpp +515 -327
  165. data/vendor/faiss/faiss/utils/extra_distances-inl.h +17 -1
  166. data/vendor/faiss/faiss/utils/extra_distances.cpp +37 -8
  167. data/vendor/faiss/faiss/utils/extra_distances.h +2 -1
  168. data/vendor/faiss/faiss/utils/fp16-fp16c.h +7 -0
  169. data/vendor/faiss/faiss/utils/fp16-inl.h +7 -0
  170. data/vendor/faiss/faiss/utils/fp16.h +7 -0
  171. data/vendor/faiss/faiss/utils/hamming-inl.h +0 -456
  172. data/vendor/faiss/faiss/utils/hamming.cpp +104 -120
  173. data/vendor/faiss/faiss/utils/hamming.h +21 -10
  174. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +535 -0
  175. data/vendor/faiss/faiss/utils/hamming_distance/common.h +48 -0
  176. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +519 -0
  177. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +26 -0
  178. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +614 -0
  179. data/vendor/faiss/faiss/utils/partitioning.cpp +21 -25
  180. data/vendor/faiss/faiss/utils/simdlib_avx2.h +344 -3
  181. data/vendor/faiss/faiss/utils/simdlib_emulated.h +390 -0
  182. data/vendor/faiss/faiss/utils/simdlib_neon.h +655 -130
  183. data/vendor/faiss/faiss/utils/sorting.cpp +692 -0
  184. data/vendor/faiss/faiss/utils/sorting.h +71 -0
  185. data/vendor/faiss/faiss/utils/transpose/transpose-avx2-inl.h +165 -0
  186. data/vendor/faiss/faiss/utils/utils.cpp +4 -176
  187. data/vendor/faiss/faiss/utils/utils.h +2 -9
  188. metadata +29 -3
  189. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +0 -26
@@ -0,0 +1,102 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // // // AVX-512 version. It is not used, but let it be for the future
9
+ // // // needs.
10
+ // // template <class SearchResultType, typename T = PQDecoder>
11
+ // // typename std::enable_if<(std::is_same<T, PQDecoder8>::value), void>::
12
+ // // type distance_four_codes(
13
+ // // const uint8_t* __restrict code0,
14
+ // // const uint8_t* __restrict code1,
15
+ // // const uint8_t* __restrict code2,
16
+ // // const uint8_t* __restrict code3,
17
+ // // float& result0,
18
+ // // float& result1,
19
+ // // float& result2,
20
+ // // float& result3
21
+ // // ) const {
22
+ // // result0 = 0;
23
+ // // result1 = 0;
24
+ // // result2 = 0;
25
+ // // result3 = 0;
26
+
27
+ // // size_t m = 0;
28
+ // // const size_t pqM16 = pq.M / 16;
29
+
30
+ // // constexpr intptr_t N = 4;
31
+
32
+ // // const float* tab = sim_table;
33
+
34
+ // // if (pqM16 > 0) {
35
+ // // // process 16 values per loop
36
+ // // const __m512i ksub = _mm512_set1_epi32(pq.ksub);
37
+ // // __m512i offsets_0 = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7,
38
+ // // 8, 9, 10, 11, 12, 13, 14, 15);
39
+ // // offsets_0 = _mm512_mullo_epi32(offsets_0, ksub);
40
+
41
+ // // // accumulators of partial sums
42
+ // // __m512 partialSums[N];
43
+ // // for (intptr_t j = 0; j < N; j++) {
44
+ // // partialSums[j] = _mm512_setzero_ps();
45
+ // // }
46
+
47
+ // // // loop
48
+ // // for (m = 0; m < pqM16 * 16; m += 16) {
49
+ // // // load 16 uint8 values
50
+ // // __m128i mm1[N];
51
+ // // mm1[0] = _mm_loadu_si128((const __m128i_u*)(code0 + m));
52
+ // // mm1[1] = _mm_loadu_si128((const __m128i_u*)(code1 + m));
53
+ // // mm1[2] = _mm_loadu_si128((const __m128i_u*)(code2 + m));
54
+ // // mm1[3] = _mm_loadu_si128((const __m128i_u*)(code3 + m));
55
+
56
+ // // // process first 8 codes
57
+ // // for (intptr_t j = 0; j < N; j++) {
58
+ // // // convert uint8 values (low part of __m128i) to int32
59
+ // // // values
60
+ // // const __m512i idx1 = _mm512_cvtepu8_epi32(mm1[j]);
61
+
62
+ // // // add offsets
63
+ // // const __m512i indices_to_read_from =
64
+ // // _mm512_add_epi32(idx1, offsets_0);
65
+
66
+ // // // gather 8 values, similar to 8 operations of
67
+ // // // tab[idx]
68
+ // // __m512 collected =
69
+ // // _mm512_i32gather_ps(
70
+ // // indices_to_read_from, tab, sizeof(float));
71
+
72
+ // // // collect partial sums
73
+ // // partialSums[j] = _mm512_add_ps(partialSums[j],
74
+ // // collected);
75
+ // // }
76
+ // // tab += pq.ksub * 16;
77
+
78
+ // // }
79
+
80
+ // // // horizontal sum for partialSum
81
+ // // result0 += _mm512_reduce_add_ps(partialSums[0]);
82
+ // // result1 += _mm512_reduce_add_ps(partialSums[1]);
83
+ // // result2 += _mm512_reduce_add_ps(partialSums[2]);
84
+ // // result3 += _mm512_reduce_add_ps(partialSums[3]);
85
+ // // }
86
+
87
+ // // //
88
+ // // if (m < pq.M) {
89
+ // // // process leftovers
90
+ // // PQDecoder decoder0(code0 + m, pq.nbits);
91
+ // // PQDecoder decoder1(code1 + m, pq.nbits);
92
+ // // PQDecoder decoder2(code2 + m, pq.nbits);
93
+ // // PQDecoder decoder3(code3 + m, pq.nbits);
94
+ // // for (; m < pq.M; m++) {
95
+ // // result0 += tab[decoder0.decode()];
96
+ // // result1 += tab[decoder1.decode()];
97
+ // // result2 += tab[decoder2.decode()];
98
+ // // result3 += tab[decoder3.decode()];
99
+ // // tab += pq.ksub;
100
+ // // }
101
+ // // }
102
+ // // }
@@ -65,7 +65,7 @@ namespace faiss {
65
65
  static void read_index_header(Index* idx, IOReader* f) {
66
66
  READ1(idx->d);
67
67
  READ1(idx->ntotal);
68
- Index::idx_t dummy;
68
+ idx_t dummy;
69
69
  READ1(dummy);
70
70
  READ1(dummy);
71
71
  READ1(idx->is_trained);
@@ -279,6 +279,8 @@ static void read_AdditiveQuantizer(AdditiveQuantizer* aq, IOReader* f) {
279
279
  aq->search_type == AdditiveQuantizer::ST_norm_lsq2x4 ||
280
280
  aq->search_type == AdditiveQuantizer::ST_norm_rq2x4) {
281
281
  READXBVECTOR(aq->qnorm.codes);
282
+ aq->qnorm.ntotal = aq->qnorm.codes.size() / 4;
283
+ aq->qnorm.update_permutation();
282
284
  }
283
285
 
284
286
  if (aq->search_type == AdditiveQuantizer::ST_norm_lsq2x4 ||
@@ -439,7 +441,6 @@ static void read_direct_map(DirectMap* dm, IOReader* f) {
439
441
  dm->type = (DirectMap::Type)maintain_direct_map;
440
442
  READVECTOR(dm->array);
441
443
  if (dm->type == DirectMap::Hashtable) {
442
- using idx_t = Index::idx_t;
443
444
  std::vector<std::pair<idx_t, idx_t>> v;
444
445
  READVECTOR(v);
445
446
  std::unordered_map<idx_t, idx_t>& map = dm->hashtable;
@@ -453,7 +454,7 @@ static void read_direct_map(DirectMap* dm, IOReader* f) {
453
454
  static void read_ivf_header(
454
455
  IndexIVF* ivf,
455
456
  IOReader* f,
456
- std::vector<std::vector<Index::idx_t>>* ids = nullptr) {
457
+ std::vector<std::vector<idx_t>>* ids = nullptr) {
457
458
  read_index_header(ivf, f);
458
459
  READ1(ivf->nlist);
459
460
  READ1(ivf->nprobe);
@@ -470,7 +471,7 @@ static void read_ivf_header(
470
471
  // used for legacy formats
471
472
  static ArrayInvertedLists* set_array_invlist(
472
473
  IndexIVF* ivf,
473
- std::vector<std::vector<Index::idx_t>>& ids) {
474
+ std::vector<std::vector<idx_t>>& ids) {
474
475
  ArrayInvertedLists* ail =
475
476
  new ArrayInvertedLists(ivf->nlist, ivf->code_size);
476
477
  std::swap(ail->ids, ids);
@@ -487,7 +488,7 @@ static IndexIVFPQ* read_ivfpq(IOReader* f, uint32_t h, int io_flags) {
487
488
  : nullptr;
488
489
  IndexIVFPQ* ivpq = ivfpqr ? ivfpqr : new IndexIVFPQ();
489
490
 
490
- std::vector<std::vector<Index::idx_t>> ids;
491
+ std::vector<std::vector<idx_t>> ids;
491
492
  read_ivf_header(ivpq, f, legacy ? &ids : nullptr);
492
493
  READ1(ivpq->by_residual);
493
494
  READ1(ivpq->code_size);
@@ -728,10 +729,11 @@ Index* read_index(IOReader* f, int io_flags) {
728
729
  READ1(ivaqfs->max_train_points);
729
730
 
730
731
  read_InvertedLists(ivaqfs, f, io_flags);
732
+ ivaqfs->init_code_packer();
731
733
  idx = ivaqfs;
732
734
  } else if (h == fourcc("IvFl") || h == fourcc("IvFL")) { // legacy
733
735
  IndexIVFFlat* ivfl = new IndexIVFFlat();
734
- std::vector<std::vector<Index::idx_t>> ids;
736
+ std::vector<std::vector<idx_t>> ids;
735
737
  read_ivf_header(ivfl, f, &ids);
736
738
  ivfl->code_size = ivfl->d * sizeof(float);
737
739
  ArrayInvertedLists* ail = set_array_invlist(ivfl, ids);
@@ -754,10 +756,10 @@ Index* read_index(IOReader* f, int io_flags) {
754
756
  read_ivf_header(ivfl, f);
755
757
  ivfl->code_size = ivfl->d * sizeof(float);
756
758
  {
757
- std::vector<Index::idx_t> tab;
759
+ std::vector<idx_t> tab;
758
760
  READVECTOR(tab);
759
761
  for (long i = 0; i < tab.size(); i += 2) {
760
- std::pair<Index::idx_t, Index::idx_t> pair(tab[i], tab[i + 1]);
762
+ std::pair<idx_t, idx_t> pair(tab[i], tab[i + 1]);
761
763
  ivfl->instances.insert(pair);
762
764
  }
763
765
  }
@@ -788,7 +790,7 @@ Index* read_index(IOReader* f, int io_flags) {
788
790
  idx = idxl;
789
791
  } else if (h == fourcc("IvSQ")) { // legacy
790
792
  IndexIVFScalarQuantizer* ivsc = new IndexIVFScalarQuantizer();
791
- std::vector<std::vector<Index::idx_t>> ids;
793
+ std::vector<std::vector<idx_t>> ids;
792
794
  read_ivf_header(ivsc, f, &ids);
793
795
  read_ScalarQuantizer(&ivsc->sq, f);
794
796
  READ1(ivsc->code_size);
@@ -1002,6 +1004,7 @@ Index* read_index(IOReader* f, int io_flags) {
1002
1004
  ivpq->nbits = pq.nbits;
1003
1005
  ivpq->ksub = (1 << pq.nbits);
1004
1006
  ivpq->code_size = pq.code_size;
1007
+ ivpq->init_code_packer();
1005
1008
 
1006
1009
  idx = ivpq;
1007
1010
  } else if (h == fourcc("IRMf")) {
@@ -1072,7 +1075,7 @@ static void read_index_binary_header(IndexBinary* idx, IOReader* f) {
1072
1075
  static void read_binary_ivf_header(
1073
1076
  IndexBinaryIVF* ivf,
1074
1077
  IOReader* f,
1075
- std::vector<std::vector<Index::idx_t>>* ids = nullptr) {
1078
+ std::vector<std::vector<idx_t>>* ids = nullptr) {
1076
1079
  read_index_binary_header(ivf, f);
1077
1080
  READ1(ivf->nlist);
1078
1081
  READ1(ivf->nprobe);
@@ -84,7 +84,7 @@ namespace faiss {
84
84
  static void write_index_header(const Index* idx, IOWriter* f) {
85
85
  WRITE1(idx->d);
86
86
  WRITE1(idx->ntotal);
87
- Index::idx_t dummy = 1 << 20;
87
+ idx_t dummy = 1 << 20;
88
88
  WRITE1(dummy);
89
89
  WRITE1(dummy);
90
90
  WRITE1(idx->is_trained);
@@ -373,7 +373,6 @@ static void write_direct_map(const DirectMap* dm, IOWriter* f) {
373
373
  WRITE1(maintain_direct_map);
374
374
  WRITEVECTOR(dm->array);
375
375
  if (dm->type == DirectMap::Hashtable) {
376
- using idx_t = Index::idx_t;
377
376
  std::vector<std::pair<idx_t, idx_t>> v;
378
377
  const std::unordered_map<idx_t, idx_t>& map = dm->hashtable;
379
378
  v.resize(map.size());
@@ -615,7 +614,7 @@ void write_index(const Index* idx, IOWriter* f) {
615
614
  WRITE1(h);
616
615
  write_ivf_header(ivfl, f);
617
616
  {
618
- std::vector<Index::idx_t> tab(2 * ivfl->instances.size());
617
+ std::vector<idx_t> tab(2 * ivfl->instances.size());
619
618
  long i = 0;
620
619
  for (auto it = ivfl->instances.begin(); it != ivfl->instances.end();
621
620
  ++it) {
@@ -900,7 +899,7 @@ static void write_binary_multi_hash_map(
900
899
  size_t ntotal,
901
900
  IOWriter* f) {
902
901
  int id_bits = 0;
903
- while ((ntotal > ((Index::idx_t)1 << id_bits))) {
902
+ while ((ntotal > ((idx_t)1 << id_bits))) {
904
903
  id_bits++;
905
904
  }
906
905
  WRITE1(id_bits);
@@ -20,7 +20,6 @@
20
20
 
21
21
  namespace faiss {
22
22
 
23
- using idx_t = Index::idx_t;
24
23
  using LookUpFunc = std::function<float(idx_t, idx_t)>;
25
24
 
26
25
  void reduce(
@@ -22,10 +22,10 @@ namespace faiss {
22
22
  * @param argmins argmin of each row
23
23
  */
24
24
  void smawk(
25
- const Index::idx_t nrows,
26
- const Index::idx_t ncols,
25
+ const idx_t nrows,
26
+ const idx_t ncols,
27
27
  const float* x,
28
- Index::idx_t* argmins);
28
+ idx_t* argmins);
29
29
 
30
30
  /** Exact 1D K-Means by dynamic programming
31
31
  *
@@ -636,7 +636,7 @@ void ZnSphereCodecRec::decode(uint64_t code, float* c) const {
636
636
  }
637
637
  }
638
638
 
639
- // if not use_rec, instanciate an arbitrary harmless znc_rec
639
+ // if not use_rec, instantiate an arbitrary harmless znc_rec
640
640
  ZnSphereCodecAlt::ZnSphereCodecAlt(int dim, int r2)
641
641
  : ZnSphereCodec(dim, r2),
642
642
  use_rec((dim & (dim - 1)) == 0),
@@ -7,6 +7,10 @@
7
7
 
8
8
  #pragma once
9
9
 
10
+ // basic int types and size_t
11
+ #include <cstdint>
12
+ #include <cstdio>
13
+
10
14
  #ifdef _MSC_VER
11
15
 
12
16
  /*******************************************************
@@ -19,6 +23,10 @@
19
23
  #define FAISS_API __declspec(dllimport)
20
24
  #endif // FAISS_MAIN_LIB
21
25
 
26
+ #ifdef _MSC_VER
27
+ #define strtok_r strtok_s
28
+ #endif // _MSC_VER
29
+
22
30
  #define __PRETTY_FUNCTION__ __FUNCSIG__
23
31
 
24
32
  #define posix_memalign(p, a, s) \
@@ -87,3 +95,56 @@ inline int __builtin_clzll(uint64_t x) {
87
95
  #define ALIGNED(x) __attribute__((aligned(x)))
88
96
 
89
97
  #endif // _MSC_VER
98
+
99
+ #if defined(__GNUC__) || defined(__clang__)
100
+ #define FAISS_DEPRECATED(msg) __attribute__((deprecated(msg)))
101
+ #else
102
+ #define FAISS_DEPRECATED(msg)
103
+ #endif // GCC or Clang
104
+
105
+ // Localized enablement of imprecise floating point operations
106
+ // You need to use all 3 macros to cover all compilers.
107
+ #if defined(_MSC_VER)
108
+ #define FAISS_PRAGMA_IMPRECISE_LOOP
109
+ #define FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN \
110
+ __pragma(float_control(precise, off, push))
111
+ #define FAISS_PRAGMA_IMPRECISE_FUNCTION_END __pragma(float_control(pop))
112
+ #elif defined(__clang__)
113
+ #define FAISS_PRAGMA_IMPRECISE_LOOP \
114
+ _Pragma("clang loop vectorize(enable) interleave(enable)")
115
+
116
+ // clang-format off
117
+
118
+ // the following ifdef is needed, because old versions of clang (prior to 14)
119
+ // do not generate FMAs on x86 unless this pragma is used. On the other hand,
120
+ // ARM does not support the following pragma flag.
121
+ // TODO: find out how to enable FMAs on clang 10 and earlier.
122
+ #if defined(__x86_64__) && (defined(__clang_major__) && (__clang_major__ > 10))
123
+ #define FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN \
124
+ _Pragma("float_control(precise, off, push)")
125
+ #define FAISS_PRAGMA_IMPRECISE_FUNCTION_END _Pragma("float_control(pop)")
126
+ #else
127
+ #define FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
128
+ #define FAISS_PRAGMA_IMPRECISE_FUNCTION_END
129
+ #endif
130
+ #elif defined(__GNUC__)
131
+ // Unfortunately, GCC does not provide a pragma for detecting it.
132
+ // So, we have to stick to GNUC, which is defined by MANY compilers.
133
+ // This is why clang/icc needs to be checked first.
134
+
135
+ // todo: add __INTEL_COMPILER check for the classic ICC
136
+ // todo: add __INTEL_LLVM_COMPILER for ICX
137
+
138
+ #define FAISS_PRAGMA_IMPRECISE_LOOP
139
+ #define FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN \
140
+ _Pragma("GCC push_options") \
141
+ _Pragma("GCC optimize (\"unroll-loops,associative-math,no-signed-zeros\")")
142
+ #define FAISS_PRAGMA_IMPRECISE_FUNCTION_END \
143
+ _Pragma("GCC pop_options")
144
+ #else
145
+ #define FAISS_PRAGMA_IMPRECISE_LOOP
146
+ #define FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
147
+ #define FAISS_PRAGMA_IMPRECISE_FUNCTION_END
148
+ #endif
149
+
150
+ // clang-format on
@@ -88,7 +88,7 @@ void pq4_pack_codes_range(
88
88
  size_t i0,
89
89
  size_t i1,
90
90
  size_t bbs,
91
- size_t M2,
91
+ size_t nsq,
92
92
  uint8_t* blocks) {
93
93
  const uint8_t perm0[16] = {
94
94
  0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15};
@@ -98,9 +98,9 @@ void pq4_pack_codes_range(
98
98
  size_t block1 = ((i1 - 1) / bbs) + 1;
99
99
 
100
100
  for (size_t b = block0; b < block1; b++) {
101
- uint8_t* codes2 = blocks + b * bbs * M2 / 2;
101
+ uint8_t* codes2 = blocks + b * bbs * nsq / 2;
102
102
  int64_t i_base = b * bbs - i0;
103
- for (int sq = 0; sq < M2; sq += 2) {
103
+ for (int sq = 0; sq < nsq; sq += 2) {
104
104
  for (size_t i = 0; i < bbs; i += 32) {
105
105
  std::array<uint8_t, 32> c, c0, c1;
106
106
  get_matrix_column(
@@ -127,7 +127,7 @@ namespace {
127
127
  // get the specific address of the vector inside a block
128
128
  // shift is used for determine the if the saved in bits 0..3 (false) or
129
129
  // bits 4..7 (true)
130
- uint8_t get_vector_specific_address(
130
+ size_t get_vector_specific_address(
131
131
  size_t bbs,
132
132
  size_t vector_id,
133
133
  size_t sq,
@@ -189,6 +189,50 @@ void pq4_set_packed_element(
189
189
  }
190
190
  }
191
191
 
192
+ /***************************************************************
193
+ * CodePackerPQ4 implementation
194
+ ***************************************************************/
195
+
196
+ CodePackerPQ4::CodePackerPQ4(size_t nsq, size_t bbs) {
197
+ this->nsq = nsq;
198
+ nvec = bbs;
199
+ code_size = (nsq * 4 + 7) / 8;
200
+ block_size = ((nsq + 1) / 2) * bbs;
201
+ }
202
+
203
+ void CodePackerPQ4::pack_1(
204
+ const uint8_t* flat_code,
205
+ size_t offset,
206
+ uint8_t* block) const {
207
+ size_t bbs = nvec;
208
+ if (offset >= nvec) {
209
+ block += (offset / nvec) * block_size;
210
+ offset = offset % nvec;
211
+ }
212
+ for (size_t i = 0; i < code_size; i++) {
213
+ uint8_t code = flat_code[i];
214
+ pq4_set_packed_element(block, code & 15, bbs, nsq, offset, 2 * i);
215
+ pq4_set_packed_element(block, code >> 4, bbs, nsq, offset, 2 * i + 1);
216
+ }
217
+ }
218
+
219
+ void CodePackerPQ4::unpack_1(
220
+ const uint8_t* block,
221
+ size_t offset,
222
+ uint8_t* flat_code) const {
223
+ size_t bbs = nvec;
224
+ if (offset >= nvec) {
225
+ block += (offset / nvec) * block_size;
226
+ offset = offset % nvec;
227
+ }
228
+ for (size_t i = 0; i < code_size; i++) {
229
+ uint8_t code0, code1;
230
+ code0 = pq4_get_packed_element(block, bbs, nsq, offset, 2 * i);
231
+ code1 = pq4_get_packed_element(block, bbs, nsq, offset, 2 * i + 1);
232
+ flat_code[i] = code0 | (code1 << 4);
233
+ }
234
+ }
235
+
192
236
  /***************************************************************
193
237
  * Packing functions for Look-Up Tables (LUT)
194
238
  ***************************************************************/
@@ -10,6 +10,8 @@
10
10
  #include <cstdint>
11
11
  #include <cstdlib>
12
12
 
13
+ #include <faiss/impl/CodePacker.h>
14
+
13
15
  /** PQ4 SIMD packing and accumulation functions
14
16
  *
15
17
  * The basic kernel accumulates nq query vectors with bbs = nb * 2 * 16 vectors
@@ -17,7 +19,7 @@
17
19
  * otherwise register spilling becomes too large.
18
20
  *
19
21
  * The implementation of these functions is spread over 3 cpp files to reduce
20
- * parallel compile times. Templates are instanciated explicitly.
22
+ * parallel compile times. Templates are instantiated explicitly.
21
23
  */
22
24
 
23
25
  namespace faiss {
@@ -29,7 +31,7 @@ namespace faiss {
29
31
  * @param ntotal number of input codes
30
32
  * @param nb output number of codes (ntotal rounded up to a multiple of
31
33
  * bbs)
32
- * @param M2 number of sub-quantizers (=M rounded up to a muliple of 2)
34
+ * @param nsq number of sub-quantizers (=M rounded up to a muliple of 2)
33
35
  * @param bbs size of database blocks (multiple of 32)
34
36
  * @param blocks output array, size nb * nsq / 2.
35
37
  */
@@ -39,7 +41,7 @@ void pq4_pack_codes(
39
41
  size_t M,
40
42
  size_t nb,
41
43
  size_t bbs,
42
- size_t M2,
44
+ size_t nsq,
43
45
  uint8_t* blocks);
44
46
 
45
47
  /** Same as pack_codes but write in a given range of the output,
@@ -56,7 +58,7 @@ void pq4_pack_codes_range(
56
58
  size_t i0,
57
59
  size_t i1,
58
60
  size_t bbs,
59
- size_t M2,
61
+ size_t nsq,
60
62
  uint8_t* blocks);
61
63
 
62
64
  /** get a single element from a packed codes table
@@ -84,6 +86,18 @@ void pq4_set_packed_element(
84
86
  size_t vector_id,
85
87
  size_t sq);
86
88
 
89
+ /** CodePacker API for the PQ4 fast-scan */
90
+ struct CodePackerPQ4 : CodePacker {
91
+ size_t nsq;
92
+
93
+ CodePackerPQ4(size_t nsq, size_t bbs);
94
+
95
+ void pack_1(const uint8_t* flat_code, size_t offset, uint8_t* block)
96
+ const final;
97
+ void unpack_1(const uint8_t* block, size_t offset, uint8_t* flat_code)
98
+ const final;
99
+ };
100
+
87
101
  /** Pack Look-up table for consumption by the kernel.
88
102
  *
89
103
  * @param nq number of queries
@@ -189,7 +189,7 @@ void accumulate(
189
189
  DISPATCH(3);
190
190
  DISPATCH(4);
191
191
  }
192
- FAISS_THROW_FMT("accumulate nq=%d not instanciated", nq);
192
+ FAISS_THROW_FMT("accumulate nq=%d not instantiated", nq);
193
193
 
194
194
  #undef DISPATCH
195
195
  }
@@ -263,7 +263,7 @@ void pq4_accumulate_loop_qbs(
263
263
  DISPATCH(4);
264
264
  #undef DISPATCH
265
265
  default:
266
- FAISS_THROW_FMT("accumulate nq=%d not instanciated", nq);
266
+ FAISS_THROW_FMT("accumulate nq=%d not instantiated", nq);
267
267
  }
268
268
  i0 += nq;
269
269
  LUT += nq * nsq * 16;
@@ -10,8 +10,6 @@
10
10
  */
11
11
 
12
12
  #include <faiss/index_factory.h>
13
- #include "faiss/MetricType.h"
14
- #include "faiss/impl/FaissAssert.h"
15
13
 
16
14
  #include <cinttypes>
17
15
  #include <cmath>
@@ -665,19 +663,19 @@ std::unique_ptr<Index> index_factory_sub(
665
663
  re_match(description, "(.+),Refine\\((.+)\\)", sm)) {
666
664
  std::unique_ptr<Index> filter_index =
667
665
  index_factory_sub(d, sm[1].str(), metric);
668
- std::unique_ptr<Index> refine_index;
669
666
 
667
+ IndexRefine* index_rf = nullptr;
670
668
  if (sm.size() == 3) { // Refine
671
- refine_index = index_factory_sub(d, sm[2].str(), metric);
669
+ std::unique_ptr<Index> refine_index =
670
+ index_factory_sub(d, sm[2].str(), metric);
671
+ index_rf = new IndexRefine(
672
+ filter_index.release(), refine_index.release());
673
+ index_rf->own_refine_index = true;
672
674
  } else { // RFlat
673
- refine_index.reset(new IndexFlat(d, metric));
675
+ index_rf = new IndexRefineFlat(filter_index.release(), nullptr);
674
676
  }
675
- IndexRefine* index_rf =
676
- new IndexRefine(filter_index.get(), refine_index.get());
677
+ FAISS_ASSERT(index_rf != nullptr);
677
678
  index_rf->own_fields = true;
678
- filter_index.release();
679
- refine_index.release();
680
- index_rf->own_refine_index = true;
681
679
  return std::unique_ptr<Index>(index_rf);
682
680
  }
683
681
 
@@ -7,6 +7,7 @@
7
7
 
8
8
  #include <faiss/invlists/BlockInvertedLists.h>
9
9
 
10
+ #include <faiss/impl/CodePacker.h>
10
11
  #include <faiss/impl/FaissAssert.h>
11
12
 
12
13
  #include <faiss/impl/io.h>
@@ -25,29 +26,43 @@ BlockInvertedLists::BlockInvertedLists(
25
26
  codes.resize(nlist);
26
27
  }
27
28
 
29
+ BlockInvertedLists::BlockInvertedLists(size_t nlist, const CodePacker* packer)
30
+ : InvertedLists(nlist, InvertedLists::INVALID_CODE_SIZE),
31
+ n_per_block(packer->nvec),
32
+ block_size(packer->block_size),
33
+ packer(packer) {
34
+ ids.resize(nlist);
35
+ codes.resize(nlist);
36
+ }
37
+
28
38
  BlockInvertedLists::BlockInvertedLists()
29
- : InvertedLists(0, InvertedLists::INVALID_CODE_SIZE),
30
- n_per_block(0),
31
- block_size(0) {}
39
+ : InvertedLists(0, InvertedLists::INVALID_CODE_SIZE) {}
32
40
 
33
41
  size_t BlockInvertedLists::add_entries(
34
42
  size_t list_no,
35
43
  size_t n_entry,
36
44
  const idx_t* ids_in,
37
45
  const uint8_t* code) {
38
- if (n_entry == 0)
46
+ if (n_entry == 0) {
39
47
  return 0;
48
+ }
40
49
  FAISS_THROW_IF_NOT(list_no < nlist);
41
50
  size_t o = ids[list_no].size();
42
- FAISS_THROW_IF_NOT(
43
- o == 0); // not clear how we should handle subsequent adds
44
51
  ids[list_no].resize(o + n_entry);
45
52
  memcpy(&ids[list_no][o], ids_in, sizeof(ids_in[0]) * n_entry);
46
-
47
- // copy whole blocks
48
- size_t n_block = (n_entry + n_per_block - 1) / n_per_block;
53
+ size_t n_block = (o + n_entry + n_per_block - 1) / n_per_block;
49
54
  codes[list_no].resize(n_block * block_size);
50
- memcpy(&codes[list_no][o * code_size], code, n_block * block_size);
55
+ if (o % block_size == 0) {
56
+ // copy whole blocks
57
+ memcpy(&codes[list_no][o * code_size], code, n_block * block_size);
58
+ } else {
59
+ FAISS_THROW_IF_NOT_MSG(packer, "missing code packer");
60
+ std::vector<uint8_t> buffer(packer->code_size);
61
+ for (size_t i = 0; i < n_entry; i++) {
62
+ packer->unpack_1(code, i, buffer.data());
63
+ packer->pack_1(buffer.data(), i + o, codes[list_no].data());
64
+ }
65
+ }
51
66
  return o;
52
67
  }
53
68
 
@@ -61,7 +76,7 @@ const uint8_t* BlockInvertedLists::get_codes(size_t list_no) const {
61
76
  return codes[list_no].get();
62
77
  }
63
78
 
64
- const InvertedLists::idx_t* BlockInvertedLists::get_ids(size_t list_no) const {
79
+ const idx_t* BlockInvertedLists::get_ids(size_t list_no) const {
65
80
  assert(list_no < nlist);
66
81
  return ids[list_no].data();
67
82
  }
@@ -95,7 +110,9 @@ void BlockInvertedLists::update_entries(
95
110
  */
96
111
  }
97
112
 
98
- BlockInvertedLists::~BlockInvertedLists() {}
113
+ BlockInvertedLists::~BlockInvertedLists() {
114
+ delete packer;
115
+ }
99
116
 
100
117
  /**************************************************
101
118
  * IO hook implementation
@@ -14,6 +14,8 @@
14
14
 
15
15
  namespace faiss {
16
16
 
17
+ struct CodePacker;
18
+
17
19
  /** Inverted Lists that are organized by blocks.
18
20
  *
19
21
  * Different from the regular inverted lists, the codes are organized by blocks
@@ -28,13 +30,17 @@ namespace faiss {
28
30
  * data.
29
31
  */
30
32
  struct BlockInvertedLists : InvertedLists {
31
- size_t n_per_block; // nb of vectors stored per block
32
- size_t block_size; // nb bytes per block
33
+ size_t n_per_block = 0; // nb of vectors stored per block
34
+ size_t block_size = 0; // nb bytes per block
35
+
36
+ // required to interpret the content of the blocks (owned by this)
37
+ const CodePacker* packer = nullptr;
33
38
 
34
39
  std::vector<AlignedTable<uint8_t>> codes;
35
40
  std::vector<std::vector<idx_t>> ids;
36
41
 
37
42
  BlockInvertedLists(size_t nlist, size_t vec_per_block, size_t block_size);
43
+ BlockInvertedLists(size_t nlist, const CodePacker* packer);
38
44
 
39
45
  BlockInvertedLists();
40
46
 
@@ -68,7 +68,7 @@ void DirectMap::clear() {
68
68
  hashtable.clear();
69
69
  }
70
70
 
71
- DirectMap::idx_t DirectMap::get(idx_t key) const {
71
+ idx_t DirectMap::get(idx_t key) const {
72
72
  if (type == Array) {
73
73
  FAISS_THROW_IF_NOT_MSG(key >= 0 && key < array.size(), "invalid key");
74
74
  idx_t lo = array[key];