faiss 0.4.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/lib/faiss/version.rb +1 -1
  4. data/vendor/faiss/faiss/AutoTune.cpp +39 -29
  5. data/vendor/faiss/faiss/Clustering.cpp +4 -2
  6. data/vendor/faiss/faiss/IVFlib.cpp +14 -7
  7. data/vendor/faiss/faiss/Index.h +72 -3
  8. data/vendor/faiss/faiss/Index2Layer.cpp +2 -4
  9. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +0 -1
  10. data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +1 -0
  11. data/vendor/faiss/faiss/IndexBinary.h +46 -3
  12. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +118 -4
  13. data/vendor/faiss/faiss/IndexBinaryHNSW.h +41 -0
  14. data/vendor/faiss/faiss/IndexBinaryHash.cpp +0 -1
  15. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +18 -7
  16. data/vendor/faiss/faiss/IndexBinaryIVF.h +5 -1
  17. data/vendor/faiss/faiss/IndexFlat.cpp +6 -4
  18. data/vendor/faiss/faiss/IndexHNSW.cpp +65 -24
  19. data/vendor/faiss/faiss/IndexHNSW.h +10 -1
  20. data/vendor/faiss/faiss/IndexIDMap.cpp +96 -18
  21. data/vendor/faiss/faiss/IndexIDMap.h +20 -0
  22. data/vendor/faiss/faiss/IndexIVF.cpp +28 -10
  23. data/vendor/faiss/faiss/IndexIVF.h +16 -1
  24. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +84 -16
  25. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +18 -6
  26. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +33 -21
  27. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +16 -6
  28. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +24 -15
  29. data/vendor/faiss/faiss/IndexIVFFastScan.h +4 -2
  30. data/vendor/faiss/faiss/IndexIVFFlat.cpp +59 -43
  31. data/vendor/faiss/faiss/IndexIVFFlat.h +10 -2
  32. data/vendor/faiss/faiss/IndexIVFPQ.cpp +16 -3
  33. data/vendor/faiss/faiss/IndexIVFPQ.h +8 -1
  34. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +14 -6
  35. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +2 -1
  36. data/vendor/faiss/faiss/IndexIVFPQR.cpp +14 -4
  37. data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
  38. data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +28 -3
  39. data/vendor/faiss/faiss/IndexIVFRaBitQ.h +8 -1
  40. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +9 -2
  41. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +2 -1
  42. data/vendor/faiss/faiss/IndexLattice.cpp +8 -4
  43. data/vendor/faiss/faiss/IndexNNDescent.cpp +0 -7
  44. data/vendor/faiss/faiss/IndexNSG.cpp +3 -3
  45. data/vendor/faiss/faiss/IndexPQ.cpp +0 -1
  46. data/vendor/faiss/faiss/IndexPQ.h +1 -0
  47. data/vendor/faiss/faiss/IndexPQFastScan.cpp +0 -2
  48. data/vendor/faiss/faiss/IndexPreTransform.cpp +4 -2
  49. data/vendor/faiss/faiss/IndexRefine.cpp +11 -6
  50. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +16 -4
  51. data/vendor/faiss/faiss/IndexScalarQuantizer.h +10 -3
  52. data/vendor/faiss/faiss/IndexShards.cpp +7 -6
  53. data/vendor/faiss/faiss/MatrixStats.cpp +16 -8
  54. data/vendor/faiss/faiss/MetaIndexes.cpp +12 -6
  55. data/vendor/faiss/faiss/MetricType.h +5 -3
  56. data/vendor/faiss/faiss/clone_index.cpp +2 -4
  57. data/vendor/faiss/faiss/cppcontrib/factory_tools.cpp +6 -0
  58. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +9 -4
  59. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +32 -10
  60. data/vendor/faiss/faiss/gpu/GpuIndex.h +88 -0
  61. data/vendor/faiss/faiss/gpu/GpuIndexBinaryCagra.h +125 -0
  62. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +39 -4
  63. data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +3 -3
  64. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +1 -1
  65. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +3 -2
  66. data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +41 -0
  67. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +6 -3
  68. data/vendor/faiss/faiss/impl/HNSW.cpp +34 -19
  69. data/vendor/faiss/faiss/impl/IDSelector.cpp +2 -1
  70. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +2 -3
  71. data/vendor/faiss/faiss/impl/NNDescent.cpp +17 -9
  72. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +42 -21
  73. data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +6 -24
  74. data/vendor/faiss/faiss/impl/ResultHandler.h +56 -47
  75. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +28 -15
  76. data/vendor/faiss/faiss/impl/index_read.cpp +36 -11
  77. data/vendor/faiss/faiss/impl/index_write.cpp +19 -6
  78. data/vendor/faiss/faiss/impl/io.cpp +9 -5
  79. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +18 -11
  80. data/vendor/faiss/faiss/impl/mapped_io.cpp +4 -7
  81. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +0 -1
  82. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +0 -1
  83. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +6 -6
  84. data/vendor/faiss/faiss/impl/zerocopy_io.cpp +1 -1
  85. data/vendor/faiss/faiss/impl/zerocopy_io.h +2 -2
  86. data/vendor/faiss/faiss/index_factory.cpp +49 -33
  87. data/vendor/faiss/faiss/index_factory.h +8 -2
  88. data/vendor/faiss/faiss/index_io.h +0 -3
  89. data/vendor/faiss/faiss/invlists/DirectMap.cpp +2 -1
  90. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +12 -6
  91. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +8 -4
  92. data/vendor/faiss/faiss/utils/Heap.cpp +15 -8
  93. data/vendor/faiss/faiss/utils/Heap.h +23 -12
  94. data/vendor/faiss/faiss/utils/distances.cpp +42 -21
  95. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
  96. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +1 -1
  97. data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -3
  98. data/vendor/faiss/faiss/utils/extra_distances-inl.h +27 -4
  99. data/vendor/faiss/faiss/utils/extra_distances.cpp +8 -4
  100. data/vendor/faiss/faiss/utils/hamming.cpp +20 -10
  101. data/vendor/faiss/faiss/utils/partitioning.cpp +8 -4
  102. data/vendor/faiss/faiss/utils/quantize_lut.cpp +17 -9
  103. data/vendor/faiss/faiss/utils/rabitq_simd.h +539 -0
  104. data/vendor/faiss/faiss/utils/random.cpp +14 -7
  105. data/vendor/faiss/faiss/utils/utils.cpp +0 -3
  106. metadata +5 -2
@@ -6,15 +6,12 @@
6
6
  */
7
7
 
8
8
  #include <stdio.h>
9
- #include <string.h>
10
9
 
11
- #ifdef __linux__
10
+ #if defined(__linux__) || defined(__FreeBSD__)
12
11
 
13
12
  #include <fcntl.h>
14
13
  #include <sys/mman.h>
15
14
  #include <sys/stat.h>
16
- #include <sys/types.h>
17
- #include <unistd.h>
18
15
 
19
16
  #elif defined(_WIN32)
20
17
 
@@ -30,7 +27,7 @@
30
27
 
31
28
  namespace faiss {
32
29
 
33
- #ifdef __linux__
30
+ #if defined(__linux__) || defined(__FreeBSD__)
34
31
 
35
32
  struct MmappedFileMappingOwner::PImpl {
36
33
  void* ptr = nullptr;
@@ -171,12 +168,12 @@ struct MmappedFileMappingOwner::PImpl {
171
168
  const int fd = _fileno(f);
172
169
  if (fd == -1) {
173
170
  // no good
174
- FAISS_THROW_FMT("could not get a HANDLE");
171
+ FAISS_THROW_MSG("could not get a HANDLE");
175
172
  }
176
173
 
177
174
  HANDLE file_handle = (HANDLE)_get_osfhandle(fd);
178
175
  if (file_handle == INVALID_HANDLE_VALUE) {
179
- FAISS_THROW_FMT("could not get an OS HANDLE");
176
+ FAISS_THROW_MSG("could not get an OS HANDLE");
180
177
  }
181
178
 
182
179
  // get the size of the file
@@ -6,7 +6,6 @@
6
6
  */
7
7
 
8
8
  #include <faiss/impl/FaissAssert.h>
9
- #include <faiss/impl/platform_macros.h>
10
9
  #include <faiss/impl/pq4_fast_scan.h>
11
10
  #include <faiss/impl/simd_result_handlers.h>
12
11
 
@@ -10,7 +10,6 @@
10
10
  #include <faiss/impl/FaissAssert.h>
11
11
  #include <faiss/impl/LookupTableScaler.h>
12
12
  #include <faiss/impl/simd_result_handlers.h>
13
- #include <faiss/utils/simdlib.h>
14
13
 
15
14
  namespace faiss {
16
15
 
@@ -292,8 +292,8 @@ void beam_search_encode_step(
292
292
  cent_ids.data() + i * beam_size * new_beam_size;
293
293
 
294
294
  // here we could be a tad more efficient by merging sorted arrays
295
- for (int i_2 = 0; i_2 < new_beam_size; i_2++) {
296
- new_distances_i[i_2] = C::neutral();
295
+ for (int j = 0; j < new_beam_size; j++) {
296
+ new_distances_i[j] = C::neutral();
297
297
  }
298
298
  std::vector<int> perm(new_beam_size, -1);
299
299
  heap_addn<C>(
@@ -325,8 +325,8 @@ void beam_search_encode_step(
325
325
  const float* cent_distances_i =
326
326
  cent_distances.data() + i * beam_size * K;
327
327
  // then we have to select the best results
328
- for (int i_2 = 0; i_2 < new_beam_size; i_2++) {
329
- new_distances_i[i_2] = C::neutral();
328
+ for (int j = 0; j < new_beam_size; j++) {
329
+ new_distances_i[j] = C::neutral();
330
330
  }
331
331
  std::vector<int> perm(new_beam_size, -1);
332
332
 
@@ -558,8 +558,8 @@ void beam_search_encode_step_tab(
558
558
  const float* cent_distances_i = cent_distances.data();
559
559
 
560
560
  // then we have to select the best results
561
- for (int i_2 = 0; i_2 < new_beam_size; i_2++) {
562
- new_distances_i[i_2] = C::neutral();
561
+ for (int j = 0; j < new_beam_size; j++) {
562
+ new_distances_i[j] = C::neutral();
563
563
  }
564
564
  std::vector<int> perm(new_beam_size, -1);
565
565
 
@@ -10,7 +10,7 @@
10
10
 
11
11
  namespace faiss {
12
12
 
13
- ZeroCopyIOReader::ZeroCopyIOReader(uint8_t* data, size_t size)
13
+ ZeroCopyIOReader::ZeroCopyIOReader(const uint8_t* data, size_t size)
14
14
  : data_(data), rp_(0), total_(size) {}
15
15
 
16
16
  ZeroCopyIOReader::~ZeroCopyIOReader() {}
@@ -15,11 +15,11 @@ namespace faiss {
15
15
 
16
16
  // ZeroCopyIOReader just maps the data from a given pointer.
17
17
  struct ZeroCopyIOReader : public faiss::IOReader {
18
- uint8_t* data_;
18
+ const uint8_t* data_;
19
19
  size_t rp_ = 0;
20
20
  size_t total_ = 0;
21
21
 
22
- ZeroCopyIOReader(uint8_t* data, size_t size);
22
+ ZeroCopyIOReader(const uint8_t* data, size_t size);
23
23
  ~ZeroCopyIOReader();
24
24
 
25
25
  void reset();
@@ -170,7 +170,7 @@ AdditiveQuantizer::Search_type_t aq_parse_search_type(
170
170
  return metric == METRIC_L2 ? AdditiveQuantizer::ST_decompress
171
171
  : AdditiveQuantizer::ST_LUT_nonorm;
172
172
  }
173
- int pos = stok.rfind("_");
173
+ int pos = stok.rfind('_');
174
174
  return aq_search_type[stok.substr(pos)];
175
175
  }
176
176
 
@@ -311,7 +311,8 @@ IndexIVF* parse_IndexIVF(
311
311
  const std::string& code_string,
312
312
  std::unique_ptr<Index>& quantizer,
313
313
  size_t nlist,
314
- MetricType mt) {
314
+ MetricType mt,
315
+ bool own_il) {
315
316
  std::smatch sm;
316
317
  auto match = [&sm, &code_string](const std::string pattern) {
317
318
  return re_match(code_string, pattern, sm);
@@ -320,18 +321,25 @@ IndexIVF* parse_IndexIVF(
320
321
  int d = quantizer->d;
321
322
 
322
323
  if (match("Flat")) {
323
- return new IndexIVFFlat(get_q(), d, nlist, mt);
324
+ return new IndexIVFFlat(get_q(), d, nlist, mt, own_il);
324
325
  }
325
326
  if (match("FlatDedup")) {
326
- return new IndexIVFFlatDedup(get_q(), d, nlist, mt);
327
+ return new IndexIVFFlatDedup(get_q(), d, nlist, mt, own_il);
327
328
  }
328
329
  if (match(sq_pattern)) {
329
330
  return new IndexIVFScalarQuantizer(
330
- get_q(), d, nlist, sq_types[sm[1].str()], mt);
331
+ get_q(),
332
+ d,
333
+ nlist,
334
+ sq_types[sm[1].str()],
335
+ mt,
336
+ /*by_residual=*/true,
337
+ own_il);
331
338
  }
332
339
  if (match("PQ([0-9]+)(x[0-9]+)?(np)?")) {
333
340
  int M = mres_to_int(sm[1]), nbit = mres_to_int(sm[2], 8, 1);
334
- IndexIVFPQ* index_ivf = new IndexIVFPQ(get_q(), d, nlist, M, nbit, mt);
341
+ IndexIVFPQ* index_ivf =
342
+ new IndexIVFPQ(get_q(), d, nlist, M, nbit, mt, own_il);
335
343
  index_ivf->do_polysemous_training = sm[3].str() != "np";
336
344
  return index_ivf;
337
345
  }
@@ -340,13 +348,13 @@ IndexIVF* parse_IndexIVF(
340
348
  mt == METRIC_L2,
341
349
  "IVFPQR not implemented for inner product search");
342
350
  int M1 = mres_to_int(sm[1]), M2 = mres_to_int(sm[2]);
343
- return new IndexIVFPQR(get_q(), d, nlist, M1, 8, M2, 8);
351
+ return new IndexIVFPQR(get_q(), d, nlist, M1, 8, M2, 8, own_il);
344
352
  }
345
353
  if (match("PQ([0-9]+)x4fs(r?)(_[0-9]+)?")) {
346
354
  int M = mres_to_int(sm[1]);
347
355
  int bbs = mres_to_int(sm[3], 32, 1);
348
- IndexIVFPQFastScan* index_ivf =
349
- new IndexIVFPQFastScan(get_q(), d, nlist, M, 4, mt, bbs);
356
+ IndexIVFPQFastScan* index_ivf = new IndexIVFPQFastScan(
357
+ get_q(), d, nlist, M, 4, mt, bbs, own_il);
350
358
  index_ivf->by_residual = sm[2].str() == "r";
351
359
  return index_ivf;
352
360
  }
@@ -357,11 +365,11 @@ IndexIVF* parse_IndexIVF(
357
365
  IndexIVF* index_ivf;
358
366
  if (sm[1].str() == "RQ") {
359
367
  index_ivf = new IndexIVFResidualQuantizer(
360
- get_q(), d, nlist, nbits, mt, st);
368
+ get_q(), d, nlist, nbits, mt, st, own_il);
361
369
  } else {
362
370
  FAISS_THROW_IF_NOT(nbits.size() > 0);
363
371
  index_ivf = new IndexIVFLocalSearchQuantizer(
364
- get_q(), d, nlist, nbits.size(), nbits[0], mt, st);
372
+ get_q(), d, nlist, nbits.size(), nbits[0], mt, st, own_il);
365
373
  }
366
374
  return index_ivf;
367
375
  }
@@ -373,10 +381,10 @@ IndexIVF* parse_IndexIVF(
373
381
  IndexIVF* index_ivf;
374
382
  if (sm[1].str() == "PRQ") {
375
383
  index_ivf = new IndexIVFProductResidualQuantizer(
376
- get_q(), d, nlist, nsplits, Msub, nbit, mt, st);
384
+ get_q(), d, nlist, nsplits, Msub, nbit, mt, st, own_il);
377
385
  } else {
378
386
  index_ivf = new IndexIVFProductLocalSearchQuantizer(
379
- get_q(), d, nlist, nsplits, Msub, nbit, mt, st);
387
+ get_q(), d, nlist, nsplits, Msub, nbit, mt, st, own_il);
380
388
  }
381
389
  return index_ivf;
382
390
  }
@@ -387,10 +395,10 @@ IndexIVF* parse_IndexIVF(
387
395
  IndexIVFAdditiveQuantizerFastScan* index_ivf;
388
396
  if (sm[1].str() == "RQ") {
389
397
  index_ivf = new IndexIVFResidualQuantizerFastScan(
390
- get_q(), d, nlist, M, 4, mt, st, bbs);
398
+ get_q(), d, nlist, M, 4, mt, st, bbs, own_il);
391
399
  } else {
392
400
  index_ivf = new IndexIVFLocalSearchQuantizerFastScan(
393
- get_q(), d, nlist, M, 4, mt, st, bbs);
401
+ get_q(), d, nlist, M, 4, mt, st, bbs, own_il);
394
402
  }
395
403
  index_ivf->by_residual = (sm[3].str() == "r");
396
404
  return index_ivf;
@@ -404,10 +412,10 @@ IndexIVF* parse_IndexIVF(
404
412
  IndexIVFAdditiveQuantizerFastScan* index_ivf;
405
413
  if (sm[1].str() == "PRQ") {
406
414
  index_ivf = new IndexIVFProductResidualQuantizerFastScan(
407
- get_q(), d, nlist, nsplits, Msub, 4, mt, st, bbs);
415
+ get_q(), d, nlist, nsplits, Msub, 4, mt, st, bbs, own_il);
408
416
  } else {
409
417
  index_ivf = new IndexIVFProductLocalSearchQuantizerFastScan(
410
- get_q(), d, nlist, nsplits, Msub, 4, mt, st, bbs);
418
+ get_q(), d, nlist, nsplits, Msub, 4, mt, st, bbs, own_il);
411
419
  }
412
420
  index_ivf->by_residual = (sm[4].str() == "r");
413
421
  return index_ivf;
@@ -425,8 +433,8 @@ IndexIVF* parse_IndexIVF(
425
433
  // the rationale for -1e10 is that this corresponds to simple
426
434
  // thresholding
427
435
  float period = sm[3].length() > 0 ? std::stof(sm[3]) : -1e10;
428
- IndexIVFSpectralHash* index_ivf =
429
- new IndexIVFSpectralHash(get_q(), d, nlist, outdim, period);
436
+ IndexIVFSpectralHash* index_ivf = new IndexIVFSpectralHash(
437
+ get_q(), d, nlist, outdim, period, own_il);
430
438
  index_ivf->replace_vt(vt.release(), true);
431
439
  if (sm[4].length()) {
432
440
  std::string s = sm[4].str();
@@ -440,7 +448,7 @@ IndexIVF* parse_IndexIVF(
440
448
  return index_ivf;
441
449
  }
442
450
  if (match(rabitq_pattern)) {
443
- return new IndexIVFRaBitQ(get_q(), d, nlist, mt);
451
+ return new IndexIVFRaBitQ(get_q(), d, nlist, mt, own_il);
444
452
  }
445
453
  return nullptr;
446
454
  }
@@ -677,7 +685,8 @@ Index* parse_other_indexes(
677
685
  std::unique_ptr<Index> index_factory_sub(
678
686
  int d,
679
687
  std::string description,
680
- MetricType metric) {
688
+ MetricType metric,
689
+ bool own_invlists = true) {
681
690
  // handle composite indexes
682
691
 
683
692
  bool verbose = index_factory_verbose;
@@ -838,7 +847,7 @@ std::unique_ptr<Index> index_factory_sub(
838
847
 
839
848
  // IndexRowwiseMinMax, fp32 version
840
849
  if (description.compare(0, 7, "MinMax,") == 0) {
841
- size_t comma = description.find(",");
850
+ size_t comma = description.find(',');
842
851
  std::string sub_index_string = description.substr(comma + 1);
843
852
  auto sub_index = index_factory_sub(d, sub_index_string, metric);
844
853
 
@@ -850,7 +859,7 @@ std::unique_ptr<Index> index_factory_sub(
850
859
 
851
860
  // IndexRowwiseMinMax, fp16 version
852
861
  if (description.compare(0, 11, "MinMaxFP16,") == 0) {
853
- size_t comma = description.find(",");
862
+ size_t comma = description.find(',');
854
863
  std::string sub_index_string = description.substr(comma + 1);
855
864
  auto sub_index = index_factory_sub(d, sub_index_string, metric);
856
865
 
@@ -864,7 +873,7 @@ std::unique_ptr<Index> index_factory_sub(
864
873
  {
865
874
  size_t nlist;
866
875
  bool use_2layer;
867
- size_t comma = description.find(",");
876
+ size_t comma = description.find(',');
868
877
  std::string coarse_string = description.substr(0, comma);
869
878
  // Match coarse quantizer part first
870
879
  std::unique_ptr<Index> quantizer(parse_coarse_quantizer(
@@ -894,8 +903,8 @@ std::unique_ptr<Index> index_factory_sub(
894
903
  return std::unique_ptr<Index>(index_2l);
895
904
  }
896
905
 
897
- IndexIVF* index_ivf =
898
- parse_IndexIVF(code_description, quantizer, nlist, metric);
906
+ IndexIVF* index_ivf = parse_IndexIVF(
907
+ code_description, quantizer, nlist, metric, own_invlists);
899
908
 
900
909
  FAISS_THROW_IF_NOT_FMT(
901
910
  index_ivf,
@@ -911,25 +920,32 @@ std::unique_ptr<Index> index_factory_sub(
911
920
 
912
921
  } // anonymous namespace
913
922
 
914
- Index* index_factory(int d, const char* description, MetricType metric) {
915
- return index_factory_sub(d, description, metric).release();
923
+ Index* index_factory(
924
+ int d,
925
+ const char* description,
926
+ MetricType metric,
927
+ bool own_invlists) {
928
+ return index_factory_sub(d, description, metric, own_invlists).release();
916
929
  }
917
930
 
918
- IndexBinary* index_binary_factory(int d, const char* description) {
931
+ IndexBinary* index_binary_factory(
932
+ int d,
933
+ const char* description,
934
+ bool own_invlists) {
919
935
  IndexBinary* index = nullptr;
920
936
 
921
937
  int ncentroids = -1;
922
938
  int M, nhash, b;
923
939
 
924
940
  if (sscanf(description, "BIVF%d_HNSW%d", &ncentroids, &M) == 2) {
925
- IndexBinaryIVF* index_ivf =
926
- new IndexBinaryIVF(new IndexBinaryHNSW(d, M), d, ncentroids);
941
+ IndexBinaryIVF* index_ivf = new IndexBinaryIVF(
942
+ new IndexBinaryHNSW(d, M), d, ncentroids, own_invlists);
927
943
  index_ivf->own_fields = true;
928
944
  index = index_ivf;
929
945
 
930
946
  } else if (sscanf(description, "BIVF%d", &ncentroids) == 1) {
931
- IndexBinaryIVF* index_ivf =
932
- new IndexBinaryIVF(new IndexBinaryFlat(d), d, ncentroids);
947
+ IndexBinaryIVF* index_ivf = new IndexBinaryIVF(
948
+ new IndexBinaryFlat(d), d, ncentroids, own_invlists);
933
949
  index_ivf->own_fields = true;
934
950
  index = index_ivf;
935
951
 
@@ -17,11 +17,17 @@ namespace faiss {
17
17
  Index* index_factory(
18
18
  int d,
19
19
  const char* description,
20
- MetricType metric = METRIC_L2);
20
+ MetricType metric = METRIC_L2,
21
+ // Whether to maintain inverted list within faiss index (only applicable
22
+ // to IndexIVF*)
23
+ bool own_invlists = true);
21
24
 
22
25
  /// set to > 0 to get more logs from index_factory
23
26
  FAISS_API extern int index_factory_verbose;
24
27
 
25
- IndexBinary* index_binary_factory(int d, const char* description);
28
+ IndexBinary* index_binary_factory(
29
+ int d,
30
+ const char* description,
31
+ bool own_invlists = true);
26
32
 
27
33
  } // namespace faiss
@@ -11,9 +11,6 @@
11
11
  #define FAISS_INDEX_IO_H
12
12
 
13
13
  #include <cstdio>
14
- #include <string>
15
- #include <typeinfo>
16
- #include <vector>
17
14
 
18
15
  /** I/O functions can read/write to a filename, a file handle or to an
19
16
  * object that abstracts the medium.
@@ -85,8 +85,9 @@ idx_t DirectMap::get(idx_t key) const {
85
85
  }
86
86
 
87
87
  void DirectMap::add_single_id(idx_t id, idx_t list_no, size_t offset) {
88
- if (type == NoMap)
88
+ if (type == NoMap) {
89
89
  return;
90
+ }
90
91
 
91
92
  if (type == Array) {
92
93
  assert(id == array.size());
@@ -280,8 +280,9 @@ size_t ArrayInvertedLists::add_entries(
280
280
  size_t n_entry,
281
281
  const idx_t* ids_in,
282
282
  const uint8_t* code) {
283
- if (n_entry == 0)
283
+ if (n_entry == 0) {
284
284
  return 0;
285
+ }
285
286
  assert(list_no < nlist);
286
287
  size_t o = ids[list_no].size();
287
288
  ids[list_no].resize(o + n_entry);
@@ -526,8 +527,9 @@ void SliceInvertedLists::prefetch_lists(const idx_t* list_nos, int nlist)
526
527
  std::vector<idx_t> translated_list_nos;
527
528
  for (int j = 0; j < nlist; j++) {
528
529
  idx_t list_no = list_nos[j];
529
- if (list_no < 0)
530
+ if (list_no < 0) {
530
531
  continue;
532
+ }
531
533
  translated_list_nos.push_back(translate_list_no(this, list_no));
532
534
  }
533
535
  il->prefetch_lists(translated_list_nos.data(), translated_list_nos.size());
@@ -630,8 +632,9 @@ void VStackInvertedLists::prefetch_lists(const idx_t* list_nos, int nlist)
630
632
  std::vector<int> n_per_il(ils.size(), 0);
631
633
  for (int j = 0; j < nlist; j++) {
632
634
  idx_t list_no = list_nos[j];
633
- if (list_no < 0)
635
+ if (list_no < 0) {
634
636
  continue;
637
+ }
635
638
  int i = ilno[j] = translate_list_no(this, list_no);
636
639
  n_per_il[i]++;
637
640
  }
@@ -642,8 +645,9 @@ void VStackInvertedLists::prefetch_lists(const idx_t* list_nos, int nlist)
642
645
  std::vector<idx_t> sorted_list_nos(cum_n_per_il.back());
643
646
  for (int j = 0; j < nlist; j++) {
644
647
  idx_t list_no = list_nos[j];
645
- if (list_no < 0)
648
+ if (list_no < 0) {
646
649
  continue;
650
+ }
647
651
  int i = ilno[j];
648
652
  list_no -= cumsz[i];
649
653
  sorted_list_nos[cum_n_per_il[i]++] = list_no;
@@ -716,8 +720,9 @@ void MaskedInvertedLists::prefetch_lists(const idx_t* list_nos, int nlist)
716
720
  std::vector<idx_t> list0, list1;
717
721
  for (int i = 0; i < nlist; i++) {
718
722
  idx_t list_no = list_nos[i];
719
- if (list_no < 0)
723
+ if (list_no < 0) {
720
724
  continue;
725
+ }
721
726
  size_t sz = il0->list_size(list_no);
722
727
  (sz ? list0 : list1).push_back(list_no);
723
728
  }
@@ -782,8 +787,9 @@ void StopWordsInvertedLists::prefetch_lists(const idx_t* list_nos, int nlist)
782
787
  std::vector<idx_t> list0;
783
788
  for (int i = 0; i < nlist; i++) {
784
789
  idx_t list_no = list_nos[i];
785
- if (list_no < 0)
790
+ if (list_no < 0) {
786
791
  continue;
792
+ }
787
793
  if (il0->list_size(list_no) < maxsize) {
788
794
  list0.push_back(list_no);
789
795
  }
@@ -148,8 +148,9 @@ struct OnDiskInvertedLists::OngoingPrefetch {
148
148
 
149
149
  bool one_list() {
150
150
  idx_t list_no = pf->get_next_list();
151
- if (list_no == -1)
151
+ if (list_no == -1) {
152
152
  return false;
153
+ }
153
154
  const OnDiskInvertedLists* od = pf->od;
154
155
  od->locks->lock_1(list_no);
155
156
  size_t n = od->list_size(list_no);
@@ -195,8 +196,9 @@ struct OnDiskInvertedLists::OngoingPrefetch {
195
196
  static void* prefetch_list(void* arg) {
196
197
  Thread* th = static_cast<Thread*>(arg);
197
198
 
198
- while (th->one_list())
199
+ while (th->one_list()) {
199
200
  ;
201
+ }
200
202
 
201
203
  return nullptr;
202
204
  }
@@ -404,8 +406,9 @@ void OnDiskInvertedLists::update_entries(
404
406
  const idx_t* ids_in,
405
407
  const uint8_t* codes_in) {
406
408
  FAISS_THROW_IF_NOT(!read_only);
407
- if (n_entry == 0)
409
+ if (n_entry == 0) {
408
410
  return;
411
+ }
409
412
  [[maybe_unused]] const List& l = lists[list_no];
410
413
  assert(n_entry + offset <= l.size);
411
414
  idx_t* ids = const_cast<idx_t*>(get_ids(list_no));
@@ -515,8 +518,9 @@ size_t OnDiskInvertedLists::allocate_slot(size_t capacity) {
515
518
 
516
519
  void OnDiskInvertedLists::free_slot(size_t offset, size_t capacity) {
517
520
  // should hold lock2
518
- if (capacity == 0)
521
+ if (capacity == 0) {
519
522
  return;
523
+ }
520
524
 
521
525
  auto it = slots.begin();
522
526
  while (it != slots.end() && it->offset <= offset) {
@@ -17,21 +17,24 @@ namespace faiss {
17
17
  template <typename C>
18
18
  void HeapArray<C>::heapify() {
19
19
  #pragma omp parallel for
20
- for (int64_t j = 0; j < nh; j++)
20
+ for (int64_t j = 0; j < nh; j++) {
21
21
  heap_heapify<C>(k, val + j * k, ids + j * k);
22
+ }
22
23
  }
23
24
 
24
25
  template <typename C>
25
26
  void HeapArray<C>::reorder() {
26
27
  #pragma omp parallel for
27
- for (int64_t j = 0; j < nh; j++)
28
+ for (int64_t j = 0; j < nh; j++) {
28
29
  heap_reorder<C>(k, val + j * k, ids + j * k);
30
+ }
29
31
  }
30
32
 
31
33
  template <typename C>
32
34
  void HeapArray<C>::addn(size_t nj, const T* vin, TI j0, size_t i0, int64_t ni) {
33
- if (ni == -1)
35
+ if (ni == -1) {
34
36
  ni = nh;
37
+ }
35
38
  assert(i0 >= 0 && i0 + ni <= nh);
36
39
  #pragma omp parallel for if (ni * nj > 100000)
37
40
  for (int64_t i = i0; i < i0 + ni; i++) {
@@ -60,8 +63,9 @@ void HeapArray<C>::addn_with_ids(
60
63
  addn(nj, vin, 0, i0, ni);
61
64
  return;
62
65
  }
63
- if (ni == -1)
66
+ if (ni == -1) {
64
67
  ni = nh;
68
+ }
65
69
  assert(i0 >= 0 && i0 + ni <= nh);
66
70
  #pragma omp parallel for if (ni * nj > 100000)
67
71
  for (int64_t i = i0; i < i0 + ni; i++) {
@@ -115,19 +119,22 @@ void HeapArray<C>::per_line_extrema(T* out_val, TI* out_ids) const {
115
119
  int64_t imin = -1;
116
120
  typename C::T xval = C::Crev::neutral();
117
121
  const typename C::T* x_ = val + j * k;
118
- for (size_t i = 0; i < k; i++)
122
+ for (size_t i = 0; i < k; i++) {
119
123
  if (C::cmp(x_[i], xval)) {
120
124
  xval = x_[i];
121
125
  imin = i;
122
126
  }
123
- if (out_val)
127
+ }
128
+ if (out_val) {
124
129
  out_val[j] = xval;
130
+ }
125
131
 
126
132
  if (out_ids) {
127
- if (ids && imin != -1)
133
+ if (ids && imin != -1) {
128
134
  out_ids[j] = ids[j * k + imin];
129
- else
135
+ } else {
130
136
  out_ids[j] = imin;
137
+ }
131
138
  }
132
139
  }
133
140
  }
@@ -53,8 +53,9 @@ inline void heap_pop(size_t k, typename C::T* bh_val, typename C::TI* bh_ids) {
53
53
  while (1) {
54
54
  i1 = i << 1;
55
55
  i2 = i1 + 1;
56
- if (i1 > k)
56
+ if (i1 > k) {
57
57
  break;
58
+ }
58
59
  if ((i2 == k + 1) ||
59
60
  C::cmp2(bh_val[i1], bh_val[i2], bh_ids[i1], bh_ids[i2])) {
60
61
  if (C::cmp2(val, bh_val[i1], id, bh_ids[i1])) {
@@ -220,8 +221,9 @@ inline void heap_pop(size_t k, std::pair<typename C::T, typename C::TI>* bh) {
220
221
  while (1) {
221
222
  i1 = i << 1;
222
223
  i2 = i1 + 1;
223
- if (i1 > k)
224
+ if (i1 > k) {
224
225
  break;
226
+ }
225
227
  if ((i2 == k + 1) ||
226
228
  C::cmp2(bh[i1].first, bh[i2].first, bh[i1].second, bh[i2].second)) {
227
229
  if (C::cmp2(val, bh[i1].first, id, bh[i1].second)) {
@@ -320,15 +322,18 @@ inline void heap_heapify(
320
322
  const typename C::T* x = nullptr,
321
323
  const typename C::TI* ids = nullptr,
322
324
  size_t k0 = 0) {
323
- if (k0 > 0)
325
+ if (k0 > 0) {
324
326
  assert(x);
327
+ }
325
328
 
326
329
  if (ids) {
327
- for (size_t i = 0; i < k0; i++)
330
+ for (size_t i = 0; i < k0; i++) {
328
331
  heap_push<C>(i + 1, bh_val, bh_ids, x[i], ids[i]);
332
+ }
329
333
  } else {
330
- for (size_t i = 0; i < k0; i++)
334
+ for (size_t i = 0; i < k0; i++) {
331
335
  heap_push<C>(i + 1, bh_val, bh_ids, x[i], i);
336
+ }
332
337
  }
333
338
 
334
339
  for (size_t i = k0; i < k; i++) {
@@ -373,18 +378,19 @@ inline void heap_addn(
373
378
  const typename C::TI* ids,
374
379
  size_t n) {
375
380
  size_t i;
376
- if (ids)
381
+ if (ids) {
377
382
  for (i = 0; i < n; i++) {
378
383
  if (C::cmp(bh_val[0], x[i])) {
379
384
  heap_replace_top<C>(k, bh_val, bh_ids, x[i], ids[i]);
380
385
  }
381
386
  }
382
- else
387
+ } else {
383
388
  for (i = 0; i < n; i++) {
384
389
  if (C::cmp(bh_val[0], x[i])) {
385
390
  heap_replace_top<C>(k, bh_val, bh_ids, x[i], i);
386
391
  }
387
392
  }
393
+ }
388
394
  }
389
395
 
390
396
  /* Partial instanciation for heaps with TI = int64_t */
@@ -433,8 +439,9 @@ inline size_t heap_reorder(
433
439
  heap_pop<C>(k - i, bh_val, bh_ids);
434
440
  bh_val[k - ii - 1] = val;
435
441
  bh_ids[k - ii - 1] = id;
436
- if (id != -1)
442
+ if (id != -1) {
437
443
  ii++;
444
+ }
438
445
  }
439
446
  /* Count the number of elements which are effectively returned */
440
447
  size_t nel = ii;
@@ -573,17 +580,20 @@ inline void indirect_heap_pop(
573
580
  while (1) {
574
581
  size_t i1 = i << 1;
575
582
  size_t i2 = i1 + 1;
576
- if (i1 > k)
583
+ if (i1 > k) {
577
584
  break;
585
+ }
578
586
  typename C::TI id1 = bh_ids[i1], id2 = bh_ids[i2];
579
587
  if (i2 == k + 1 || C::cmp(bh_val[id1], bh_val[id2])) {
580
- if (C::cmp(val, bh_val[id1]))
588
+ if (C::cmp(val, bh_val[id1])) {
581
589
  break;
590
+ }
582
591
  bh_ids[i] = id1;
583
592
  i = i1;
584
593
  } else {
585
- if (C::cmp(val, bh_val[id2]))
594
+ if (C::cmp(val, bh_val[id2])) {
586
595
  break;
596
+ }
587
597
  bh_ids[i] = id2;
588
598
  i = i2;
589
599
  }
@@ -602,8 +612,9 @@ inline void indirect_heap_push(
602
612
  size_t i = k;
603
613
  while (i > 1) {
604
614
  size_t i_father = i >> 1;
605
- if (!C::cmp(val, bh_val[bh_ids[i_father]]))
615
+ if (!C::cmp(val, bh_val[bh_ids[i_father]])) {
606
616
  break;
617
+ }
607
618
  bh_ids[i] = bh_ids[i_father];
608
619
  i = i_father;
609
620
  }