faiss 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (202) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/lib/faiss/version.rb +1 -1
  4. data/vendor/faiss/faiss/AutoTune.cpp +292 -291
  5. data/vendor/faiss/faiss/AutoTune.h +55 -56
  6. data/vendor/faiss/faiss/Clustering.cpp +334 -195
  7. data/vendor/faiss/faiss/Clustering.h +88 -35
  8. data/vendor/faiss/faiss/IVFlib.cpp +171 -195
  9. data/vendor/faiss/faiss/IVFlib.h +48 -51
  10. data/vendor/faiss/faiss/Index.cpp +85 -103
  11. data/vendor/faiss/faiss/Index.h +54 -48
  12. data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
  13. data/vendor/faiss/faiss/Index2Layer.h +22 -22
  14. data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
  15. data/vendor/faiss/faiss/IndexBinary.h +140 -132
  16. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
  17. data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
  18. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
  19. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
  20. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
  21. data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
  22. data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
  23. data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
  24. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
  25. data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
  26. data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
  27. data/vendor/faiss/faiss/IndexFlat.h +35 -46
  28. data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
  29. data/vendor/faiss/faiss/IndexHNSW.h +57 -41
  30. data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
  31. data/vendor/faiss/faiss/IndexIVF.h +146 -113
  32. data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
  33. data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
  34. data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
  35. data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
  36. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
  37. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
  38. data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
  39. data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
  40. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
  41. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
  42. data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
  43. data/vendor/faiss/faiss/IndexLSH.h +21 -26
  44. data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
  45. data/vendor/faiss/faiss/IndexLattice.h +11 -16
  46. data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
  47. data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
  48. data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
  49. data/vendor/faiss/faiss/IndexNSG.h +85 -0
  50. data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
  51. data/vendor/faiss/faiss/IndexPQ.h +64 -67
  52. data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
  53. data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
  54. data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
  55. data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
  56. data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
  57. data/vendor/faiss/faiss/IndexRefine.h +22 -23
  58. data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
  59. data/vendor/faiss/faiss/IndexReplicas.h +62 -56
  60. data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
  61. data/vendor/faiss/faiss/IndexResidual.h +152 -0
  62. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
  63. data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
  64. data/vendor/faiss/faiss/IndexShards.cpp +256 -240
  65. data/vendor/faiss/faiss/IndexShards.h +85 -73
  66. data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
  67. data/vendor/faiss/faiss/MatrixStats.h +7 -10
  68. data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
  69. data/vendor/faiss/faiss/MetaIndexes.h +40 -34
  70. data/vendor/faiss/faiss/MetricType.h +7 -7
  71. data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
  72. data/vendor/faiss/faiss/VectorTransform.h +61 -89
  73. data/vendor/faiss/faiss/clone_index.cpp +77 -73
  74. data/vendor/faiss/faiss/clone_index.h +4 -9
  75. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
  76. data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
  77. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
  78. data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
  79. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
  80. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
  81. data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
  82. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
  83. data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
  84. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
  85. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
  86. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
  87. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
  88. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
  89. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
  90. data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
  91. data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
  92. data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
  93. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
  94. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
  95. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
  96. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
  97. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
  98. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
  99. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
  100. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
  101. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
  102. data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
  103. data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
  104. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
  105. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
  106. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
  107. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
  108. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
  109. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
  110. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
  111. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
  112. data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
  113. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
  114. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
  115. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
  116. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
  117. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
  118. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
  119. data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
  120. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
  121. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
  122. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
  123. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
  124. data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
  125. data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
  126. data/vendor/faiss/faiss/impl/FaissException.h +41 -29
  127. data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
  128. data/vendor/faiss/faiss/impl/HNSW.h +179 -200
  129. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
  130. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
  131. data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
  132. data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
  133. data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
  134. data/vendor/faiss/faiss/impl/NSG.h +199 -0
  135. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
  136. data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
  137. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
  138. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
  139. data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
  140. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
  141. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
  142. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
  143. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
  144. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
  145. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
  146. data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
  147. data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
  148. data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
  149. data/vendor/faiss/faiss/impl/io.cpp +75 -94
  150. data/vendor/faiss/faiss/impl/io.h +31 -41
  151. data/vendor/faiss/faiss/impl/io_macros.h +40 -29
  152. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
  153. data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
  154. data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
  155. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
  156. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
  157. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
  158. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
  159. data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
  160. data/vendor/faiss/faiss/index_factory.cpp +269 -218
  161. data/vendor/faiss/faiss/index_factory.h +6 -7
  162. data/vendor/faiss/faiss/index_io.h +23 -26
  163. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
  164. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
  165. data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
  166. data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
  167. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
  168. data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
  169. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
  170. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
  171. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
  172. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
  173. data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
  174. data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
  175. data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
  176. data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
  177. data/vendor/faiss/faiss/utils/Heap.h +186 -209
  178. data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
  179. data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
  180. data/vendor/faiss/faiss/utils/distances.cpp +301 -310
  181. data/vendor/faiss/faiss/utils/distances.h +133 -118
  182. data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
  183. data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
  184. data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
  185. data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
  186. data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
  187. data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
  188. data/vendor/faiss/faiss/utils/hamming.h +62 -85
  189. data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
  190. data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
  191. data/vendor/faiss/faiss/utils/partitioning.h +26 -21
  192. data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
  193. data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
  194. data/vendor/faiss/faiss/utils/random.cpp +39 -63
  195. data/vendor/faiss/faiss/utils/random.h +13 -16
  196. data/vendor/faiss/faiss/utils/simdlib.h +4 -2
  197. data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
  198. data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
  199. data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
  200. data/vendor/faiss/faiss/utils/utils.cpp +304 -287
  201. data/vendor/faiss/faiss/utils/utils.h +53 -48
  202. metadata +20 -2
@@ -5,8 +5,6 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
- // -*- c++ -*-
9
-
10
8
  #pragma once
11
9
 
12
10
  #include <faiss/Index.h>
@@ -16,10 +14,11 @@ namespace faiss {
16
14
 
17
15
  /** Build and index with the sequence of processing steps described in
18
16
  * the string. */
19
- Index *index_factory (int d, const char *description,
20
- MetricType metric = METRIC_L2);
21
-
22
- IndexBinary *index_binary_factory (int d, const char *description);
17
+ Index* index_factory(
18
+ int d,
19
+ const char* description,
20
+ MetricType metric = METRIC_L2);
23
21
 
22
+ IndexBinary* index_binary_factory(int d, const char* description);
24
23
 
25
- }
24
+ } // namespace faiss
@@ -12,10 +12,9 @@
12
12
  #ifndef FAISS_INDEX_IO_H
13
13
  #define FAISS_INDEX_IO_H
14
14
 
15
-
16
15
  #include <cstdio>
17
- #include <typeinfo>
18
16
  #include <string>
17
+ #include <typeinfo>
19
18
  #include <vector>
20
19
 
21
20
  /** I/O functions can read/write to a filename, a file handle or to an
@@ -36,13 +35,13 @@ struct IOReader;
36
35
  struct IOWriter;
37
36
  struct InvertedLists;
38
37
 
39
- void write_index (const Index *idx, const char *fname);
40
- void write_index (const Index *idx, FILE *f);
41
- void write_index (const Index *idx, IOWriter *writer);
38
+ void write_index(const Index* idx, const char* fname);
39
+ void write_index(const Index* idx, FILE* f);
40
+ void write_index(const Index* idx, IOWriter* writer);
42
41
 
43
- void write_index_binary (const IndexBinary *idx, const char *fname);
44
- void write_index_binary (const IndexBinary *idx, FILE *f);
45
- void write_index_binary (const IndexBinary *idx, IOWriter *writer);
42
+ void write_index_binary(const IndexBinary* idx, const char* fname);
43
+ void write_index_binary(const IndexBinary* idx, FILE* f);
44
+ void write_index_binary(const IndexBinary* idx, IOWriter* writer);
46
45
 
47
46
  // The read_index flags are implemented only for a subset of index types.
48
47
  const int IO_FLAG_READ_ONLY = 2;
@@ -51,32 +50,30 @@ const int IO_FLAG_READ_ONLY = 2;
51
50
  const int IO_FLAG_ONDISK_SAME_DIR = 4;
52
51
  // don't load IVF data to RAM, only list sizes
53
52
  const int IO_FLAG_SKIP_IVF_DATA = 8;
54
- // try to memmap data (useful to load an ArrayInvertedLists as an OnDiskInvertedLists)
53
+ // try to memmap data (useful to load an ArrayInvertedLists as an
54
+ // OnDiskInvertedLists)
55
55
  const int IO_FLAG_MMAP = IO_FLAG_SKIP_IVF_DATA | 0x646f0000;
56
56
 
57
+ Index* read_index(const char* fname, int io_flags = 0);
58
+ Index* read_index(FILE* f, int io_flags = 0);
59
+ Index* read_index(IOReader* reader, int io_flags = 0);
57
60
 
58
- Index *read_index (const char *fname, int io_flags = 0);
59
- Index *read_index (FILE * f, int io_flags = 0);
60
- Index *read_index (IOReader *reader, int io_flags = 0);
61
+ IndexBinary* read_index_binary(const char* fname, int io_flags = 0);
62
+ IndexBinary* read_index_binary(FILE* f, int io_flags = 0);
63
+ IndexBinary* read_index_binary(IOReader* reader, int io_flags = 0);
61
64
 
62
- IndexBinary *read_index_binary (const char *fname, int io_flags = 0);
63
- IndexBinary *read_index_binary (FILE * f, int io_flags = 0);
64
- IndexBinary *read_index_binary (IOReader *reader, int io_flags = 0);
65
+ void write_VectorTransform(const VectorTransform* vt, const char* fname);
66
+ VectorTransform* read_VectorTransform(const char* fname);
65
67
 
66
- void write_VectorTransform (const VectorTransform *vt, const char *fname);
67
- VectorTransform *read_VectorTransform (const char *fname);
68
+ ProductQuantizer* read_ProductQuantizer(const char* fname);
69
+ ProductQuantizer* read_ProductQuantizer(IOReader* reader);
68
70
 
69
- ProductQuantizer * read_ProductQuantizer (const char*fname);
70
- ProductQuantizer * read_ProductQuantizer (IOReader *reader);
71
-
72
- void write_ProductQuantizer (const ProductQuantizer*pq, const char *fname);
73
- void write_ProductQuantizer (const ProductQuantizer*pq, IOWriter *f);
74
-
75
- void write_InvertedLists (const InvertedLists *ils, IOWriter *f);
76
- InvertedLists *read_InvertedLists (IOReader *reader, int io_flags = 0);
71
+ void write_ProductQuantizer(const ProductQuantizer* pq, const char* fname);
72
+ void write_ProductQuantizer(const ProductQuantizer* pq, IOWriter* f);
77
73
 
74
+ void write_InvertedLists(const InvertedLists* ils, IOWriter* f);
75
+ InvertedLists* read_InvertedLists(IOReader* reader, int io_flags = 0);
78
76
 
79
77
  } // namespace faiss
80
78
 
81
-
82
79
  #endif
@@ -12,81 +12,80 @@
12
12
  #include <faiss/impl/io.h>
13
13
  #include <faiss/impl/io_macros.h>
14
14
 
15
-
16
15
  namespace faiss {
17
16
 
18
- BlockInvertedLists::BlockInvertedLists (
19
- size_t nlist, size_t n_per_block,
20
- size_t block_size):
21
- InvertedLists (nlist, InvertedLists::INVALID_CODE_SIZE),
22
- n_per_block(n_per_block), block_size(block_size)
23
- {
24
- ids.resize (nlist);
25
- codes.resize (nlist);
17
+ BlockInvertedLists::BlockInvertedLists(
18
+ size_t nlist,
19
+ size_t n_per_block,
20
+ size_t block_size)
21
+ : InvertedLists(nlist, InvertedLists::INVALID_CODE_SIZE),
22
+ n_per_block(n_per_block),
23
+ block_size(block_size) {
24
+ ids.resize(nlist);
25
+ codes.resize(nlist);
26
26
  }
27
27
 
28
- BlockInvertedLists::BlockInvertedLists ():
29
- InvertedLists (0, InvertedLists::INVALID_CODE_SIZE),
30
- n_per_block(0), block_size(0)
31
- {}
32
-
33
-
34
- size_t BlockInvertedLists::add_entries (
35
- size_t list_no, size_t n_entry,
36
- const idx_t* ids_in, const uint8_t *code)
37
- {
38
- if (n_entry == 0) return 0;
39
- FAISS_THROW_IF_NOT (list_no < nlist);
40
- size_t o = ids [list_no].size();
41
- FAISS_THROW_IF_NOT (o == 0); // not clear how we should handle subsequent adds
42
- ids [list_no].resize (o + n_entry);
43
- memcpy (&ids[list_no][o], ids_in, sizeof (ids_in[0]) * n_entry);
28
+ BlockInvertedLists::BlockInvertedLists()
29
+ : InvertedLists(0, InvertedLists::INVALID_CODE_SIZE),
30
+ n_per_block(0),
31
+ block_size(0) {}
32
+
33
+ size_t BlockInvertedLists::add_entries(
34
+ size_t list_no,
35
+ size_t n_entry,
36
+ const idx_t* ids_in,
37
+ const uint8_t* code) {
38
+ if (n_entry == 0)
39
+ return 0;
40
+ FAISS_THROW_IF_NOT(list_no < nlist);
41
+ size_t o = ids[list_no].size();
42
+ FAISS_THROW_IF_NOT(
43
+ o == 0); // not clear how we should handle subsequent adds
44
+ ids[list_no].resize(o + n_entry);
45
+ memcpy(&ids[list_no][o], ids_in, sizeof(ids_in[0]) * n_entry);
44
46
 
45
47
  // copy whole blocks
46
48
  size_t n_block = (n_entry + n_per_block - 1) / n_per_block;
47
- codes [list_no].resize (n_block * block_size);
48
- memcpy (&codes[list_no][o * code_size], code, n_block * block_size);
49
+ codes[list_no].resize(n_block * block_size);
50
+ memcpy(&codes[list_no][o * code_size], code, n_block * block_size);
49
51
  return o;
50
52
  }
51
53
 
52
- size_t BlockInvertedLists::list_size(size_t list_no) const
53
- {
54
- assert (list_no < nlist);
54
+ size_t BlockInvertedLists::list_size(size_t list_no) const {
55
+ assert(list_no < nlist);
55
56
  return ids[list_no].size();
56
57
  }
57
58
 
58
- const uint8_t * BlockInvertedLists::get_codes (size_t list_no) const
59
- {
60
- assert (list_no < nlist);
59
+ const uint8_t* BlockInvertedLists::get_codes(size_t list_no) const {
60
+ assert(list_no < nlist);
61
61
  return codes[list_no].get();
62
62
  }
63
63
 
64
- const InvertedLists::idx_t * BlockInvertedLists::get_ids (size_t list_no) const
65
- {
66
- assert (list_no < nlist);
64
+ const InvertedLists::idx_t* BlockInvertedLists::get_ids(size_t list_no) const {
65
+ assert(list_no < nlist);
67
66
  return ids[list_no].data();
68
67
  }
69
68
 
70
- void BlockInvertedLists::resize (size_t list_no, size_t new_size)
71
- {
72
- ids[list_no].resize (new_size);
69
+ void BlockInvertedLists::resize(size_t list_no, size_t new_size) {
70
+ ids[list_no].resize(new_size);
73
71
  size_t prev_nbytes = codes[list_no].size();
74
72
  size_t n_block = (new_size + n_per_block - 1) / n_per_block;
75
73
  size_t new_nbytes = n_block * block_size;
76
- codes[list_no].resize (new_nbytes);
74
+ codes[list_no].resize(new_nbytes);
77
75
  if (prev_nbytes < new_nbytes) {
78
76
  // set new elements to 0
79
- memset(
80
- codes[list_no].data() + prev_nbytes, 0,
81
- new_nbytes - prev_nbytes
82
- );
77
+ memset(codes[list_no].data() + prev_nbytes,
78
+ 0,
79
+ new_nbytes - prev_nbytes);
83
80
  }
84
81
  }
85
82
 
86
- void BlockInvertedLists::update_entries (
87
- size_t , size_t , size_t ,
88
- const idx_t *, const uint8_t *)
89
- {
83
+ void BlockInvertedLists::update_entries(
84
+ size_t,
85
+ size_t,
86
+ size_t,
87
+ const idx_t*,
88
+ const uint8_t*) {
90
89
  FAISS_THROW_MSG("not impemented");
91
90
  /*
92
91
  assert (list_no < nlist);
@@ -96,29 +95,25 @@ void BlockInvertedLists::update_entries (
96
95
  */
97
96
  }
98
97
 
99
-
100
- BlockInvertedLists::~BlockInvertedLists ()
101
- {}
98
+ BlockInvertedLists::~BlockInvertedLists() {}
102
99
 
103
100
  /**************************************************
104
101
  * IO hook implementation
105
102
  **************************************************/
106
103
 
107
- BlockInvertedListsIOHook::BlockInvertedListsIOHook():
108
- InvertedListsIOHook("ilbl", typeid(BlockInvertedLists).name())
109
- {}
110
-
104
+ BlockInvertedListsIOHook::BlockInvertedListsIOHook()
105
+ : InvertedListsIOHook("ilbl", typeid(BlockInvertedLists).name()) {}
111
106
 
112
- void BlockInvertedListsIOHook::write(const InvertedLists *ils_in, IOWriter *f) const
113
- {
114
- uint32_t h = fourcc ("ilbl");
115
- WRITE1 (h);
116
- const BlockInvertedLists *il =
117
- dynamic_cast<const BlockInvertedLists*> (ils_in);
118
- WRITE1 (il->nlist);
119
- WRITE1 (il->code_size);
120
- WRITE1 (il->n_per_block);
121
- WRITE1 (il->block_size);
107
+ void BlockInvertedListsIOHook::write(const InvertedLists* ils_in, IOWriter* f)
108
+ const {
109
+ uint32_t h = fourcc("ilbl");
110
+ WRITE1(h);
111
+ const BlockInvertedLists* il =
112
+ dynamic_cast<const BlockInvertedLists*>(ils_in);
113
+ WRITE1(il->nlist);
114
+ WRITE1(il->code_size);
115
+ WRITE1(il->n_per_block);
116
+ WRITE1(il->block_size);
122
117
 
123
118
  for (size_t i = 0; i < il->nlist; i++) {
124
119
  WRITEVECTOR(il->ids[i]);
@@ -126,13 +121,13 @@ void BlockInvertedListsIOHook::write(const InvertedLists *ils_in, IOWriter *f) c
126
121
  }
127
122
  }
128
123
 
129
- InvertedLists * BlockInvertedListsIOHook::read(IOReader *f, int /* io_flags */) const
130
- {
131
- BlockInvertedLists *il = new BlockInvertedLists();
132
- READ1 (il->nlist);
133
- READ1 (il->code_size);
134
- READ1 (il->n_per_block);
135
- READ1 (il->block_size);
124
+ InvertedLists* BlockInvertedListsIOHook::read(IOReader* f, int /* io_flags */)
125
+ const {
126
+ BlockInvertedLists* il = new BlockInvertedLists();
127
+ READ1(il->nlist);
128
+ READ1(il->code_size);
129
+ READ1(il->n_per_block);
130
+ READ1(il->block_size);
136
131
 
137
132
  il->ids.resize(il->nlist);
138
133
  il->codes.resize(il->nlist);
@@ -145,7 +140,4 @@ InvertedLists * BlockInvertedListsIOHook::read(IOReader *f, int /* io_flags */)
145
140
  return il;
146
141
  }
147
142
 
148
-
149
-
150
-
151
143
  } // namespace faiss
@@ -5,13 +5,12 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
-
9
8
  #pragma once
10
9
 
10
+ #include <faiss/index_io.h>
11
11
  #include <faiss/invlists/InvertedLists.h>
12
12
  #include <faiss/invlists/InvertedListsIOHook.h>
13
13
  #include <faiss/utils/AlignedTable.h>
14
- #include <faiss/index_io.h>
15
14
 
16
15
  namespace faiss {
17
16
 
@@ -28,49 +27,48 @@ namespace faiss {
28
27
  * The writing functions add_entries and update_entries operate on block-aligned
29
28
  * data.
30
29
  */
31
- struct BlockInvertedLists: InvertedLists {
32
-
33
- size_t n_per_block; // nb of vectors stored per block
34
- size_t block_size; // nb bytes per block
30
+ struct BlockInvertedLists : InvertedLists {
31
+ size_t n_per_block; // nb of vectors stored per block
32
+ size_t block_size; // nb bytes per block
35
33
 
36
34
  std::vector<AlignedTable<uint8_t>> codes;
37
35
  std::vector<std::vector<idx_t>> ids;
38
36
 
39
-
40
- BlockInvertedLists (
41
- size_t nlist, size_t vec_per_block,
42
- size_t block_size
43
- );
37
+ BlockInvertedLists(size_t nlist, size_t vec_per_block, size_t block_size);
44
38
 
45
39
  BlockInvertedLists();
46
40
 
47
41
  size_t list_size(size_t list_no) const override;
48
- const uint8_t * get_codes (size_t list_no) const override;
49
- const idx_t * get_ids (size_t list_no) const override;
42
+ const uint8_t* get_codes(size_t list_no) const override;
43
+ const idx_t* get_ids(size_t list_no) const override;
50
44
 
51
45
  // works only on empty BlockInvertedLists
52
46
  // the codes should be of size ceil(n_entry / n_per_block) * block_size
53
47
  // and padded with 0s
54
- size_t add_entries (
55
- size_t list_no, size_t n_entry,
56
- const idx_t* ids, const uint8_t *code) override;
48
+ size_t add_entries(
49
+ size_t list_no,
50
+ size_t n_entry,
51
+ const idx_t* ids,
52
+ const uint8_t* code) override;
57
53
 
58
54
  /// not implemented
59
- void update_entries (size_t list_no, size_t offset, size_t n_entry,
60
- const idx_t *ids, const uint8_t *code) override;
55
+ void update_entries(
56
+ size_t list_no,
57
+ size_t offset,
58
+ size_t n_entry,
59
+ const idx_t* ids,
60
+ const uint8_t* code) override;
61
61
 
62
62
  // also pads new data with 0s
63
- void resize (size_t list_no, size_t new_size) override;
64
-
65
- ~BlockInvertedLists () override;
63
+ void resize(size_t list_no, size_t new_size) override;
66
64
 
65
+ ~BlockInvertedLists() override;
67
66
  };
68
67
 
69
68
  struct BlockInvertedListsIOHook : InvertedListsIOHook {
70
69
  BlockInvertedListsIOHook();
71
- void write(const InvertedLists *ils, IOWriter *f) const override;
72
- InvertedLists * read(IOReader *f, int io_flags) const override;
70
+ void write(const InvertedLists* ils, IOWriter* f) const override;
71
+ InvertedLists* read(IOReader* f, int io_flags) const override;
73
72
  };
74
73
 
75
-
76
74
  } // namespace faiss
@@ -9,142 +9,130 @@
9
9
 
10
10
  #include <faiss/invlists/DirectMap.h>
11
11
 
12
- #include <cstdio>
13
12
  #include <cassert>
13
+ #include <cstdio>
14
14
 
15
- #include <faiss/impl/FaissAssert.h>
16
15
  #include <faiss/impl/AuxIndexStructures.h>
16
+ #include <faiss/impl/FaissAssert.h>
17
17
 
18
18
  namespace faiss {
19
19
 
20
- DirectMap::DirectMap(): type(NoMap)
21
- {}
20
+ DirectMap::DirectMap() : type(NoMap) {}
22
21
 
23
- void DirectMap::set_type (Type new_type, const InvertedLists *invlists, size_t ntotal) {
24
-
25
- FAISS_THROW_IF_NOT (new_type == NoMap || new_type == Array ||
26
- new_type == Hashtable);
22
+ void DirectMap::set_type(
23
+ Type new_type,
24
+ const InvertedLists* invlists,
25
+ size_t ntotal) {
26
+ FAISS_THROW_IF_NOT(
27
+ new_type == NoMap || new_type == Array || new_type == Hashtable);
27
28
 
28
29
  if (new_type == type) {
29
30
  // nothing to do
30
31
  return;
31
32
  }
32
33
 
33
- array.clear ();
34
- hashtable.clear ();
34
+ array.clear();
35
+ hashtable.clear();
35
36
  type = new_type;
36
37
 
37
38
  if (new_type == NoMap) {
38
39
  return;
39
40
  } else if (new_type == Array) {
40
- array.resize (ntotal, -1);
41
+ array.resize(ntotal, -1);
41
42
  } else if (new_type == Hashtable) {
42
- hashtable.reserve (ntotal);
43
+ hashtable.reserve(ntotal);
43
44
  }
44
45
 
45
46
  for (size_t key = 0; key < invlists->nlist; key++) {
46
- size_t list_size = invlists->list_size (key);
47
- InvertedLists::ScopedIds idlist (invlists, key);
47
+ size_t list_size = invlists->list_size(key);
48
+ InvertedLists::ScopedIds idlist(invlists, key);
48
49
 
49
50
  if (new_type == Array) {
50
51
  for (long ofs = 0; ofs < list_size; ofs++) {
51
- FAISS_THROW_IF_NOT_MSG (
52
- 0 <= idlist [ofs] && idlist[ofs] < ntotal,
53
- "direct map supported only for seuquential ids");
54
- array [idlist [ofs]] = lo_build(key, ofs);
52
+ FAISS_THROW_IF_NOT_MSG(
53
+ 0 <= idlist[ofs] && idlist[ofs] < ntotal,
54
+ "direct map supported only for seuquential ids");
55
+ array[idlist[ofs]] = lo_build(key, ofs);
55
56
  }
56
57
  } else if (new_type == Hashtable) {
57
58
  for (long ofs = 0; ofs < list_size; ofs++) {
58
- hashtable [idlist [ofs]] = lo_build(key, ofs);
59
+ hashtable[idlist[ofs]] = lo_build(key, ofs);
59
60
  }
60
61
  }
61
62
  }
62
63
  }
63
64
 
64
- void DirectMap::clear()
65
- {
66
- array.clear ();
67
- hashtable.clear ();
65
+ void DirectMap::clear() {
66
+ array.clear();
67
+ hashtable.clear();
68
68
  }
69
69
 
70
-
71
- DirectMap::idx_t DirectMap::get (idx_t key) const
72
- {
70
+ DirectMap::idx_t DirectMap::get(idx_t key) const {
73
71
  if (type == Array) {
74
- FAISS_THROW_IF_NOT_MSG (
75
- key >= 0 && key < array.size(), "invalid key"
76
- );
72
+ FAISS_THROW_IF_NOT_MSG(key >= 0 && key < array.size(), "invalid key");
77
73
  idx_t lo = array[key];
78
74
  FAISS_THROW_IF_NOT_MSG(lo >= 0, "-1 entry in direct_map");
79
75
  return lo;
80
76
  } else if (type == Hashtable) {
81
- auto res = hashtable.find (key);
82
- FAISS_THROW_IF_NOT_MSG (res != hashtable.end(), "key not found");
77
+ auto res = hashtable.find(key);
78
+ FAISS_THROW_IF_NOT_MSG(res != hashtable.end(), "key not found");
83
79
  return res->second;
84
80
  } else {
85
- FAISS_THROW_MSG ("direct map not initialized");
81
+ FAISS_THROW_MSG("direct map not initialized");
86
82
  }
87
83
  }
88
84
 
89
-
90
-
91
- void DirectMap::add_single_id (idx_t id, idx_t list_no, size_t offset)
92
- {
93
- if (type == NoMap) return;
85
+ void DirectMap::add_single_id(idx_t id, idx_t list_no, size_t offset) {
86
+ if (type == NoMap)
87
+ return;
94
88
 
95
89
  if (type == Array) {
96
- assert (id == array.size());
90
+ assert(id == array.size());
97
91
  if (list_no >= 0) {
98
- array.push_back (lo_build (list_no, offset));
92
+ array.push_back(lo_build(list_no, offset));
99
93
  } else {
100
- array.push_back (-1);
94
+ array.push_back(-1);
101
95
  }
102
96
  } else if (type == Hashtable) {
103
97
  if (list_no >= 0) {
104
- hashtable[id] = lo_build (list_no, offset);
98
+ hashtable[id] = lo_build(list_no, offset);
105
99
  }
106
100
  }
107
-
108
101
  }
109
102
 
110
- void DirectMap::check_can_add (const idx_t *ids) {
103
+ void DirectMap::check_can_add(const idx_t* ids) {
111
104
  if (type == Array && ids) {
112
- FAISS_THROW_MSG ("cannot have array direct map and add with ids");
105
+ FAISS_THROW_MSG("cannot have array direct map and add with ids");
113
106
  }
114
107
  }
115
108
 
116
109
  /********************* DirectMapAdd implementation */
117
110
 
118
-
119
- DirectMapAdd::DirectMapAdd (DirectMap &direct_map, size_t n, const idx_t *xids):
120
- direct_map(direct_map), type(direct_map.type), n(n), xids(xids)
121
- {
122
- if (type == DirectMap::Array) {
123
- FAISS_THROW_IF_NOT (xids == nullptr);
111
+ DirectMapAdd::DirectMapAdd(DirectMap& direct_map, size_t n, const idx_t* xids)
112
+ : direct_map(direct_map), type(direct_map.type), n(n), xids(xids) {
113
+ if (type == DirectMap::Array) {
114
+ FAISS_THROW_IF_NOT(xids == nullptr);
124
115
  ntotal = direct_map.array.size();
125
- direct_map.array.resize (ntotal + n, -1);
116
+ direct_map.array.resize(ntotal + n, -1);
126
117
  } else if (type == DirectMap::Hashtable) {
127
118
  // can't parallel update hashtable so use temp array
128
- all_ofs.resize (n, -1);
119
+ all_ofs.resize(n, -1);
129
120
  }
130
121
  }
131
122
 
132
-
133
- void DirectMapAdd::add (size_t i, idx_t list_no, size_t ofs)
134
- {
123
+ void DirectMapAdd::add(size_t i, idx_t list_no, size_t ofs) {
135
124
  if (type == DirectMap::Array) {
136
- direct_map.array [ntotal + i] = lo_build (list_no, ofs);
125
+ direct_map.array[ntotal + i] = lo_build(list_no, ofs);
137
126
  } else if (type == DirectMap::Hashtable) {
138
- all_ofs [i] = lo_build (list_no, ofs);
127
+ all_ofs[i] = lo_build(list_no, ofs);
139
128
  }
140
129
  }
141
130
 
142
- DirectMapAdd::~DirectMapAdd ()
143
- {
131
+ DirectMapAdd::~DirectMapAdd() {
144
132
  if (type == DirectMap::Hashtable) {
145
133
  for (int i = 0; i < n; i++) {
146
134
  idx_t id = xids ? xids[i] : ntotal + i;
147
- direct_map.hashtable [id] = all_ofs [i];
135
+ direct_map.hashtable[id] = all_ofs[i];
148
136
  }
149
137
  }
150
138
  }
@@ -154,9 +142,7 @@ DirectMapAdd::~DirectMapAdd ()
154
142
  using ScopedCodes = InvertedLists::ScopedCodes;
155
143
  using ScopedIds = InvertedLists::ScopedIds;
156
144
 
157
-
158
- size_t DirectMap::remove_ids(const IDSelector& sel, InvertedLists *invlists)
159
- {
145
+ size_t DirectMap::remove_ids(const IDSelector& sel, InvertedLists* invlists) {
160
146
  size_t nlist = invlists->nlist;
161
147
  std::vector<idx_t> toremove(nlist);
162
148
 
@@ -166,16 +152,16 @@ size_t DirectMap::remove_ids(const IDSelector& sel, InvertedLists *invlists)
166
152
  // exhaustive scan of IVF
167
153
  #pragma omp parallel for
168
154
  for (idx_t i = 0; i < nlist; i++) {
169
- idx_t l0 = invlists->list_size (i), l = l0, j = 0;
170
- ScopedIds idsi (invlists, i);
155
+ idx_t l0 = invlists->list_size(i), l = l0, j = 0;
156
+ ScopedIds idsi(invlists, i);
171
157
  while (j < l) {
172
- if (sel.is_member (idsi[j])) {
158
+ if (sel.is_member(idsi[j])) {
173
159
  l--;
174
- invlists->update_entry (
175
- i, j,
176
- invlists->get_single_id (i, l),
177
- ScopedCodes (invlists, i, l).get()
178
- );
160
+ invlists->update_entry(
161
+ i,
162
+ j,
163
+ invlists->get_single_id(i, l),
164
+ ScopedCodes(invlists, i, l).get());
179
165
  } else {
180
166
  j++;
181
167
  }
@@ -191,30 +177,28 @@ size_t DirectMap::remove_ids(const IDSelector& sel, InvertedLists *invlists)
191
177
  }
192
178
  }
193
179
  } else if (type == Hashtable) {
194
- const IDSelectorArray *sela =
195
- dynamic_cast<const IDSelectorArray*>(&sel);
196
- FAISS_THROW_IF_NOT_MSG (
197
- sela,
198
- "remove with hashtable works only with IDSelectorArray"
199
- );
180
+ const IDSelectorArray* sela =
181
+ dynamic_cast<const IDSelectorArray*>(&sel);
182
+ FAISS_THROW_IF_NOT_MSG(
183
+ sela, "remove with hashtable works only with IDSelectorArray");
200
184
 
201
185
  for (idx_t i = 0; i < sela->n; i++) {
202
186
  idx_t id = sela->ids[i];
203
- auto res = hashtable.find (id);
187
+ auto res = hashtable.find(id);
204
188
  if (res != hashtable.end()) {
205
- size_t list_no = lo_listno (res->second);
206
- size_t offset = lo_offset (res->second);
207
- idx_t last = invlists->list_size (list_no) - 1;
208
- hashtable.erase (res);
189
+ size_t list_no = lo_listno(res->second);
190
+ size_t offset = lo_offset(res->second);
191
+ idx_t last = invlists->list_size(list_no) - 1;
192
+ hashtable.erase(res);
209
193
  if (offset < last) {
210
- idx_t last_id = invlists->get_single_id (list_no, last);
211
- invlists->update_entry (
212
- list_no, offset,
213
- last_id,
214
- ScopedCodes (invlists, list_no, last).get()
215
- );
194
+ idx_t last_id = invlists->get_single_id(list_no, last);
195
+ invlists->update_entry(
196
+ list_no,
197
+ offset,
198
+ last_id,
199
+ ScopedCodes(invlists, list_no, last).get());
216
200
  // update hash entry for last element
217
- hashtable [last_id] = list_no << 32 | offset;
201
+ hashtable[last_id] = list_no << 32 | offset;
218
202
  }
219
203
  invlists->resize(list_no, last);
220
204
  nremove++;
@@ -227,41 +211,41 @@ size_t DirectMap::remove_ids(const IDSelector& sel, InvertedLists *invlists)
227
211
  return nremove;
228
212
  }
229
213
 
230
- void DirectMap::update_codes (InvertedLists *invlists,
231
- int n, const idx_t *ids,
232
- const idx_t *assign,
233
- const uint8_t *codes)
234
- {
235
- FAISS_THROW_IF_NOT (type == Array);
214
+ void DirectMap::update_codes(
215
+ InvertedLists* invlists,
216
+ int n,
217
+ const idx_t* ids,
218
+ const idx_t* assign,
219
+ const uint8_t* codes) {
220
+ FAISS_THROW_IF_NOT(type == Array);
236
221
 
237
222
  size_t code_size = invlists->code_size;
238
223
 
239
224
  for (size_t i = 0; i < n; i++) {
240
225
  idx_t id = ids[i];
241
- FAISS_THROW_IF_NOT_MSG (0 <= id && id < array.size(),
242
- "id to update out of range");
226
+ FAISS_THROW_IF_NOT_MSG(
227
+ 0 <= id && id < array.size(), "id to update out of range");
243
228
  { // remove old one
244
- idx_t dm = array [id];
245
- int64_t ofs = lo_offset (dm);
246
- int64_t il = lo_listno (dm);
247
- size_t l = invlists->list_size (il);
229
+ idx_t dm = array[id];
230
+ int64_t ofs = lo_offset(dm);
231
+ int64_t il = lo_listno(dm);
232
+ size_t l = invlists->list_size(il);
248
233
  if (ofs != l - 1) { // move l - 1 to ofs
249
- int64_t id2 = invlists->get_single_id (il, l - 1);
250
- array[id2] = lo_build (il, ofs);
251
- invlists->update_entry (il, ofs, id2,
252
- invlists->get_single_code (il, l - 1));
234
+ int64_t id2 = invlists->get_single_id(il, l - 1);
235
+ array[id2] = lo_build(il, ofs);
236
+ invlists->update_entry(
237
+ il, ofs, id2, invlists->get_single_code(il, l - 1));
253
238
  }
254
- invlists->resize (il, l - 1);
239
+ invlists->resize(il, l - 1);
255
240
  }
256
241
  { // insert new one
257
242
  int64_t il = assign[i];
258
- size_t l = invlists->list_size (il);
259
- idx_t dm = lo_build (il, l);
260
- array [id] = dm;
261
- invlists->add_entry (il, id, codes + i * code_size);
243
+ size_t l = invlists->list_size(il);
244
+ idx_t dm = lo_build(il, l);
245
+ array[id] = dm;
246
+ invlists->add_entry(il, id, codes + i * code_size);
262
247
  }
263
248
  }
264
249
  }
265
250
 
266
-
267
- }
251
+ } // namespace faiss