faiss 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/README.md +103 -3
  4. data/ext/faiss/ext.cpp +99 -32
  5. data/ext/faiss/extconf.rb +12 -2
  6. data/lib/faiss/ext.bundle +0 -0
  7. data/lib/faiss/index.rb +3 -3
  8. data/lib/faiss/index_binary.rb +3 -3
  9. data/lib/faiss/kmeans.rb +1 -1
  10. data/lib/faiss/pca_matrix.rb +2 -2
  11. data/lib/faiss/product_quantizer.rb +3 -3
  12. data/lib/faiss/version.rb +1 -1
  13. data/vendor/faiss/AutoTune.cpp +719 -0
  14. data/vendor/faiss/AutoTune.h +212 -0
  15. data/vendor/faiss/Clustering.cpp +261 -0
  16. data/vendor/faiss/Clustering.h +101 -0
  17. data/vendor/faiss/IVFlib.cpp +339 -0
  18. data/vendor/faiss/IVFlib.h +132 -0
  19. data/vendor/faiss/Index.cpp +171 -0
  20. data/vendor/faiss/Index.h +261 -0
  21. data/vendor/faiss/Index2Layer.cpp +437 -0
  22. data/vendor/faiss/Index2Layer.h +85 -0
  23. data/vendor/faiss/IndexBinary.cpp +77 -0
  24. data/vendor/faiss/IndexBinary.h +163 -0
  25. data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
  26. data/vendor/faiss/IndexBinaryFlat.h +54 -0
  27. data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
  28. data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
  29. data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
  30. data/vendor/faiss/IndexBinaryHNSW.h +56 -0
  31. data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
  32. data/vendor/faiss/IndexBinaryIVF.h +211 -0
  33. data/vendor/faiss/IndexFlat.cpp +508 -0
  34. data/vendor/faiss/IndexFlat.h +175 -0
  35. data/vendor/faiss/IndexHNSW.cpp +1090 -0
  36. data/vendor/faiss/IndexHNSW.h +170 -0
  37. data/vendor/faiss/IndexIVF.cpp +909 -0
  38. data/vendor/faiss/IndexIVF.h +353 -0
  39. data/vendor/faiss/IndexIVFFlat.cpp +502 -0
  40. data/vendor/faiss/IndexIVFFlat.h +118 -0
  41. data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
  42. data/vendor/faiss/IndexIVFPQ.h +161 -0
  43. data/vendor/faiss/IndexIVFPQR.cpp +219 -0
  44. data/vendor/faiss/IndexIVFPQR.h +65 -0
  45. data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
  46. data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
  47. data/vendor/faiss/IndexLSH.cpp +225 -0
  48. data/vendor/faiss/IndexLSH.h +87 -0
  49. data/vendor/faiss/IndexLattice.cpp +143 -0
  50. data/vendor/faiss/IndexLattice.h +68 -0
  51. data/vendor/faiss/IndexPQ.cpp +1188 -0
  52. data/vendor/faiss/IndexPQ.h +199 -0
  53. data/vendor/faiss/IndexPreTransform.cpp +288 -0
  54. data/vendor/faiss/IndexPreTransform.h +91 -0
  55. data/vendor/faiss/IndexReplicas.cpp +123 -0
  56. data/vendor/faiss/IndexReplicas.h +76 -0
  57. data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
  58. data/vendor/faiss/IndexScalarQuantizer.h +127 -0
  59. data/vendor/faiss/IndexShards.cpp +317 -0
  60. data/vendor/faiss/IndexShards.h +100 -0
  61. data/vendor/faiss/InvertedLists.cpp +623 -0
  62. data/vendor/faiss/InvertedLists.h +334 -0
  63. data/vendor/faiss/LICENSE +21 -0
  64. data/vendor/faiss/MatrixStats.cpp +252 -0
  65. data/vendor/faiss/MatrixStats.h +62 -0
  66. data/vendor/faiss/MetaIndexes.cpp +351 -0
  67. data/vendor/faiss/MetaIndexes.h +126 -0
  68. data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
  69. data/vendor/faiss/OnDiskInvertedLists.h +127 -0
  70. data/vendor/faiss/VectorTransform.cpp +1157 -0
  71. data/vendor/faiss/VectorTransform.h +322 -0
  72. data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
  73. data/vendor/faiss/c_api/AutoTune_c.h +64 -0
  74. data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
  75. data/vendor/faiss/c_api/Clustering_c.h +117 -0
  76. data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
  77. data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
  78. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
  79. data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
  80. data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
  81. data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
  82. data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
  83. data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
  84. data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
  85. data/vendor/faiss/c_api/IndexShards_c.h +42 -0
  86. data/vendor/faiss/c_api/Index_c.cpp +105 -0
  87. data/vendor/faiss/c_api/Index_c.h +183 -0
  88. data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
  89. data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
  90. data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
  91. data/vendor/faiss/c_api/clone_index_c.h +32 -0
  92. data/vendor/faiss/c_api/error_c.h +42 -0
  93. data/vendor/faiss/c_api/error_impl.cpp +27 -0
  94. data/vendor/faiss/c_api/error_impl.h +16 -0
  95. data/vendor/faiss/c_api/faiss_c.h +58 -0
  96. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
  97. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
  98. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
  99. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
  100. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
  101. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
  102. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
  103. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
  104. data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
  105. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
  106. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
  107. data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
  108. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
  109. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
  110. data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
  111. data/vendor/faiss/c_api/index_factory_c.h +30 -0
  112. data/vendor/faiss/c_api/index_io_c.cpp +42 -0
  113. data/vendor/faiss/c_api/index_io_c.h +50 -0
  114. data/vendor/faiss/c_api/macros_impl.h +110 -0
  115. data/vendor/faiss/clone_index.cpp +147 -0
  116. data/vendor/faiss/clone_index.h +38 -0
  117. data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
  118. data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
  119. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
  120. data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
  121. data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
  122. data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
  123. data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
  124. data/vendor/faiss/gpu/GpuCloner.h +82 -0
  125. data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
  126. data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
  127. data/vendor/faiss/gpu/GpuDistance.h +52 -0
  128. data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
  129. data/vendor/faiss/gpu/GpuIndex.h +148 -0
  130. data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
  131. data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
  132. data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
  133. data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
  134. data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
  135. data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
  136. data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
  137. data/vendor/faiss/gpu/GpuResources.cpp +52 -0
  138. data/vendor/faiss/gpu/GpuResources.h +73 -0
  139. data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
  140. data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
  141. data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
  142. data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
  143. data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
  144. data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
  145. data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
  146. data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
  147. data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
  148. data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
  149. data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
  150. data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
  151. data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
  152. data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
  153. data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
  154. data/vendor/faiss/gpu/test/TestUtils.h +93 -0
  155. data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
  156. data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
  157. data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
  158. data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
  159. data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
  160. data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
  161. data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
  162. data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
  163. data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
  164. data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
  165. data/vendor/faiss/gpu/utils/Timer.h +52 -0
  166. data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
  167. data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
  168. data/vendor/faiss/impl/FaissAssert.h +95 -0
  169. data/vendor/faiss/impl/FaissException.cpp +66 -0
  170. data/vendor/faiss/impl/FaissException.h +71 -0
  171. data/vendor/faiss/impl/HNSW.cpp +818 -0
  172. data/vendor/faiss/impl/HNSW.h +275 -0
  173. data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
  174. data/vendor/faiss/impl/PolysemousTraining.h +158 -0
  175. data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
  176. data/vendor/faiss/impl/ProductQuantizer.h +242 -0
  177. data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
  178. data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
  179. data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
  180. data/vendor/faiss/impl/ThreadedIndex.h +80 -0
  181. data/vendor/faiss/impl/index_read.cpp +793 -0
  182. data/vendor/faiss/impl/index_write.cpp +558 -0
  183. data/vendor/faiss/impl/io.cpp +142 -0
  184. data/vendor/faiss/impl/io.h +98 -0
  185. data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
  186. data/vendor/faiss/impl/lattice_Zn.h +199 -0
  187. data/vendor/faiss/index_factory.cpp +392 -0
  188. data/vendor/faiss/index_factory.h +25 -0
  189. data/vendor/faiss/index_io.h +75 -0
  190. data/vendor/faiss/misc/test_blas.cpp +84 -0
  191. data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
  192. data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
  193. data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
  194. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
  195. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
  196. data/vendor/faiss/tests/test_merge.cpp +258 -0
  197. data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
  198. data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
  199. data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
  200. data/vendor/faiss/tests/test_params_override.cpp +231 -0
  201. data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
  202. data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
  203. data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
  204. data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
  205. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
  206. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
  207. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
  208. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
  209. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
  210. data/vendor/faiss/utils/Heap.cpp +122 -0
  211. data/vendor/faiss/utils/Heap.h +495 -0
  212. data/vendor/faiss/utils/WorkerThread.cpp +126 -0
  213. data/vendor/faiss/utils/WorkerThread.h +61 -0
  214. data/vendor/faiss/utils/distances.cpp +765 -0
  215. data/vendor/faiss/utils/distances.h +243 -0
  216. data/vendor/faiss/utils/distances_simd.cpp +809 -0
  217. data/vendor/faiss/utils/extra_distances.cpp +336 -0
  218. data/vendor/faiss/utils/extra_distances.h +54 -0
  219. data/vendor/faiss/utils/hamming-inl.h +472 -0
  220. data/vendor/faiss/utils/hamming.cpp +792 -0
  221. data/vendor/faiss/utils/hamming.h +220 -0
  222. data/vendor/faiss/utils/random.cpp +192 -0
  223. data/vendor/faiss/utils/random.h +60 -0
  224. data/vendor/faiss/utils/utils.cpp +783 -0
  225. data/vendor/faiss/utils/utils.h +181 -0
  226. metadata +216 -2
@@ -0,0 +1,623 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #include <faiss/InvertedLists.h>
11
+
12
+ #include <cstdio>
13
+
14
+ #include <faiss/utils/utils.h>
15
+ #include <faiss/impl/FaissAssert.h>
16
+
17
+ namespace faiss {
18
+
19
+ using ScopedIds = InvertedLists::ScopedIds;
20
+ using ScopedCodes = InvertedLists::ScopedCodes;
21
+
22
+
23
+ /*****************************************
24
+ * InvertedLists implementation
25
+ ******************************************/
26
+
27
+ InvertedLists::InvertedLists (size_t nlist, size_t code_size):
28
+ nlist (nlist), code_size (code_size)
29
+ {
30
+ }
31
+
32
+ InvertedLists::~InvertedLists ()
33
+ {}
34
+
35
+ InvertedLists::idx_t InvertedLists::get_single_id (
36
+ size_t list_no, size_t offset) const
37
+ {
38
+ assert (offset < list_size (list_no));
39
+ return get_ids(list_no)[offset];
40
+ }
41
+
42
+
43
+ void InvertedLists::release_codes (size_t, const uint8_t *) const
44
+ {}
45
+
46
+ void InvertedLists::release_ids (size_t, const idx_t *) const
47
+ {}
48
+
49
+ void InvertedLists::prefetch_lists (const idx_t *, int) const
50
+ {}
51
+
52
+ const uint8_t * InvertedLists::get_single_code (
53
+ size_t list_no, size_t offset) const
54
+ {
55
+ assert (offset < list_size (list_no));
56
+ return get_codes(list_no) + offset * code_size;
57
+ }
58
+
59
+ size_t InvertedLists::add_entry (size_t list_no, idx_t theid,
60
+ const uint8_t *code)
61
+ {
62
+ return add_entries (list_no, 1, &theid, code);
63
+ }
64
+
65
+ void InvertedLists::update_entry (size_t list_no, size_t offset,
66
+ idx_t id, const uint8_t *code)
67
+ {
68
+ update_entries (list_no, offset, 1, &id, code);
69
+ }
70
+
71
+ void InvertedLists::reset () {
72
+ for (size_t i = 0; i < nlist; i++) {
73
+ resize (i, 0);
74
+ }
75
+ }
76
+
77
+ void InvertedLists::merge_from (InvertedLists *oivf, size_t add_id) {
78
+
79
+ #pragma omp parallel for
80
+ for (idx_t i = 0; i < nlist; i++) {
81
+ size_t list_size = oivf->list_size (i);
82
+ ScopedIds ids (oivf, i);
83
+ if (add_id == 0) {
84
+ add_entries (i, list_size, ids.get (),
85
+ ScopedCodes (oivf, i).get());
86
+ } else {
87
+ std::vector <idx_t> new_ids (list_size);
88
+
89
+ for (size_t j = 0; j < list_size; j++) {
90
+ new_ids [j] = ids[j] + add_id;
91
+ }
92
+ add_entries (i, list_size, new_ids.data(),
93
+ ScopedCodes (oivf, i).get());
94
+ }
95
+ oivf->resize (i, 0);
96
+ }
97
+ }
98
+
99
+ double InvertedLists::imbalance_factor () const {
100
+ std::vector<int> hist(nlist);
101
+
102
+ for (size_t i = 0; i < nlist; i++) {
103
+ hist[i] = list_size(i);
104
+ }
105
+
106
+ return faiss::imbalance_factor(nlist, hist.data());
107
+ }
108
+
109
+ void InvertedLists::print_stats () const {
110
+ std::vector<int> sizes(40);
111
+ for (size_t i = 0; i < nlist; i++) {
112
+ for (size_t j = 0; j < sizes.size(); j++) {
113
+ if ((list_size(i) >> j) == 0) {
114
+ sizes[j]++;
115
+ break;
116
+ }
117
+ }
118
+ }
119
+ for (size_t i = 0; i < sizes.size(); i++) {
120
+ if (sizes[i]) {
121
+ printf("list size in < %d: %d instances\n", 1 << i, sizes[i]);
122
+ }
123
+ }
124
+ }
125
+
126
+ size_t InvertedLists::compute_ntotal () const {
127
+ size_t tot = 0;
128
+ for (size_t i = 0; i < nlist; i++) {
129
+ tot += list_size(i);
130
+ }
131
+ return tot;
132
+ }
133
+
134
+ /*****************************************
135
+ * ArrayInvertedLists implementation
136
+ ******************************************/
137
+
138
+ ArrayInvertedLists::ArrayInvertedLists (size_t nlist, size_t code_size):
139
+ InvertedLists (nlist, code_size)
140
+ {
141
+ ids.resize (nlist);
142
+ codes.resize (nlist);
143
+ }
144
+
145
+ size_t ArrayInvertedLists::add_entries (
146
+ size_t list_no, size_t n_entry,
147
+ const idx_t* ids_in, const uint8_t *code)
148
+ {
149
+ if (n_entry == 0) return 0;
150
+ assert (list_no < nlist);
151
+ size_t o = ids [list_no].size();
152
+ ids [list_no].resize (o + n_entry);
153
+ memcpy (&ids[list_no][o], ids_in, sizeof (ids_in[0]) * n_entry);
154
+ codes [list_no].resize ((o + n_entry) * code_size);
155
+ memcpy (&codes[list_no][o * code_size], code, code_size * n_entry);
156
+ return o;
157
+ }
158
+
159
+ size_t ArrayInvertedLists::list_size(size_t list_no) const
160
+ {
161
+ assert (list_no < nlist);
162
+ return ids[list_no].size();
163
+ }
164
+
165
+ const uint8_t * ArrayInvertedLists::get_codes (size_t list_no) const
166
+ {
167
+ assert (list_no < nlist);
168
+ return codes[list_no].data();
169
+ }
170
+
171
+
172
+ const InvertedLists::idx_t * ArrayInvertedLists::get_ids (size_t list_no) const
173
+ {
174
+ assert (list_no < nlist);
175
+ return ids[list_no].data();
176
+ }
177
+
178
+ void ArrayInvertedLists::resize (size_t list_no, size_t new_size)
179
+ {
180
+ ids[list_no].resize (new_size);
181
+ codes[list_no].resize (new_size * code_size);
182
+ }
183
+
184
+ void ArrayInvertedLists::update_entries (
185
+ size_t list_no, size_t offset, size_t n_entry,
186
+ const idx_t *ids_in, const uint8_t *codes_in)
187
+ {
188
+ assert (list_no < nlist);
189
+ assert (n_entry + offset <= ids[list_no].size());
190
+ memcpy (&ids[list_no][offset], ids_in, sizeof(ids_in[0]) * n_entry);
191
+ memcpy (&codes[list_no][offset * code_size], codes_in, code_size * n_entry);
192
+ }
193
+
194
+
195
+ ArrayInvertedLists::~ArrayInvertedLists ()
196
+ {}
197
+
198
+ /*****************************************************************
199
+ * Meta-inverted list implementations
200
+ *****************************************************************/
201
+
202
+
203
+ size_t ReadOnlyInvertedLists::add_entries (
204
+ size_t , size_t ,
205
+ const idx_t* , const uint8_t *)
206
+ {
207
+ FAISS_THROW_MSG ("not implemented");
208
+ }
209
+
210
+ void ReadOnlyInvertedLists::update_entries (size_t, size_t , size_t ,
211
+ const idx_t *, const uint8_t *)
212
+ {
213
+ FAISS_THROW_MSG ("not implemented");
214
+ }
215
+
216
+ void ReadOnlyInvertedLists::resize (size_t , size_t )
217
+ {
218
+ FAISS_THROW_MSG ("not implemented");
219
+ }
220
+
221
+
222
+
223
+ /*****************************************
224
+ * HStackInvertedLists implementation
225
+ ******************************************/
226
+
227
+ HStackInvertedLists::HStackInvertedLists (
228
+ int nil, const InvertedLists **ils_in):
229
+ ReadOnlyInvertedLists (nil > 0 ? ils_in[0]->nlist : 0,
230
+ nil > 0 ? ils_in[0]->code_size : 0)
231
+ {
232
+ FAISS_THROW_IF_NOT (nil > 0);
233
+ for (int i = 0; i < nil; i++) {
234
+ ils.push_back (ils_in[i]);
235
+ FAISS_THROW_IF_NOT (ils_in[i]->code_size == code_size &&
236
+ ils_in[i]->nlist == nlist);
237
+ }
238
+ }
239
+
240
+ size_t HStackInvertedLists::list_size(size_t list_no) const
241
+ {
242
+ size_t sz = 0;
243
+ for (int i = 0; i < ils.size(); i++) {
244
+ const InvertedLists *il = ils[i];
245
+ sz += il->list_size (list_no);
246
+ }
247
+ return sz;
248
+ }
249
+
250
+ const uint8_t * HStackInvertedLists::get_codes (size_t list_no) const
251
+ {
252
+ uint8_t *codes = new uint8_t [code_size * list_size(list_no)], *c = codes;
253
+
254
+ for (int i = 0; i < ils.size(); i++) {
255
+ const InvertedLists *il = ils[i];
256
+ size_t sz = il->list_size(list_no) * code_size;
257
+ if (sz > 0) {
258
+ memcpy (c, ScopedCodes (il, list_no).get(), sz);
259
+ c += sz;
260
+ }
261
+ }
262
+ return codes;
263
+ }
264
+
265
+ const uint8_t * HStackInvertedLists::get_single_code (
266
+ size_t list_no, size_t offset) const
267
+ {
268
+ for (int i = 0; i < ils.size(); i++) {
269
+ const InvertedLists *il = ils[i];
270
+ size_t sz = il->list_size (list_no);
271
+ if (offset < sz) {
272
+ // here we have to copy the code, otherwise it will crash at dealloc
273
+ uint8_t * code = new uint8_t [code_size];
274
+ memcpy (code, ScopedCodes (il, list_no, offset).get(), code_size);
275
+ return code;
276
+ }
277
+ offset -= sz;
278
+ }
279
+ FAISS_THROW_FMT ("offset %ld unknown", offset);
280
+ }
281
+
282
+
283
+ void HStackInvertedLists::release_codes (size_t, const uint8_t *codes) const {
284
+ delete [] codes;
285
+ }
286
+
287
+ const Index::idx_t * HStackInvertedLists::get_ids (size_t list_no) const
288
+ {
289
+ idx_t *ids = new idx_t [list_size(list_no)], *c = ids;
290
+
291
+ for (int i = 0; i < ils.size(); i++) {
292
+ const InvertedLists *il = ils[i];
293
+ size_t sz = il->list_size(list_no);
294
+ if (sz > 0) {
295
+ memcpy (c, ScopedIds (il, list_no).get(), sz * sizeof(idx_t));
296
+ c += sz;
297
+ }
298
+ }
299
+ return ids;
300
+ }
301
+
302
+ Index::idx_t HStackInvertedLists::get_single_id (
303
+ size_t list_no, size_t offset) const
304
+ {
305
+
306
+ for (int i = 0; i < ils.size(); i++) {
307
+ const InvertedLists *il = ils[i];
308
+ size_t sz = il->list_size (list_no);
309
+ if (offset < sz) {
310
+ return il->get_single_id (list_no, offset);
311
+ }
312
+ offset -= sz;
313
+ }
314
+ FAISS_THROW_FMT ("offset %ld unknown", offset);
315
+ }
316
+
317
+
318
+ void HStackInvertedLists::release_ids (size_t, const idx_t *ids) const {
319
+ delete [] ids;
320
+ }
321
+
322
+ void HStackInvertedLists::prefetch_lists (const idx_t *list_nos, int nlist) const
323
+ {
324
+ for (int i = 0; i < ils.size(); i++) {
325
+ const InvertedLists *il = ils[i];
326
+ il->prefetch_lists (list_nos, nlist);
327
+ }
328
+ }
329
+
330
+ /*****************************************
331
+ * SliceInvertedLists implementation
332
+ ******************************************/
333
+
334
+
335
+ namespace {
336
+
337
+ using idx_t = InvertedLists::idx_t;
338
+
339
+ idx_t translate_list_no (const SliceInvertedLists *sil,
340
+ idx_t list_no) {
341
+ FAISS_THROW_IF_NOT (list_no >= 0 && list_no < sil->nlist);
342
+ return list_no + sil->i0;
343
+ }
344
+
345
+ };
346
+
347
+
348
+
349
+ SliceInvertedLists::SliceInvertedLists (
350
+ const InvertedLists *il, idx_t i0, idx_t i1):
351
+ ReadOnlyInvertedLists (i1 - i0, il->code_size),
352
+ il (il), i0(i0), i1(i1)
353
+ {
354
+
355
+ }
356
+
357
+ size_t SliceInvertedLists::list_size(size_t list_no) const
358
+ {
359
+ return il->list_size (translate_list_no (this, list_no));
360
+ }
361
+
362
+ const uint8_t * SliceInvertedLists::get_codes (size_t list_no) const
363
+ {
364
+ return il->get_codes (translate_list_no (this, list_no));
365
+ }
366
+
367
+ const uint8_t * SliceInvertedLists::get_single_code (
368
+ size_t list_no, size_t offset) const
369
+ {
370
+ return il->get_single_code (translate_list_no (this, list_no), offset);
371
+ }
372
+
373
+
374
+ void SliceInvertedLists::release_codes (
375
+ size_t list_no, const uint8_t *codes) const {
376
+ return il->release_codes (translate_list_no (this, list_no), codes);
377
+ }
378
+
379
+ const Index::idx_t * SliceInvertedLists::get_ids (size_t list_no) const
380
+ {
381
+ return il->get_ids (translate_list_no (this, list_no));
382
+ }
383
+
384
+ Index::idx_t SliceInvertedLists::get_single_id (
385
+ size_t list_no, size_t offset) const
386
+ {
387
+ return il->get_single_id (translate_list_no (this, list_no), offset);
388
+ }
389
+
390
+
391
+ void SliceInvertedLists::release_ids (size_t list_no, const idx_t *ids) const {
392
+ return il->release_ids (translate_list_no (this, list_no), ids);
393
+ }
394
+
395
+ void SliceInvertedLists::prefetch_lists (const idx_t *list_nos, int nlist) const
396
+ {
397
+ std::vector<idx_t> translated_list_nos;
398
+ for (int j = 0; j < nlist; j++) {
399
+ idx_t list_no = list_nos[j];
400
+ if (list_no < 0) continue;
401
+ translated_list_nos.push_back (translate_list_no (this, list_no));
402
+ }
403
+ il->prefetch_lists (translated_list_nos.data(),
404
+ translated_list_nos.size());
405
+ }
406
+
407
+
408
+ /*****************************************
409
+ * VStackInvertedLists implementation
410
+ ******************************************/
411
+
412
+ namespace {
413
+
414
+ using idx_t = InvertedLists::idx_t;
415
+
416
+ // find the invlist this number belongs to
417
+ int translate_list_no (const VStackInvertedLists *vil,
418
+ idx_t list_no) {
419
+ FAISS_THROW_IF_NOT (list_no >= 0 && list_no < vil->nlist);
420
+ int i0 = 0, i1 = vil->ils.size();
421
+ const idx_t *cumsz = vil->cumsz.data();
422
+ while (i0 + 1 < i1) {
423
+ int imed = (i0 + i1) / 2;
424
+ if (list_no >= cumsz[imed]) {
425
+ i0 = imed;
426
+ } else {
427
+ i1 = imed;
428
+ }
429
+ }
430
+ assert(list_no >= cumsz[i0] && list_no < cumsz[i0 + 1]);
431
+ return i0;
432
+ }
433
+
434
+ idx_t sum_il_sizes (int nil, const InvertedLists **ils_in) {
435
+ idx_t tot = 0;
436
+ for (int i = 0; i < nil; i++) {
437
+ tot += ils_in[i]->nlist;
438
+ }
439
+ return tot;
440
+ }
441
+
442
+ };
443
+
444
+
445
+
446
+ VStackInvertedLists::VStackInvertedLists (
447
+ int nil, const InvertedLists **ils_in):
448
+ ReadOnlyInvertedLists (sum_il_sizes(nil, ils_in),
449
+ nil > 0 ? ils_in[0]->code_size : 0)
450
+ {
451
+ FAISS_THROW_IF_NOT (nil > 0);
452
+ cumsz.resize (nil + 1);
453
+ for (int i = 0; i < nil; i++) {
454
+ ils.push_back (ils_in[i]);
455
+ FAISS_THROW_IF_NOT (ils_in[i]->code_size == code_size);
456
+ cumsz[i + 1] = cumsz[i] + ils_in[i]->nlist;
457
+ }
458
+ }
459
+
460
+ size_t VStackInvertedLists::list_size(size_t list_no) const
461
+ {
462
+ int i = translate_list_no (this, list_no);
463
+ list_no -= cumsz[i];
464
+ return ils[i]->list_size (list_no);
465
+ }
466
+
467
+ const uint8_t * VStackInvertedLists::get_codes (size_t list_no) const
468
+ {
469
+ int i = translate_list_no (this, list_no);
470
+ list_no -= cumsz[i];
471
+ return ils[i]->get_codes (list_no);
472
+ }
473
+
474
+ const uint8_t * VStackInvertedLists::get_single_code (
475
+ size_t list_no, size_t offset) const
476
+ {
477
+ int i = translate_list_no (this, list_no);
478
+ list_no -= cumsz[i];
479
+ return ils[i]->get_single_code (list_no, offset);
480
+ }
481
+
482
+
483
+ void VStackInvertedLists::release_codes (
484
+ size_t list_no, const uint8_t *codes) const {
485
+ int i = translate_list_no (this, list_no);
486
+ list_no -= cumsz[i];
487
+ return ils[i]->release_codes (list_no, codes);
488
+ }
489
+
490
+ const Index::idx_t * VStackInvertedLists::get_ids (size_t list_no) const
491
+ {
492
+ int i = translate_list_no (this, list_no);
493
+ list_no -= cumsz[i];
494
+ return ils[i]->get_ids (list_no);
495
+ }
496
+
497
+ Index::idx_t VStackInvertedLists::get_single_id (
498
+ size_t list_no, size_t offset) const
499
+ {
500
+ int i = translate_list_no (this, list_no);
501
+ list_no -= cumsz[i];
502
+ return ils[i]->get_single_id (list_no, offset);
503
+ }
504
+
505
+
506
+ void VStackInvertedLists::release_ids (size_t list_no, const idx_t *ids) const {
507
+ int i = translate_list_no (this, list_no);
508
+ list_no -= cumsz[i];
509
+ return ils[i]->release_ids (list_no, ids);
510
+ }
511
+
512
+ void VStackInvertedLists::prefetch_lists (
513
+ const idx_t *list_nos, int nlist) const
514
+ {
515
+ std::vector<int> ilno (nlist, -1);
516
+ std::vector<int> n_per_il (ils.size(), 0);
517
+ for (int j = 0; j < nlist; j++) {
518
+ idx_t list_no = list_nos[j];
519
+ if (list_no < 0) continue;
520
+ int i = ilno[j] = translate_list_no (this, list_no);
521
+ n_per_il[i]++;
522
+ }
523
+ std::vector<int> cum_n_per_il (ils.size() + 1, 0);
524
+ for (int j = 0; j < ils.size(); j++) {
525
+ cum_n_per_il[j + 1] = cum_n_per_il[j] + n_per_il[j];
526
+ }
527
+ std::vector<idx_t> sorted_list_nos (cum_n_per_il.back());
528
+ for (int j = 0; j < nlist; j++) {
529
+ idx_t list_no = list_nos[j];
530
+ if (list_no < 0) continue;
531
+ int i = ilno[j];
532
+ list_no -= cumsz[i];
533
+ sorted_list_nos[cum_n_per_il[i]++] = list_no;
534
+ }
535
+
536
+ int i0 = 0;
537
+ for (int j = 0; j < ils.size(); j++) {
538
+ int i1 = i0 + n_per_il[j];
539
+ if (i1 > i0) {
540
+ ils[j]->prefetch_lists (sorted_list_nos.data() + i0,
541
+ i1 - i0);
542
+ }
543
+ i0 = i1;
544
+ }
545
+ }
546
+
547
+
548
+
549
+ /*****************************************
550
+ * MaskedInvertedLists implementation
551
+ ******************************************/
552
+
553
+
554
+ MaskedInvertedLists::MaskedInvertedLists (const InvertedLists *il0,
555
+ const InvertedLists *il1):
556
+ ReadOnlyInvertedLists (il0->nlist, il0->code_size),
557
+ il0 (il0), il1 (il1)
558
+ {
559
+ FAISS_THROW_IF_NOT (il1->nlist == nlist);
560
+ FAISS_THROW_IF_NOT (il1->code_size == code_size);
561
+ }
562
+
563
+ size_t MaskedInvertedLists::list_size(size_t list_no) const
564
+ {
565
+ size_t sz = il0->list_size(list_no);
566
+ return sz ? sz : il1->list_size(list_no);
567
+ }
568
+
569
+ const uint8_t * MaskedInvertedLists::get_codes (size_t list_no) const
570
+ {
571
+ size_t sz = il0->list_size(list_no);
572
+ return (sz ? il0 : il1)->get_codes(list_no);
573
+ }
574
+
575
+ const idx_t * MaskedInvertedLists::get_ids (size_t list_no) const
576
+ {
577
+ size_t sz = il0->list_size (list_no);
578
+ return (sz ? il0 : il1)->get_ids (list_no);
579
+ }
580
+
581
+ void MaskedInvertedLists::release_codes (
582
+ size_t list_no, const uint8_t *codes) const
583
+ {
584
+ size_t sz = il0->list_size (list_no);
585
+ (sz ? il0 : il1)->release_codes (list_no, codes);
586
+ }
587
+
588
+ void MaskedInvertedLists::release_ids (size_t list_no, const idx_t *ids) const
589
+ {
590
+ size_t sz = il0->list_size (list_no);
591
+ (sz ? il0 : il1)->release_ids (list_no, ids);
592
+ }
593
+
594
+ idx_t MaskedInvertedLists::get_single_id (size_t list_no, size_t offset) const
595
+ {
596
+ size_t sz = il0->list_size (list_no);
597
+ return (sz ? il0 : il1)->get_single_id (list_no, offset);
598
+ }
599
+
600
+ const uint8_t * MaskedInvertedLists::get_single_code (
601
+ size_t list_no, size_t offset) const
602
+ {
603
+ size_t sz = il0->list_size (list_no);
604
+ return (sz ? il0 : il1)->get_single_code (list_no, offset);
605
+ }
606
+
607
+ void MaskedInvertedLists::prefetch_lists (
608
+ const idx_t *list_nos, int nlist) const
609
+ {
610
+ std::vector<idx_t> list0, list1;
611
+ for (int i = 0; i < nlist; i++) {
612
+ idx_t list_no = list_nos[i];
613
+ if (list_no < 0) continue;
614
+ size_t sz = il0->list_size(list_no);
615
+ (sz ? list0 : list1).push_back (list_no);
616
+ }
617
+ il0->prefetch_lists (list0.data(), list0.size());
618
+ il1->prefetch_lists (list1.data(), list1.size());
619
+ }
620
+
621
+
622
+
623
+ } // namespace faiss