faiss 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/README.md +103 -3
  4. data/ext/faiss/ext.cpp +99 -32
  5. data/ext/faiss/extconf.rb +12 -2
  6. data/lib/faiss/ext.bundle +0 -0
  7. data/lib/faiss/index.rb +3 -3
  8. data/lib/faiss/index_binary.rb +3 -3
  9. data/lib/faiss/kmeans.rb +1 -1
  10. data/lib/faiss/pca_matrix.rb +2 -2
  11. data/lib/faiss/product_quantizer.rb +3 -3
  12. data/lib/faiss/version.rb +1 -1
  13. data/vendor/faiss/AutoTune.cpp +719 -0
  14. data/vendor/faiss/AutoTune.h +212 -0
  15. data/vendor/faiss/Clustering.cpp +261 -0
  16. data/vendor/faiss/Clustering.h +101 -0
  17. data/vendor/faiss/IVFlib.cpp +339 -0
  18. data/vendor/faiss/IVFlib.h +132 -0
  19. data/vendor/faiss/Index.cpp +171 -0
  20. data/vendor/faiss/Index.h +261 -0
  21. data/vendor/faiss/Index2Layer.cpp +437 -0
  22. data/vendor/faiss/Index2Layer.h +85 -0
  23. data/vendor/faiss/IndexBinary.cpp +77 -0
  24. data/vendor/faiss/IndexBinary.h +163 -0
  25. data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
  26. data/vendor/faiss/IndexBinaryFlat.h +54 -0
  27. data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
  28. data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
  29. data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
  30. data/vendor/faiss/IndexBinaryHNSW.h +56 -0
  31. data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
  32. data/vendor/faiss/IndexBinaryIVF.h +211 -0
  33. data/vendor/faiss/IndexFlat.cpp +508 -0
  34. data/vendor/faiss/IndexFlat.h +175 -0
  35. data/vendor/faiss/IndexHNSW.cpp +1090 -0
  36. data/vendor/faiss/IndexHNSW.h +170 -0
  37. data/vendor/faiss/IndexIVF.cpp +909 -0
  38. data/vendor/faiss/IndexIVF.h +353 -0
  39. data/vendor/faiss/IndexIVFFlat.cpp +502 -0
  40. data/vendor/faiss/IndexIVFFlat.h +118 -0
  41. data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
  42. data/vendor/faiss/IndexIVFPQ.h +161 -0
  43. data/vendor/faiss/IndexIVFPQR.cpp +219 -0
  44. data/vendor/faiss/IndexIVFPQR.h +65 -0
  45. data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
  46. data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
  47. data/vendor/faiss/IndexLSH.cpp +225 -0
  48. data/vendor/faiss/IndexLSH.h +87 -0
  49. data/vendor/faiss/IndexLattice.cpp +143 -0
  50. data/vendor/faiss/IndexLattice.h +68 -0
  51. data/vendor/faiss/IndexPQ.cpp +1188 -0
  52. data/vendor/faiss/IndexPQ.h +199 -0
  53. data/vendor/faiss/IndexPreTransform.cpp +288 -0
  54. data/vendor/faiss/IndexPreTransform.h +91 -0
  55. data/vendor/faiss/IndexReplicas.cpp +123 -0
  56. data/vendor/faiss/IndexReplicas.h +76 -0
  57. data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
  58. data/vendor/faiss/IndexScalarQuantizer.h +127 -0
  59. data/vendor/faiss/IndexShards.cpp +317 -0
  60. data/vendor/faiss/IndexShards.h +100 -0
  61. data/vendor/faiss/InvertedLists.cpp +623 -0
  62. data/vendor/faiss/InvertedLists.h +334 -0
  63. data/vendor/faiss/LICENSE +21 -0
  64. data/vendor/faiss/MatrixStats.cpp +252 -0
  65. data/vendor/faiss/MatrixStats.h +62 -0
  66. data/vendor/faiss/MetaIndexes.cpp +351 -0
  67. data/vendor/faiss/MetaIndexes.h +126 -0
  68. data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
  69. data/vendor/faiss/OnDiskInvertedLists.h +127 -0
  70. data/vendor/faiss/VectorTransform.cpp +1157 -0
  71. data/vendor/faiss/VectorTransform.h +322 -0
  72. data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
  73. data/vendor/faiss/c_api/AutoTune_c.h +64 -0
  74. data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
  75. data/vendor/faiss/c_api/Clustering_c.h +117 -0
  76. data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
  77. data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
  78. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
  79. data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
  80. data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
  81. data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
  82. data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
  83. data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
  84. data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
  85. data/vendor/faiss/c_api/IndexShards_c.h +42 -0
  86. data/vendor/faiss/c_api/Index_c.cpp +105 -0
  87. data/vendor/faiss/c_api/Index_c.h +183 -0
  88. data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
  89. data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
  90. data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
  91. data/vendor/faiss/c_api/clone_index_c.h +32 -0
  92. data/vendor/faiss/c_api/error_c.h +42 -0
  93. data/vendor/faiss/c_api/error_impl.cpp +27 -0
  94. data/vendor/faiss/c_api/error_impl.h +16 -0
  95. data/vendor/faiss/c_api/faiss_c.h +58 -0
  96. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
  97. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
  98. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
  99. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
  100. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
  101. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
  102. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
  103. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
  104. data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
  105. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
  106. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
  107. data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
  108. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
  109. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
  110. data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
  111. data/vendor/faiss/c_api/index_factory_c.h +30 -0
  112. data/vendor/faiss/c_api/index_io_c.cpp +42 -0
  113. data/vendor/faiss/c_api/index_io_c.h +50 -0
  114. data/vendor/faiss/c_api/macros_impl.h +110 -0
  115. data/vendor/faiss/clone_index.cpp +147 -0
  116. data/vendor/faiss/clone_index.h +38 -0
  117. data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
  118. data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
  119. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
  120. data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
  121. data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
  122. data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
  123. data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
  124. data/vendor/faiss/gpu/GpuCloner.h +82 -0
  125. data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
  126. data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
  127. data/vendor/faiss/gpu/GpuDistance.h +52 -0
  128. data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
  129. data/vendor/faiss/gpu/GpuIndex.h +148 -0
  130. data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
  131. data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
  132. data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
  133. data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
  134. data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
  135. data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
  136. data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
  137. data/vendor/faiss/gpu/GpuResources.cpp +52 -0
  138. data/vendor/faiss/gpu/GpuResources.h +73 -0
  139. data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
  140. data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
  141. data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
  142. data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
  143. data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
  144. data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
  145. data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
  146. data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
  147. data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
  148. data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
  149. data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
  150. data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
  151. data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
  152. data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
  153. data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
  154. data/vendor/faiss/gpu/test/TestUtils.h +93 -0
  155. data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
  156. data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
  157. data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
  158. data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
  159. data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
  160. data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
  161. data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
  162. data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
  163. data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
  164. data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
  165. data/vendor/faiss/gpu/utils/Timer.h +52 -0
  166. data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
  167. data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
  168. data/vendor/faiss/impl/FaissAssert.h +95 -0
  169. data/vendor/faiss/impl/FaissException.cpp +66 -0
  170. data/vendor/faiss/impl/FaissException.h +71 -0
  171. data/vendor/faiss/impl/HNSW.cpp +818 -0
  172. data/vendor/faiss/impl/HNSW.h +275 -0
  173. data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
  174. data/vendor/faiss/impl/PolysemousTraining.h +158 -0
  175. data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
  176. data/vendor/faiss/impl/ProductQuantizer.h +242 -0
  177. data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
  178. data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
  179. data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
  180. data/vendor/faiss/impl/ThreadedIndex.h +80 -0
  181. data/vendor/faiss/impl/index_read.cpp +793 -0
  182. data/vendor/faiss/impl/index_write.cpp +558 -0
  183. data/vendor/faiss/impl/io.cpp +142 -0
  184. data/vendor/faiss/impl/io.h +98 -0
  185. data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
  186. data/vendor/faiss/impl/lattice_Zn.h +199 -0
  187. data/vendor/faiss/index_factory.cpp +392 -0
  188. data/vendor/faiss/index_factory.h +25 -0
  189. data/vendor/faiss/index_io.h +75 -0
  190. data/vendor/faiss/misc/test_blas.cpp +84 -0
  191. data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
  192. data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
  193. data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
  194. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
  195. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
  196. data/vendor/faiss/tests/test_merge.cpp +258 -0
  197. data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
  198. data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
  199. data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
  200. data/vendor/faiss/tests/test_params_override.cpp +231 -0
  201. data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
  202. data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
  203. data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
  204. data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
  205. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
  206. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
  207. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
  208. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
  209. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
  210. data/vendor/faiss/utils/Heap.cpp +122 -0
  211. data/vendor/faiss/utils/Heap.h +495 -0
  212. data/vendor/faiss/utils/WorkerThread.cpp +126 -0
  213. data/vendor/faiss/utils/WorkerThread.h +61 -0
  214. data/vendor/faiss/utils/distances.cpp +765 -0
  215. data/vendor/faiss/utils/distances.h +243 -0
  216. data/vendor/faiss/utils/distances_simd.cpp +809 -0
  217. data/vendor/faiss/utils/extra_distances.cpp +336 -0
  218. data/vendor/faiss/utils/extra_distances.h +54 -0
  219. data/vendor/faiss/utils/hamming-inl.h +472 -0
  220. data/vendor/faiss/utils/hamming.cpp +792 -0
  221. data/vendor/faiss/utils/hamming.h +220 -0
  222. data/vendor/faiss/utils/random.cpp +192 -0
  223. data/vendor/faiss/utils/random.h +60 -0
  224. data/vendor/faiss/utils/utils.cpp +783 -0
  225. data/vendor/faiss/utils/utils.h +181 -0
  226. metadata +216 -2
@@ -0,0 +1,99 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #include <cstdio>
9
+ #include <cstdlib>
10
+
11
+ #include <faiss/IndexFlat.h>
12
+ #include <faiss/gpu/GpuAutoTune.h>
13
+ #include <faiss/gpu/GpuIndexFlat.h>
14
+ #include <faiss/gpu/StandardGpuResources.h>
15
+ #include <faiss/gpu/utils/DeviceUtils.h>
16
+
17
+
18
+ int main() {
19
+ int d = 64; // dimension
20
+ int nb = 100000; // database size
21
+ int nq = 10000; // nb of queries
22
+
23
+ float *xb = new float[d * nb];
24
+ float *xq = new float[d * nq];
25
+
26
+ for(int i = 0; i < nb; i++) {
27
+ for(int j = 0; j < d; j++)
28
+ xb[d * i + j] = drand48();
29
+ xb[d * i] += i / 1000.;
30
+ }
31
+
32
+ for(int i = 0; i < nq; i++) {
33
+ for(int j = 0; j < d; j++)
34
+ xq[d * i + j] = drand48();
35
+ xq[d * i] += i / 1000.;
36
+ }
37
+
38
+ int ngpus = faiss::gpu::getNumDevices();
39
+
40
+ printf("Number of GPUs: %d\n", ngpus);
41
+
42
+ std::vector<faiss::gpu::GpuResources*> res;
43
+ std::vector<int> devs;
44
+ for(int i = 0; i < ngpus; i++) {
45
+ res.push_back(new faiss::gpu::StandardGpuResources);
46
+ devs.push_back(i);
47
+ }
48
+
49
+ faiss::IndexFlatL2 cpu_index(d);
50
+
51
+ faiss::Index *gpu_index =
52
+ faiss::gpu::index_cpu_to_gpu_multiple(
53
+ res,
54
+ devs,
55
+ &cpu_index
56
+ );
57
+
58
+ printf("is_trained = %s\n", gpu_index->is_trained ? "true" : "false");
59
+ gpu_index->add(nb, xb); // add vectors to the index
60
+ printf("ntotal = %ld\n", gpu_index->ntotal);
61
+
62
+ int k = 4;
63
+
64
+ { // search xq
65
+ long *I = new long[k * nq];
66
+ float *D = new float[k * nq];
67
+
68
+ gpu_index->search(nq, xq, k, D, I);
69
+
70
+ // print results
71
+ printf("I (5 first results)=\n");
72
+ for(int i = 0; i < 5; i++) {
73
+ for(int j = 0; j < k; j++)
74
+ printf("%5ld ", I[i * k + j]);
75
+ printf("\n");
76
+ }
77
+
78
+ printf("I (5 last results)=\n");
79
+ for(int i = nq - 5; i < nq; i++) {
80
+ for(int j = 0; j < k; j++)
81
+ printf("%5ld ", I[i * k + j]);
82
+ printf("\n");
83
+ }
84
+
85
+ delete [] I;
86
+ delete [] D;
87
+ }
88
+
89
+ delete gpu_index;
90
+
91
+ for(int i = 0; i < ngpus; i++) {
92
+ delete res[i];
93
+ }
94
+
95
+ delete [] xb;
96
+ delete [] xq;
97
+
98
+ return 0;
99
+ }
@@ -0,0 +1,122 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ /* Function for soft heap */
11
+
12
+ #include <faiss/utils/Heap.h>
13
+
14
+
15
+ namespace faiss {
16
+
17
+
18
+ template <typename C>
19
+ void HeapArray<C>::heapify ()
20
+ {
21
+ #pragma omp parallel for
22
+ for (size_t j = 0; j < nh; j++)
23
+ heap_heapify<C> (k, val + j * k, ids + j * k);
24
+ }
25
+
26
+ template <typename C>
27
+ void HeapArray<C>::reorder ()
28
+ {
29
+ #pragma omp parallel for
30
+ for (size_t j = 0; j < nh; j++)
31
+ heap_reorder<C> (k, val + j * k, ids + j * k);
32
+ }
33
+
34
+ template <typename C>
35
+ void HeapArray<C>::addn (size_t nj, const T *vin, TI j0,
36
+ size_t i0, int64_t ni)
37
+ {
38
+ if (ni == -1) ni = nh;
39
+ assert (i0 >= 0 && i0 + ni <= nh);
40
+ #pragma omp parallel for
41
+ for (size_t i = i0; i < i0 + ni; i++) {
42
+ T * __restrict simi = get_val(i);
43
+ TI * __restrict idxi = get_ids (i);
44
+ const T *ip_line = vin + (i - i0) * nj;
45
+
46
+ for (size_t j = 0; j < nj; j++) {
47
+ T ip = ip_line [j];
48
+ if (C::cmp(simi[0], ip)) {
49
+ heap_pop<C> (k, simi, idxi);
50
+ heap_push<C> (k, simi, idxi, ip, j + j0);
51
+ }
52
+ }
53
+ }
54
+ }
55
+
56
+ template <typename C>
57
+ void HeapArray<C>::addn_with_ids (
58
+ size_t nj, const T *vin, const TI *id_in,
59
+ int64_t id_stride, size_t i0, int64_t ni)
60
+ {
61
+ if (id_in == nullptr) {
62
+ addn (nj, vin, 0, i0, ni);
63
+ return;
64
+ }
65
+ if (ni == -1) ni = nh;
66
+ assert (i0 >= 0 && i0 + ni <= nh);
67
+ #pragma omp parallel for
68
+ for (size_t i = i0; i < i0 + ni; i++) {
69
+ T * __restrict simi = get_val(i);
70
+ TI * __restrict idxi = get_ids (i);
71
+ const T *ip_line = vin + (i - i0) * nj;
72
+ const TI *id_line = id_in + (i - i0) * id_stride;
73
+
74
+ for (size_t j = 0; j < nj; j++) {
75
+ T ip = ip_line [j];
76
+ if (C::cmp(simi[0], ip)) {
77
+ heap_pop<C> (k, simi, idxi);
78
+ heap_push<C> (k, simi, idxi, ip, id_line [j]);
79
+ }
80
+ }
81
+ }
82
+ }
83
+
84
+ template <typename C>
85
+ void HeapArray<C>::per_line_extrema (
86
+ T * out_val,
87
+ TI * out_ids) const
88
+ {
89
+ #pragma omp parallel for
90
+ for (size_t j = 0; j < nh; j++) {
91
+ int64_t imin = -1;
92
+ typename C::T xval = C::Crev::neutral ();
93
+ const typename C::T * x_ = val + j * k;
94
+ for (size_t i = 0; i < k; i++)
95
+ if (C::cmp (x_[i], xval)) {
96
+ xval = x_[i];
97
+ imin = i;
98
+ }
99
+ if (out_val)
100
+ out_val[j] = xval;
101
+
102
+ if (out_ids) {
103
+ if (ids && imin != -1)
104
+ out_ids[j] = ids [j * k + imin];
105
+ else
106
+ out_ids[j] = imin;
107
+ }
108
+ }
109
+ }
110
+
111
+
112
+
113
+
114
+ // explicit instanciations
115
+
116
+ template struct HeapArray<CMin <float, int64_t> >;
117
+ template struct HeapArray<CMax <float, int64_t> >;
118
+ template struct HeapArray<CMin <int, int64_t> >;
119
+ template struct HeapArray<CMax <int, int64_t> >;
120
+
121
+
122
+ } // END namespace fasis
@@ -0,0 +1,495 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ /*
11
+ * C++ support for heaps. The set of functions is tailored for
12
+ * efficient similarity search.
13
+ *
14
+ * There is no specific object for a heap, and the functions that
15
+ * operate on a signle heap are inlined, because heaps are often
16
+ * small. More complex functions are implemented in Heaps.cpp
17
+ *
18
+ */
19
+
20
+
21
+ #ifndef FAISS_Heap_h
22
+ #define FAISS_Heap_h
23
+
24
+ #include <climits>
25
+ #include <cstring>
26
+ #include <cmath>
27
+
28
+ #include <cassert>
29
+ #include <cstdio>
30
+ #include <stdint.h>
31
+
32
+ #include <limits>
33
+
34
+
35
+ namespace faiss {
36
+
37
+ /*******************************************************************
38
+ * C object: uniform handling of min and max heap
39
+ *******************************************************************/
40
+
41
+ /** The C object gives the type T of the values in the heap, the type
42
+ * of the keys, TI and the comparison that is done: > for the minheap
43
+ * and < for the maxheap. The neutral value will always be dropped in
44
+ * favor of any other value in the heap.
45
+ */
46
+
47
+ template <typename T_, typename TI_>
48
+ struct CMax;
49
+
50
+ // traits of minheaps = heaps where the minimum value is stored on top
51
+ // useful to find the *max* values of an array
52
+ template <typename T_, typename TI_>
53
+ struct CMin {
54
+ typedef T_ T;
55
+ typedef TI_ TI;
56
+ typedef CMax<T_, TI_> Crev;
57
+ inline static bool cmp (T a, T b) {
58
+ return a < b;
59
+ }
60
+ // value that will be popped first -> must be smaller than all others
61
+ // for int types this is not strictly the smallest val (-max - 1)
62
+ inline static T neutral () {
63
+ return -std::numeric_limits<T>::max();
64
+ }
65
+ };
66
+
67
+
68
+ template <typename T_, typename TI_>
69
+ struct CMax {
70
+ typedef T_ T;
71
+ typedef TI_ TI;
72
+ typedef CMin<T_, TI_> Crev;
73
+ inline static bool cmp (T a, T b) {
74
+ return a > b;
75
+ }
76
+ inline static T neutral () {
77
+ return std::numeric_limits<T>::max();
78
+ }
79
+ };
80
+
81
+
82
+ /*******************************************************************
83
+ * Basic heap ops: push and pop
84
+ *******************************************************************/
85
+
86
+ /** Pops the top element from the heap defined by bh_val[0..k-1] and
87
+ * bh_ids[0..k-1]. on output the element at k-1 is undefined.
88
+ */
89
+ template <class C> inline
90
+ void heap_pop (size_t k, typename C::T * bh_val, typename C::TI * bh_ids)
91
+ {
92
+ bh_val--; /* Use 1-based indexing for easier node->child translation */
93
+ bh_ids--;
94
+ typename C::T val = bh_val[k];
95
+ size_t i = 1, i1, i2;
96
+ while (1) {
97
+ i1 = i << 1;
98
+ i2 = i1 + 1;
99
+ if (i1 > k)
100
+ break;
101
+ if (i2 == k + 1 || C::cmp(bh_val[i1], bh_val[i2])) {
102
+ if (C::cmp(val, bh_val[i1]))
103
+ break;
104
+ bh_val[i] = bh_val[i1];
105
+ bh_ids[i] = bh_ids[i1];
106
+ i = i1;
107
+ }
108
+ else {
109
+ if (C::cmp(val, bh_val[i2]))
110
+ break;
111
+ bh_val[i] = bh_val[i2];
112
+ bh_ids[i] = bh_ids[i2];
113
+ i = i2;
114
+ }
115
+ }
116
+ bh_val[i] = bh_val[k];
117
+ bh_ids[i] = bh_ids[k];
118
+ }
119
+
120
+
121
+
122
+ /** Pushes the element (val, ids) into the heap bh_val[0..k-2] and
123
+ * bh_ids[0..k-2]. on output the element at k-1 is defined.
124
+ */
125
+ template <class C> inline
126
+ void heap_push (size_t k,
127
+ typename C::T * bh_val, typename C::TI * bh_ids,
128
+ typename C::T val, typename C::TI ids)
129
+ {
130
+ bh_val--; /* Use 1-based indexing for easier node->child translation */
131
+ bh_ids--;
132
+ size_t i = k, i_father;
133
+ while (i > 1) {
134
+ i_father = i >> 1;
135
+ if (!C::cmp (val, bh_val[i_father])) /* the heap structure is ok */
136
+ break;
137
+ bh_val[i] = bh_val[i_father];
138
+ bh_ids[i] = bh_ids[i_father];
139
+ i = i_father;
140
+ }
141
+ bh_val[i] = val;
142
+ bh_ids[i] = ids;
143
+ }
144
+
145
+
146
+
147
+ /* Partial instanciation for heaps with TI = int64_t */
148
+
149
+ template <typename T> inline
150
+ void minheap_pop (size_t k, T * bh_val, int64_t * bh_ids)
151
+ {
152
+ heap_pop<CMin<T, int64_t> > (k, bh_val, bh_ids);
153
+ }
154
+
155
+
156
+ template <typename T> inline
157
+ void minheap_push (size_t k, T * bh_val, int64_t * bh_ids, T val, int64_t ids)
158
+ {
159
+ heap_push<CMin<T, int64_t> > (k, bh_val, bh_ids, val, ids);
160
+ }
161
+
162
+
163
+ template <typename T> inline
164
+ void maxheap_pop (size_t k, T * bh_val, int64_t * bh_ids)
165
+ {
166
+ heap_pop<CMax<T, int64_t> > (k, bh_val, bh_ids);
167
+ }
168
+
169
+
170
+ template <typename T> inline
171
+ void maxheap_push (size_t k, T * bh_val, int64_t * bh_ids, T val, int64_t ids)
172
+ {
173
+ heap_push<CMax<T, int64_t> > (k, bh_val, bh_ids, val, ids);
174
+ }
175
+
176
+
177
+
178
+ /*******************************************************************
179
+ * Heap initialization
180
+ *******************************************************************/
181
+
182
+ /* Initialization phase for the heap (with unconditionnal pushes).
183
+ * Store k0 elements in a heap containing up to k values. Note that
184
+ * (bh_val, bh_ids) can be the same as (x, ids) */
185
+ template <class C> inline
186
+ void heap_heapify (
187
+ size_t k,
188
+ typename C::T * bh_val,
189
+ typename C::TI * bh_ids,
190
+ const typename C::T * x = nullptr,
191
+ const typename C::TI * ids = nullptr,
192
+ size_t k0 = 0)
193
+ {
194
+ if (k0 > 0) assert (x);
195
+
196
+ if (ids) {
197
+ for (size_t i = 0; i < k0; i++)
198
+ heap_push<C> (i+1, bh_val, bh_ids, x[i], ids[i]);
199
+ } else {
200
+ for (size_t i = 0; i < k0; i++)
201
+ heap_push<C> (i+1, bh_val, bh_ids, x[i], i);
202
+ }
203
+
204
+ for (size_t i = k0; i < k; i++) {
205
+ bh_val[i] = C::neutral();
206
+ bh_ids[i] = -1;
207
+ }
208
+
209
+ }
210
+
211
+ template <typename T> inline
212
+ void minheap_heapify (
213
+ size_t k, T * bh_val,
214
+ int64_t * bh_ids,
215
+ const T * x = nullptr,
216
+ const int64_t * ids = nullptr,
217
+ size_t k0 = 0)
218
+ {
219
+ heap_heapify< CMin<T, int64_t> > (k, bh_val, bh_ids, x, ids, k0);
220
+ }
221
+
222
+
223
+ template <typename T> inline
224
+ void maxheap_heapify (
225
+ size_t k,
226
+ T * bh_val,
227
+ int64_t * bh_ids,
228
+ const T * x = nullptr,
229
+ const int64_t * ids = nullptr,
230
+ size_t k0 = 0)
231
+ {
232
+ heap_heapify< CMax<T, int64_t> > (k, bh_val, bh_ids, x, ids, k0);
233
+ }
234
+
235
+
236
+
237
+ /*******************************************************************
238
+ * Add n elements to the heap
239
+ *******************************************************************/
240
+
241
+
242
+ /* Add some elements to the heap */
243
+ template <class C> inline
244
+ void heap_addn (size_t k,
245
+ typename C::T * bh_val, typename C::TI * bh_ids,
246
+ const typename C::T * x,
247
+ const typename C::TI * ids,
248
+ size_t n)
249
+ {
250
+ size_t i;
251
+ if (ids)
252
+ for (i = 0; i < n; i++) {
253
+ if (C::cmp (bh_val[0], x[i])) {
254
+ heap_pop<C> (k, bh_val, bh_ids);
255
+ heap_push<C> (k, bh_val, bh_ids, x[i], ids[i]);
256
+ }
257
+ }
258
+ else
259
+ for (i = 0; i < n; i++) {
260
+ if (C::cmp (bh_val[0], x[i])) {
261
+ heap_pop<C> (k, bh_val, bh_ids);
262
+ heap_push<C> (k, bh_val, bh_ids, x[i], i);
263
+ }
264
+ }
265
+ }
266
+
267
+
268
+ /* Partial instanciation for heaps with TI = int64_t */
269
+
270
+ template <typename T> inline
271
+ void minheap_addn (size_t k, T * bh_val, int64_t * bh_ids,
272
+ const T * x, const int64_t * ids, size_t n)
273
+ {
274
+ heap_addn<CMin<T, int64_t> > (k, bh_val, bh_ids, x, ids, n);
275
+ }
276
+
277
+ template <typename T> inline
278
+ void maxheap_addn (size_t k, T * bh_val, int64_t * bh_ids,
279
+ const T * x, const int64_t * ids, size_t n)
280
+ {
281
+ heap_addn<CMax<T, int64_t> > (k, bh_val, bh_ids, x, ids, n);
282
+ }
283
+
284
+
285
+
286
+
287
+
288
+
289
+ /*******************************************************************
290
+ * Heap finalization (reorder elements)
291
+ *******************************************************************/
292
+
293
+
294
+ /* This function maps a binary heap into an sorted structure.
295
+ It returns the number */
296
+ template <typename C> inline
297
+ size_t heap_reorder (size_t k, typename C::T * bh_val, typename C::TI * bh_ids)
298
+ {
299
+ size_t i, ii;
300
+
301
+ for (i = 0, ii = 0; i < k; i++) {
302
+ /* top element should be put at the end of the list */
303
+ typename C::T val = bh_val[0];
304
+ typename C::TI id = bh_ids[0];
305
+
306
+ /* boundary case: we will over-ride this value if not a true element */
307
+ heap_pop<C> (k-i, bh_val, bh_ids);
308
+ bh_val[k-ii-1] = val;
309
+ bh_ids[k-ii-1] = id;
310
+ if (id != -1) ii++;
311
+ }
312
+ /* Count the number of elements which are effectively returned */
313
+ size_t nel = ii;
314
+
315
+ memmove (bh_val, bh_val+k-ii, ii * sizeof(*bh_val));
316
+ memmove (bh_ids, bh_ids+k-ii, ii * sizeof(*bh_ids));
317
+
318
+ for (; ii < k; ii++) {
319
+ bh_val[ii] = C::neutral();
320
+ bh_ids[ii] = -1;
321
+ }
322
+ return nel;
323
+ }
324
+
325
+ template <typename T> inline
326
+ size_t minheap_reorder (size_t k, T * bh_val, int64_t * bh_ids)
327
+ {
328
+ return heap_reorder< CMin<T, int64_t> > (k, bh_val, bh_ids);
329
+ }
330
+
331
+ template <typename T> inline
332
+ size_t maxheap_reorder (size_t k, T * bh_val, int64_t * bh_ids)
333
+ {
334
+ return heap_reorder< CMax<T, int64_t> > (k, bh_val, bh_ids);
335
+ }
336
+
337
+
338
+
339
+
340
+
341
+ /*******************************************************************
342
+ * Operations on heap arrays
343
+ *******************************************************************/
344
+
345
+ /** a template structure for a set of [min|max]-heaps it is tailored
346
+ * so that the actual data of the heaps can just live in compact
347
+ * arrays.
348
+ */
349
+ template <typename C>
350
+ struct HeapArray {
351
+ typedef typename C::TI TI;
352
+ typedef typename C::T T;
353
+
354
+ size_t nh; ///< number of heaps
355
+ size_t k; ///< allocated size per heap
356
+ TI * ids; ///< identifiers (size nh * k)
357
+ T * val; ///< values (distances or similarities), size nh * k
358
+
359
+ /// Return the list of values for a heap
360
+ T * get_val (size_t key) { return val + key * k; }
361
+
362
+ /// Correspponding identifiers
363
+ TI * get_ids (size_t key) { return ids + key * k; }
364
+
365
+ /// prepare all the heaps before adding
366
+ void heapify ();
367
+
368
+ /** add nj elements to heaps i0:i0+ni, with sequential ids
369
+ *
370
+ * @param nj nb of elements to add to each heap
371
+ * @param vin elements to add, size ni * nj
372
+ * @param j0 add this to the ids that are added
373
+ * @param i0 first heap to update
374
+ * @param ni nb of elements to update (-1 = use nh)
375
+ */
376
+ void addn (size_t nj, const T *vin, TI j0 = 0,
377
+ size_t i0 = 0, int64_t ni = -1);
378
+
379
+ /** same as addn
380
+ *
381
+ * @param id_in ids of the elements to add, size ni * nj
382
+ * @param id_stride stride for id_in
383
+ */
384
+ void addn_with_ids (
385
+ size_t nj, const T *vin, const TI *id_in = nullptr,
386
+ int64_t id_stride = 0, size_t i0 = 0, int64_t ni = -1);
387
+
388
+ /// reorder all the heaps
389
+ void reorder ();
390
+
391
+ /** this is not really a heap function. It just finds the per-line
392
+ * extrema of each line of array D
393
+ * @param vals_out extreme value of each line (size nh, or NULL)
394
+ * @param idx_out index of extreme value (size nh or NULL)
395
+ */
396
+ void per_line_extrema (T *vals_out, TI *idx_out) const;
397
+
398
+ };
399
+
400
+
401
+ /* Define useful heaps */
402
+ typedef HeapArray<CMin<float, int64_t> > float_minheap_array_t;
403
+ typedef HeapArray<CMin<int, int64_t> > int_minheap_array_t;
404
+
405
+ typedef HeapArray<CMax<float, int64_t> > float_maxheap_array_t;
406
+ typedef HeapArray<CMax<int, int64_t> > int_maxheap_array_t;
407
+
408
+ // The heap templates are instanciated explicitly in Heap.cpp
409
+
410
+
411
+
412
+
413
+
414
+
415
+
416
+
417
+
418
+
419
+
420
+
421
+
422
+
423
+
424
+
425
+
426
+
427
+
428
+ /*********************************************************************
429
+ * Indirect heaps: instead of having
430
+ *
431
+ * node i = (bh_ids[i], bh_val[i]),
432
+ *
433
+ * in indirect heaps,
434
+ *
435
+ * node i = (bh_ids[i], bh_val[bh_ids[i]]),
436
+ *
437
+ *********************************************************************/
438
+
439
+
440
+ template <class C>
441
+ inline
442
+ void indirect_heap_pop (
443
+ size_t k,
444
+ const typename C::T * bh_val,
445
+ typename C::TI * bh_ids)
446
+ {
447
+ bh_ids--; /* Use 1-based indexing for easier node->child translation */
448
+ typename C::T val = bh_val[bh_ids[k]];
449
+ size_t i = 1;
450
+ while (1) {
451
+ size_t i1 = i << 1;
452
+ size_t i2 = i1 + 1;
453
+ if (i1 > k)
454
+ break;
455
+ typename C::TI id1 = bh_ids[i1], id2 = bh_ids[i2];
456
+ if (i2 == k + 1 || C::cmp(bh_val[id1], bh_val[id2])) {
457
+ if (C::cmp(val, bh_val[id1]))
458
+ break;
459
+ bh_ids[i] = id1;
460
+ i = i1;
461
+ } else {
462
+ if (C::cmp(val, bh_val[id2]))
463
+ break;
464
+ bh_ids[i] = id2;
465
+ i = i2;
466
+ }
467
+ }
468
+ bh_ids[i] = bh_ids[k];
469
+ }
470
+
471
+
472
+
473
+ template <class C>
474
+ inline
475
+ void indirect_heap_push (size_t k,
476
+ const typename C::T * bh_val, typename C::TI * bh_ids,
477
+ typename C::TI id)
478
+ {
479
+ bh_ids--; /* Use 1-based indexing for easier node->child translation */
480
+ typename C::T val = bh_val[id];
481
+ size_t i = k;
482
+ while (i > 1) {
483
+ size_t i_father = i >> 1;
484
+ if (!C::cmp (val, bh_val[bh_ids[i_father]]))
485
+ break;
486
+ bh_ids[i] = bh_ids[i_father];
487
+ i = i_father;
488
+ }
489
+ bh_ids[i] = id;
490
+ }
491
+
492
+
493
+ } // namespace faiss
494
+
495
+ #endif /* FAISS_Heap_h */