faiss 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/README.md +103 -3
  4. data/ext/faiss/ext.cpp +99 -32
  5. data/ext/faiss/extconf.rb +12 -2
  6. data/lib/faiss/ext.bundle +0 -0
  7. data/lib/faiss/index.rb +3 -3
  8. data/lib/faiss/index_binary.rb +3 -3
  9. data/lib/faiss/kmeans.rb +1 -1
  10. data/lib/faiss/pca_matrix.rb +2 -2
  11. data/lib/faiss/product_quantizer.rb +3 -3
  12. data/lib/faiss/version.rb +1 -1
  13. data/vendor/faiss/AutoTune.cpp +719 -0
  14. data/vendor/faiss/AutoTune.h +212 -0
  15. data/vendor/faiss/Clustering.cpp +261 -0
  16. data/vendor/faiss/Clustering.h +101 -0
  17. data/vendor/faiss/IVFlib.cpp +339 -0
  18. data/vendor/faiss/IVFlib.h +132 -0
  19. data/vendor/faiss/Index.cpp +171 -0
  20. data/vendor/faiss/Index.h +261 -0
  21. data/vendor/faiss/Index2Layer.cpp +437 -0
  22. data/vendor/faiss/Index2Layer.h +85 -0
  23. data/vendor/faiss/IndexBinary.cpp +77 -0
  24. data/vendor/faiss/IndexBinary.h +163 -0
  25. data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
  26. data/vendor/faiss/IndexBinaryFlat.h +54 -0
  27. data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
  28. data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
  29. data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
  30. data/vendor/faiss/IndexBinaryHNSW.h +56 -0
  31. data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
  32. data/vendor/faiss/IndexBinaryIVF.h +211 -0
  33. data/vendor/faiss/IndexFlat.cpp +508 -0
  34. data/vendor/faiss/IndexFlat.h +175 -0
  35. data/vendor/faiss/IndexHNSW.cpp +1090 -0
  36. data/vendor/faiss/IndexHNSW.h +170 -0
  37. data/vendor/faiss/IndexIVF.cpp +909 -0
  38. data/vendor/faiss/IndexIVF.h +353 -0
  39. data/vendor/faiss/IndexIVFFlat.cpp +502 -0
  40. data/vendor/faiss/IndexIVFFlat.h +118 -0
  41. data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
  42. data/vendor/faiss/IndexIVFPQ.h +161 -0
  43. data/vendor/faiss/IndexIVFPQR.cpp +219 -0
  44. data/vendor/faiss/IndexIVFPQR.h +65 -0
  45. data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
  46. data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
  47. data/vendor/faiss/IndexLSH.cpp +225 -0
  48. data/vendor/faiss/IndexLSH.h +87 -0
  49. data/vendor/faiss/IndexLattice.cpp +143 -0
  50. data/vendor/faiss/IndexLattice.h +68 -0
  51. data/vendor/faiss/IndexPQ.cpp +1188 -0
  52. data/vendor/faiss/IndexPQ.h +199 -0
  53. data/vendor/faiss/IndexPreTransform.cpp +288 -0
  54. data/vendor/faiss/IndexPreTransform.h +91 -0
  55. data/vendor/faiss/IndexReplicas.cpp +123 -0
  56. data/vendor/faiss/IndexReplicas.h +76 -0
  57. data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
  58. data/vendor/faiss/IndexScalarQuantizer.h +127 -0
  59. data/vendor/faiss/IndexShards.cpp +317 -0
  60. data/vendor/faiss/IndexShards.h +100 -0
  61. data/vendor/faiss/InvertedLists.cpp +623 -0
  62. data/vendor/faiss/InvertedLists.h +334 -0
  63. data/vendor/faiss/LICENSE +21 -0
  64. data/vendor/faiss/MatrixStats.cpp +252 -0
  65. data/vendor/faiss/MatrixStats.h +62 -0
  66. data/vendor/faiss/MetaIndexes.cpp +351 -0
  67. data/vendor/faiss/MetaIndexes.h +126 -0
  68. data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
  69. data/vendor/faiss/OnDiskInvertedLists.h +127 -0
  70. data/vendor/faiss/VectorTransform.cpp +1157 -0
  71. data/vendor/faiss/VectorTransform.h +322 -0
  72. data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
  73. data/vendor/faiss/c_api/AutoTune_c.h +64 -0
  74. data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
  75. data/vendor/faiss/c_api/Clustering_c.h +117 -0
  76. data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
  77. data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
  78. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
  79. data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
  80. data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
  81. data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
  82. data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
  83. data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
  84. data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
  85. data/vendor/faiss/c_api/IndexShards_c.h +42 -0
  86. data/vendor/faiss/c_api/Index_c.cpp +105 -0
  87. data/vendor/faiss/c_api/Index_c.h +183 -0
  88. data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
  89. data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
  90. data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
  91. data/vendor/faiss/c_api/clone_index_c.h +32 -0
  92. data/vendor/faiss/c_api/error_c.h +42 -0
  93. data/vendor/faiss/c_api/error_impl.cpp +27 -0
  94. data/vendor/faiss/c_api/error_impl.h +16 -0
  95. data/vendor/faiss/c_api/faiss_c.h +58 -0
  96. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
  97. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
  98. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
  99. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
  100. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
  101. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
  102. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
  103. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
  104. data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
  105. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
  106. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
  107. data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
  108. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
  109. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
  110. data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
  111. data/vendor/faiss/c_api/index_factory_c.h +30 -0
  112. data/vendor/faiss/c_api/index_io_c.cpp +42 -0
  113. data/vendor/faiss/c_api/index_io_c.h +50 -0
  114. data/vendor/faiss/c_api/macros_impl.h +110 -0
  115. data/vendor/faiss/clone_index.cpp +147 -0
  116. data/vendor/faiss/clone_index.h +38 -0
  117. data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
  118. data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
  119. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
  120. data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
  121. data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
  122. data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
  123. data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
  124. data/vendor/faiss/gpu/GpuCloner.h +82 -0
  125. data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
  126. data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
  127. data/vendor/faiss/gpu/GpuDistance.h +52 -0
  128. data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
  129. data/vendor/faiss/gpu/GpuIndex.h +148 -0
  130. data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
  131. data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
  132. data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
  133. data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
  134. data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
  135. data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
  136. data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
  137. data/vendor/faiss/gpu/GpuResources.cpp +52 -0
  138. data/vendor/faiss/gpu/GpuResources.h +73 -0
  139. data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
  140. data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
  141. data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
  142. data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
  143. data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
  144. data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
  145. data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
  146. data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
  147. data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
  148. data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
  149. data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
  150. data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
  151. data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
  152. data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
  153. data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
  154. data/vendor/faiss/gpu/test/TestUtils.h +93 -0
  155. data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
  156. data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
  157. data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
  158. data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
  159. data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
  160. data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
  161. data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
  162. data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
  163. data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
  164. data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
  165. data/vendor/faiss/gpu/utils/Timer.h +52 -0
  166. data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
  167. data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
  168. data/vendor/faiss/impl/FaissAssert.h +95 -0
  169. data/vendor/faiss/impl/FaissException.cpp +66 -0
  170. data/vendor/faiss/impl/FaissException.h +71 -0
  171. data/vendor/faiss/impl/HNSW.cpp +818 -0
  172. data/vendor/faiss/impl/HNSW.h +275 -0
  173. data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
  174. data/vendor/faiss/impl/PolysemousTraining.h +158 -0
  175. data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
  176. data/vendor/faiss/impl/ProductQuantizer.h +242 -0
  177. data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
  178. data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
  179. data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
  180. data/vendor/faiss/impl/ThreadedIndex.h +80 -0
  181. data/vendor/faiss/impl/index_read.cpp +793 -0
  182. data/vendor/faiss/impl/index_write.cpp +558 -0
  183. data/vendor/faiss/impl/io.cpp +142 -0
  184. data/vendor/faiss/impl/io.h +98 -0
  185. data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
  186. data/vendor/faiss/impl/lattice_Zn.h +199 -0
  187. data/vendor/faiss/index_factory.cpp +392 -0
  188. data/vendor/faiss/index_factory.h +25 -0
  189. data/vendor/faiss/index_io.h +75 -0
  190. data/vendor/faiss/misc/test_blas.cpp +84 -0
  191. data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
  192. data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
  193. data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
  194. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
  195. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
  196. data/vendor/faiss/tests/test_merge.cpp +258 -0
  197. data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
  198. data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
  199. data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
  200. data/vendor/faiss/tests/test_params_override.cpp +231 -0
  201. data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
  202. data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
  203. data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
  204. data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
  205. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
  206. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
  207. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
  208. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
  209. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
  210. data/vendor/faiss/utils/Heap.cpp +122 -0
  211. data/vendor/faiss/utils/Heap.h +495 -0
  212. data/vendor/faiss/utils/WorkerThread.cpp +126 -0
  213. data/vendor/faiss/utils/WorkerThread.h +61 -0
  214. data/vendor/faiss/utils/distances.cpp +765 -0
  215. data/vendor/faiss/utils/distances.h +243 -0
  216. data/vendor/faiss/utils/distances_simd.cpp +809 -0
  217. data/vendor/faiss/utils/extra_distances.cpp +336 -0
  218. data/vendor/faiss/utils/extra_distances.h +54 -0
  219. data/vendor/faiss/utils/hamming-inl.h +472 -0
  220. data/vendor/faiss/utils/hamming.cpp +792 -0
  221. data/vendor/faiss/utils/hamming.h +220 -0
  222. data/vendor/faiss/utils/random.cpp +192 -0
  223. data/vendor/faiss/utils/random.h +60 -0
  224. data/vendor/faiss/utils/utils.cpp +783 -0
  225. data/vendor/faiss/utils/utils.h +181 -0
  226. metadata +216 -2
@@ -0,0 +1,25 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #pragma once
11
+
12
+ #include <faiss/Index.h>
13
+ #include <faiss/IndexBinary.h>
14
+
15
+ namespace faiss {
16
+
17
+ /** Build and index with the sequence of processing steps described in
18
+ * the string. */
19
+ Index *index_factory (int d, const char *description,
20
+ MetricType metric = METRIC_L2);
21
+
22
+ IndexBinary *index_binary_factory (int d, const char *description);
23
+
24
+
25
+ }
@@ -0,0 +1,75 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ // I/O code for indexes
11
+
12
+ #ifndef FAISS_INDEX_IO_H
13
+ #define FAISS_INDEX_IO_H
14
+
15
+
16
+ #include <cstdio>
17
+
18
+ /** I/O functions can read/write to a filename, a file handle or to an
19
+ * object that abstracts the medium.
20
+ *
21
+ * The read functions return objects that should be deallocated with
22
+ * delete. All references within these objectes are owned by the
23
+ * object.
24
+ */
25
+
26
+ namespace faiss {
27
+
28
+ struct Index;
29
+ struct IndexBinary;
30
+ struct VectorTransform;
31
+ struct ProductQuantizer;
32
+ struct IOReader;
33
+ struct IOWriter;
34
+ struct InvertedLists;
35
+
36
+ void write_index (const Index *idx, const char *fname);
37
+ void write_index (const Index *idx, FILE *f);
38
+ void write_index (const Index *idx, IOWriter *writer);
39
+
40
+ void write_index_binary (const IndexBinary *idx, const char *fname);
41
+ void write_index_binary (const IndexBinary *idx, FILE *f);
42
+ void write_index_binary (const IndexBinary *idx, IOWriter *writer);
43
+
44
+ // The read_index flags are implemented only for a subset of index types.
45
+ const int IO_FLAG_MMAP = 1; // try to memmap if possible
46
+ const int IO_FLAG_READ_ONLY = 2;
47
+ // strip directory component from ondisk filename, and assume it's in
48
+ // the same directory as the index file
49
+ const int IO_FLAG_ONDISK_SAME_DIR = 4;
50
+
51
+ Index *read_index (const char *fname, int io_flags = 0);
52
+ Index *read_index (FILE * f, int io_flags = 0);
53
+ Index *read_index (IOReader *reader, int io_flags = 0);
54
+
55
+ IndexBinary *read_index_binary (const char *fname, int io_flags = 0);
56
+ IndexBinary *read_index_binary (FILE * f, int io_flags = 0);
57
+ IndexBinary *read_index_binary (IOReader *reader, int io_flags = 0);
58
+
59
+ void write_VectorTransform (const VectorTransform *vt, const char *fname);
60
+ VectorTransform *read_VectorTransform (const char *fname);
61
+
62
+ ProductQuantizer * read_ProductQuantizer (const char*fname);
63
+ ProductQuantizer * read_ProductQuantizer (IOReader *reader);
64
+
65
+ void write_ProductQuantizer (const ProductQuantizer*pq, const char *fname);
66
+ void write_ProductQuantizer (const ProductQuantizer*pq, IOWriter *f);
67
+
68
+ void write_InvertedLists (const InvertedLists *ils, IOWriter *f);
69
+ InvertedLists *read_InvertedLists (IOReader *reader, int io_flags = 0);
70
+
71
+
72
+ } // namespace faiss
73
+
74
+
75
+ #endif
@@ -0,0 +1,84 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #include <cstdio>
9
+ #include <cstdlib>
10
+
11
+ #undef FINTEGER
12
+ #define FINTEGER long
13
+
14
+
15
+ extern "C" {
16
+
17
+ /* declare BLAS functions, see http://www.netlib.org/clapack/cblas/ */
18
+
19
+ int sgemm_ (const char *transa, const char *transb, FINTEGER *m, FINTEGER *
20
+ n, FINTEGER *k, const float *alpha, const float *a,
21
+ FINTEGER *lda, const float *b, FINTEGER *
22
+ ldb, float *beta, float *c, FINTEGER *ldc);
23
+
24
+ /* Lapack functions, see http://www.netlib.org/clapack/old/single/sgeqrf.c */
25
+
26
+ int sgeqrf_ (FINTEGER *m, FINTEGER *n, float *a, FINTEGER *lda,
27
+ float *tau, float *work, FINTEGER *lwork, FINTEGER *info);
28
+
29
+ }
30
+
31
+ float *new_random_vec(int size)
32
+ {
33
+ float *x = new float[size];
34
+ for (int i = 0; i < size; i++)
35
+ x[i] = drand48();
36
+ return x;
37
+ }
38
+
39
+
40
+ int main() {
41
+
42
+ FINTEGER m = 10, n = 20, k = 30;
43
+ float *a = new_random_vec(m * k), *b = new_random_vec(n * k), *c = new float[n * m];
44
+ float one = 1.0, zero = 0.0;
45
+
46
+ printf("BLAS test\n");
47
+
48
+ sgemm_("Not transposed", "Not transposed",
49
+ &m, &n, &k, &one, a, &m, b, &k, &zero, c, &m);
50
+
51
+ printf("errors=\n");
52
+
53
+ for (int i = 0; i < m; i++) {
54
+ for (int j = 0; j < n; j++) {
55
+ float accu = 0;
56
+ for (int l = 0; l < k; l++)
57
+ accu += a[i + l * m] * b[l + j * k];
58
+ printf ("%6.3f ", accu - c[i + j * m]);
59
+ }
60
+ printf("\n");
61
+ }
62
+
63
+ long info = 0x64bL << 32;
64
+ long mi = 0x64bL << 32 | m;
65
+ float *tau = new float[m];
66
+ FINTEGER lwork = -1;
67
+
68
+ float work1;
69
+
70
+ printf("Intentional Lapack error (appears only for 64-bit INTEGER):\n");
71
+ sgeqrf_ (&mi, &n, c, &m, tau, &work1, &lwork, (FINTEGER*)&info);
72
+
73
+ // sgeqrf_ (&m, &n, c, &zeroi, tau, &work1, &lwork, (FINTEGER*)&info);
74
+ printf("info=%016lx\n", info);
75
+
76
+ if(info >> 32 == 0x64b) {
77
+ printf("Lapack uses 32-bit integers\n");
78
+ } else {
79
+ printf("Lapack uses 64-bit integers\n");
80
+ }
81
+
82
+
83
+ return 0;
84
+ }
@@ -0,0 +1,64 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #include <cstdio>
9
+ #include <cstdlib>
10
+
11
+ #include <gtest/gtest.h>
12
+
13
+ #include <faiss/IndexBinaryFlat.h>
14
+ #include <faiss/utils/hamming.h>
15
+
16
+ TEST(BinaryFlat, accuracy) {
17
+ // dimension of the vectors to index
18
+ int d = 64;
19
+
20
+ // size of the database we plan to index
21
+ size_t nb = 1000;
22
+
23
+ // make the index object and train it
24
+ faiss::IndexBinaryFlat index(d);
25
+
26
+ srand(35);
27
+
28
+ std::vector<uint8_t> database(nb * (d / 8));
29
+ for (size_t i = 0; i < nb * (d / 8); i++) {
30
+ database[i] = rand() % 0x100;
31
+ }
32
+
33
+ { // populating the database
34
+ index.add(nb, database.data());
35
+ }
36
+
37
+ size_t nq = 200;
38
+
39
+ { // searching the database
40
+
41
+ std::vector<uint8_t> queries(nq * (d / 8));
42
+ for (size_t i = 0; i < nq * (d / 8); i++) {
43
+ queries[i] = rand() % 0x100;
44
+ }
45
+
46
+ int k = 5;
47
+ std::vector<faiss::IndexBinary::idx_t> nns(k * nq);
48
+ std::vector<int> dis(k * nq);
49
+
50
+ index.search(nq, queries.data(), k, dis.data(), nns.data());
51
+
52
+ for (size_t i = 0; i < nq; ++i) {
53
+ faiss::HammingComputer8 hc(queries.data() + i * (d / 8), d / 8);
54
+ hamdis_t dist_min = hc.hamming(database.data());
55
+ for (size_t j = 1; j < nb; ++j) {
56
+ hamdis_t dist = hc.hamming(database.data() + j * (d / 8));
57
+ if (dist < dist_min) {
58
+ dist_min = dist;
59
+ }
60
+ }
61
+ EXPECT_EQ(dist_min, dis[k * i]);
62
+ }
63
+ }
64
+ }
@@ -0,0 +1,183 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #include <cstdio>
9
+ #include <cstdlib>
10
+
11
+ #include <memory>
12
+ #include <vector>
13
+
14
+ #include <gtest/gtest.h>
15
+
16
+ #include <faiss/IndexIVF.h>
17
+ #include <faiss/index_factory.h>
18
+ #include <faiss/AutoTune.h>
19
+ #include <faiss/index_io.h>
20
+ #include <faiss/IVFlib.h>
21
+
22
+ using namespace faiss;
23
+
24
+ namespace {
25
+
26
+ typedef Index::idx_t idx_t;
27
+
28
+
29
+ // dimension of the vectors to index
30
+ int d = 32;
31
+
32
+ // nb of training vectors
33
+ size_t nt = 5000;
34
+
35
+ // size of the database points per window step
36
+ size_t nb = 1000;
37
+
38
+ // nb of queries
39
+ size_t nq = 200;
40
+
41
+
42
+ std::vector<float> make_data(size_t n)
43
+ {
44
+ std::vector <float> database (n * d);
45
+ for (size_t i = 0; i < n * d; i++) {
46
+ database[i] = drand48();
47
+ }
48
+ return database;
49
+ }
50
+
51
+ std::unique_ptr<Index> make_trained_index(const char *index_type)
52
+ {
53
+ auto index = std::unique_ptr<Index>(index_factory(d, index_type));
54
+ auto xt = make_data(nt * d);
55
+ index->train(nt, xt.data());
56
+ ParameterSpace().set_index_parameter (index.get(), "nprobe", 4);
57
+ return index;
58
+ }
59
+
60
+ std::vector<idx_t> search_index(Index *index, const float *xq) {
61
+ int k = 10;
62
+ std::vector<idx_t> I(k * nq);
63
+ std::vector<float> D(k * nq);
64
+ index->search (nq, xq, k, D.data(), I.data());
65
+ return I;
66
+ }
67
+
68
+
69
+
70
+
71
+
72
+ /*************************************************************
73
+ * Test functions for a given index type
74
+ *************************************************************/
75
+
76
+ struct EncapsulateInvertedLists: InvertedLists {
77
+
78
+ const InvertedLists *il;
79
+
80
+ EncapsulateInvertedLists(const InvertedLists *il):
81
+ InvertedLists(il->nlist, il->code_size),
82
+ il(il)
83
+ {}
84
+
85
+ static void * memdup (const void *m, size_t size) {
86
+ if (size == 0) return nullptr;
87
+ return memcpy (malloc(size), m, size);
88
+ }
89
+
90
+ size_t list_size(size_t list_no) const override {
91
+ return il->list_size (list_no);
92
+ }
93
+
94
+ const uint8_t * get_codes (size_t list_no) const override {
95
+ return (uint8_t*)memdup (il->get_codes(list_no),
96
+ list_size(list_no) * code_size);
97
+ }
98
+
99
+ const idx_t * get_ids (size_t list_no) const override {
100
+ return (idx_t*)memdup (il->get_ids(list_no),
101
+ list_size(list_no) * sizeof(idx_t));
102
+ }
103
+
104
+ void release_codes (size_t, const uint8_t *codes) const override {
105
+ free ((void*)codes);
106
+ }
107
+
108
+ void release_ids (size_t, const idx_t *ids) const override {
109
+ free ((void*)ids);
110
+ }
111
+
112
+ const uint8_t * get_single_code (size_t list_no, size_t offset)
113
+ const override {
114
+ return (uint8_t*)memdup (il->get_single_code(list_no, offset),
115
+ code_size);
116
+ }
117
+
118
+ size_t add_entries(size_t, size_t, const idx_t*, const uint8_t*) override {
119
+ assert(!"not implemented");
120
+ return 0;
121
+ }
122
+
123
+ void update_entries(size_t, size_t, size_t, const idx_t*, const uint8_t*)
124
+ override {
125
+ assert(!"not implemented");
126
+ }
127
+
128
+ void resize(size_t, size_t) override {
129
+ assert(!"not implemented");
130
+ }
131
+
132
+ ~EncapsulateInvertedLists() override {}
133
+ };
134
+
135
+
136
+
137
+ int test_dealloc_invlists (const char *index_key) {
138
+
139
+ std::unique_ptr<Index> index = make_trained_index(index_key);
140
+ IndexIVF * index_ivf = ivflib::extract_index_ivf (index.get());
141
+
142
+ auto xb = make_data (nb * d);
143
+ index->add(nb, xb.data());
144
+
145
+ auto xq = make_data (nq * d);
146
+
147
+ auto ref_res = search_index (index.get(), xq.data());
148
+
149
+ EncapsulateInvertedLists eil(index_ivf->invlists);
150
+
151
+ index_ivf->own_invlists = false;
152
+ index_ivf->replace_invlists (&eil, false);
153
+
154
+ // TEST: this could crash or leak mem
155
+ auto new_res = search_index (index.get(), xq.data());
156
+
157
+ // delete explicitly
158
+ delete eil.il;
159
+
160
+ // just to make sure
161
+ EXPECT_EQ (ref_res, new_res);
162
+ return 0;
163
+ }
164
+
165
+ } // anonymous namespace
166
+
167
+
168
+
169
+ /*************************************************************
170
+ * Test entry points
171
+ *************************************************************/
172
+
173
+ TEST(TestIvlistDealloc, IVFFlat) {
174
+ test_dealloc_invlists ("IVF32,Flat");
175
+ }
176
+
177
+ TEST(TestIvlistDealloc, IVFSQ) {
178
+ test_dealloc_invlists ("IVF32,SQ8");
179
+ }
180
+
181
+ TEST(TestIvlistDealloc, IVFPQ) {
182
+ test_dealloc_invlists ("IVF32,PQ4np");
183
+ }
@@ -0,0 +1,67 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #include <cstdio>
9
+ #include <cstdlib>
10
+
11
+ #include <gtest/gtest.h>
12
+
13
+ #include <faiss/IndexIVFPQ.h>
14
+ #include <faiss/IndexFlat.h>
15
+ #include <faiss/utils/utils.h>
16
+ #include <faiss/utils/distances.h>
17
+
18
+
19
+ namespace {
20
+
21
+ // dimension of the vectors to index
22
+ int d = 64;
23
+
24
+ // size of the database we plan to index
25
+ size_t nb = 8000;
26
+
27
+
28
+ double eval_codec_error (long ncentroids, long m, const std::vector<float> &v)
29
+ {
30
+ faiss::IndexFlatL2 coarse_quantizer (d);
31
+ faiss::IndexIVFPQ index (&coarse_quantizer, d,
32
+ ncentroids, m, 8);
33
+ index.pq.cp.niter = 10; // speed up train
34
+ index.train (nb, v.data());
35
+
36
+ // encode and decode to compute reconstruction error
37
+
38
+ std::vector<faiss::Index::idx_t> keys (nb);
39
+ std::vector<uint8_t> codes (nb * m);
40
+ index.encode_multiple (nb, keys.data(), v.data(), codes.data(), true);
41
+
42
+ std::vector<float> v2 (nb * d);
43
+ index.decode_multiple (nb, keys.data(), codes.data(), v2.data());
44
+
45
+ return faiss::fvec_L2sqr (v.data(), v2.data(), nb * d);
46
+ }
47
+
48
+ } // namespace
49
+
50
+
51
+ TEST(IVFPQ, codec) {
52
+
53
+ std::vector <float> database (nb * d);
54
+ for (size_t i = 0; i < nb * d; i++) {
55
+ database[i] = drand48();
56
+ }
57
+
58
+ double err0 = eval_codec_error(16, 8, database);
59
+
60
+ // should be more accurate as there are more coarse centroids
61
+ double err1 = eval_codec_error(128, 8, database);
62
+ EXPECT_GT(err0, err1);
63
+
64
+ // should be more accurate as there are more PQ codes
65
+ double err2 = eval_codec_error(16, 16, database);
66
+ EXPECT_GT(err0, err2);
67
+ }