faiss 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +1 -1
  5. data/ext/faiss/extconf.rb +1 -1
  6. data/lib/faiss/version.rb +1 -1
  7. data/vendor/faiss/faiss/AutoTune.cpp +36 -33
  8. data/vendor/faiss/faiss/AutoTune.h +6 -3
  9. data/vendor/faiss/faiss/Clustering.cpp +16 -12
  10. data/vendor/faiss/faiss/Index.cpp +3 -4
  11. data/vendor/faiss/faiss/Index.h +3 -3
  12. data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
  13. data/vendor/faiss/faiss/IndexBinary.h +1 -1
  14. data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
  15. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
  16. data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
  17. data/vendor/faiss/faiss/IndexFlat.h +0 -51
  18. data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
  19. data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
  20. data/vendor/faiss/faiss/IndexIVF.h +22 -15
  21. data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
  22. data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
  23. data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
  24. data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
  25. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
  26. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
  27. data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
  28. data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
  29. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
  30. data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
  31. data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
  32. data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
  33. data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
  34. data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
  35. data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
  36. data/vendor/faiss/faiss/IndexRefine.h +73 -0
  37. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
  38. data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
  39. data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
  40. data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
  41. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
  42. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
  43. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
  44. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
  45. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
  46. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
  47. data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
  48. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
  49. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
  50. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
  51. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
  52. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
  53. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
  54. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
  55. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
  56. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
  57. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
  58. data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
  59. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
  60. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
  61. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
  62. data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
  63. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
  64. data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
  65. data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
  66. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
  67. data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
  68. data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
  69. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
  70. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
  71. data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
  72. data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
  73. data/vendor/faiss/faiss/impl/io.cpp +33 -2
  74. data/vendor/faiss/faiss/impl/io.h +7 -2
  75. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
  76. data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
  77. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
  78. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
  79. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
  80. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
  81. data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
  82. data/vendor/faiss/faiss/index_factory.cpp +112 -7
  83. data/vendor/faiss/faiss/index_io.h +1 -48
  84. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
  85. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
  86. data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
  87. data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
  88. data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
  89. data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
  90. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
  91. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
  92. data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
  93. data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
  94. data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
  95. data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
  96. data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
  97. data/vendor/faiss/faiss/utils/Heap.h +61 -50
  98. data/vendor/faiss/faiss/utils/distances.cpp +164 -319
  99. data/vendor/faiss/faiss/utils/distances.h +28 -20
  100. data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
  101. data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
  102. data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
  103. data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
  104. data/vendor/faiss/faiss/utils/hamming.h +2 -7
  105. data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
  106. data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
  107. data/vendor/faiss/faiss/utils/partitioning.h +69 -0
  108. data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
  109. data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
  110. data/vendor/faiss/faiss/utils/simdlib.h +31 -0
  111. data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
  112. data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
  113. metadata +43 -141
  114. data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
  115. data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
  116. data/vendor/faiss/c_api/AutoTune_c.h +0 -66
  117. data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
  118. data/vendor/faiss/c_api/Clustering_c.h +0 -123
  119. data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
  120. data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
  121. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
  122. data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
  123. data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
  124. data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
  125. data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
  126. data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
  127. data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
  128. data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
  129. data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
  130. data/vendor/faiss/c_api/IndexShards_c.h +0 -39
  131. data/vendor/faiss/c_api/Index_c.cpp +0 -105
  132. data/vendor/faiss/c_api/Index_c.h +0 -183
  133. data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
  134. data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
  135. data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
  136. data/vendor/faiss/c_api/clone_index_c.h +0 -32
  137. data/vendor/faiss/c_api/error_c.h +0 -42
  138. data/vendor/faiss/c_api/error_impl.cpp +0 -27
  139. data/vendor/faiss/c_api/error_impl.h +0 -16
  140. data/vendor/faiss/c_api/faiss_c.h +0 -58
  141. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
  142. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
  143. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
  144. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
  145. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
  146. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
  147. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
  148. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
  149. data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
  150. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
  151. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
  152. data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
  153. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
  154. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
  155. data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
  156. data/vendor/faiss/c_api/index_factory_c.h +0 -30
  157. data/vendor/faiss/c_api/index_io_c.cpp +0 -42
  158. data/vendor/faiss/c_api/index_io_c.h +0 -50
  159. data/vendor/faiss/c_api/macros_impl.h +0 -110
  160. data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
  161. data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
  162. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
  163. data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
  164. data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
  165. data/vendor/faiss/misc/test_blas.cpp +0 -87
  166. data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
  167. data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
  168. data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
  169. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
  170. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
  171. data/vendor/faiss/tests/test_merge.cpp +0 -260
  172. data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
  173. data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
  174. data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
  175. data/vendor/faiss/tests/test_params_override.cpp +0 -236
  176. data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
  177. data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
  178. data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
  179. data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
  180. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
  181. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
  182. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
  183. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
  184. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104
@@ -1,42 +0,0 @@
1
- /**
2
- * Copyright (c) Facebook, Inc. and its affiliates.
3
- *
4
- * This source code is licensed under the MIT license found in the
5
- * LICENSE file in the root directory of this source tree.
6
- */
7
-
8
- // Copyright 2004-present Facebook. All Rights Reserved
9
- // -*- c++ -*-
10
- // I/O code for indexes
11
-
12
- #include "index_io_c.h"
13
- #include "index_io.h"
14
- #include "macros_impl.h"
15
-
16
- using faiss::Index;
17
-
18
- int faiss_write_index(const FaissIndex *idx, FILE *f) {
19
- try {
20
- faiss::write_index(reinterpret_cast<const Index*>(idx), f);
21
- } CATCH_AND_HANDLE
22
- }
23
-
24
- int faiss_write_index_fname(const FaissIndex *idx, const char *fname) {
25
- try {
26
- faiss::write_index(reinterpret_cast<const Index*>(idx), fname);
27
- } CATCH_AND_HANDLE
28
- }
29
-
30
- int faiss_read_index(FILE *f, int io_flags, FaissIndex **p_out) {
31
- try {
32
- auto out = faiss::read_index(f, io_flags);
33
- *p_out = reinterpret_cast<FaissIndex*>(out);
34
- } CATCH_AND_HANDLE
35
- }
36
-
37
- int faiss_read_index_fname(const char *fname, int io_flags, FaissIndex **p_out) {
38
- try {
39
- auto out = faiss::read_index(fname, io_flags);
40
- *p_out = reinterpret_cast<FaissIndex*>(out);
41
- } CATCH_AND_HANDLE
42
- }
@@ -1,50 +0,0 @@
1
- /**
2
- * Copyright (c) Facebook, Inc. and its affiliates.
3
- *
4
- * This source code is licensed under the MIT license found in the
5
- * LICENSE file in the root directory of this source tree.
6
- */
7
-
8
- // Copyright 2004-present Facebook. All Rights Reserved
9
- // -*- c++ -*-
10
- // I/O code for indexes
11
-
12
-
13
- #ifndef FAISS_INDEX_IO_C_H
14
- #define FAISS_INDEX_IO_C_H
15
-
16
- #include <stdio.h>
17
- #include "faiss_c.h"
18
- #include "Index_c.h"
19
-
20
- #ifdef __cplusplus
21
- extern "C" {
22
- #endif
23
-
24
- /** Write index to a file.
25
- * This is equivalent to `faiss::write_index` when a file descriptor is provided.
26
- */
27
- int faiss_write_index(const FaissIndex *idx, FILE *f);
28
-
29
- /** Write index to a file.
30
- * This is equivalent to `faiss::write_index` when a file path is provided.
31
- */
32
- int faiss_write_index_fname(const FaissIndex *idx, const char *fname);
33
-
34
- #define FAISS_IO_FLAG_MMAP 1
35
- #define FAISS_IO_FLAG_READ_ONLY 2
36
-
37
- /** Read index from a file.
38
- * This is equivalent to `faiss:read_index` when a file descriptor is given.
39
- */
40
- int faiss_read_index(FILE *f, int io_flags, FaissIndex **p_out);
41
-
42
- /** Read index from a file.
43
- * This is equivalent to `faiss:read_index` when a file path is given.
44
- */
45
- int faiss_read_index_fname(const char *fname, int io_flags, FaissIndex **p_out);
46
-
47
- #ifdef __cplusplus
48
- }
49
- #endif
50
- #endif
@@ -1,110 +0,0 @@
1
- /**
2
- * Copyright (c) Facebook, Inc. and its affiliates.
3
- *
4
- * This source code is licensed under the MIT license found in the
5
- * LICENSE file in the root directory of this source tree.
6
- */
7
-
8
- // Copyright 2004-present Facebook. All Rights Reserved.
9
- // -*- c++ -*-
10
-
11
- /// Utility macros for the C wrapper implementation.
12
-
13
- #ifndef MACROS_IMPL_H
14
- #define MACROS_IMPL_H
15
-
16
- #include "faiss_c.h"
17
- #include "FaissException.h"
18
- #include "error_impl.h"
19
- #include <stdexcept>
20
- #include <iostream>
21
-
22
- #ifdef NDEBUG
23
- #define CATCH_AND_HANDLE \
24
- catch (faiss::FaissException& e) { \
25
- faiss_last_exception = \
26
- std::make_exception_ptr(e); \
27
- return -2; \
28
- } catch (std::exception& e) { \
29
- faiss_last_exception = \
30
- std::make_exception_ptr(e); \
31
- return -4; \
32
- } catch (...) { \
33
- faiss_last_exception = \
34
- std::make_exception_ptr( \
35
- std::runtime_error("Unknown error")); \
36
- return -1; \
37
- } return 0;
38
- #else
39
- #define CATCH_AND_HANDLE \
40
- catch (faiss::FaissException& e) { \
41
- std::cerr << e.what() << '\n'; \
42
- faiss_last_exception = \
43
- std::make_exception_ptr(e); \
44
- return -2; \
45
- } catch (std::exception& e) { \
46
- std::cerr << e.what() << '\n'; \
47
- faiss_last_exception = \
48
- std::make_exception_ptr(e); \
49
- return -4; \
50
- } catch (...) { \
51
- std::cerr << "Unrecognized exception!\n"; \
52
- faiss_last_exception = \
53
- std::make_exception_ptr( \
54
- std::runtime_error("Unknown error")); \
55
- return -1; \
56
- } return 0;
57
- #endif
58
-
59
- #define DEFINE_GETTER(clazz, ty, name) \
60
- ty faiss_ ## clazz ## _ ## name (const Faiss ## clazz *obj) { \
61
- return static_cast< ty >( \
62
- reinterpret_cast< const faiss::clazz *>(obj)-> name \
63
- ); \
64
- }
65
-
66
- #define DEFINE_GETTER_SUBCLASS(clazz, parent, ty, name) \
67
- ty faiss_ ## clazz ## _ ## name (const Faiss ## clazz *obj) { \
68
- return static_cast< ty >( \
69
- reinterpret_cast<const faiss::parent::clazz *>(obj)-> name \
70
- ); \
71
- }
72
-
73
- #define DEFINE_GETTER_PERMISSIVE(clazz, ty, name) \
74
- ty faiss_ ## clazz ## _ ## name (const Faiss ## clazz *obj) { \
75
- return ( ty ) ( \
76
- reinterpret_cast<const faiss::clazz *>(obj)-> name \
77
- ); \
78
- }
79
-
80
- #define DEFINE_GETTER_SUBCLASS_PERMISSIVE(clazz, parent, ty, name) \
81
- ty faiss_ ## clazz ## _ ## name (const Faiss ## clazz *obj) { \
82
- return ( ty ) ( \
83
- reinterpret_cast<const faiss::parent::clazz *>(obj)-> name \
84
- ); \
85
- }
86
-
87
- #define DEFINE_SETTER(clazz, ty, name) \
88
- void faiss_ ## clazz ## _set_ ## name (Faiss ## clazz *obj, ty val) { \
89
- reinterpret_cast< faiss::clazz *>(obj)-> name = val; \
90
- }
91
-
92
- #define DEFINE_SETTER_STATIC(clazz, ty_to, ty_from, name) \
93
- void faiss_ ## clazz ## _set_ ## name (Faiss ## clazz *obj, ty_from val) { \
94
- reinterpret_cast< faiss::clazz *>(obj)-> name = \
95
- static_cast< ty_to >(val); \
96
- }
97
-
98
- #define DEFINE_DESTRUCTOR(clazz) \
99
- void faiss_ ## clazz ## _free (Faiss ## clazz *obj) { \
100
- delete reinterpret_cast<faiss::clazz *>(obj); \
101
- }
102
-
103
- #define DEFINE_INDEX_DOWNCAST(clazz) \
104
- Faiss ## clazz * faiss_ ## clazz ## _cast (FaissIndex* index) { \
105
- return reinterpret_cast<Faiss ## clazz *>( \
106
- dynamic_cast< faiss::clazz *>( \
107
- reinterpret_cast<faiss::Index*>(index))); \
108
- }
109
-
110
- #endif
@@ -1,154 +0,0 @@
1
- /**
2
- * Copyright (c) Facebook, Inc. and its affiliates.
3
- *
4
- * This source code is licensed under the MIT license found in the
5
- * LICENSE file in the root directory of this source tree.
6
- */
7
-
8
-
9
-
10
- #include <cmath>
11
- #include <cstdio>
12
- #include <cstdlib>
13
- #include <random>
14
-
15
- #include <sys/time.h>
16
-
17
-
18
- #include <faiss/IndexPQ.h>
19
- #include <faiss/IndexIVFFlat.h>
20
- #include <faiss/IndexFlat.h>
21
- #include <faiss/index_io.h>
22
-
23
- double elapsed ()
24
- {
25
- struct timeval tv;
26
- gettimeofday (&tv, nullptr);
27
- return tv.tv_sec + tv.tv_usec * 1e-6;
28
- }
29
-
30
-
31
- int main ()
32
- {
33
- double t0 = elapsed();
34
-
35
- // dimension of the vectors to index
36
- int d = 128;
37
-
38
- // size of the database we plan to index
39
- size_t nb = 1000 * 1000;
40
-
41
- // make a set of nt training vectors in the unit cube
42
- // (could be the database)
43
- size_t nt = 100 * 1000;
44
-
45
- //---------------------------------------------------------------
46
- // Define the core quantizer
47
- // We choose a multiple inverted index for faster training with less data
48
- // and because it usually offers best accuracy/speed trade-offs
49
- //
50
- // We here assume that its lifespan of this coarse quantizer will cover the
51
- // lifespan of the inverted-file quantizer IndexIVFFlat below
52
- // With dynamic allocation, one may give the responsability to free the
53
- // quantizer to the inverted-file index (with attribute do_delete_quantizer)
54
- //
55
- // Note: a regular clustering algorithm would be defined as:
56
- // faiss::IndexFlatL2 coarse_quantizer (d);
57
- //
58
- // Use nhash=2 subquantizers used to define the product coarse quantizer
59
- // Number of bits: we will have 2^nbits_coarse centroids per subquantizer
60
- // meaning (2^12)^nhash distinct inverted lists
61
- size_t nhash = 2;
62
- size_t nbits_subq = int (log2 (nb+1) / 2); // good choice in general
63
- size_t ncentroids = 1 << (nhash * nbits_subq); // total # of centroids
64
-
65
- faiss::MultiIndexQuantizer coarse_quantizer (d, nhash, nbits_subq);
66
-
67
- printf ("IMI (%ld,%ld): %ld virtual centroids (target: %ld base vectors)",
68
- nhash, nbits_subq, ncentroids, nb);
69
-
70
- // the coarse quantizer should not be dealloced before the index
71
- // 4 = nb of bytes per code (d must be a multiple of this)
72
- // 8 = nb of bits per sub-code (almost always 8)
73
- faiss::MetricType metric = faiss::METRIC_L2; // can be METRIC_INNER_PRODUCT
74
- faiss::IndexIVFFlat index (&coarse_quantizer, d, ncentroids, metric);
75
- index.quantizer_trains_alone = true;
76
-
77
- // define the number of probes. 2048 is for high-dim, overkilled in practice
78
- // Use 4-1024 depending on the trade-off speed accuracy that you want
79
- index.nprobe = 2048;
80
-
81
- std::mt19937 rng;
82
- std::uniform_real_distribution<> distrib;
83
-
84
- { // training
85
- printf ("[%.3f s] Generating %ld vectors in %dD for training\n",
86
- elapsed() - t0, nt, d);
87
-
88
- std::vector <float> trainvecs (nt * d);
89
- for (size_t i = 0; i < nt * d; i++) {
90
- trainvecs[i] = distrib(rng);
91
- }
92
-
93
- printf ("[%.3f s] Training the index\n", elapsed() - t0);
94
- index.verbose = true;
95
- index.train (nt, trainvecs.data());
96
- }
97
-
98
- size_t nq;
99
- std::vector<float> queries;
100
-
101
- { // populating the database
102
- printf ("[%.3f s] Building a dataset of %ld vectors to index\n",
103
- elapsed() - t0, nb);
104
-
105
- std::vector <float> database (nb * d);
106
- for (size_t i = 0; i < nb * d; i++) {
107
- database[i] = distrib(rng);
108
- }
109
-
110
- printf ("[%.3f s] Adding the vectors to the index\n", elapsed() - t0);
111
-
112
- index.add (nb, database.data());
113
-
114
- // remember a few elements from the database as queries
115
- int i0 = 1234;
116
- int i1 = 1244;
117
-
118
- nq = i1 - i0;
119
- queries.resize (nq * d);
120
- for (int i = i0; i < i1; i++) {
121
- for (int j = 0; j < d; j++) {
122
- queries [(i - i0) * d + j] = database [i * d + j];
123
- }
124
- }
125
- }
126
-
127
- { // searching the database
128
- int k = 5;
129
- printf ("[%.3f s] Searching the %d nearest neighbors "
130
- "of %ld vectors in the index\n",
131
- elapsed() - t0, k, nq);
132
-
133
- std::vector<faiss::Index::idx_t> nns (k * nq);
134
- std::vector<float> dis (k * nq);
135
-
136
- index.search (nq, queries.data(), k, dis.data(), nns.data());
137
-
138
- printf ("[%.3f s] Query results (vector ids, then distances):\n",
139
- elapsed() - t0);
140
-
141
- for (int i = 0; i < nq; i++) {
142
- printf ("query %2d: ", i);
143
- for (int j = 0; j < k; j++) {
144
- printf ("%7ld ", nns[j + i * k]);
145
- }
146
- printf ("\n dis: ");
147
- for (int j = 0; j < k; j++) {
148
- printf ("%7g ", dis[j + i * k]);
149
- }
150
- printf ("\n");
151
- }
152
- }
153
- return 0;
154
- }
@@ -1,203 +0,0 @@
1
- /**
2
- * Copyright (c) Facebook, Inc. and its affiliates.
3
- *
4
- * This source code is licensed under the MIT license found in the
5
- * LICENSE file in the root directory of this source tree.
6
- */
7
-
8
-
9
-
10
- #include <cmath>
11
- #include <cstdio>
12
- #include <cstdlib>
13
- #include <random>
14
-
15
- #include <sys/time.h>
16
-
17
-
18
- #include <faiss/IndexPQ.h>
19
- #include <faiss/IndexIVFPQ.h>
20
- #include <faiss/IndexFlat.h>
21
- #include <faiss/index_io.h>
22
-
23
- double elapsed ()
24
- {
25
- struct timeval tv;
26
- gettimeofday (&tv, nullptr);
27
- return tv.tv_sec + tv.tv_usec * 1e-6;
28
- }
29
-
30
-
31
- int main ()
32
- {
33
- double t0 = elapsed();
34
-
35
- // dimension of the vectors to index
36
- int d = 64;
37
-
38
- // size of the database we plan to index
39
- size_t nb = 1000 * 1000;
40
- size_t add_bs = 10000; // # size of the blocks to add
41
-
42
- // make a set of nt training vectors in the unit cube
43
- // (could be the database)
44
- size_t nt = 100 * 1000;
45
-
46
- //---------------------------------------------------------------
47
- // Define the core quantizer
48
- // We choose a multiple inverted index for faster training with less data
49
- // and because it usually offers best accuracy/speed trade-offs
50
- //
51
- // We here assume that its lifespan of this coarse quantizer will cover the
52
- // lifespan of the inverted-file quantizer IndexIVFFlat below
53
- // With dynamic allocation, one may give the responsability to free the
54
- // quantizer to the inverted-file index (with attribute do_delete_quantizer)
55
- //
56
- // Note: a regular clustering algorithm would be defined as:
57
- // faiss::IndexFlatL2 coarse_quantizer (d);
58
- //
59
- // Use nhash=2 subquantizers used to define the product coarse quantizer
60
- // Number of bits: we will have 2^nbits_coarse centroids per subquantizer
61
- // meaning (2^12)^nhash distinct inverted lists
62
- //
63
- // The parameter bytes_per_code is determined by the memory
64
- // constraint, the dataset will use nb * (bytes_per_code + 8)
65
- // bytes.
66
- //
67
- // The parameter nbits_subq is determined by the size of the dataset to index.
68
- //
69
- size_t nhash = 2;
70
- size_t nbits_subq = 9;
71
- size_t ncentroids = 1 << (nhash * nbits_subq); // total # of centroids
72
- int bytes_per_code = 16;
73
-
74
- faiss::MultiIndexQuantizer coarse_quantizer (d, nhash, nbits_subq);
75
-
76
- printf ("IMI (%ld,%ld): %ld virtual centroids (target: %ld base vectors)",
77
- nhash, nbits_subq, ncentroids, nb);
78
-
79
- // the coarse quantizer should not be dealloced before the index
80
- // 4 = nb of bytes per code (d must be a multiple of this)
81
- // 8 = nb of bits per sub-code (almost always 8)
82
- faiss::MetricType metric = faiss::METRIC_L2; // can be METRIC_INNER_PRODUCT
83
- faiss::IndexIVFPQ index (&coarse_quantizer, d, ncentroids, bytes_per_code, 8);
84
- index.quantizer_trains_alone = true;
85
-
86
- // define the number of probes. 2048 is for high-dim, overkill in practice
87
- // Use 4-1024 depending on the trade-off speed accuracy that you want
88
- index.nprobe = 2048;
89
-
90
-
91
- std::mt19937 rng;
92
- std::uniform_real_distribution<> distrib;
93
-
94
- { // training.
95
-
96
- // The distribution of the training vectors should be the same
97
- // as the database vectors. It could be a sub-sample of the
98
- // database vectors, if sampling is not biased. Here we just
99
- // randomly generate the vectors.
100
-
101
- printf ("[%.3f s] Generating %ld vectors in %dD for training\n",
102
- elapsed() - t0, nt, d);
103
-
104
- std::vector <float> trainvecs (nt * d);
105
- for (size_t i = 0; i < nt; i++) {
106
- for (size_t j = 0; j < d; j++) {
107
- trainvecs[i * d + j] = distrib(rng);
108
- }
109
- }
110
-
111
- printf ("[%.3f s] Training the index\n", elapsed() - t0);
112
- index.verbose = true;
113
- index.train (nt, trainvecs.data());
114
- }
115
-
116
- // the index can be re-loaded later with
117
- // faiss::Index * idx = faiss::read_index("/tmp/trained_index.faissindex");
118
- faiss::write_index(&index, "/tmp/trained_index.faissindex");
119
-
120
- size_t nq;
121
- std::vector<float> queries;
122
-
123
- { // populating the database
124
- printf ("[%.3f s] Building a dataset of %ld vectors to index\n",
125
- elapsed() - t0, nb);
126
-
127
- std::vector <float> database (nb * d);
128
- std::vector <long> ids (nb);
129
- for (size_t i = 0; i < nb; i++) {
130
- for (size_t j = 0; j < d; j++) {
131
- database[i * d + j] = distrib(rng);
132
- }
133
- ids[i] = 8760000000L + i;
134
- }
135
-
136
- printf ("[%.3f s] Adding the vectors to the index\n", elapsed() - t0);
137
-
138
- for (size_t begin = 0; begin < nb; begin += add_bs) {
139
- size_t end = std::min (begin + add_bs, nb);
140
- index.add_with_ids (end - begin,
141
- database.data() + d * begin,
142
- ids.data() + begin);
143
- }
144
-
145
- // remember a few elements from the database as queries
146
- int i0 = 1234;
147
- int i1 = 1244;
148
-
149
- nq = i1 - i0;
150
- queries.resize (nq * d);
151
- for (int i = i0; i < i1; i++) {
152
- for (int j = 0; j < d; j++) {
153
- queries [(i - i0) * d + j] = database [i * d + j];
154
- }
155
- }
156
- }
157
-
158
- // A few notes on the internal format of the index:
159
- //
160
- // - the positing lists for PQ codes are index.codes, which is a
161
- // std::vector < std::vector<uint8_t> >
162
- // if n is the length of posting list #i, codes[i] has length bytes_per_code * n
163
- //
164
- // - the corresponding ids are stored in index.ids
165
- //
166
- // - given a vector float *x, finding which k centroids are
167
- // closest to it (ie to find the nearest neighbors) can be done with
168
- //
169
- // long *centroid_ids = new long[k];
170
- // float *distances = new float[k];
171
- // index.quantizer->search (1, x, k, dis, centroids_ids);
172
- //
173
-
174
- faiss::write_index(&index, "/tmp/populated_index.faissindex");
175
-
176
- { // searching the database
177
- int k = 5;
178
- printf ("[%.3f s] Searching the %d nearest neighbors "
179
- "of %ld vectors in the index\n",
180
- elapsed() - t0, k, nq);
181
-
182
- std::vector<faiss::Index::idx_t> nns (k * nq);
183
- std::vector<float> dis (k * nq);
184
-
185
- index.search (nq, queries.data(), k, dis.data(), nns.data());
186
-
187
- printf ("[%.3f s] Query results (vector ids, then distances):\n",
188
- elapsed() - t0);
189
-
190
- for (int i = 0; i < nq; i++) {
191
- printf ("query %2d: ", i);
192
- for (int j = 0; j < k; j++) {
193
- printf ("%7ld ", nns[j + i * k]);
194
- }
195
- printf ("\n dis: ");
196
- for (int j = 0; j < k; j++) {
197
- printf ("%7g ", dis[j + i * k]);
198
- }
199
- printf ("\n");
200
- }
201
- }
202
- return 0;
203
- }