faiss 0.1.3 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (199) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +25 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +16 -4
  5. data/ext/faiss/ext.cpp +12 -308
  6. data/ext/faiss/extconf.rb +6 -3
  7. data/ext/faiss/index.cpp +189 -0
  8. data/ext/faiss/index_binary.cpp +75 -0
  9. data/ext/faiss/kmeans.cpp +40 -0
  10. data/ext/faiss/numo.hpp +867 -0
  11. data/ext/faiss/pca_matrix.cpp +33 -0
  12. data/ext/faiss/product_quantizer.cpp +53 -0
  13. data/ext/faiss/utils.cpp +13 -0
  14. data/ext/faiss/utils.h +5 -0
  15. data/lib/faiss.rb +0 -5
  16. data/lib/faiss/version.rb +1 -1
  17. data/vendor/faiss/faiss/AutoTune.cpp +36 -33
  18. data/vendor/faiss/faiss/AutoTune.h +6 -3
  19. data/vendor/faiss/faiss/Clustering.cpp +16 -12
  20. data/vendor/faiss/faiss/Index.cpp +3 -4
  21. data/vendor/faiss/faiss/Index.h +3 -3
  22. data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
  23. data/vendor/faiss/faiss/IndexBinary.h +1 -1
  24. data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
  25. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
  26. data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
  27. data/vendor/faiss/faiss/IndexFlat.h +0 -51
  28. data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
  29. data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
  30. data/vendor/faiss/faiss/IndexIVF.h +22 -15
  31. data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
  32. data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
  33. data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
  34. data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
  35. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
  36. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
  37. data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
  38. data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
  39. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
  40. data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
  41. data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
  42. data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
  43. data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
  44. data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
  45. data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
  46. data/vendor/faiss/faiss/IndexRefine.h +73 -0
  47. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
  48. data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
  49. data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
  50. data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
  51. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
  52. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
  53. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
  54. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
  55. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
  56. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
  57. data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
  58. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
  59. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
  60. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
  61. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
  62. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
  63. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
  64. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
  65. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
  66. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
  67. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
  68. data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
  69. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
  70. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
  71. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
  72. data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
  73. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
  74. data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
  75. data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
  76. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
  77. data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
  78. data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
  79. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
  80. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
  81. data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
  82. data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
  83. data/vendor/faiss/faiss/impl/io.cpp +33 -2
  84. data/vendor/faiss/faiss/impl/io.h +7 -2
  85. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
  86. data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
  87. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
  88. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
  89. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
  90. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
  91. data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
  92. data/vendor/faiss/faiss/index_factory.cpp +112 -7
  93. data/vendor/faiss/faiss/index_io.h +1 -48
  94. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
  95. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
  96. data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
  97. data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
  98. data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
  99. data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
  100. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
  101. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
  102. data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
  103. data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
  104. data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
  105. data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
  106. data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
  107. data/vendor/faiss/faiss/utils/Heap.h +61 -50
  108. data/vendor/faiss/faiss/utils/distances.cpp +164 -319
  109. data/vendor/faiss/faiss/utils/distances.h +28 -20
  110. data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
  111. data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
  112. data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
  113. data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
  114. data/vendor/faiss/faiss/utils/hamming.h +2 -7
  115. data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
  116. data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
  117. data/vendor/faiss/faiss/utils/partitioning.h +69 -0
  118. data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
  119. data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
  120. data/vendor/faiss/faiss/utils/simdlib.h +31 -0
  121. data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
  122. data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
  123. metadata +54 -149
  124. data/lib/faiss/index.rb +0 -20
  125. data/lib/faiss/index_binary.rb +0 -20
  126. data/lib/faiss/kmeans.rb +0 -15
  127. data/lib/faiss/pca_matrix.rb +0 -15
  128. data/lib/faiss/product_quantizer.rb +0 -22
  129. data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
  130. data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
  131. data/vendor/faiss/c_api/AutoTune_c.h +0 -66
  132. data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
  133. data/vendor/faiss/c_api/Clustering_c.h +0 -123
  134. data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
  135. data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
  136. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
  137. data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
  138. data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
  139. data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
  140. data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
  141. data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
  142. data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
  143. data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
  144. data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
  145. data/vendor/faiss/c_api/IndexShards_c.h +0 -39
  146. data/vendor/faiss/c_api/Index_c.cpp +0 -105
  147. data/vendor/faiss/c_api/Index_c.h +0 -183
  148. data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
  149. data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
  150. data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
  151. data/vendor/faiss/c_api/clone_index_c.h +0 -32
  152. data/vendor/faiss/c_api/error_c.h +0 -42
  153. data/vendor/faiss/c_api/error_impl.cpp +0 -27
  154. data/vendor/faiss/c_api/error_impl.h +0 -16
  155. data/vendor/faiss/c_api/faiss_c.h +0 -58
  156. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
  157. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
  158. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
  159. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
  160. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
  161. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
  162. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
  163. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
  164. data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
  165. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
  166. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
  167. data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
  168. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
  169. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
  170. data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
  171. data/vendor/faiss/c_api/index_factory_c.h +0 -30
  172. data/vendor/faiss/c_api/index_io_c.cpp +0 -42
  173. data/vendor/faiss/c_api/index_io_c.h +0 -50
  174. data/vendor/faiss/c_api/macros_impl.h +0 -110
  175. data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
  176. data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
  177. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
  178. data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
  179. data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
  180. data/vendor/faiss/misc/test_blas.cpp +0 -87
  181. data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
  182. data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
  183. data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
  184. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
  185. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
  186. data/vendor/faiss/tests/test_merge.cpp +0 -260
  187. data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
  188. data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
  189. data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
  190. data/vendor/faiss/tests/test_params_override.cpp +0 -236
  191. data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
  192. data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
  193. data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
  194. data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
  195. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
  196. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
  197. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
  198. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
  199. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104
@@ -1,42 +0,0 @@
1
- /**
2
- * Copyright (c) Facebook, Inc. and its affiliates.
3
- *
4
- * This source code is licensed under the MIT license found in the
5
- * LICENSE file in the root directory of this source tree.
6
- */
7
-
8
- // Copyright 2004-present Facebook. All Rights Reserved
9
- // -*- c++ -*-
10
- // I/O code for indexes
11
-
12
- #include "index_io_c.h"
13
- #include "index_io.h"
14
- #include "macros_impl.h"
15
-
16
- using faiss::Index;
17
-
18
- int faiss_write_index(const FaissIndex *idx, FILE *f) {
19
- try {
20
- faiss::write_index(reinterpret_cast<const Index*>(idx), f);
21
- } CATCH_AND_HANDLE
22
- }
23
-
24
- int faiss_write_index_fname(const FaissIndex *idx, const char *fname) {
25
- try {
26
- faiss::write_index(reinterpret_cast<const Index*>(idx), fname);
27
- } CATCH_AND_HANDLE
28
- }
29
-
30
- int faiss_read_index(FILE *f, int io_flags, FaissIndex **p_out) {
31
- try {
32
- auto out = faiss::read_index(f, io_flags);
33
- *p_out = reinterpret_cast<FaissIndex*>(out);
34
- } CATCH_AND_HANDLE
35
- }
36
-
37
- int faiss_read_index_fname(const char *fname, int io_flags, FaissIndex **p_out) {
38
- try {
39
- auto out = faiss::read_index(fname, io_flags);
40
- *p_out = reinterpret_cast<FaissIndex*>(out);
41
- } CATCH_AND_HANDLE
42
- }
@@ -1,50 +0,0 @@
1
- /**
2
- * Copyright (c) Facebook, Inc. and its affiliates.
3
- *
4
- * This source code is licensed under the MIT license found in the
5
- * LICENSE file in the root directory of this source tree.
6
- */
7
-
8
- // Copyright 2004-present Facebook. All Rights Reserved
9
- // -*- c++ -*-
10
- // I/O code for indexes
11
-
12
-
13
- #ifndef FAISS_INDEX_IO_C_H
14
- #define FAISS_INDEX_IO_C_H
15
-
16
- #include <stdio.h>
17
- #include "faiss_c.h"
18
- #include "Index_c.h"
19
-
20
- #ifdef __cplusplus
21
- extern "C" {
22
- #endif
23
-
24
- /** Write index to a file.
25
- * This is equivalent to `faiss::write_index` when a file descriptor is provided.
26
- */
27
- int faiss_write_index(const FaissIndex *idx, FILE *f);
28
-
29
- /** Write index to a file.
30
- * This is equivalent to `faiss::write_index` when a file path is provided.
31
- */
32
- int faiss_write_index_fname(const FaissIndex *idx, const char *fname);
33
-
34
- #define FAISS_IO_FLAG_MMAP 1
35
- #define FAISS_IO_FLAG_READ_ONLY 2
36
-
37
- /** Read index from a file.
38
- * This is equivalent to `faiss:read_index` when a file descriptor is given.
39
- */
40
- int faiss_read_index(FILE *f, int io_flags, FaissIndex **p_out);
41
-
42
- /** Read index from a file.
43
- * This is equivalent to `faiss:read_index` when a file path is given.
44
- */
45
- int faiss_read_index_fname(const char *fname, int io_flags, FaissIndex **p_out);
46
-
47
- #ifdef __cplusplus
48
- }
49
- #endif
50
- #endif
@@ -1,110 +0,0 @@
1
- /**
2
- * Copyright (c) Facebook, Inc. and its affiliates.
3
- *
4
- * This source code is licensed under the MIT license found in the
5
- * LICENSE file in the root directory of this source tree.
6
- */
7
-
8
- // Copyright 2004-present Facebook. All Rights Reserved.
9
- // -*- c++ -*-
10
-
11
- /// Utility macros for the C wrapper implementation.
12
-
13
- #ifndef MACROS_IMPL_H
14
- #define MACROS_IMPL_H
15
-
16
- #include "faiss_c.h"
17
- #include "FaissException.h"
18
- #include "error_impl.h"
19
- #include <stdexcept>
20
- #include <iostream>
21
-
22
- #ifdef NDEBUG
23
- #define CATCH_AND_HANDLE \
24
- catch (faiss::FaissException& e) { \
25
- faiss_last_exception = \
26
- std::make_exception_ptr(e); \
27
- return -2; \
28
- } catch (std::exception& e) { \
29
- faiss_last_exception = \
30
- std::make_exception_ptr(e); \
31
- return -4; \
32
- } catch (...) { \
33
- faiss_last_exception = \
34
- std::make_exception_ptr( \
35
- std::runtime_error("Unknown error")); \
36
- return -1; \
37
- } return 0;
38
- #else
39
- #define CATCH_AND_HANDLE \
40
- catch (faiss::FaissException& e) { \
41
- std::cerr << e.what() << '\n'; \
42
- faiss_last_exception = \
43
- std::make_exception_ptr(e); \
44
- return -2; \
45
- } catch (std::exception& e) { \
46
- std::cerr << e.what() << '\n'; \
47
- faiss_last_exception = \
48
- std::make_exception_ptr(e); \
49
- return -4; \
50
- } catch (...) { \
51
- std::cerr << "Unrecognized exception!\n"; \
52
- faiss_last_exception = \
53
- std::make_exception_ptr( \
54
- std::runtime_error("Unknown error")); \
55
- return -1; \
56
- } return 0;
57
- #endif
58
-
59
- #define DEFINE_GETTER(clazz, ty, name) \
60
- ty faiss_ ## clazz ## _ ## name (const Faiss ## clazz *obj) { \
61
- return static_cast< ty >( \
62
- reinterpret_cast< const faiss::clazz *>(obj)-> name \
63
- ); \
64
- }
65
-
66
- #define DEFINE_GETTER_SUBCLASS(clazz, parent, ty, name) \
67
- ty faiss_ ## clazz ## _ ## name (const Faiss ## clazz *obj) { \
68
- return static_cast< ty >( \
69
- reinterpret_cast<const faiss::parent::clazz *>(obj)-> name \
70
- ); \
71
- }
72
-
73
- #define DEFINE_GETTER_PERMISSIVE(clazz, ty, name) \
74
- ty faiss_ ## clazz ## _ ## name (const Faiss ## clazz *obj) { \
75
- return ( ty ) ( \
76
- reinterpret_cast<const faiss::clazz *>(obj)-> name \
77
- ); \
78
- }
79
-
80
- #define DEFINE_GETTER_SUBCLASS_PERMISSIVE(clazz, parent, ty, name) \
81
- ty faiss_ ## clazz ## _ ## name (const Faiss ## clazz *obj) { \
82
- return ( ty ) ( \
83
- reinterpret_cast<const faiss::parent::clazz *>(obj)-> name \
84
- ); \
85
- }
86
-
87
- #define DEFINE_SETTER(clazz, ty, name) \
88
- void faiss_ ## clazz ## _set_ ## name (Faiss ## clazz *obj, ty val) { \
89
- reinterpret_cast< faiss::clazz *>(obj)-> name = val; \
90
- }
91
-
92
- #define DEFINE_SETTER_STATIC(clazz, ty_to, ty_from, name) \
93
- void faiss_ ## clazz ## _set_ ## name (Faiss ## clazz *obj, ty_from val) { \
94
- reinterpret_cast< faiss::clazz *>(obj)-> name = \
95
- static_cast< ty_to >(val); \
96
- }
97
-
98
- #define DEFINE_DESTRUCTOR(clazz) \
99
- void faiss_ ## clazz ## _free (Faiss ## clazz *obj) { \
100
- delete reinterpret_cast<faiss::clazz *>(obj); \
101
- }
102
-
103
- #define DEFINE_INDEX_DOWNCAST(clazz) \
104
- Faiss ## clazz * faiss_ ## clazz ## _cast (FaissIndex* index) { \
105
- return reinterpret_cast<Faiss ## clazz *>( \
106
- dynamic_cast< faiss::clazz *>( \
107
- reinterpret_cast<faiss::Index*>(index))); \
108
- }
109
-
110
- #endif
@@ -1,154 +0,0 @@
1
- /**
2
- * Copyright (c) Facebook, Inc. and its affiliates.
3
- *
4
- * This source code is licensed under the MIT license found in the
5
- * LICENSE file in the root directory of this source tree.
6
- */
7
-
8
-
9
-
10
- #include <cmath>
11
- #include <cstdio>
12
- #include <cstdlib>
13
- #include <random>
14
-
15
- #include <sys/time.h>
16
-
17
-
18
- #include <faiss/IndexPQ.h>
19
- #include <faiss/IndexIVFFlat.h>
20
- #include <faiss/IndexFlat.h>
21
- #include <faiss/index_io.h>
22
-
23
- double elapsed ()
24
- {
25
- struct timeval tv;
26
- gettimeofday (&tv, nullptr);
27
- return tv.tv_sec + tv.tv_usec * 1e-6;
28
- }
29
-
30
-
31
- int main ()
32
- {
33
- double t0 = elapsed();
34
-
35
- // dimension of the vectors to index
36
- int d = 128;
37
-
38
- // size of the database we plan to index
39
- size_t nb = 1000 * 1000;
40
-
41
- // make a set of nt training vectors in the unit cube
42
- // (could be the database)
43
- size_t nt = 100 * 1000;
44
-
45
- //---------------------------------------------------------------
46
- // Define the core quantizer
47
- // We choose a multiple inverted index for faster training with less data
48
- // and because it usually offers best accuracy/speed trade-offs
49
- //
50
- // We here assume that its lifespan of this coarse quantizer will cover the
51
- // lifespan of the inverted-file quantizer IndexIVFFlat below
52
- // With dynamic allocation, one may give the responsability to free the
53
- // quantizer to the inverted-file index (with attribute do_delete_quantizer)
54
- //
55
- // Note: a regular clustering algorithm would be defined as:
56
- // faiss::IndexFlatL2 coarse_quantizer (d);
57
- //
58
- // Use nhash=2 subquantizers used to define the product coarse quantizer
59
- // Number of bits: we will have 2^nbits_coarse centroids per subquantizer
60
- // meaning (2^12)^nhash distinct inverted lists
61
- size_t nhash = 2;
62
- size_t nbits_subq = int (log2 (nb+1) / 2); // good choice in general
63
- size_t ncentroids = 1 << (nhash * nbits_subq); // total # of centroids
64
-
65
- faiss::MultiIndexQuantizer coarse_quantizer (d, nhash, nbits_subq);
66
-
67
- printf ("IMI (%ld,%ld): %ld virtual centroids (target: %ld base vectors)",
68
- nhash, nbits_subq, ncentroids, nb);
69
-
70
- // the coarse quantizer should not be dealloced before the index
71
- // 4 = nb of bytes per code (d must be a multiple of this)
72
- // 8 = nb of bits per sub-code (almost always 8)
73
- faiss::MetricType metric = faiss::METRIC_L2; // can be METRIC_INNER_PRODUCT
74
- faiss::IndexIVFFlat index (&coarse_quantizer, d, ncentroids, metric);
75
- index.quantizer_trains_alone = true;
76
-
77
- // define the number of probes. 2048 is for high-dim, overkilled in practice
78
- // Use 4-1024 depending on the trade-off speed accuracy that you want
79
- index.nprobe = 2048;
80
-
81
- std::mt19937 rng;
82
- std::uniform_real_distribution<> distrib;
83
-
84
- { // training
85
- printf ("[%.3f s] Generating %ld vectors in %dD for training\n",
86
- elapsed() - t0, nt, d);
87
-
88
- std::vector <float> trainvecs (nt * d);
89
- for (size_t i = 0; i < nt * d; i++) {
90
- trainvecs[i] = distrib(rng);
91
- }
92
-
93
- printf ("[%.3f s] Training the index\n", elapsed() - t0);
94
- index.verbose = true;
95
- index.train (nt, trainvecs.data());
96
- }
97
-
98
- size_t nq;
99
- std::vector<float> queries;
100
-
101
- { // populating the database
102
- printf ("[%.3f s] Building a dataset of %ld vectors to index\n",
103
- elapsed() - t0, nb);
104
-
105
- std::vector <float> database (nb * d);
106
- for (size_t i = 0; i < nb * d; i++) {
107
- database[i] = distrib(rng);
108
- }
109
-
110
- printf ("[%.3f s] Adding the vectors to the index\n", elapsed() - t0);
111
-
112
- index.add (nb, database.data());
113
-
114
- // remember a few elements from the database as queries
115
- int i0 = 1234;
116
- int i1 = 1244;
117
-
118
- nq = i1 - i0;
119
- queries.resize (nq * d);
120
- for (int i = i0; i < i1; i++) {
121
- for (int j = 0; j < d; j++) {
122
- queries [(i - i0) * d + j] = database [i * d + j];
123
- }
124
- }
125
- }
126
-
127
- { // searching the database
128
- int k = 5;
129
- printf ("[%.3f s] Searching the %d nearest neighbors "
130
- "of %ld vectors in the index\n",
131
- elapsed() - t0, k, nq);
132
-
133
- std::vector<faiss::Index::idx_t> nns (k * nq);
134
- std::vector<float> dis (k * nq);
135
-
136
- index.search (nq, queries.data(), k, dis.data(), nns.data());
137
-
138
- printf ("[%.3f s] Query results (vector ids, then distances):\n",
139
- elapsed() - t0);
140
-
141
- for (int i = 0; i < nq; i++) {
142
- printf ("query %2d: ", i);
143
- for (int j = 0; j < k; j++) {
144
- printf ("%7ld ", nns[j + i * k]);
145
- }
146
- printf ("\n dis: ");
147
- for (int j = 0; j < k; j++) {
148
- printf ("%7g ", dis[j + i * k]);
149
- }
150
- printf ("\n");
151
- }
152
- }
153
- return 0;
154
- }
@@ -1,203 +0,0 @@
1
- /**
2
- * Copyright (c) Facebook, Inc. and its affiliates.
3
- *
4
- * This source code is licensed under the MIT license found in the
5
- * LICENSE file in the root directory of this source tree.
6
- */
7
-
8
-
9
-
10
- #include <cmath>
11
- #include <cstdio>
12
- #include <cstdlib>
13
- #include <random>
14
-
15
- #include <sys/time.h>
16
-
17
-
18
- #include <faiss/IndexPQ.h>
19
- #include <faiss/IndexIVFPQ.h>
20
- #include <faiss/IndexFlat.h>
21
- #include <faiss/index_io.h>
22
-
23
- double elapsed ()
24
- {
25
- struct timeval tv;
26
- gettimeofday (&tv, nullptr);
27
- return tv.tv_sec + tv.tv_usec * 1e-6;
28
- }
29
-
30
-
31
- int main ()
32
- {
33
- double t0 = elapsed();
34
-
35
- // dimension of the vectors to index
36
- int d = 64;
37
-
38
- // size of the database we plan to index
39
- size_t nb = 1000 * 1000;
40
- size_t add_bs = 10000; // # size of the blocks to add
41
-
42
- // make a set of nt training vectors in the unit cube
43
- // (could be the database)
44
- size_t nt = 100 * 1000;
45
-
46
- //---------------------------------------------------------------
47
- // Define the core quantizer
48
- // We choose a multiple inverted index for faster training with less data
49
- // and because it usually offers best accuracy/speed trade-offs
50
- //
51
- // We here assume that its lifespan of this coarse quantizer will cover the
52
- // lifespan of the inverted-file quantizer IndexIVFFlat below
53
- // With dynamic allocation, one may give the responsability to free the
54
- // quantizer to the inverted-file index (with attribute do_delete_quantizer)
55
- //
56
- // Note: a regular clustering algorithm would be defined as:
57
- // faiss::IndexFlatL2 coarse_quantizer (d);
58
- //
59
- // Use nhash=2 subquantizers used to define the product coarse quantizer
60
- // Number of bits: we will have 2^nbits_coarse centroids per subquantizer
61
- // meaning (2^12)^nhash distinct inverted lists
62
- //
63
- // The parameter bytes_per_code is determined by the memory
64
- // constraint, the dataset will use nb * (bytes_per_code + 8)
65
- // bytes.
66
- //
67
- // The parameter nbits_subq is determined by the size of the dataset to index.
68
- //
69
- size_t nhash = 2;
70
- size_t nbits_subq = 9;
71
- size_t ncentroids = 1 << (nhash * nbits_subq); // total # of centroids
72
- int bytes_per_code = 16;
73
-
74
- faiss::MultiIndexQuantizer coarse_quantizer (d, nhash, nbits_subq);
75
-
76
- printf ("IMI (%ld,%ld): %ld virtual centroids (target: %ld base vectors)",
77
- nhash, nbits_subq, ncentroids, nb);
78
-
79
- // the coarse quantizer should not be dealloced before the index
80
- // 4 = nb of bytes per code (d must be a multiple of this)
81
- // 8 = nb of bits per sub-code (almost always 8)
82
- faiss::MetricType metric = faiss::METRIC_L2; // can be METRIC_INNER_PRODUCT
83
- faiss::IndexIVFPQ index (&coarse_quantizer, d, ncentroids, bytes_per_code, 8);
84
- index.quantizer_trains_alone = true;
85
-
86
- // define the number of probes. 2048 is for high-dim, overkill in practice
87
- // Use 4-1024 depending on the trade-off speed accuracy that you want
88
- index.nprobe = 2048;
89
-
90
-
91
- std::mt19937 rng;
92
- std::uniform_real_distribution<> distrib;
93
-
94
- { // training.
95
-
96
- // The distribution of the training vectors should be the same
97
- // as the database vectors. It could be a sub-sample of the
98
- // database vectors, if sampling is not biased. Here we just
99
- // randomly generate the vectors.
100
-
101
- printf ("[%.3f s] Generating %ld vectors in %dD for training\n",
102
- elapsed() - t0, nt, d);
103
-
104
- std::vector <float> trainvecs (nt * d);
105
- for (size_t i = 0; i < nt; i++) {
106
- for (size_t j = 0; j < d; j++) {
107
- trainvecs[i * d + j] = distrib(rng);
108
- }
109
- }
110
-
111
- printf ("[%.3f s] Training the index\n", elapsed() - t0);
112
- index.verbose = true;
113
- index.train (nt, trainvecs.data());
114
- }
115
-
116
- // the index can be re-loaded later with
117
- // faiss::Index * idx = faiss::read_index("/tmp/trained_index.faissindex");
118
- faiss::write_index(&index, "/tmp/trained_index.faissindex");
119
-
120
- size_t nq;
121
- std::vector<float> queries;
122
-
123
- { // populating the database
124
- printf ("[%.3f s] Building a dataset of %ld vectors to index\n",
125
- elapsed() - t0, nb);
126
-
127
- std::vector <float> database (nb * d);
128
- std::vector <long> ids (nb);
129
- for (size_t i = 0; i < nb; i++) {
130
- for (size_t j = 0; j < d; j++) {
131
- database[i * d + j] = distrib(rng);
132
- }
133
- ids[i] = 8760000000L + i;
134
- }
135
-
136
- printf ("[%.3f s] Adding the vectors to the index\n", elapsed() - t0);
137
-
138
- for (size_t begin = 0; begin < nb; begin += add_bs) {
139
- size_t end = std::min (begin + add_bs, nb);
140
- index.add_with_ids (end - begin,
141
- database.data() + d * begin,
142
- ids.data() + begin);
143
- }
144
-
145
- // remember a few elements from the database as queries
146
- int i0 = 1234;
147
- int i1 = 1244;
148
-
149
- nq = i1 - i0;
150
- queries.resize (nq * d);
151
- for (int i = i0; i < i1; i++) {
152
- for (int j = 0; j < d; j++) {
153
- queries [(i - i0) * d + j] = database [i * d + j];
154
- }
155
- }
156
- }
157
-
158
- // A few notes on the internal format of the index:
159
- //
160
- // - the positing lists for PQ codes are index.codes, which is a
161
- // std::vector < std::vector<uint8_t> >
162
- // if n is the length of posting list #i, codes[i] has length bytes_per_code * n
163
- //
164
- // - the corresponding ids are stored in index.ids
165
- //
166
- // - given a vector float *x, finding which k centroids are
167
- // closest to it (ie to find the nearest neighbors) can be done with
168
- //
169
- // long *centroid_ids = new long[k];
170
- // float *distances = new float[k];
171
- // index.quantizer->search (1, x, k, dis, centroids_ids);
172
- //
173
-
174
- faiss::write_index(&index, "/tmp/populated_index.faissindex");
175
-
176
- { // searching the database
177
- int k = 5;
178
- printf ("[%.3f s] Searching the %d nearest neighbors "
179
- "of %ld vectors in the index\n",
180
- elapsed() - t0, k, nq);
181
-
182
- std::vector<faiss::Index::idx_t> nns (k * nq);
183
- std::vector<float> dis (k * nq);
184
-
185
- index.search (nq, queries.data(), k, dis.data(), nns.data());
186
-
187
- printf ("[%.3f s] Query results (vector ids, then distances):\n",
188
- elapsed() - t0);
189
-
190
- for (int i = 0; i < nq; i++) {
191
- printf ("query %2d: ", i);
192
- for (int j = 0; j < k; j++) {
193
- printf ("%7ld ", nns[j + i * k]);
194
- }
195
- printf ("\n dis: ");
196
- for (int j = 0; j < k; j++) {
197
- printf ("%7g ", dis[j + i * k]);
198
- }
199
- printf ("\n");
200
- }
201
- }
202
- return 0;
203
- }