faiss 0.1.5 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -0
- data/README.md +12 -0
- data/ext/faiss/ext.cpp +1 -1
- data/ext/faiss/extconf.rb +6 -2
- data/ext/faiss/index.cpp +114 -43
- data/ext/faiss/index_binary.cpp +24 -30
- data/ext/faiss/kmeans.cpp +20 -16
- data/ext/faiss/numo.hpp +867 -0
- data/ext/faiss/pca_matrix.cpp +13 -14
- data/ext/faiss/product_quantizer.cpp +23 -24
- data/ext/faiss/utils.cpp +10 -37
- data/ext/faiss/utils.h +2 -13
- data/lib/faiss.rb +0 -5
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +292 -291
- data/vendor/faiss/faiss/AutoTune.h +55 -56
- data/vendor/faiss/faiss/Clustering.cpp +334 -195
- data/vendor/faiss/faiss/Clustering.h +88 -35
- data/vendor/faiss/faiss/IVFlib.cpp +171 -195
- data/vendor/faiss/faiss/IVFlib.h +48 -51
- data/vendor/faiss/faiss/Index.cpp +85 -103
- data/vendor/faiss/faiss/Index.h +54 -48
- data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
- data/vendor/faiss/faiss/Index2Layer.h +22 -22
- data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
- data/vendor/faiss/faiss/IndexBinary.h +140 -132
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
- data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
- data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
- data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
- data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
- data/vendor/faiss/faiss/IndexFlat.h +35 -46
- data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
- data/vendor/faiss/faiss/IndexHNSW.h +57 -41
- data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
- data/vendor/faiss/faiss/IndexIVF.h +146 -113
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
- data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
- data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
- data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
- data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
- data/vendor/faiss/faiss/IndexLSH.h +21 -26
- data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
- data/vendor/faiss/faiss/IndexLattice.h +11 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
- data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
- data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
- data/vendor/faiss/faiss/IndexNSG.h +85 -0
- data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
- data/vendor/faiss/faiss/IndexPQ.h +64 -67
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
- data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
- data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
- data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
- data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
- data/vendor/faiss/faiss/IndexRefine.h +22 -23
- data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
- data/vendor/faiss/faiss/IndexReplicas.h +62 -56
- data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
- data/vendor/faiss/faiss/IndexResidual.h +152 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
- data/vendor/faiss/faiss/IndexShards.cpp +256 -240
- data/vendor/faiss/faiss/IndexShards.h +85 -73
- data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
- data/vendor/faiss/faiss/MatrixStats.h +7 -10
- data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
- data/vendor/faiss/faiss/MetaIndexes.h +40 -34
- data/vendor/faiss/faiss/MetricType.h +7 -7
- data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
- data/vendor/faiss/faiss/VectorTransform.h +61 -89
- data/vendor/faiss/faiss/clone_index.cpp +77 -73
- data/vendor/faiss/faiss/clone_index.h +4 -9
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
- data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
- data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
- data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
- data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
- data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
- data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
- data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
- data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
- data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
- data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
- data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
- data/vendor/faiss/faiss/impl/FaissException.h +41 -29
- data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
- data/vendor/faiss/faiss/impl/HNSW.h +179 -200
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
- data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
- data/vendor/faiss/faiss/impl/NSG.h +199 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
- data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
- data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
- data/vendor/faiss/faiss/impl/io.cpp +75 -94
- data/vendor/faiss/faiss/impl/io.h +31 -41
- data/vendor/faiss/faiss/impl/io_macros.h +40 -29
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
- data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
- data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
- data/vendor/faiss/faiss/index_factory.cpp +269 -218
- data/vendor/faiss/faiss/index_factory.h +6 -7
- data/vendor/faiss/faiss/index_io.h +23 -26
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
- data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
- data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
- data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
- data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
- data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
- data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
- data/vendor/faiss/faiss/utils/Heap.h +186 -209
- data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
- data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
- data/vendor/faiss/faiss/utils/distances.cpp +301 -310
- data/vendor/faiss/faiss/utils/distances.h +133 -118
- data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
- data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
- data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
- data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
- data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
- data/vendor/faiss/faiss/utils/hamming.h +62 -85
- data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
- data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
- data/vendor/faiss/faiss/utils/partitioning.h +26 -21
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
- data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
- data/vendor/faiss/faiss/utils/random.cpp +39 -63
- data/vendor/faiss/faiss/utils/random.h +13 -16
- data/vendor/faiss/faiss/utils/simdlib.h +4 -2
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
- data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
- data/vendor/faiss/faiss/utils/utils.cpp +304 -287
- data/vendor/faiss/faiss/utils/utils.h +53 -48
- metadata +24 -10
- data/lib/faiss/index.rb +0 -20
- data/lib/faiss/index_binary.rb +0 -20
- data/lib/faiss/kmeans.rb +0 -15
- data/lib/faiss/pca_matrix.rb +0 -15
- data/lib/faiss/product_quantizer.rb +0 -22
|
@@ -10,71 +10,81 @@
|
|
|
10
10
|
#include <faiss/impl/FaissException.h>
|
|
11
11
|
#include <sstream>
|
|
12
12
|
|
|
13
|
-
#ifdef
|
|
13
|
+
#ifdef __GNUG__
|
|
14
14
|
#include <cxxabi.h>
|
|
15
15
|
#endif
|
|
16
16
|
|
|
17
17
|
namespace faiss {
|
|
18
18
|
|
|
19
|
-
FaissException::FaissException(const std::string& m)
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
19
|
+
FaissException::FaissException(const std::string& m) : msg(m) {}
|
|
20
|
+
|
|
21
|
+
FaissException::FaissException(
|
|
22
|
+
const std::string& m,
|
|
23
|
+
const char* funcName,
|
|
24
|
+
const char* file,
|
|
25
|
+
int line) {
|
|
26
|
+
int size = snprintf(
|
|
27
|
+
nullptr,
|
|
28
|
+
0,
|
|
29
|
+
"Error in %s at %s:%d: %s",
|
|
30
|
+
funcName,
|
|
31
|
+
file,
|
|
32
|
+
line,
|
|
33
|
+
m.c_str());
|
|
34
|
+
msg.resize(size + 1);
|
|
35
|
+
snprintf(
|
|
36
|
+
&msg[0],
|
|
37
|
+
msg.size(),
|
|
38
|
+
"Error in %s at %s:%d: %s",
|
|
39
|
+
funcName,
|
|
40
|
+
file,
|
|
41
|
+
line,
|
|
42
|
+
m.c_str());
|
|
32
43
|
}
|
|
33
44
|
|
|
34
|
-
const char*
|
|
35
|
-
|
|
36
|
-
return msg.c_str();
|
|
45
|
+
const char* FaissException::what() const noexcept {
|
|
46
|
+
return msg.c_str();
|
|
37
47
|
}
|
|
38
48
|
|
|
39
49
|
void handleExceptions(
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
50
|
+
std::vector<std::pair<int, std::exception_ptr>>& exceptions) {
|
|
51
|
+
if (exceptions.size() == 1) {
|
|
52
|
+
// throw the single received exception directly
|
|
53
|
+
std::rethrow_exception(exceptions.front().second);
|
|
54
|
+
|
|
55
|
+
} else if (exceptions.size() > 1) {
|
|
56
|
+
// multiple exceptions; aggregate them and return a single exception
|
|
57
|
+
std::stringstream ss;
|
|
58
|
+
|
|
59
|
+
for (auto& p : exceptions) {
|
|
60
|
+
try {
|
|
61
|
+
std::rethrow_exception(p.second);
|
|
62
|
+
} catch (std::exception& ex) {
|
|
63
|
+
if (ex.what()) {
|
|
64
|
+
// exception message available
|
|
65
|
+
ss << "Exception thrown from index " << p.first << ": "
|
|
66
|
+
<< ex.what() << "\n";
|
|
67
|
+
} else {
|
|
68
|
+
// No message available
|
|
69
|
+
ss << "Unknown exception thrown from index " << p.first
|
|
70
|
+
<< "\n";
|
|
71
|
+
}
|
|
72
|
+
} catch (...) {
|
|
73
|
+
ss << "Unknown exception thrown from index " << p.first << "\n";
|
|
74
|
+
}
|
|
60
75
|
}
|
|
61
|
-
} catch (...) {
|
|
62
|
-
ss << "Unknown exception thrown from index " << p.first << "\n";
|
|
63
|
-
}
|
|
64
|
-
}
|
|
65
76
|
|
|
66
|
-
|
|
67
|
-
|
|
77
|
+
throw FaissException(ss.str());
|
|
78
|
+
}
|
|
68
79
|
}
|
|
69
80
|
|
|
70
|
-
|
|
71
81
|
// From
|
|
72
82
|
// https://stackoverflow.com/questions/281818/unmangling-the-result-of-stdtype-infoname
|
|
73
83
|
|
|
74
84
|
std::string demangle_cpp_symbol(const char* name) {
|
|
75
85
|
#ifdef __GNUG__
|
|
76
86
|
int status = -1;
|
|
77
|
-
const char
|
|
87
|
+
const char* res = abi::__cxa_demangle(name, nullptr, nullptr, &status);
|
|
78
88
|
std::string sres;
|
|
79
89
|
if (status == 0) {
|
|
80
90
|
sres = res;
|
|
@@ -87,6 +97,4 @@ std::string demangle_cpp_symbol(const char* name) {
|
|
|
87
97
|
#endif
|
|
88
98
|
}
|
|
89
99
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
} // namespace
|
|
100
|
+
} // namespace faiss
|
|
@@ -12,56 +12,69 @@
|
|
|
12
12
|
|
|
13
13
|
#include <exception>
|
|
14
14
|
#include <string>
|
|
15
|
-
#include <vector>
|
|
16
15
|
#include <utility>
|
|
16
|
+
#include <vector>
|
|
17
17
|
|
|
18
18
|
namespace faiss {
|
|
19
19
|
|
|
20
20
|
/// Base class for Faiss exceptions
|
|
21
21
|
class FaissException : public std::exception {
|
|
22
|
-
|
|
23
|
-
|
|
22
|
+
public:
|
|
23
|
+
explicit FaissException(const std::string& msg);
|
|
24
24
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
25
|
+
FaissException(
|
|
26
|
+
const std::string& msg,
|
|
27
|
+
const char* funcName,
|
|
28
|
+
const char* file,
|
|
29
|
+
int line);
|
|
29
30
|
|
|
30
|
-
|
|
31
|
-
|
|
31
|
+
/// from std::exception
|
|
32
|
+
const char* what() const noexcept override;
|
|
32
33
|
|
|
33
|
-
|
|
34
|
+
std::string msg;
|
|
34
35
|
};
|
|
35
36
|
|
|
36
37
|
/// Handle multiple exceptions from worker threads, throwing an appropriate
|
|
37
38
|
/// exception that aggregates the information
|
|
38
39
|
/// The pair int is the thread that generated the exception
|
|
39
|
-
void
|
|
40
|
-
|
|
40
|
+
void handleExceptions(
|
|
41
|
+
std::vector<std::pair<int, std::exception_ptr>>& exceptions);
|
|
41
42
|
|
|
42
43
|
/** bare-bones unique_ptr
|
|
43
44
|
* this one deletes with delete [] */
|
|
44
|
-
template<class T>
|
|
45
|
+
template <class T>
|
|
45
46
|
struct ScopeDeleter {
|
|
46
|
-
const T
|
|
47
|
-
explicit ScopeDeleter
|
|
48
|
-
void release
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
47
|
+
const T* ptr;
|
|
48
|
+
explicit ScopeDeleter(const T* ptr = nullptr) : ptr(ptr) {}
|
|
49
|
+
void release() {
|
|
50
|
+
ptr = nullptr;
|
|
51
|
+
}
|
|
52
|
+
void set(const T* ptr_in) {
|
|
53
|
+
ptr = ptr_in;
|
|
54
|
+
}
|
|
55
|
+
void swap(ScopeDeleter<T>& other) {
|
|
56
|
+
std::swap(ptr, other.ptr);
|
|
57
|
+
}
|
|
58
|
+
~ScopeDeleter() {
|
|
59
|
+
delete[] ptr;
|
|
53
60
|
}
|
|
54
61
|
};
|
|
55
62
|
|
|
56
63
|
/** same but deletes with the simple delete (least common case) */
|
|
57
|
-
template<class T>
|
|
64
|
+
template <class T>
|
|
58
65
|
struct ScopeDeleter1 {
|
|
59
|
-
const T
|
|
60
|
-
explicit ScopeDeleter1
|
|
61
|
-
void release
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
66
|
+
const T* ptr;
|
|
67
|
+
explicit ScopeDeleter1(const T* ptr = nullptr) : ptr(ptr) {}
|
|
68
|
+
void release() {
|
|
69
|
+
ptr = nullptr;
|
|
70
|
+
}
|
|
71
|
+
void set(const T* ptr_in) {
|
|
72
|
+
ptr = ptr_in;
|
|
73
|
+
}
|
|
74
|
+
void swap(ScopeDeleter1<T>& other) {
|
|
75
|
+
std::swap(ptr, other.ptr);
|
|
76
|
+
}
|
|
77
|
+
~ScopeDeleter1() {
|
|
65
78
|
delete ptr;
|
|
66
79
|
}
|
|
67
80
|
};
|
|
@@ -69,7 +82,6 @@ struct ScopeDeleter1 {
|
|
|
69
82
|
/// make typeids more readable
|
|
70
83
|
std::string demangle_cpp_symbol(const char* name);
|
|
71
84
|
|
|
72
|
-
|
|
73
|
-
}
|
|
85
|
+
} // namespace faiss
|
|
74
86
|
|
|
75
87
|
#endif
|
|
@@ -15,275 +15,254 @@
|
|
|
15
15
|
|
|
16
16
|
namespace faiss {
|
|
17
17
|
|
|
18
|
-
|
|
19
18
|
/**************************************************************
|
|
20
19
|
* HNSW structure implementation
|
|
21
20
|
**************************************************************/
|
|
22
21
|
|
|
23
|
-
int HNSW::nb_neighbors(int layer_no) const
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
cum_nneighbor_per_level[layer_no];
|
|
22
|
+
int HNSW::nb_neighbors(int layer_no) const {
|
|
23
|
+
return cum_nneighbor_per_level[layer_no + 1] -
|
|
24
|
+
cum_nneighbor_per_level[layer_no];
|
|
27
25
|
}
|
|
28
26
|
|
|
29
|
-
void HNSW::set_nb_neighbors(int level_no, int n)
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
}
|
|
27
|
+
void HNSW::set_nb_neighbors(int level_no, int n) {
|
|
28
|
+
FAISS_THROW_IF_NOT(levels.size() == 0);
|
|
29
|
+
int cur_n = nb_neighbors(level_no);
|
|
30
|
+
for (int i = level_no + 1; i < cum_nneighbor_per_level.size(); i++) {
|
|
31
|
+
cum_nneighbor_per_level[i] += n - cur_n;
|
|
32
|
+
}
|
|
36
33
|
}
|
|
37
34
|
|
|
38
|
-
int HNSW::cum_nb_neighbors(int layer_no) const
|
|
39
|
-
|
|
40
|
-
return cum_nneighbor_per_level[layer_no];
|
|
35
|
+
int HNSW::cum_nb_neighbors(int layer_no) const {
|
|
36
|
+
return cum_nneighbor_per_level[layer_no];
|
|
41
37
|
}
|
|
42
38
|
|
|
43
|
-
void HNSW::neighbor_range(idx_t no, int layer_no,
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
*end = o + cum_nb_neighbors(layer_no + 1);
|
|
39
|
+
void HNSW::neighbor_range(idx_t no, int layer_no, size_t* begin, size_t* end)
|
|
40
|
+
const {
|
|
41
|
+
size_t o = offsets[no];
|
|
42
|
+
*begin = o + cum_nb_neighbors(layer_no);
|
|
43
|
+
*end = o + cum_nb_neighbors(layer_no + 1);
|
|
49
44
|
}
|
|
50
45
|
|
|
51
|
-
|
|
52
|
-
|
|
53
46
|
HNSW::HNSW(int M) : rng(12345) {
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
47
|
+
set_default_probas(M, 1.0 / log(M));
|
|
48
|
+
max_level = -1;
|
|
49
|
+
entry_point = -1;
|
|
50
|
+
efSearch = 16;
|
|
51
|
+
efConstruction = 40;
|
|
52
|
+
upper_beam = 1;
|
|
53
|
+
offsets.push_back(0);
|
|
61
54
|
}
|
|
62
55
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
56
|
+
int HNSW::random_level() {
|
|
57
|
+
double f = rng.rand_float();
|
|
58
|
+
// could be a bit faster with bissection
|
|
59
|
+
for (int level = 0; level < assign_probas.size(); level++) {
|
|
60
|
+
if (f < assign_probas[level]) {
|
|
61
|
+
return level;
|
|
62
|
+
}
|
|
63
|
+
f -= assign_probas[level];
|
|
71
64
|
}
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
// happens with exponentially low probability
|
|
75
|
-
return assign_probas.size() - 1;
|
|
65
|
+
// happens with exponentially low probability
|
|
66
|
+
return assign_probas.size() - 1;
|
|
76
67
|
}
|
|
77
68
|
|
|
78
|
-
void HNSW::set_default_probas(int M, float levelMult)
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
69
|
+
void HNSW::set_default_probas(int M, float levelMult) {
|
|
70
|
+
int nn = 0;
|
|
71
|
+
cum_nneighbor_per_level.push_back(0);
|
|
72
|
+
for (int level = 0;; level++) {
|
|
73
|
+
float proba = exp(-level / levelMult) * (1 - exp(-1 / levelMult));
|
|
74
|
+
if (proba < 1e-9)
|
|
75
|
+
break;
|
|
76
|
+
assign_probas.push_back(proba);
|
|
77
|
+
nn += level == 0 ? M * 2 : M;
|
|
78
|
+
cum_nneighbor_per_level.push_back(nn);
|
|
79
|
+
}
|
|
89
80
|
}
|
|
90
81
|
|
|
91
|
-
void HNSW::clear_neighbor_tables(int level)
|
|
92
|
-
{
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
82
|
+
void HNSW::clear_neighbor_tables(int level) {
|
|
83
|
+
for (int i = 0; i < levels.size(); i++) {
|
|
84
|
+
size_t begin, end;
|
|
85
|
+
neighbor_range(i, level, &begin, &end);
|
|
86
|
+
for (size_t j = begin; j < end; j++) {
|
|
87
|
+
neighbors[j] = -1;
|
|
88
|
+
}
|
|
98
89
|
}
|
|
99
|
-
}
|
|
100
90
|
}
|
|
101
91
|
|
|
102
|
-
|
|
103
92
|
void HNSW::reset() {
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
93
|
+
max_level = -1;
|
|
94
|
+
entry_point = -1;
|
|
95
|
+
offsets.clear();
|
|
96
|
+
offsets.push_back(0);
|
|
97
|
+
levels.clear();
|
|
98
|
+
neighbors.clear();
|
|
110
99
|
}
|
|
111
100
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
level, nb_neighbors(level));
|
|
119
|
-
size_t tot_neigh = 0, tot_common = 0, tot_reciprocal = 0, n_node = 0;
|
|
101
|
+
void HNSW::print_neighbor_stats(int level) const {
|
|
102
|
+
FAISS_THROW_IF_NOT(level < cum_nneighbor_per_level.size());
|
|
103
|
+
printf("stats on level %d, max %d neighbors per vertex:\n",
|
|
104
|
+
level,
|
|
105
|
+
nb_neighbors(level));
|
|
106
|
+
size_t tot_neigh = 0, tot_common = 0, tot_reciprocal = 0, n_node = 0;
|
|
120
107
|
#pragma omp parallel for reduction(+: tot_neigh) reduction(+: tot_common) \
|
|
121
108
|
reduction(+: tot_reciprocal) reduction(+: n_node)
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
109
|
+
for (int i = 0; i < levels.size(); i++) {
|
|
110
|
+
if (levels[i] > level) {
|
|
111
|
+
n_node++;
|
|
112
|
+
size_t begin, end;
|
|
113
|
+
neighbor_range(i, level, &begin, &end);
|
|
114
|
+
std::unordered_set<int> neighset;
|
|
115
|
+
for (size_t j = begin; j < end; j++) {
|
|
116
|
+
if (neighbors[j] < 0)
|
|
117
|
+
break;
|
|
118
|
+
neighset.insert(neighbors[j]);
|
|
119
|
+
}
|
|
120
|
+
int n_neigh = neighset.size();
|
|
121
|
+
int n_common = 0;
|
|
122
|
+
int n_reciprocal = 0;
|
|
123
|
+
for (size_t j = begin; j < end; j++) {
|
|
124
|
+
storage_idx_t i2 = neighbors[j];
|
|
125
|
+
if (i2 < 0)
|
|
126
|
+
break;
|
|
127
|
+
FAISS_ASSERT(i2 != i);
|
|
128
|
+
size_t begin2, end2;
|
|
129
|
+
neighbor_range(i2, level, &begin2, &end2);
|
|
130
|
+
for (size_t j2 = begin2; j2 < end2; j2++) {
|
|
131
|
+
storage_idx_t i3 = neighbors[j2];
|
|
132
|
+
if (i3 < 0)
|
|
133
|
+
break;
|
|
134
|
+
if (i3 == i) {
|
|
135
|
+
n_reciprocal++;
|
|
136
|
+
continue;
|
|
137
|
+
}
|
|
138
|
+
if (neighset.count(i3)) {
|
|
139
|
+
neighset.erase(i3);
|
|
140
|
+
n_common++;
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
tot_neigh += n_neigh;
|
|
145
|
+
tot_common += n_common;
|
|
146
|
+
tot_reciprocal += n_reciprocal;
|
|
152
147
|
}
|
|
153
|
-
}
|
|
154
|
-
tot_neigh += n_neigh;
|
|
155
|
-
tot_common += n_common;
|
|
156
|
-
tot_reciprocal += n_reciprocal;
|
|
157
148
|
}
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
149
|
+
float normalizer = n_node;
|
|
150
|
+
printf(" nb of nodes at that level %zd\n", n_node);
|
|
151
|
+
printf(" neighbors per node: %.2f (%zd)\n",
|
|
152
|
+
tot_neigh / normalizer,
|
|
153
|
+
tot_neigh);
|
|
154
|
+
printf(" nb of reciprocal neighbors: %.2f\n",
|
|
155
|
+
tot_reciprocal / normalizer);
|
|
156
|
+
printf(" nb of neighbors that are also neighbor-of-neighbors: %.2f (%zd)\n",
|
|
157
|
+
tot_common / normalizer,
|
|
158
|
+
tot_common);
|
|
169
159
|
}
|
|
170
160
|
|
|
161
|
+
void HNSW::fill_with_random_links(size_t n) {
|
|
162
|
+
int max_level = prepare_level_tab(n);
|
|
163
|
+
RandomGenerator rng2(456);
|
|
171
164
|
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
if (levels[i] > level) {
|
|
181
|
-
elts.push_back(i);
|
|
182
|
-
}
|
|
183
|
-
}
|
|
184
|
-
printf ("linking %zd elements in level %d\n",
|
|
185
|
-
elts.size(), level);
|
|
186
|
-
|
|
187
|
-
if (elts.size() == 1) continue;
|
|
165
|
+
for (int level = max_level - 1; level >= 0; --level) {
|
|
166
|
+
std::vector<int> elts;
|
|
167
|
+
for (int i = 0; i < n; i++) {
|
|
168
|
+
if (levels[i] > level) {
|
|
169
|
+
elts.push_back(i);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
printf("linking %zd elements in level %d\n", elts.size(), level);
|
|
188
173
|
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
size_t begin, end;
|
|
192
|
-
neighbor_range(i, 0, &begin, &end);
|
|
193
|
-
for (size_t j = begin; j < end; j++) {
|
|
194
|
-
int other = 0;
|
|
195
|
-
do {
|
|
196
|
-
other = elts[rng2.rand_int(elts.size())];
|
|
197
|
-
} while(other == i);
|
|
174
|
+
if (elts.size() == 1)
|
|
175
|
+
continue;
|
|
198
176
|
|
|
199
|
-
|
|
200
|
-
|
|
177
|
+
for (int ii = 0; ii < elts.size(); ii++) {
|
|
178
|
+
int i = elts[ii];
|
|
179
|
+
size_t begin, end;
|
|
180
|
+
neighbor_range(i, 0, &begin, &end);
|
|
181
|
+
for (size_t j = begin; j < end; j++) {
|
|
182
|
+
int other = 0;
|
|
183
|
+
do {
|
|
184
|
+
other = elts[rng2.rand_int(elts.size())];
|
|
185
|
+
} while (other == i);
|
|
186
|
+
|
|
187
|
+
neighbors[j] = other;
|
|
188
|
+
}
|
|
189
|
+
}
|
|
201
190
|
}
|
|
202
|
-
}
|
|
203
191
|
}
|
|
204
192
|
|
|
193
|
+
int HNSW::prepare_level_tab(size_t n, bool preset_levels) {
|
|
194
|
+
size_t n0 = offsets.size() - 1;
|
|
205
195
|
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
196
|
+
if (preset_levels) {
|
|
197
|
+
FAISS_ASSERT(n0 + n == levels.size());
|
|
198
|
+
} else {
|
|
199
|
+
FAISS_ASSERT(n0 == levels.size());
|
|
200
|
+
for (int i = 0; i < n; i++) {
|
|
201
|
+
int pt_level = random_level();
|
|
202
|
+
levels.push_back(pt_level + 1);
|
|
203
|
+
}
|
|
204
|
+
}
|
|
209
205
|
|
|
210
|
-
|
|
211
|
-
FAISS_ASSERT (n0 + n == levels.size());
|
|
212
|
-
} else {
|
|
213
|
-
FAISS_ASSERT (n0 == levels.size());
|
|
206
|
+
int max_level = 0;
|
|
214
207
|
for (int i = 0; i < n; i++) {
|
|
215
|
-
|
|
216
|
-
|
|
208
|
+
int pt_level = levels[i + n0] - 1;
|
|
209
|
+
if (pt_level > max_level)
|
|
210
|
+
max_level = pt_level;
|
|
211
|
+
offsets.push_back(offsets.back() + cum_nb_neighbors(pt_level + 1));
|
|
212
|
+
neighbors.resize(offsets.back(), -1);
|
|
217
213
|
}
|
|
218
|
-
}
|
|
219
214
|
|
|
220
|
-
|
|
221
|
-
for (int i = 0; i < n; i++) {
|
|
222
|
-
int pt_level = levels[i + n0] - 1;
|
|
223
|
-
if (pt_level > max_level) max_level = pt_level;
|
|
224
|
-
offsets.push_back(offsets.back() +
|
|
225
|
-
cum_nb_neighbors(pt_level + 1));
|
|
226
|
-
neighbors.resize(offsets.back(), -1);
|
|
227
|
-
}
|
|
228
|
-
|
|
229
|
-
return max_level;
|
|
215
|
+
return max_level;
|
|
230
216
|
}
|
|
231
217
|
|
|
232
|
-
|
|
233
218
|
/** Enumerate vertices from farthest to nearest from query, keep a
|
|
234
219
|
* neighbor only if there is no previous neighbor that is closer to
|
|
235
220
|
* that vertex than the query.
|
|
236
221
|
*/
|
|
237
222
|
void HNSW::shrink_neighbor_list(
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
{
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
223
|
+
DistanceComputer& qdis,
|
|
224
|
+
std::priority_queue<NodeDistFarther>& input,
|
|
225
|
+
std::vector<NodeDistFarther>& output,
|
|
226
|
+
int max_size) {
|
|
227
|
+
while (input.size() > 0) {
|
|
228
|
+
NodeDistFarther v1 = input.top();
|
|
229
|
+
input.pop();
|
|
230
|
+
float dist_v1_q = v1.d;
|
|
231
|
+
|
|
232
|
+
bool good = true;
|
|
233
|
+
for (NodeDistFarther v2 : output) {
|
|
234
|
+
float dist_v1_v2 = qdis.symmetric_dis(v2.id, v1.id);
|
|
235
|
+
|
|
236
|
+
if (dist_v1_v2 < dist_v1_q) {
|
|
237
|
+
good = false;
|
|
238
|
+
break;
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
if (good) {
|
|
243
|
+
output.push_back(v1);
|
|
244
|
+
if (output.size() >= max_size) {
|
|
245
|
+
return;
|
|
246
|
+
}
|
|
247
|
+
}
|
|
263
248
|
}
|
|
264
|
-
}
|
|
265
249
|
}
|
|
266
250
|
|
|
267
|
-
|
|
268
251
|
namespace {
|
|
269
252
|
|
|
270
|
-
|
|
271
253
|
using storage_idx_t = HNSW::storage_idx_t;
|
|
272
254
|
using NodeDistCloser = HNSW::NodeDistCloser;
|
|
273
255
|
using NodeDistFarther = HNSW::NodeDistFarther;
|
|
274
256
|
|
|
275
|
-
|
|
276
257
|
/**************************************************************
|
|
277
258
|
* Addition subroutines
|
|
278
259
|
**************************************************************/
|
|
279
260
|
|
|
280
|
-
|
|
281
261
|
/// remove neighbors from the list to make it smaller than max_size
|
|
282
262
|
void shrink_neighbor_list(
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
{
|
|
263
|
+
DistanceComputer& qdis,
|
|
264
|
+
std::priority_queue<NodeDistCloser>& resultSet1,
|
|
265
|
+
int max_size) {
|
|
287
266
|
if (resultSet1.size() < max_size) {
|
|
288
267
|
return;
|
|
289
268
|
}
|
|
@@ -300,516 +279,521 @@ void shrink_neighbor_list(
|
|
|
300
279
|
for (NodeDistFarther curen2 : returnlist) {
|
|
301
280
|
resultSet1.emplace(curen2.d, curen2.id);
|
|
302
281
|
}
|
|
303
|
-
|
|
304
282
|
}
|
|
305
283
|
|
|
306
|
-
|
|
307
284
|
/// add a link between two elements, possibly shrinking the list
|
|
308
285
|
/// of links to make room for it.
|
|
309
|
-
void add_link(
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
resultSet.
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
286
|
+
void add_link(
|
|
287
|
+
HNSW& hnsw,
|
|
288
|
+
DistanceComputer& qdis,
|
|
289
|
+
storage_idx_t src,
|
|
290
|
+
storage_idx_t dest,
|
|
291
|
+
int level) {
|
|
292
|
+
size_t begin, end;
|
|
293
|
+
hnsw.neighbor_range(src, level, &begin, &end);
|
|
294
|
+
if (hnsw.neighbors[end - 1] == -1) {
|
|
295
|
+
// there is enough room, find a slot to add it
|
|
296
|
+
size_t i = end;
|
|
297
|
+
while (i > begin) {
|
|
298
|
+
if (hnsw.neighbors[i - 1] != -1)
|
|
299
|
+
break;
|
|
300
|
+
i--;
|
|
301
|
+
}
|
|
302
|
+
hnsw.neighbors[i] = dest;
|
|
303
|
+
return;
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
// otherwise we let them fight out which to keep
|
|
307
|
+
|
|
308
|
+
// copy to resultSet...
|
|
309
|
+
std::priority_queue<NodeDistCloser> resultSet;
|
|
310
|
+
resultSet.emplace(qdis.symmetric_dis(src, dest), dest);
|
|
311
|
+
for (size_t i = begin; i < end; i++) { // HERE WAS THE BUG
|
|
312
|
+
storage_idx_t neigh = hnsw.neighbors[i];
|
|
313
|
+
resultSet.emplace(qdis.symmetric_dis(src, neigh), neigh);
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
shrink_neighbor_list(qdis, resultSet, end - begin);
|
|
317
|
+
|
|
318
|
+
// ...and back
|
|
319
|
+
size_t i = begin;
|
|
320
|
+
while (resultSet.size()) {
|
|
321
|
+
hnsw.neighbors[i++] = resultSet.top().id;
|
|
322
|
+
resultSet.pop();
|
|
323
|
+
}
|
|
324
|
+
// they may have shrunk more than just by 1 element
|
|
325
|
+
while (i < end) {
|
|
326
|
+
hnsw.neighbors[i++] = -1;
|
|
327
|
+
}
|
|
349
328
|
}
|
|
350
329
|
|
|
351
330
|
/// search neighbors on a single level, starting from an entry point
|
|
352
331
|
void search_neighbors_to_add(
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
332
|
+
HNSW& hnsw,
|
|
333
|
+
DistanceComputer& qdis,
|
|
334
|
+
std::priority_queue<NodeDistCloser>& results,
|
|
335
|
+
int entry_point,
|
|
336
|
+
float d_entry_point,
|
|
337
|
+
int level,
|
|
338
|
+
VisitedTable& vt) {
|
|
339
|
+
// top is nearest candidate
|
|
340
|
+
std::priority_queue<NodeDistFarther> candidates;
|
|
341
|
+
|
|
342
|
+
NodeDistFarther ev(d_entry_point, entry_point);
|
|
343
|
+
candidates.push(ev);
|
|
344
|
+
results.emplace(d_entry_point, entry_point);
|
|
345
|
+
vt.set(entry_point);
|
|
346
|
+
|
|
347
|
+
while (!candidates.empty()) {
|
|
348
|
+
// get nearest
|
|
349
|
+
const NodeDistFarther& currEv = candidates.top();
|
|
350
|
+
|
|
351
|
+
if (currEv.d > results.top().d) {
|
|
352
|
+
break;
|
|
353
|
+
}
|
|
354
|
+
int currNode = currEv.id;
|
|
355
|
+
candidates.pop();
|
|
356
|
+
|
|
357
|
+
// loop over neighbors
|
|
358
|
+
size_t begin, end;
|
|
359
|
+
hnsw.neighbor_range(currNode, level, &begin, &end);
|
|
360
|
+
for (size_t i = begin; i < end; i++) {
|
|
361
|
+
storage_idx_t nodeId = hnsw.neighbors[i];
|
|
362
|
+
if (nodeId < 0)
|
|
363
|
+
break;
|
|
364
|
+
if (vt.get(nodeId))
|
|
365
|
+
continue;
|
|
366
|
+
vt.set(nodeId);
|
|
367
|
+
|
|
368
|
+
float dis = qdis(nodeId);
|
|
369
|
+
NodeDistFarther evE1(dis, nodeId);
|
|
370
|
+
|
|
371
|
+
if (results.size() < hnsw.efConstruction || results.top().d > dis) {
|
|
372
|
+
results.emplace(dis, nodeId);
|
|
373
|
+
candidates.emplace(dis, nodeId);
|
|
374
|
+
if (results.size() > hnsw.efConstruction) {
|
|
375
|
+
results.pop();
|
|
376
|
+
}
|
|
377
|
+
}
|
|
398
378
|
}
|
|
399
|
-
}
|
|
400
379
|
}
|
|
401
|
-
|
|
402
|
-
vt.advance();
|
|
380
|
+
vt.advance();
|
|
403
381
|
}
|
|
404
382
|
|
|
405
|
-
|
|
406
383
|
/**************************************************************
|
|
407
384
|
* Searching subroutines
|
|
408
385
|
**************************************************************/
|
|
409
386
|
|
|
410
387
|
/// greedily update a nearest vector at a given level
|
|
411
|
-
void greedy_update_nearest(
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
{
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
388
|
+
void greedy_update_nearest(
|
|
389
|
+
const HNSW& hnsw,
|
|
390
|
+
DistanceComputer& qdis,
|
|
391
|
+
int level,
|
|
392
|
+
storage_idx_t& nearest,
|
|
393
|
+
float& d_nearest) {
|
|
394
|
+
for (;;) {
|
|
395
|
+
storage_idx_t prev_nearest = nearest;
|
|
396
|
+
|
|
397
|
+
size_t begin, end;
|
|
398
|
+
hnsw.neighbor_range(nearest, level, &begin, &end);
|
|
399
|
+
for (size_t i = begin; i < end; i++) {
|
|
400
|
+
storage_idx_t v = hnsw.neighbors[i];
|
|
401
|
+
if (v < 0)
|
|
402
|
+
break;
|
|
403
|
+
float dis = qdis(v);
|
|
404
|
+
if (dis < d_nearest) {
|
|
405
|
+
nearest = v;
|
|
406
|
+
d_nearest = dis;
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
if (nearest == prev_nearest) {
|
|
410
|
+
return;
|
|
411
|
+
}
|
|
412
|
+
}
|
|
435
413
|
}
|
|
436
414
|
|
|
437
|
-
|
|
438
|
-
} // namespace
|
|
439
|
-
|
|
415
|
+
} // namespace
|
|
440
416
|
|
|
441
417
|
/// Finds neighbors and builds links with them, starting from an entry
|
|
442
418
|
/// point. The own neighbor list is assumed to be locked.
|
|
443
|
-
void HNSW::add_links_starting_from(
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
{
|
|
451
|
-
|
|
419
|
+
void HNSW::add_links_starting_from(
|
|
420
|
+
DistanceComputer& ptdis,
|
|
421
|
+
storage_idx_t pt_id,
|
|
422
|
+
storage_idx_t nearest,
|
|
423
|
+
float d_nearest,
|
|
424
|
+
int level,
|
|
425
|
+
omp_lock_t* locks,
|
|
426
|
+
VisitedTable& vt) {
|
|
427
|
+
std::priority_queue<NodeDistCloser> link_targets;
|
|
452
428
|
|
|
453
|
-
|
|
454
|
-
|
|
429
|
+
search_neighbors_to_add(
|
|
430
|
+
*this, ptdis, link_targets, nearest, d_nearest, level, vt);
|
|
455
431
|
|
|
456
|
-
|
|
457
|
-
|
|
432
|
+
// but we can afford only this many neighbors
|
|
433
|
+
int M = nb_neighbors(level);
|
|
458
434
|
|
|
459
|
-
|
|
435
|
+
::faiss::shrink_neighbor_list(ptdis, link_targets, M);
|
|
460
436
|
|
|
461
|
-
|
|
462
|
-
|
|
437
|
+
while (!link_targets.empty()) {
|
|
438
|
+
int other_id = link_targets.top().id;
|
|
463
439
|
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
440
|
+
omp_set_lock(&locks[other_id]);
|
|
441
|
+
add_link(*this, ptdis, other_id, pt_id, level);
|
|
442
|
+
omp_unset_lock(&locks[other_id]);
|
|
467
443
|
|
|
468
|
-
|
|
444
|
+
add_link(*this, ptdis, pt_id, other_id, level);
|
|
469
445
|
|
|
470
|
-
|
|
471
|
-
|
|
446
|
+
link_targets.pop();
|
|
447
|
+
}
|
|
472
448
|
}
|
|
473
449
|
|
|
474
|
-
|
|
475
450
|
/**************************************************************
|
|
476
451
|
* Building, parallel
|
|
477
452
|
**************************************************************/
|
|
478
453
|
|
|
479
|
-
void HNSW::add_with_locks(
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
454
|
+
void HNSW::add_with_locks(
|
|
455
|
+
DistanceComputer& ptdis,
|
|
456
|
+
int pt_level,
|
|
457
|
+
int pt_id,
|
|
458
|
+
std::vector<omp_lock_t>& locks,
|
|
459
|
+
VisitedTable& vt) {
|
|
460
|
+
// greedy search on upper levels
|
|
484
461
|
|
|
485
|
-
|
|
462
|
+
storage_idx_t nearest;
|
|
486
463
|
#pragma omp critical
|
|
487
|
-
|
|
488
|
-
|
|
464
|
+
{
|
|
465
|
+
nearest = entry_point;
|
|
489
466
|
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
467
|
+
if (nearest == -1) {
|
|
468
|
+
max_level = pt_level;
|
|
469
|
+
entry_point = pt_id;
|
|
470
|
+
}
|
|
493
471
|
}
|
|
494
|
-
}
|
|
495
472
|
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
473
|
+
if (nearest < 0) {
|
|
474
|
+
return;
|
|
475
|
+
}
|
|
499
476
|
|
|
500
|
-
|
|
477
|
+
omp_set_lock(&locks[pt_id]);
|
|
501
478
|
|
|
502
|
-
|
|
503
|
-
|
|
479
|
+
int level = max_level; // level at which we start adding neighbors
|
|
480
|
+
float d_nearest = ptdis(nearest);
|
|
504
481
|
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
482
|
+
for (; level > pt_level; level--) {
|
|
483
|
+
greedy_update_nearest(*this, ptdis, level, nearest, d_nearest);
|
|
484
|
+
}
|
|
508
485
|
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
486
|
+
for (; level >= 0; level--) {
|
|
487
|
+
add_links_starting_from(
|
|
488
|
+
ptdis, pt_id, nearest, d_nearest, level, locks.data(), vt);
|
|
489
|
+
}
|
|
513
490
|
|
|
514
|
-
|
|
491
|
+
omp_unset_lock(&locks[pt_id]);
|
|
515
492
|
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
493
|
+
if (pt_level > max_level) {
|
|
494
|
+
max_level = pt_level;
|
|
495
|
+
entry_point = pt_id;
|
|
496
|
+
}
|
|
520
497
|
}
|
|
521
498
|
|
|
522
|
-
|
|
523
499
|
/** Do a BFS on the candidates list */
|
|
524
500
|
|
|
525
501
|
int HNSW::search_from_candidates(
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
bool do_dis_check = check_relative_distance;
|
|
548
|
-
int nstep = 0;
|
|
549
|
-
|
|
550
|
-
while (candidates.size() > 0) {
|
|
551
|
-
float d0 = 0;
|
|
552
|
-
int v0 = candidates.pop_min(&d0);
|
|
553
|
-
|
|
554
|
-
if (do_dis_check) {
|
|
555
|
-
// tricky stopping condition: there are more that ef
|
|
556
|
-
// distances that are processed already that are smaller
|
|
557
|
-
// than d0
|
|
558
|
-
|
|
559
|
-
int n_dis_below = candidates.count_below(d0);
|
|
560
|
-
if(n_dis_below >= efSearch) {
|
|
561
|
-
break;
|
|
562
|
-
}
|
|
563
|
-
}
|
|
564
|
-
|
|
565
|
-
size_t begin, end;
|
|
566
|
-
neighbor_range(v0, level, &begin, &end);
|
|
567
|
-
|
|
568
|
-
for (size_t j = begin; j < end; j++) {
|
|
569
|
-
int v1 = neighbors[j];
|
|
570
|
-
if (v1 < 0) break;
|
|
571
|
-
if (vt.get(v1)) {
|
|
572
|
-
continue;
|
|
573
|
-
}
|
|
574
|
-
vt.set(v1);
|
|
575
|
-
ndis++;
|
|
576
|
-
float d = qdis(v1);
|
|
577
|
-
if (nres < k) {
|
|
578
|
-
faiss::maxheap_push(++nres, D, I, d, v1);
|
|
579
|
-
} else if (d < D[0]) {
|
|
580
|
-
faiss::maxheap_replace_top(nres, D, I, d, v1);
|
|
581
|
-
}
|
|
582
|
-
candidates.push(v1, d);
|
|
583
|
-
}
|
|
584
|
-
|
|
585
|
-
nstep++;
|
|
586
|
-
if (!do_dis_check && nstep > efSearch) {
|
|
587
|
-
break;
|
|
588
|
-
}
|
|
589
|
-
}
|
|
590
|
-
|
|
591
|
-
if (level == 0) {
|
|
592
|
-
stats.n1 ++;
|
|
593
|
-
if (candidates.size() == 0) {
|
|
594
|
-
stats.n2 ++;
|
|
502
|
+
DistanceComputer& qdis,
|
|
503
|
+
int k,
|
|
504
|
+
idx_t* I,
|
|
505
|
+
float* D,
|
|
506
|
+
MinimaxHeap& candidates,
|
|
507
|
+
VisitedTable& vt,
|
|
508
|
+
HNSWStats& stats,
|
|
509
|
+
int level,
|
|
510
|
+
int nres_in) const {
|
|
511
|
+
int nres = nres_in;
|
|
512
|
+
int ndis = 0;
|
|
513
|
+
for (int i = 0; i < candidates.size(); i++) {
|
|
514
|
+
idx_t v1 = candidates.ids[i];
|
|
515
|
+
float d = candidates.dis[i];
|
|
516
|
+
FAISS_ASSERT(v1 >= 0);
|
|
517
|
+
if (nres < k) {
|
|
518
|
+
faiss::maxheap_push(++nres, D, I, d, v1);
|
|
519
|
+
} else if (d < D[0]) {
|
|
520
|
+
faiss::maxheap_replace_top(nres, D, I, d, v1);
|
|
521
|
+
}
|
|
522
|
+
vt.set(v1);
|
|
595
523
|
}
|
|
596
|
-
stats.n3 += ndis;
|
|
597
|
-
}
|
|
598
|
-
|
|
599
|
-
return nres;
|
|
600
|
-
}
|
|
601
|
-
|
|
602
524
|
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
**************************************************************/
|
|
525
|
+
bool do_dis_check = check_relative_distance;
|
|
526
|
+
int nstep = 0;
|
|
606
527
|
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
int ef,
|
|
611
|
-
VisitedTable *vt,
|
|
612
|
-
HNSWStats& stats) const
|
|
613
|
-
{
|
|
614
|
-
int ndis = 0;
|
|
615
|
-
std::priority_queue<Node> top_candidates;
|
|
616
|
-
std::priority_queue<Node, std::vector<Node>, std::greater<Node>> candidates;
|
|
528
|
+
while (candidates.size() > 0) {
|
|
529
|
+
float d0 = 0;
|
|
530
|
+
int v0 = candidates.pop_min(&d0);
|
|
617
531
|
|
|
618
|
-
|
|
619
|
-
|
|
532
|
+
if (do_dis_check) {
|
|
533
|
+
// tricky stopping condition: there are more that ef
|
|
534
|
+
// distances that are processed already that are smaller
|
|
535
|
+
// than d0
|
|
620
536
|
|
|
621
|
-
|
|
537
|
+
int n_dis_below = candidates.count_below(d0);
|
|
538
|
+
if (n_dis_below >= efSearch) {
|
|
539
|
+
break;
|
|
540
|
+
}
|
|
541
|
+
}
|
|
622
542
|
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
543
|
+
size_t begin, end;
|
|
544
|
+
neighbor_range(v0, level, &begin, &end);
|
|
545
|
+
|
|
546
|
+
for (size_t j = begin; j < end; j++) {
|
|
547
|
+
int v1 = neighbors[j];
|
|
548
|
+
if (v1 < 0)
|
|
549
|
+
break;
|
|
550
|
+
if (vt.get(v1)) {
|
|
551
|
+
continue;
|
|
552
|
+
}
|
|
553
|
+
vt.set(v1);
|
|
554
|
+
ndis++;
|
|
555
|
+
float d = qdis(v1);
|
|
556
|
+
if (nres < k) {
|
|
557
|
+
faiss::maxheap_push(++nres, D, I, d, v1);
|
|
558
|
+
} else if (d < D[0]) {
|
|
559
|
+
faiss::maxheap_replace_top(nres, D, I, d, v1);
|
|
560
|
+
}
|
|
561
|
+
candidates.push(v1, d);
|
|
562
|
+
}
|
|
627
563
|
|
|
628
|
-
|
|
629
|
-
|
|
564
|
+
nstep++;
|
|
565
|
+
if (!do_dis_check && nstep > efSearch) {
|
|
566
|
+
break;
|
|
567
|
+
}
|
|
630
568
|
}
|
|
631
569
|
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
for (size_t j = begin; j < end; ++j) {
|
|
638
|
-
int v1 = neighbors[j];
|
|
639
|
-
|
|
640
|
-
if (v1 < 0) {
|
|
641
|
-
break;
|
|
642
|
-
}
|
|
643
|
-
if (vt->get(v1)) {
|
|
644
|
-
continue;
|
|
645
|
-
}
|
|
646
|
-
|
|
647
|
-
vt->set(v1);
|
|
648
|
-
|
|
649
|
-
float d1 = qdis(v1);
|
|
650
|
-
++ndis;
|
|
651
|
-
|
|
652
|
-
if (top_candidates.top().first > d1 || top_candidates.size() < ef) {
|
|
653
|
-
candidates.emplace(d1, v1);
|
|
654
|
-
top_candidates.emplace(d1, v1);
|
|
655
|
-
|
|
656
|
-
if (top_candidates.size() > ef) {
|
|
657
|
-
top_candidates.pop();
|
|
570
|
+
if (level == 0) {
|
|
571
|
+
stats.n1++;
|
|
572
|
+
if (candidates.size() == 0) {
|
|
573
|
+
stats.n2++;
|
|
658
574
|
}
|
|
659
|
-
|
|
575
|
+
stats.n3 += ndis;
|
|
660
576
|
}
|
|
661
|
-
}
|
|
662
|
-
|
|
663
|
-
++stats.n1;
|
|
664
|
-
if (candidates.size() == 0) {
|
|
665
|
-
++stats.n2;
|
|
666
|
-
}
|
|
667
|
-
stats.n3 += ndis;
|
|
668
577
|
|
|
669
|
-
|
|
578
|
+
return nres;
|
|
670
579
|
}
|
|
671
580
|
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
{
|
|
676
|
-
HNSWStats stats;
|
|
677
|
-
|
|
678
|
-
if (upper_beam == 1) {
|
|
679
|
-
|
|
680
|
-
// greedy search on upper levels
|
|
681
|
-
storage_idx_t nearest = entry_point;
|
|
682
|
-
float d_nearest = qdis(nearest);
|
|
581
|
+
/**************************************************************
|
|
582
|
+
* Searching
|
|
583
|
+
**************************************************************/
|
|
683
584
|
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
585
|
+
std::priority_queue<HNSW::Node> HNSW::search_from_candidate_unbounded(
|
|
586
|
+
const Node& node,
|
|
587
|
+
DistanceComputer& qdis,
|
|
588
|
+
int ef,
|
|
589
|
+
VisitedTable* vt,
|
|
590
|
+
HNSWStats& stats) const {
|
|
591
|
+
int ndis = 0;
|
|
592
|
+
std::priority_queue<Node> top_candidates;
|
|
593
|
+
std::priority_queue<Node, std::vector<Node>, std::greater<Node>> candidates;
|
|
594
|
+
|
|
595
|
+
top_candidates.push(node);
|
|
596
|
+
candidates.push(node);
|
|
597
|
+
|
|
598
|
+
vt->set(node.second);
|
|
599
|
+
|
|
600
|
+
while (!candidates.empty()) {
|
|
601
|
+
float d0;
|
|
602
|
+
storage_idx_t v0;
|
|
603
|
+
std::tie(d0, v0) = candidates.top();
|
|
604
|
+
|
|
605
|
+
if (d0 > top_candidates.top().first) {
|
|
606
|
+
break;
|
|
607
|
+
}
|
|
687
608
|
|
|
688
|
-
|
|
689
|
-
if (search_bounded_queue) {
|
|
690
|
-
MinimaxHeap candidates(ef);
|
|
609
|
+
candidates.pop();
|
|
691
610
|
|
|
692
|
-
|
|
611
|
+
size_t begin, end;
|
|
612
|
+
neighbor_range(v0, 0, &begin, &end);
|
|
693
613
|
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
std::priority_queue<Node> top_candidates =
|
|
697
|
-
search_from_candidate_unbounded(Node(d_nearest, nearest),
|
|
698
|
-
qdis, ef, &vt, stats);
|
|
614
|
+
for (size_t j = begin; j < end; ++j) {
|
|
615
|
+
int v1 = neighbors[j];
|
|
699
616
|
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
617
|
+
if (v1 < 0) {
|
|
618
|
+
break;
|
|
619
|
+
}
|
|
620
|
+
if (vt->get(v1)) {
|
|
621
|
+
continue;
|
|
622
|
+
}
|
|
703
623
|
|
|
704
|
-
|
|
705
|
-
while (!top_candidates.empty()) {
|
|
706
|
-
float d;
|
|
707
|
-
storage_idx_t label;
|
|
708
|
-
std::tie(d, label) = top_candidates.top();
|
|
709
|
-
faiss::maxheap_push(++nres, D, I, d, label);
|
|
710
|
-
top_candidates.pop();
|
|
711
|
-
}
|
|
712
|
-
}
|
|
624
|
+
vt->set(v1);
|
|
713
625
|
|
|
714
|
-
|
|
626
|
+
float d1 = qdis(v1);
|
|
627
|
+
++ndis;
|
|
715
628
|
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
629
|
+
if (top_candidates.top().first > d1 || top_candidates.size() < ef) {
|
|
630
|
+
candidates.emplace(d1, v1);
|
|
631
|
+
top_candidates.emplace(d1, v1);
|
|
719
632
|
|
|
720
|
-
|
|
721
|
-
|
|
633
|
+
if (top_candidates.size() > ef) {
|
|
634
|
+
top_candidates.pop();
|
|
635
|
+
}
|
|
636
|
+
}
|
|
637
|
+
}
|
|
638
|
+
}
|
|
722
639
|
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
640
|
+
++stats.n1;
|
|
641
|
+
if (candidates.size() == 0) {
|
|
642
|
+
++stats.n2;
|
|
643
|
+
}
|
|
644
|
+
stats.n3 += ndis;
|
|
726
645
|
|
|
727
|
-
|
|
646
|
+
return top_candidates;
|
|
647
|
+
}
|
|
728
648
|
|
|
729
|
-
|
|
649
|
+
HNSWStats HNSW::search(
|
|
650
|
+
DistanceComputer& qdis,
|
|
651
|
+
int k,
|
|
652
|
+
idx_t* I,
|
|
653
|
+
float* D,
|
|
654
|
+
VisitedTable& vt) const {
|
|
655
|
+
HNSWStats stats;
|
|
656
|
+
|
|
657
|
+
if (upper_beam == 1) {
|
|
658
|
+
// greedy search on upper levels
|
|
659
|
+
storage_idx_t nearest = entry_point;
|
|
660
|
+
float d_nearest = qdis(nearest);
|
|
661
|
+
|
|
662
|
+
for (int level = max_level; level >= 1; level--) {
|
|
663
|
+
greedy_update_nearest(*this, qdis, level, nearest, d_nearest);
|
|
664
|
+
}
|
|
730
665
|
|
|
731
|
-
|
|
666
|
+
int ef = std::max(efSearch, k);
|
|
667
|
+
if (search_bounded_queue) {
|
|
668
|
+
MinimaxHeap candidates(ef);
|
|
669
|
+
|
|
670
|
+
candidates.push(nearest, d_nearest);
|
|
671
|
+
|
|
672
|
+
search_from_candidates(qdis, k, I, D, candidates, vt, stats, 0);
|
|
673
|
+
} else {
|
|
674
|
+
std::priority_queue<Node> top_candidates =
|
|
675
|
+
search_from_candidate_unbounded(
|
|
676
|
+
Node(d_nearest, nearest), qdis, ef, &vt, stats);
|
|
677
|
+
|
|
678
|
+
while (top_candidates.size() > k) {
|
|
679
|
+
top_candidates.pop();
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
int nres = 0;
|
|
683
|
+
while (!top_candidates.empty()) {
|
|
684
|
+
float d;
|
|
685
|
+
storage_idx_t label;
|
|
686
|
+
std::tie(d, label) = top_candidates.top();
|
|
687
|
+
faiss::maxheap_push(++nres, D, I, d, label);
|
|
688
|
+
top_candidates.pop();
|
|
689
|
+
}
|
|
690
|
+
}
|
|
732
691
|
|
|
733
|
-
|
|
734
|
-
candidates.push(I_to_next[i], D_to_next[i]);
|
|
735
|
-
}
|
|
692
|
+
vt.advance();
|
|
736
693
|
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
694
|
+
} else {
|
|
695
|
+
int candidates_size = upper_beam;
|
|
696
|
+
MinimaxHeap candidates(candidates_size);
|
|
697
|
+
|
|
698
|
+
std::vector<idx_t> I_to_next(candidates_size);
|
|
699
|
+
std::vector<float> D_to_next(candidates_size);
|
|
700
|
+
|
|
701
|
+
int nres = 1;
|
|
702
|
+
I_to_next[0] = entry_point;
|
|
703
|
+
D_to_next[0] = qdis(entry_point);
|
|
704
|
+
|
|
705
|
+
for (int level = max_level; level >= 0; level--) {
|
|
706
|
+
// copy I, D -> candidates
|
|
707
|
+
|
|
708
|
+
candidates.clear();
|
|
709
|
+
|
|
710
|
+
for (int i = 0; i < nres; i++) {
|
|
711
|
+
candidates.push(I_to_next[i], D_to_next[i]);
|
|
712
|
+
}
|
|
713
|
+
|
|
714
|
+
if (level == 0) {
|
|
715
|
+
nres = search_from_candidates(
|
|
716
|
+
qdis, k, I, D, candidates, vt, stats, 0);
|
|
717
|
+
} else {
|
|
718
|
+
nres = search_from_candidates(
|
|
719
|
+
qdis,
|
|
720
|
+
candidates_size,
|
|
721
|
+
I_to_next.data(),
|
|
722
|
+
D_to_next.data(),
|
|
723
|
+
candidates,
|
|
724
|
+
vt,
|
|
725
|
+
stats,
|
|
726
|
+
level);
|
|
727
|
+
}
|
|
728
|
+
vt.advance();
|
|
729
|
+
}
|
|
747
730
|
}
|
|
748
|
-
}
|
|
749
731
|
|
|
750
|
-
|
|
732
|
+
return stats;
|
|
751
733
|
}
|
|
752
734
|
|
|
753
|
-
|
|
754
735
|
void HNSW::MinimaxHeap::push(storage_idx_t i, float v) {
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
736
|
+
if (k == n) {
|
|
737
|
+
if (v >= dis[0])
|
|
738
|
+
return;
|
|
739
|
+
faiss::heap_pop<HC>(k--, dis.data(), ids.data());
|
|
740
|
+
--nvalid;
|
|
741
|
+
}
|
|
742
|
+
faiss::heap_push<HC>(++k, dis.data(), ids.data(), v, i);
|
|
743
|
+
++nvalid;
|
|
762
744
|
}
|
|
763
745
|
|
|
764
746
|
float HNSW::MinimaxHeap::max() const {
|
|
765
|
-
|
|
747
|
+
return dis[0];
|
|
766
748
|
}
|
|
767
749
|
|
|
768
750
|
int HNSW::MinimaxHeap::size() const {
|
|
769
|
-
|
|
751
|
+
return nvalid;
|
|
770
752
|
}
|
|
771
753
|
|
|
772
754
|
void HNSW::MinimaxHeap::clear() {
|
|
773
|
-
|
|
755
|
+
nvalid = k = 0;
|
|
774
756
|
}
|
|
775
757
|
|
|
776
|
-
int HNSW::MinimaxHeap::pop_min(float
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
if (i == -1) return -1;
|
|
785
|
-
int imin = i;
|
|
786
|
-
float vmin = dis[i];
|
|
787
|
-
i--;
|
|
788
|
-
while(i >= 0) {
|
|
789
|
-
if (ids[i] != -1 && dis[i] < vmin) {
|
|
790
|
-
vmin = dis[i];
|
|
791
|
-
imin = i;
|
|
758
|
+
int HNSW::MinimaxHeap::pop_min(float* vmin_out) {
|
|
759
|
+
assert(k > 0);
|
|
760
|
+
// returns min. This is an O(n) operation
|
|
761
|
+
int i = k - 1;
|
|
762
|
+
while (i >= 0) {
|
|
763
|
+
if (ids[i] != -1)
|
|
764
|
+
break;
|
|
765
|
+
i--;
|
|
792
766
|
}
|
|
767
|
+
if (i == -1)
|
|
768
|
+
return -1;
|
|
769
|
+
int imin = i;
|
|
770
|
+
float vmin = dis[i];
|
|
793
771
|
i--;
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
772
|
+
while (i >= 0) {
|
|
773
|
+
if (ids[i] != -1 && dis[i] < vmin) {
|
|
774
|
+
vmin = dis[i];
|
|
775
|
+
imin = i;
|
|
776
|
+
}
|
|
777
|
+
i--;
|
|
778
|
+
}
|
|
779
|
+
if (vmin_out)
|
|
780
|
+
*vmin_out = vmin;
|
|
781
|
+
int ret = ids[imin];
|
|
782
|
+
ids[imin] = -1;
|
|
783
|
+
--nvalid;
|
|
799
784
|
|
|
800
|
-
|
|
785
|
+
return ret;
|
|
801
786
|
}
|
|
802
787
|
|
|
803
788
|
int HNSW::MinimaxHeap::count_below(float thresh) {
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
789
|
+
int n_below = 0;
|
|
790
|
+
for (int i = 0; i < k; i++) {
|
|
791
|
+
if (dis[i] < thresh) {
|
|
792
|
+
n_below++;
|
|
793
|
+
}
|
|
808
794
|
}
|
|
809
|
-
}
|
|
810
795
|
|
|
811
|
-
|
|
796
|
+
return n_below;
|
|
812
797
|
}
|
|
813
798
|
|
|
814
|
-
|
|
815
|
-
} // namespace faiss
|
|
799
|
+
} // namespace faiss
|