faiss 0.1.7 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -0
- data/README.md +7 -7
- data/ext/faiss/ext.cpp +1 -1
- data/ext/faiss/extconf.rb +8 -2
- data/ext/faiss/index.cpp +102 -69
- data/ext/faiss/index_binary.cpp +24 -30
- data/ext/faiss/kmeans.cpp +20 -16
- data/ext/faiss/numo.hpp +867 -0
- data/ext/faiss/pca_matrix.cpp +13 -14
- data/ext/faiss/product_quantizer.cpp +23 -24
- data/ext/faiss/utils.cpp +10 -37
- data/ext/faiss/utils.h +2 -13
- data/lib/faiss/version.rb +1 -1
- data/lib/faiss.rb +0 -5
- data/vendor/faiss/faiss/AutoTune.cpp +292 -291
- data/vendor/faiss/faiss/AutoTune.h +55 -56
- data/vendor/faiss/faiss/Clustering.cpp +334 -195
- data/vendor/faiss/faiss/Clustering.h +88 -35
- data/vendor/faiss/faiss/IVFlib.cpp +171 -195
- data/vendor/faiss/faiss/IVFlib.h +48 -51
- data/vendor/faiss/faiss/Index.cpp +85 -103
- data/vendor/faiss/faiss/Index.h +54 -48
- data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
- data/vendor/faiss/faiss/Index2Layer.h +22 -22
- data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
- data/vendor/faiss/faiss/IndexBinary.h +140 -132
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
- data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
- data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
- data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
- data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
- data/vendor/faiss/faiss/IndexFlat.h +35 -46
- data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
- data/vendor/faiss/faiss/IndexHNSW.h +57 -41
- data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
- data/vendor/faiss/faiss/IndexIVF.h +146 -113
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
- data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
- data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
- data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
- data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
- data/vendor/faiss/faiss/IndexLSH.h +21 -26
- data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
- data/vendor/faiss/faiss/IndexLattice.h +11 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
- data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
- data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
- data/vendor/faiss/faiss/IndexNSG.h +85 -0
- data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
- data/vendor/faiss/faiss/IndexPQ.h +64 -67
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
- data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
- data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
- data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
- data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
- data/vendor/faiss/faiss/IndexRefine.h +22 -23
- data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
- data/vendor/faiss/faiss/IndexReplicas.h +62 -56
- data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
- data/vendor/faiss/faiss/IndexResidual.h +152 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
- data/vendor/faiss/faiss/IndexShards.cpp +256 -240
- data/vendor/faiss/faiss/IndexShards.h +85 -73
- data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
- data/vendor/faiss/faiss/MatrixStats.h +7 -10
- data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
- data/vendor/faiss/faiss/MetaIndexes.h +40 -34
- data/vendor/faiss/faiss/MetricType.h +7 -7
- data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
- data/vendor/faiss/faiss/VectorTransform.h +61 -89
- data/vendor/faiss/faiss/clone_index.cpp +77 -73
- data/vendor/faiss/faiss/clone_index.h +4 -9
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
- data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
- data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
- data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
- data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
- data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
- data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
- data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
- data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
- data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
- data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
- data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
- data/vendor/faiss/faiss/impl/FaissException.h +41 -29
- data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
- data/vendor/faiss/faiss/impl/HNSW.h +179 -200
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
- data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
- data/vendor/faiss/faiss/impl/NSG.h +199 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
- data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
- data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
- data/vendor/faiss/faiss/impl/io.cpp +75 -94
- data/vendor/faiss/faiss/impl/io.h +31 -41
- data/vendor/faiss/faiss/impl/io_macros.h +40 -29
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
- data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
- data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
- data/vendor/faiss/faiss/index_factory.cpp +269 -218
- data/vendor/faiss/faiss/index_factory.h +6 -7
- data/vendor/faiss/faiss/index_io.h +23 -26
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
- data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
- data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
- data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
- data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
- data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
- data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
- data/vendor/faiss/faiss/utils/Heap.h +186 -209
- data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
- data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
- data/vendor/faiss/faiss/utils/distances.cpp +301 -310
- data/vendor/faiss/faiss/utils/distances.h +133 -118
- data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
- data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
- data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
- data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
- data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
- data/vendor/faiss/faiss/utils/hamming.h +62 -85
- data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
- data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
- data/vendor/faiss/faiss/utils/partitioning.h +26 -21
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
- data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
- data/vendor/faiss/faiss/utils/random.cpp +39 -63
- data/vendor/faiss/faiss/utils/random.h +13 -16
- data/vendor/faiss/faiss/utils/simdlib.h +4 -2
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
- data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
- data/vendor/faiss/faiss/utils/utils.cpp +304 -287
- data/vendor/faiss/faiss/utils/utils.h +53 -48
- metadata +26 -12
- data/lib/faiss/index.rb +0 -20
- data/lib/faiss/index_binary.rb +0 -20
- data/lib/faiss/kmeans.rb +0 -15
- data/lib/faiss/pca_matrix.rb +0 -15
- data/lib/faiss/product_quantizer.rb +0 -22
|
@@ -5,55 +5,51 @@
|
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
|
|
9
|
-
#include <faiss/gpu/utils/Timer.h>
|
|
10
8
|
#include <faiss/gpu/utils/DeviceUtils.h>
|
|
9
|
+
#include <faiss/gpu/utils/Timer.h>
|
|
11
10
|
#include <faiss/impl/FaissAssert.h>
|
|
12
11
|
#include <chrono>
|
|
13
12
|
|
|
14
|
-
namespace faiss {
|
|
13
|
+
namespace faiss {
|
|
14
|
+
namespace gpu {
|
|
15
15
|
|
|
16
16
|
KernelTimer::KernelTimer(cudaStream_t stream)
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
CUDA_VERIFY(cudaEventCreate(&stopEvent_));
|
|
23
|
-
|
|
24
|
-
CUDA_VERIFY(cudaEventRecord(startEvent_, stream_));
|
|
17
|
+
: startEvent_(0), stopEvent_(0), stream_(stream), valid_(true) {
|
|
18
|
+
CUDA_VERIFY(cudaEventCreate(&startEvent_));
|
|
19
|
+
CUDA_VERIFY(cudaEventCreate(&stopEvent_));
|
|
20
|
+
|
|
21
|
+
CUDA_VERIFY(cudaEventRecord(startEvent_, stream_));
|
|
25
22
|
}
|
|
26
23
|
|
|
27
24
|
KernelTimer::~KernelTimer() {
|
|
28
|
-
|
|
29
|
-
|
|
25
|
+
CUDA_VERIFY(cudaEventDestroy(startEvent_));
|
|
26
|
+
CUDA_VERIFY(cudaEventDestroy(stopEvent_));
|
|
30
27
|
}
|
|
31
28
|
|
|
32
|
-
float
|
|
33
|
-
|
|
34
|
-
FAISS_ASSERT(valid_);
|
|
29
|
+
float KernelTimer::elapsedMilliseconds() {
|
|
30
|
+
FAISS_ASSERT(valid_);
|
|
35
31
|
|
|
36
|
-
|
|
37
|
-
|
|
32
|
+
CUDA_VERIFY(cudaEventRecord(stopEvent_, stream_));
|
|
33
|
+
CUDA_VERIFY(cudaEventSynchronize(stopEvent_));
|
|
38
34
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
35
|
+
auto time = 0.0f;
|
|
36
|
+
CUDA_VERIFY(cudaEventElapsedTime(&time, startEvent_, stopEvent_));
|
|
37
|
+
valid_ = false;
|
|
42
38
|
|
|
43
|
-
|
|
39
|
+
return time;
|
|
44
40
|
}
|
|
45
41
|
|
|
46
42
|
CpuTimer::CpuTimer() {
|
|
47
|
-
|
|
43
|
+
start_ = std::chrono::steady_clock::now();
|
|
48
44
|
}
|
|
49
45
|
|
|
50
|
-
float
|
|
51
|
-
|
|
52
|
-
auto end = std::chrono::steady_clock::now();
|
|
46
|
+
float CpuTimer::elapsedMilliseconds() {
|
|
47
|
+
auto end = std::chrono::steady_clock::now();
|
|
53
48
|
|
|
54
|
-
|
|
49
|
+
std::chrono::duration<float, std::milli> duration = end - start_;
|
|
55
50
|
|
|
56
|
-
|
|
51
|
+
return duration.count();
|
|
57
52
|
}
|
|
58
53
|
|
|
59
|
-
}
|
|
54
|
+
} // namespace gpu
|
|
55
|
+
} // namespace faiss
|
|
@@ -5,48 +5,49 @@
|
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
|
|
9
8
|
#pragma once
|
|
10
9
|
|
|
11
10
|
#include <cuda_runtime.h>
|
|
12
11
|
#include <chrono>
|
|
13
12
|
|
|
14
|
-
namespace faiss {
|
|
13
|
+
namespace faiss {
|
|
14
|
+
namespace gpu {
|
|
15
15
|
|
|
16
16
|
/// Utility class for timing execution of a kernel
|
|
17
17
|
class KernelTimer {
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
18
|
+
public:
|
|
19
|
+
/// Constructor starts the timer and adds an event into the current
|
|
20
|
+
/// device stream
|
|
21
|
+
KernelTimer(cudaStream_t stream = 0);
|
|
22
|
+
|
|
23
|
+
/// Destructor releases event resources
|
|
24
|
+
~KernelTimer();
|
|
25
|
+
|
|
26
|
+
/// Adds a stop event then synchronizes on the stop event to get the
|
|
27
|
+
/// actual GPU-side kernel timings for any kernels launched in the
|
|
28
|
+
/// current stream. Returns the number of milliseconds elapsed.
|
|
29
|
+
/// Can only be called once.
|
|
30
|
+
float elapsedMilliseconds();
|
|
31
|
+
|
|
32
|
+
private:
|
|
33
|
+
cudaEvent_t startEvent_;
|
|
34
|
+
cudaEvent_t stopEvent_;
|
|
35
|
+
cudaStream_t stream_;
|
|
36
|
+
bool valid_;
|
|
37
37
|
};
|
|
38
38
|
|
|
39
39
|
/// CPU wallclock elapsed timer
|
|
40
40
|
class CpuTimer {
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
41
|
+
public:
|
|
42
|
+
/// Creates and starts a new timer
|
|
43
|
+
CpuTimer();
|
|
44
44
|
|
|
45
|
-
|
|
46
|
-
|
|
45
|
+
/// Returns elapsed time in milliseconds
|
|
46
|
+
float elapsedMilliseconds();
|
|
47
47
|
|
|
48
|
-
|
|
49
|
-
|
|
48
|
+
private:
|
|
49
|
+
std::chrono::time_point<std::chrono::steady_clock> start_;
|
|
50
50
|
};
|
|
51
51
|
|
|
52
|
-
}
|
|
52
|
+
} // namespace gpu
|
|
53
|
+
} // namespace faiss
|
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// -*- c++ -*-
|
|
9
|
+
|
|
10
|
+
#include <faiss/impl/AdditiveQuantizer.h>
|
|
11
|
+
#include <faiss/impl/FaissAssert.h>
|
|
12
|
+
|
|
13
|
+
#include <cstddef>
|
|
14
|
+
#include <cstdio>
|
|
15
|
+
#include <cstring>
|
|
16
|
+
#include <memory>
|
|
17
|
+
#include <random>
|
|
18
|
+
|
|
19
|
+
#include <algorithm>
|
|
20
|
+
|
|
21
|
+
#include <faiss/utils/Heap.h>
|
|
22
|
+
#include <faiss/utils/distances.h>
|
|
23
|
+
#include <faiss/utils/hamming.h> // BitstringWriter
|
|
24
|
+
#include <faiss/utils/utils.h>
|
|
25
|
+
|
|
26
|
+
extern "C" {
|
|
27
|
+
|
|
28
|
+
// general matrix multiplication
|
|
29
|
+
int sgemm_(
|
|
30
|
+
const char* transa,
|
|
31
|
+
const char* transb,
|
|
32
|
+
FINTEGER* m,
|
|
33
|
+
FINTEGER* n,
|
|
34
|
+
FINTEGER* k,
|
|
35
|
+
const float* alpha,
|
|
36
|
+
const float* a,
|
|
37
|
+
FINTEGER* lda,
|
|
38
|
+
const float* b,
|
|
39
|
+
FINTEGER* ldb,
|
|
40
|
+
float* beta,
|
|
41
|
+
float* c,
|
|
42
|
+
FINTEGER* ldc);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
namespace {
|
|
46
|
+
|
|
47
|
+
// c and a and b can overlap
|
|
48
|
+
void fvec_add(size_t d, const float* a, const float* b, float* c) {
|
|
49
|
+
for (size_t i = 0; i < d; i++) {
|
|
50
|
+
c[i] = a[i] + b[i];
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
void fvec_add(size_t d, const float* a, float b, float* c) {
|
|
55
|
+
for (size_t i = 0; i < d; i++) {
|
|
56
|
+
c[i] = a[i] + b;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
} // namespace
|
|
61
|
+
|
|
62
|
+
namespace faiss {
|
|
63
|
+
|
|
64
|
+
void AdditiveQuantizer::set_derived_values() {
|
|
65
|
+
tot_bits = 0;
|
|
66
|
+
is_byte_aligned = true;
|
|
67
|
+
codebook_offsets.resize(M + 1, 0);
|
|
68
|
+
for (int i = 0; i < M; i++) {
|
|
69
|
+
int nbit = nbits[i];
|
|
70
|
+
size_t k = 1 << nbit;
|
|
71
|
+
codebook_offsets[i + 1] = codebook_offsets[i] + k;
|
|
72
|
+
tot_bits += nbit;
|
|
73
|
+
if (nbit % 8 != 0) {
|
|
74
|
+
is_byte_aligned = false;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
total_codebook_size = codebook_offsets[M];
|
|
78
|
+
// convert bits to bytes
|
|
79
|
+
code_size = (tot_bits + 7) / 8;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
void AdditiveQuantizer::pack_codes(
|
|
83
|
+
size_t n,
|
|
84
|
+
const int32_t* codes,
|
|
85
|
+
uint8_t* packed_codes,
|
|
86
|
+
int64_t ld_codes) const {
|
|
87
|
+
if (ld_codes == -1) {
|
|
88
|
+
ld_codes = M;
|
|
89
|
+
}
|
|
90
|
+
#pragma omp parallel for if (n > 1000)
|
|
91
|
+
for (int64_t i = 0; i < n; i++) {
|
|
92
|
+
const int32_t* codes1 = codes + i * ld_codes;
|
|
93
|
+
BitstringWriter bsw(packed_codes + i * code_size, code_size);
|
|
94
|
+
for (int m = 0; m < M; m++) {
|
|
95
|
+
bsw.write(codes1[m], nbits[m]);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
void AdditiveQuantizer::decode(const uint8_t* code, float* x, size_t n) const {
|
|
101
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
102
|
+
is_trained, "The additive quantizer is not trained yet.");
|
|
103
|
+
|
|
104
|
+
// standard additive quantizer decoding
|
|
105
|
+
#pragma omp parallel for if (n > 1000)
|
|
106
|
+
for (int64_t i = 0; i < n; i++) {
|
|
107
|
+
BitstringReader bsr(code + i * code_size, code_size);
|
|
108
|
+
float* xi = x + i * d;
|
|
109
|
+
for (int m = 0; m < M; m++) {
|
|
110
|
+
int idx = bsr.read(nbits[m]);
|
|
111
|
+
const float* c = codebooks.data() + d * (codebook_offsets[m] + idx);
|
|
112
|
+
if (m == 0) {
|
|
113
|
+
memcpy(xi, c, sizeof(*x) * d);
|
|
114
|
+
} else {
|
|
115
|
+
fvec_add(d, xi, c, xi);
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
AdditiveQuantizer::~AdditiveQuantizer() {}
|
|
122
|
+
|
|
123
|
+
/****************************************************************************
|
|
124
|
+
* Support for fast distance computations and search with additive quantizer
|
|
125
|
+
****************************************************************************/
|
|
126
|
+
|
|
127
|
+
void AdditiveQuantizer::compute_centroid_norms(float* norms) const {
|
|
128
|
+
size_t ntotal = (size_t)1 << tot_bits;
|
|
129
|
+
// TODO: make tree of partial sums
|
|
130
|
+
#pragma omp parallel
|
|
131
|
+
{
|
|
132
|
+
std::vector<float> tmp(d);
|
|
133
|
+
#pragma omp for
|
|
134
|
+
for (int64_t i = 0; i < ntotal; i++) {
|
|
135
|
+
decode_64bit(i, tmp.data());
|
|
136
|
+
norms[i] = fvec_norm_L2sqr(tmp.data(), d);
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
void AdditiveQuantizer::decode_64bit(idx_t bits, float* xi) const {
|
|
142
|
+
for (int m = 0; m < M; m++) {
|
|
143
|
+
idx_t idx = bits & (((size_t)1 << nbits[m]) - 1);
|
|
144
|
+
bits >>= nbits[m];
|
|
145
|
+
const float* c = codebooks.data() + d * (codebook_offsets[m] + idx);
|
|
146
|
+
if (m == 0) {
|
|
147
|
+
memcpy(xi, c, sizeof(*xi) * d);
|
|
148
|
+
} else {
|
|
149
|
+
fvec_add(d, xi, c, xi);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
void AdditiveQuantizer::compute_LUT(size_t n, const float* xq, float* LUT)
|
|
155
|
+
const {
|
|
156
|
+
// in all cases, it is large matrix multiplication
|
|
157
|
+
|
|
158
|
+
FINTEGER ncenti = total_codebook_size;
|
|
159
|
+
FINTEGER di = d;
|
|
160
|
+
FINTEGER nqi = n;
|
|
161
|
+
float one = 1, zero = 0;
|
|
162
|
+
|
|
163
|
+
sgemm_("Transposed",
|
|
164
|
+
"Not transposed",
|
|
165
|
+
&ncenti,
|
|
166
|
+
&nqi,
|
|
167
|
+
&di,
|
|
168
|
+
&one,
|
|
169
|
+
codebooks.data(),
|
|
170
|
+
&di,
|
|
171
|
+
xq,
|
|
172
|
+
&di,
|
|
173
|
+
&zero,
|
|
174
|
+
LUT,
|
|
175
|
+
&ncenti);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
namespace {
|
|
179
|
+
|
|
180
|
+
void compute_inner_prod_with_LUT(
|
|
181
|
+
const AdditiveQuantizer& aq,
|
|
182
|
+
const float* LUT,
|
|
183
|
+
float* ips) {
|
|
184
|
+
size_t prev_size = 1;
|
|
185
|
+
for (int m = 0; m < aq.M; m++) {
|
|
186
|
+
const float* LUTm = LUT + aq.codebook_offsets[m];
|
|
187
|
+
int nb = aq.nbits[m];
|
|
188
|
+
size_t nc = (size_t)1 << nb;
|
|
189
|
+
|
|
190
|
+
if (m == 0) {
|
|
191
|
+
memcpy(ips, LUT, sizeof(*ips) * nc);
|
|
192
|
+
} else {
|
|
193
|
+
for (int64_t i = nc - 1; i >= 0; i--) {
|
|
194
|
+
float v = LUTm[i];
|
|
195
|
+
fvec_add(prev_size, ips, v, ips + i * prev_size);
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
prev_size *= nc;
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
} // anonymous namespace
|
|
203
|
+
|
|
204
|
+
void AdditiveQuantizer::knn_exact_inner_product(
|
|
205
|
+
idx_t n,
|
|
206
|
+
const float* xq,
|
|
207
|
+
idx_t k,
|
|
208
|
+
float* distances,
|
|
209
|
+
idx_t* labels) const {
|
|
210
|
+
std::unique_ptr<float[]> LUT(new float[n * total_codebook_size]);
|
|
211
|
+
compute_LUT(n, xq, LUT.get());
|
|
212
|
+
size_t ntotal = (size_t)1 << tot_bits;
|
|
213
|
+
|
|
214
|
+
#pragma omp parallel if (n > 100)
|
|
215
|
+
{
|
|
216
|
+
std::vector<float> dis(ntotal);
|
|
217
|
+
#pragma omp for
|
|
218
|
+
for (idx_t i = 0; i < n; i++) {
|
|
219
|
+
const float* LUTi = LUT.get() + i * total_codebook_size;
|
|
220
|
+
compute_inner_prod_with_LUT(*this, LUTi, dis.data());
|
|
221
|
+
float* distances_i = distances + i * k;
|
|
222
|
+
idx_t* labels_i = labels + i * k;
|
|
223
|
+
minheap_heapify(k, distances_i, labels_i);
|
|
224
|
+
minheap_addn(k, distances_i, labels_i, dis.data(), nullptr, ntotal);
|
|
225
|
+
minheap_reorder(k, distances_i, labels_i);
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
void AdditiveQuantizer::knn_exact_L2(
|
|
231
|
+
idx_t n,
|
|
232
|
+
const float* xq,
|
|
233
|
+
idx_t k,
|
|
234
|
+
float* distances,
|
|
235
|
+
idx_t* labels,
|
|
236
|
+
const float* norms) const {
|
|
237
|
+
std::unique_ptr<float[]> LUT(new float[n * total_codebook_size]);
|
|
238
|
+
compute_LUT(n, xq, LUT.get());
|
|
239
|
+
std::unique_ptr<float[]> q_norms(new float[n]);
|
|
240
|
+
fvec_norms_L2sqr(q_norms.get(), xq, d, n);
|
|
241
|
+
size_t ntotal = (size_t)1 << tot_bits;
|
|
242
|
+
|
|
243
|
+
#pragma omp parallel if (n > 100)
|
|
244
|
+
{
|
|
245
|
+
std::vector<float> dis(ntotal);
|
|
246
|
+
#pragma omp for
|
|
247
|
+
for (idx_t i = 0; i < n; i++) {
|
|
248
|
+
const float* LUTi = LUT.get() + i * total_codebook_size;
|
|
249
|
+
float* distances_i = distances + i * k;
|
|
250
|
+
idx_t* labels_i = labels + i * k;
|
|
251
|
+
|
|
252
|
+
compute_inner_prod_with_LUT(*this, LUTi, dis.data());
|
|
253
|
+
|
|
254
|
+
// update distances using
|
|
255
|
+
// ||x - y||^2 = ||x||^2 + ||y||^2 - 2 * <x,y>
|
|
256
|
+
|
|
257
|
+
maxheap_heapify(k, distances_i, labels_i);
|
|
258
|
+
for (idx_t j = 0; j < ntotal; j++) {
|
|
259
|
+
float disj = q_norms[i] + norms[j] - 2 * dis[j];
|
|
260
|
+
if (disj < distances_i[0]) {
|
|
261
|
+
heap_replace_top<CMax<float, int64_t>>(
|
|
262
|
+
k, distances_i, labels_i, disj, j);
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
maxheap_reorder(k, distances_i, labels_i);
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
} // namespace faiss
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#pragma once
|
|
9
|
+
|
|
10
|
+
#include <cstdint>
|
|
11
|
+
#include <vector>
|
|
12
|
+
|
|
13
|
+
#include <faiss/Index.h>
|
|
14
|
+
|
|
15
|
+
namespace faiss {
|
|
16
|
+
|
|
17
|
+
/** Abstract structure for additive quantizers
|
|
18
|
+
*
|
|
19
|
+
* Different from the product quantizer in which the decoded vector is the
|
|
20
|
+
* concatenation of M sub-vectors, additive quantizers sum M sub-vectors
|
|
21
|
+
* to get the decoded vector.
|
|
22
|
+
*/
|
|
23
|
+
struct AdditiveQuantizer {
|
|
24
|
+
size_t d; ///< size of the input vectors
|
|
25
|
+
size_t M; ///< number of codebooks
|
|
26
|
+
std::vector<size_t> nbits; ///< bits for each step
|
|
27
|
+
std::vector<float> codebooks; ///< codebooks
|
|
28
|
+
|
|
29
|
+
// derived values
|
|
30
|
+
std::vector<size_t> codebook_offsets;
|
|
31
|
+
size_t code_size; ///< code size in bytes
|
|
32
|
+
size_t tot_bits; ///< total number of bits
|
|
33
|
+
size_t total_codebook_size; ///< size of the codebook in vectors
|
|
34
|
+
bool is_byte_aligned;
|
|
35
|
+
|
|
36
|
+
bool verbose; ///< verbose during training?
|
|
37
|
+
bool is_trained; ///< is trained or not
|
|
38
|
+
|
|
39
|
+
///< compute derived values when d, M and nbits have been set
|
|
40
|
+
void set_derived_values();
|
|
41
|
+
|
|
42
|
+
///< Train the additive quantizer
|
|
43
|
+
virtual void train(size_t n, const float* x) = 0;
|
|
44
|
+
|
|
45
|
+
/** Encode a set of vectors
|
|
46
|
+
*
|
|
47
|
+
* @param x vectors to encode, size n * d
|
|
48
|
+
* @param codes output codes, size n * code_size
|
|
49
|
+
*/
|
|
50
|
+
virtual void compute_codes(const float* x, uint8_t* codes, size_t n)
|
|
51
|
+
const = 0;
|
|
52
|
+
|
|
53
|
+
/** pack a series of code to bit-compact format
|
|
54
|
+
*
|
|
55
|
+
* @param codes codes to be packed, size n * code_size
|
|
56
|
+
* @param packed_codes output bit-compact codes
|
|
57
|
+
* @param ld_codes leading dimension of codes
|
|
58
|
+
*/
|
|
59
|
+
void pack_codes(
|
|
60
|
+
size_t n,
|
|
61
|
+
const int32_t* codes,
|
|
62
|
+
uint8_t* packed_codes,
|
|
63
|
+
int64_t ld_codes = -1) const;
|
|
64
|
+
|
|
65
|
+
/** Decode a set of vectors
|
|
66
|
+
*
|
|
67
|
+
* @param codes codes to decode, size n * code_size
|
|
68
|
+
* @param x output vectors, size n * d
|
|
69
|
+
*/
|
|
70
|
+
void decode(const uint8_t* codes, float* x, size_t n) const;
|
|
71
|
+
|
|
72
|
+
/****************************************************************************
|
|
73
|
+
* Support for exhaustive distance computations with the centroids.
|
|
74
|
+
* Hence, the number of elements that can be enumerated is not too large.
|
|
75
|
+
****************************************************************************/
|
|
76
|
+
using idx_t = Index::idx_t;
|
|
77
|
+
|
|
78
|
+
/// decoding function for a code in a 64-bit word
|
|
79
|
+
void decode_64bit(idx_t n, float* x) const;
|
|
80
|
+
|
|
81
|
+
/** Compute inner-product look-up tables. Used in the centroid search
|
|
82
|
+
* functions.
|
|
83
|
+
*
|
|
84
|
+
* @param xq query vector, size (n, d)
|
|
85
|
+
* @param LUT look-up table, size (n, total_codebook_size)
|
|
86
|
+
*/
|
|
87
|
+
void compute_LUT(size_t n, const float* xq, float* LUT) const;
|
|
88
|
+
|
|
89
|
+
/// exact IP search
|
|
90
|
+
void knn_exact_inner_product(
|
|
91
|
+
idx_t n,
|
|
92
|
+
const float* xq,
|
|
93
|
+
idx_t k,
|
|
94
|
+
float* distances,
|
|
95
|
+
idx_t* labels) const;
|
|
96
|
+
|
|
97
|
+
/** For L2 search we need the L2 norms of the centroids
|
|
98
|
+
*
|
|
99
|
+
* @param norms output norms table, size total_codebook_size
|
|
100
|
+
*/
|
|
101
|
+
void compute_centroid_norms(float* norms) const;
|
|
102
|
+
|
|
103
|
+
/** Exact L2 search, with precomputed norms */
|
|
104
|
+
void knn_exact_L2(
|
|
105
|
+
idx_t n,
|
|
106
|
+
const float* xq,
|
|
107
|
+
idx_t k,
|
|
108
|
+
float* distances,
|
|
109
|
+
idx_t* labels,
|
|
110
|
+
const float* centroid_norms) const;
|
|
111
|
+
|
|
112
|
+
virtual ~AdditiveQuantizer();
|
|
113
|
+
};
|
|
114
|
+
|
|
115
|
+
}; // namespace faiss
|