faiss 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +103 -3
- data/ext/faiss/ext.cpp +99 -32
- data/ext/faiss/extconf.rb +12 -2
- data/lib/faiss/ext.bundle +0 -0
- data/lib/faiss/index.rb +3 -3
- data/lib/faiss/index_binary.rb +3 -3
- data/lib/faiss/kmeans.rb +1 -1
- data/lib/faiss/pca_matrix.rb +2 -2
- data/lib/faiss/product_quantizer.rb +3 -3
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/AutoTune.cpp +719 -0
- data/vendor/faiss/AutoTune.h +212 -0
- data/vendor/faiss/Clustering.cpp +261 -0
- data/vendor/faiss/Clustering.h +101 -0
- data/vendor/faiss/IVFlib.cpp +339 -0
- data/vendor/faiss/IVFlib.h +132 -0
- data/vendor/faiss/Index.cpp +171 -0
- data/vendor/faiss/Index.h +261 -0
- data/vendor/faiss/Index2Layer.cpp +437 -0
- data/vendor/faiss/Index2Layer.h +85 -0
- data/vendor/faiss/IndexBinary.cpp +77 -0
- data/vendor/faiss/IndexBinary.h +163 -0
- data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
- data/vendor/faiss/IndexBinaryFlat.h +54 -0
- data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
- data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
- data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
- data/vendor/faiss/IndexBinaryHNSW.h +56 -0
- data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
- data/vendor/faiss/IndexBinaryIVF.h +211 -0
- data/vendor/faiss/IndexFlat.cpp +508 -0
- data/vendor/faiss/IndexFlat.h +175 -0
- data/vendor/faiss/IndexHNSW.cpp +1090 -0
- data/vendor/faiss/IndexHNSW.h +170 -0
- data/vendor/faiss/IndexIVF.cpp +909 -0
- data/vendor/faiss/IndexIVF.h +353 -0
- data/vendor/faiss/IndexIVFFlat.cpp +502 -0
- data/vendor/faiss/IndexIVFFlat.h +118 -0
- data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
- data/vendor/faiss/IndexIVFPQ.h +161 -0
- data/vendor/faiss/IndexIVFPQR.cpp +219 -0
- data/vendor/faiss/IndexIVFPQR.h +65 -0
- data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
- data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
- data/vendor/faiss/IndexLSH.cpp +225 -0
- data/vendor/faiss/IndexLSH.h +87 -0
- data/vendor/faiss/IndexLattice.cpp +143 -0
- data/vendor/faiss/IndexLattice.h +68 -0
- data/vendor/faiss/IndexPQ.cpp +1188 -0
- data/vendor/faiss/IndexPQ.h +199 -0
- data/vendor/faiss/IndexPreTransform.cpp +288 -0
- data/vendor/faiss/IndexPreTransform.h +91 -0
- data/vendor/faiss/IndexReplicas.cpp +123 -0
- data/vendor/faiss/IndexReplicas.h +76 -0
- data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
- data/vendor/faiss/IndexScalarQuantizer.h +127 -0
- data/vendor/faiss/IndexShards.cpp +317 -0
- data/vendor/faiss/IndexShards.h +100 -0
- data/vendor/faiss/InvertedLists.cpp +623 -0
- data/vendor/faiss/InvertedLists.h +334 -0
- data/vendor/faiss/LICENSE +21 -0
- data/vendor/faiss/MatrixStats.cpp +252 -0
- data/vendor/faiss/MatrixStats.h +62 -0
- data/vendor/faiss/MetaIndexes.cpp +351 -0
- data/vendor/faiss/MetaIndexes.h +126 -0
- data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
- data/vendor/faiss/OnDiskInvertedLists.h +127 -0
- data/vendor/faiss/VectorTransform.cpp +1157 -0
- data/vendor/faiss/VectorTransform.h +322 -0
- data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
- data/vendor/faiss/c_api/AutoTune_c.h +64 -0
- data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
- data/vendor/faiss/c_api/Clustering_c.h +117 -0
- data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
- data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
- data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
- data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
- data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
- data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
- data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
- data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
- data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
- data/vendor/faiss/c_api/IndexShards_c.h +42 -0
- data/vendor/faiss/c_api/Index_c.cpp +105 -0
- data/vendor/faiss/c_api/Index_c.h +183 -0
- data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
- data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
- data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
- data/vendor/faiss/c_api/clone_index_c.h +32 -0
- data/vendor/faiss/c_api/error_c.h +42 -0
- data/vendor/faiss/c_api/error_impl.cpp +27 -0
- data/vendor/faiss/c_api/error_impl.h +16 -0
- data/vendor/faiss/c_api/faiss_c.h +58 -0
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
- data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
- data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
- data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
- data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
- data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
- data/vendor/faiss/c_api/index_factory_c.h +30 -0
- data/vendor/faiss/c_api/index_io_c.cpp +42 -0
- data/vendor/faiss/c_api/index_io_c.h +50 -0
- data/vendor/faiss/c_api/macros_impl.h +110 -0
- data/vendor/faiss/clone_index.cpp +147 -0
- data/vendor/faiss/clone_index.h +38 -0
- data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
- data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
- data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
- data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
- data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
- data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
- data/vendor/faiss/gpu/GpuCloner.h +82 -0
- data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
- data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
- data/vendor/faiss/gpu/GpuDistance.h +52 -0
- data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
- data/vendor/faiss/gpu/GpuIndex.h +148 -0
- data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
- data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
- data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
- data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
- data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
- data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
- data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
- data/vendor/faiss/gpu/GpuResources.cpp +52 -0
- data/vendor/faiss/gpu/GpuResources.h +73 -0
- data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
- data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
- data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
- data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
- data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
- data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
- data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
- data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
- data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
- data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
- data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
- data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
- data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
- data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
- data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
- data/vendor/faiss/gpu/test/TestUtils.h +93 -0
- data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
- data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
- data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
- data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
- data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
- data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
- data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
- data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
- data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
- data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
- data/vendor/faiss/gpu/utils/Timer.h +52 -0
- data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
- data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
- data/vendor/faiss/impl/FaissAssert.h +95 -0
- data/vendor/faiss/impl/FaissException.cpp +66 -0
- data/vendor/faiss/impl/FaissException.h +71 -0
- data/vendor/faiss/impl/HNSW.cpp +818 -0
- data/vendor/faiss/impl/HNSW.h +275 -0
- data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
- data/vendor/faiss/impl/PolysemousTraining.h +158 -0
- data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
- data/vendor/faiss/impl/ProductQuantizer.h +242 -0
- data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
- data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
- data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
- data/vendor/faiss/impl/ThreadedIndex.h +80 -0
- data/vendor/faiss/impl/index_read.cpp +793 -0
- data/vendor/faiss/impl/index_write.cpp +558 -0
- data/vendor/faiss/impl/io.cpp +142 -0
- data/vendor/faiss/impl/io.h +98 -0
- data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
- data/vendor/faiss/impl/lattice_Zn.h +199 -0
- data/vendor/faiss/index_factory.cpp +392 -0
- data/vendor/faiss/index_factory.h +25 -0
- data/vendor/faiss/index_io.h +75 -0
- data/vendor/faiss/misc/test_blas.cpp +84 -0
- data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
- data/vendor/faiss/tests/test_merge.cpp +258 -0
- data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
- data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
- data/vendor/faiss/tests/test_params_override.cpp +231 -0
- data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
- data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
- data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
- data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
- data/vendor/faiss/utils/Heap.cpp +122 -0
- data/vendor/faiss/utils/Heap.h +495 -0
- data/vendor/faiss/utils/WorkerThread.cpp +126 -0
- data/vendor/faiss/utils/WorkerThread.h +61 -0
- data/vendor/faiss/utils/distances.cpp +765 -0
- data/vendor/faiss/utils/distances.h +243 -0
- data/vendor/faiss/utils/distances_simd.cpp +809 -0
- data/vendor/faiss/utils/extra_distances.cpp +336 -0
- data/vendor/faiss/utils/extra_distances.h +54 -0
- data/vendor/faiss/utils/hamming-inl.h +472 -0
- data/vendor/faiss/utils/hamming.cpp +792 -0
- data/vendor/faiss/utils/hamming.h +220 -0
- data/vendor/faiss/utils/random.cpp +192 -0
- data/vendor/faiss/utils/random.h +60 -0
- data/vendor/faiss/utils/utils.cpp +783 -0
- data/vendor/faiss/utils/utils.h +181 -0
- metadata +216 -2
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
#pragma once
|
|
10
|
+
|
|
11
|
+
#include <faiss/gpu/GpuResources.h>
|
|
12
|
+
#include <faiss/gpu/utils/StackDeviceMemory.h>
|
|
13
|
+
#include <faiss/gpu/utils/DeviceUtils.h>
|
|
14
|
+
#include <unordered_map>
|
|
15
|
+
#include <vector>
|
|
16
|
+
|
|
17
|
+
namespace faiss { namespace gpu {
|
|
18
|
+
|
|
19
|
+
/// Default implementation of GpuResources that allocates a cuBLAS
|
|
20
|
+
/// stream and 2 streams for use, as well as temporary memory
|
|
21
|
+
class StandardGpuResources : public GpuResources {
|
|
22
|
+
public:
|
|
23
|
+
StandardGpuResources();
|
|
24
|
+
|
|
25
|
+
~StandardGpuResources() override;
|
|
26
|
+
|
|
27
|
+
/// Disable allocation of temporary memory; all temporary memory
|
|
28
|
+
/// requests will call cudaMalloc / cudaFree at the point of use
|
|
29
|
+
void noTempMemory();
|
|
30
|
+
|
|
31
|
+
/// Specify that we wish to use a certain fixed size of memory on
|
|
32
|
+
/// all devices as temporary memory. This is the upper bound for the GPU
|
|
33
|
+
/// memory that we will reserve. We will never go above 1.5 GiB on any GPU;
|
|
34
|
+
/// smaller GPUs (with <= 4 GiB or <= 8 GiB) will use less memory than that.
|
|
35
|
+
/// To avoid any temporary memory allocation, pass 0.
|
|
36
|
+
void setTempMemory(size_t size);
|
|
37
|
+
|
|
38
|
+
/// Set amount of pinned memory to allocate, for async GPU <-> CPU
|
|
39
|
+
/// transfers
|
|
40
|
+
void setPinnedMemory(size_t size);
|
|
41
|
+
|
|
42
|
+
/// Called to change the stream for work ordering
|
|
43
|
+
void setDefaultStream(int device, cudaStream_t stream);
|
|
44
|
+
|
|
45
|
+
/// Called to change the work ordering streams to the null stream
|
|
46
|
+
/// for all devices
|
|
47
|
+
void setDefaultNullStreamAllDevices();
|
|
48
|
+
|
|
49
|
+
/// Enable or disable the warning about not having enough temporary memory
|
|
50
|
+
/// when cudaMalloc gets called
|
|
51
|
+
void setCudaMallocWarning(bool b);
|
|
52
|
+
|
|
53
|
+
public:
|
|
54
|
+
/// Internal system calls
|
|
55
|
+
|
|
56
|
+
/// Initialize resources for this device
|
|
57
|
+
void initializeForDevice(int device) override;
|
|
58
|
+
|
|
59
|
+
cublasHandle_t getBlasHandle(int device) override;
|
|
60
|
+
|
|
61
|
+
cudaStream_t getDefaultStream(int device) override;
|
|
62
|
+
|
|
63
|
+
std::vector<cudaStream_t> getAlternateStreams(int device) override;
|
|
64
|
+
|
|
65
|
+
DeviceMemory& getMemoryManager(int device) override;
|
|
66
|
+
|
|
67
|
+
std::pair<void*, size_t> getPinnedMemory() override;
|
|
68
|
+
|
|
69
|
+
cudaStream_t getAsyncCopyStream(int device) override;
|
|
70
|
+
|
|
71
|
+
private:
|
|
72
|
+
/// Have GPU resources been initialized for this device yet?
|
|
73
|
+
bool isInitialized(int device) const;
|
|
74
|
+
|
|
75
|
+
/// Adjust the default temporary memory allocation based on the total GPU
|
|
76
|
+
/// memory size
|
|
77
|
+
static size_t getDefaultTempMemForGPU(int device, size_t requested);
|
|
78
|
+
|
|
79
|
+
private:
|
|
80
|
+
/// Our default stream that work is ordered on, one per each device
|
|
81
|
+
std::unordered_map<int, cudaStream_t> defaultStreams_;
|
|
82
|
+
|
|
83
|
+
/// This contains particular streams as set by the user for
|
|
84
|
+
/// ordering, if any
|
|
85
|
+
std::unordered_map<int, cudaStream_t> userDefaultStreams_;
|
|
86
|
+
|
|
87
|
+
/// Other streams we can use, per each device
|
|
88
|
+
std::unordered_map<int, std::vector<cudaStream_t> > alternateStreams_;
|
|
89
|
+
|
|
90
|
+
/// Async copy stream to use for GPU <-> CPU pinned memory copies
|
|
91
|
+
std::unordered_map<int, cudaStream_t> asyncCopyStreams_;
|
|
92
|
+
|
|
93
|
+
/// cuBLAS handle for each device
|
|
94
|
+
std::unordered_map<int, cublasHandle_t> blasHandles_;
|
|
95
|
+
|
|
96
|
+
/// Temporary memory provider, per each device
|
|
97
|
+
std::unordered_map<int, std::unique_ptr<StackDeviceMemory> > memory_;
|
|
98
|
+
|
|
99
|
+
/// Pinned memory allocation for use with this GPU
|
|
100
|
+
void* pinnedMemAlloc_;
|
|
101
|
+
size_t pinnedMemAllocSize_;
|
|
102
|
+
|
|
103
|
+
/// Another option is to use a specified amount of memory on all
|
|
104
|
+
/// devices
|
|
105
|
+
size_t tempMemSize_;
|
|
106
|
+
|
|
107
|
+
/// Amount of pinned memory we should allocate
|
|
108
|
+
size_t pinnedMemSize_;
|
|
109
|
+
|
|
110
|
+
/// Whether or not a warning upon cudaMalloc is generated
|
|
111
|
+
bool cudaMallocWarning_;
|
|
112
|
+
};
|
|
113
|
+
|
|
114
|
+
} } // namespace
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
#include <faiss/gpu/impl/RemapIndices.h>
|
|
10
|
+
#include <faiss/impl/FaissAssert.h>
|
|
11
|
+
|
|
12
|
+
namespace faiss { namespace gpu {
|
|
13
|
+
|
|
14
|
+
// Utility function to translate (list id, offset) to a user index on
|
|
15
|
+
// the CPU. In a cpp in order to use OpenMP
|
|
16
|
+
void ivfOffsetToUserIndex(
|
|
17
|
+
long* indices,
|
|
18
|
+
int numLists,
|
|
19
|
+
int queries,
|
|
20
|
+
int k,
|
|
21
|
+
const std::vector<std::vector<long>>& listOffsetToUserIndex) {
|
|
22
|
+
FAISS_ASSERT(numLists == listOffsetToUserIndex.size());
|
|
23
|
+
|
|
24
|
+
#pragma omp parallel for
|
|
25
|
+
for (int q = 0; q < queries; ++q) {
|
|
26
|
+
for (int r = 0; r < k; ++r) {
|
|
27
|
+
long offsetIndex = indices[q * k + r];
|
|
28
|
+
|
|
29
|
+
if (offsetIndex < 0) continue;
|
|
30
|
+
|
|
31
|
+
int listId = (int) (offsetIndex >> 32);
|
|
32
|
+
int listOffset = (int) (offsetIndex & 0xffffffff);
|
|
33
|
+
|
|
34
|
+
FAISS_ASSERT(listId < numLists);
|
|
35
|
+
auto& listIndices = listOffsetToUserIndex[listId];
|
|
36
|
+
|
|
37
|
+
FAISS_ASSERT(listOffset < listIndices.size());
|
|
38
|
+
indices[q * k + r] = listIndices[listOffset];
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
} } // namespace
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
#pragma once
|
|
10
|
+
|
|
11
|
+
#include <vector>
|
|
12
|
+
|
|
13
|
+
namespace faiss { namespace gpu {
|
|
14
|
+
|
|
15
|
+
/// Utility function to translate (list id, offset) to a user index on
|
|
16
|
+
/// the CPU. In a cpp in order to use OpenMP.
|
|
17
|
+
void ivfOffsetToUserIndex(
|
|
18
|
+
long* indices,
|
|
19
|
+
int numLists,
|
|
20
|
+
int queries,
|
|
21
|
+
int k,
|
|
22
|
+
const std::vector<std::vector<long>>& listOffsetToUserIndex);
|
|
23
|
+
|
|
24
|
+
} } // namespace
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
#include <faiss/impl/FaissAssert.h>
|
|
10
|
+
|
|
11
|
+
namespace faiss { namespace gpu {
|
|
12
|
+
|
|
13
|
+
template <typename GpuIndex>
|
|
14
|
+
IndexWrapper<GpuIndex>::IndexWrapper(
|
|
15
|
+
int numGpus,
|
|
16
|
+
std::function<std::unique_ptr<GpuIndex>(GpuResources*, int)> init) {
|
|
17
|
+
FAISS_ASSERT(numGpus <= faiss::gpu::getNumDevices());
|
|
18
|
+
for (int i = 0; i < numGpus; ++i) {
|
|
19
|
+
auto res = std::unique_ptr<faiss::gpu::StandardGpuResources>(
|
|
20
|
+
new StandardGpuResources);
|
|
21
|
+
|
|
22
|
+
subIndex.emplace_back(init(res.get(), i));
|
|
23
|
+
resources.emplace_back(std::move(res));
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
if (numGpus > 1) {
|
|
27
|
+
// create proxy
|
|
28
|
+
replicaIndex =
|
|
29
|
+
std::unique_ptr<faiss::IndexReplicas>(new faiss::IndexReplicas);
|
|
30
|
+
|
|
31
|
+
for (auto& index : subIndex) {
|
|
32
|
+
replicaIndex->addIndex(index.get());
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
template <typename GpuIndex>
|
|
38
|
+
faiss::Index*
|
|
39
|
+
IndexWrapper<GpuIndex>::getIndex() {
|
|
40
|
+
if ((bool) replicaIndex) {
|
|
41
|
+
return replicaIndex.get();
|
|
42
|
+
} else {
|
|
43
|
+
FAISS_ASSERT(!subIndex.empty());
|
|
44
|
+
return subIndex.front().get();
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
template <typename GpuIndex>
|
|
49
|
+
void
|
|
50
|
+
IndexWrapper<GpuIndex>::runOnIndices(std::function<void(GpuIndex*)> f) {
|
|
51
|
+
|
|
52
|
+
if ((bool) replicaIndex) {
|
|
53
|
+
replicaIndex->runOnIndex(
|
|
54
|
+
[f](int, faiss::Index* index) {
|
|
55
|
+
f(dynamic_cast<GpuIndex*>(index));
|
|
56
|
+
});
|
|
57
|
+
} else {
|
|
58
|
+
FAISS_ASSERT(!subIndex.empty());
|
|
59
|
+
f(subIndex.front().get());
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
template <typename GpuIndex>
|
|
64
|
+
void
|
|
65
|
+
IndexWrapper<GpuIndex>::setNumProbes(int nprobe) {
|
|
66
|
+
runOnIndices([nprobe](GpuIndex* index) {
|
|
67
|
+
index->setNumProbes(nprobe);
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
} }
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
#pragma once
|
|
10
|
+
|
|
11
|
+
#include <faiss/IndexReplicas.h>
|
|
12
|
+
#include <faiss/gpu/StandardGpuResources.h>
|
|
13
|
+
#include <functional>
|
|
14
|
+
#include <memory>
|
|
15
|
+
#include <vector>
|
|
16
|
+
|
|
17
|
+
namespace faiss { namespace gpu {
|
|
18
|
+
|
|
19
|
+
// If we want to run multi-GPU, create a proxy to wrap the indices.
|
|
20
|
+
// If we don't want multi-GPU, don't involve the proxy, so it doesn't
|
|
21
|
+
// affect the timings.
|
|
22
|
+
template <typename GpuIndex>
|
|
23
|
+
struct IndexWrapper {
|
|
24
|
+
std::vector<std::unique_ptr<faiss::gpu::StandardGpuResources>> resources;
|
|
25
|
+
std::vector<std::unique_ptr<GpuIndex>> subIndex;
|
|
26
|
+
std::unique_ptr<faiss::IndexReplicas> replicaIndex;
|
|
27
|
+
|
|
28
|
+
IndexWrapper(
|
|
29
|
+
int numGpus,
|
|
30
|
+
std::function<std::unique_ptr<GpuIndex>(GpuResources*, int)> init);
|
|
31
|
+
faiss::Index* getIndex();
|
|
32
|
+
|
|
33
|
+
void runOnIndices(std::function<void(GpuIndex*)> f);
|
|
34
|
+
void setNumProbes(int nprobe);
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
} }
|
|
38
|
+
|
|
39
|
+
#include <faiss/gpu/perf/IndexWrapper-inl.h>
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
#include <faiss/utils/random.h>
|
|
10
|
+
#include <faiss/Clustering.h>
|
|
11
|
+
#include <faiss/gpu/GpuIndexFlat.h>
|
|
12
|
+
#include <faiss/gpu/StandardGpuResources.h>
|
|
13
|
+
#include <faiss/gpu/perf/IndexWrapper.h>
|
|
14
|
+
#include <faiss/gpu/utils/DeviceUtils.h>
|
|
15
|
+
#include <faiss/gpu/utils/Timer.h>
|
|
16
|
+
#include <gflags/gflags.h>
|
|
17
|
+
#include <memory>
|
|
18
|
+
#include <vector>
|
|
19
|
+
|
|
20
|
+
#include <cuda_profiler_api.h>
|
|
21
|
+
|
|
22
|
+
DEFINE_int32(num, 10000, "# of vecs");
|
|
23
|
+
DEFINE_int32(k, 100, "# of clusters");
|
|
24
|
+
DEFINE_int32(dim, 128, "# of dimensions");
|
|
25
|
+
DEFINE_int32(niter, 10, "# of iterations");
|
|
26
|
+
DEFINE_bool(L2_metric, true, "If true, use L2 metric. If false, use IP metric");
|
|
27
|
+
DEFINE_bool(use_float16, false, "use float16 vectors and math");
|
|
28
|
+
DEFINE_bool(transposed, false, "transposed vector storage");
|
|
29
|
+
DEFINE_bool(verbose, false, "turn on clustering logging");
|
|
30
|
+
DEFINE_int64(seed, -1, "specify random seed");
|
|
31
|
+
DEFINE_int32(num_gpus, 1, "number of gpus to use");
|
|
32
|
+
DEFINE_int64(min_paging_size, -1, "minimum size to use CPU -> GPU paged copies");
|
|
33
|
+
DEFINE_int64(pinned_mem, -1, "pinned memory allocation to use");
|
|
34
|
+
DEFINE_int32(max_points, -1, "max points per centroid");
|
|
35
|
+
|
|
36
|
+
using namespace faiss::gpu;
|
|
37
|
+
|
|
38
|
+
int main(int argc, char** argv) {
|
|
39
|
+
gflags::ParseCommandLineFlags(&argc, &argv, true);
|
|
40
|
+
|
|
41
|
+
cudaProfilerStop();
|
|
42
|
+
|
|
43
|
+
auto seed = FLAGS_seed != -1L ? FLAGS_seed : time(nullptr);
|
|
44
|
+
printf("using seed %ld\n", seed);
|
|
45
|
+
|
|
46
|
+
std::vector<float> vecs((size_t) FLAGS_num * FLAGS_dim);
|
|
47
|
+
faiss::float_rand(vecs.data(), vecs.size(), seed);
|
|
48
|
+
|
|
49
|
+
printf("K-means metric %s dim %d centroids %d num train %d niter %d\n",
|
|
50
|
+
FLAGS_L2_metric ? "L2" : "IP",
|
|
51
|
+
FLAGS_dim, FLAGS_k, FLAGS_num, FLAGS_niter);
|
|
52
|
+
printf("float16 math %s\n", FLAGS_use_float16 ? "enabled" : "disabled");
|
|
53
|
+
printf("transposed storage %s\n", FLAGS_transposed ? "enabled" : "disabled");
|
|
54
|
+
printf("verbose %s\n", FLAGS_verbose ? "enabled" : "disabled");
|
|
55
|
+
|
|
56
|
+
auto initFn = [](faiss::gpu::GpuResources* res, int dev) ->
|
|
57
|
+
std::unique_ptr<faiss::gpu::GpuIndexFlat> {
|
|
58
|
+
if (FLAGS_pinned_mem >= 0) {
|
|
59
|
+
((faiss::gpu::StandardGpuResources*) res)->setPinnedMemory(
|
|
60
|
+
FLAGS_pinned_mem);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
GpuIndexFlatConfig config;
|
|
64
|
+
config.device = dev;
|
|
65
|
+
config.useFloat16 = FLAGS_use_float16;
|
|
66
|
+
config.storeTransposed = FLAGS_transposed;
|
|
67
|
+
|
|
68
|
+
auto p = std::unique_ptr<faiss::gpu::GpuIndexFlat>(
|
|
69
|
+
FLAGS_L2_metric ?
|
|
70
|
+
(faiss::gpu::GpuIndexFlat*)
|
|
71
|
+
new faiss::gpu::GpuIndexFlatL2(res, FLAGS_dim, config) :
|
|
72
|
+
(faiss::gpu::GpuIndexFlat*)
|
|
73
|
+
new faiss::gpu::GpuIndexFlatIP(res, FLAGS_dim, config));
|
|
74
|
+
|
|
75
|
+
if (FLAGS_min_paging_size >= 0) {
|
|
76
|
+
p->setMinPagingSize(FLAGS_min_paging_size);
|
|
77
|
+
}
|
|
78
|
+
return p;
|
|
79
|
+
};
|
|
80
|
+
|
|
81
|
+
IndexWrapper<faiss::gpu::GpuIndexFlat> gpuIndex(FLAGS_num_gpus, initFn);
|
|
82
|
+
|
|
83
|
+
CUDA_VERIFY(cudaProfilerStart());
|
|
84
|
+
faiss::gpu::synchronizeAllDevices();
|
|
85
|
+
|
|
86
|
+
float gpuTime = 0.0f;
|
|
87
|
+
|
|
88
|
+
faiss::ClusteringParameters cp;
|
|
89
|
+
cp.niter = FLAGS_niter;
|
|
90
|
+
cp.verbose = FLAGS_verbose;
|
|
91
|
+
|
|
92
|
+
if (FLAGS_max_points > 0) {
|
|
93
|
+
cp.max_points_per_centroid = FLAGS_max_points;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
faiss::Clustering kmeans(FLAGS_dim, FLAGS_k, cp);
|
|
97
|
+
|
|
98
|
+
// Time k-means
|
|
99
|
+
{
|
|
100
|
+
CpuTimer timer;
|
|
101
|
+
|
|
102
|
+
kmeans.train(FLAGS_num, vecs.data(), *(gpuIndex.getIndex()));
|
|
103
|
+
|
|
104
|
+
// There is a device -> host copy above, so no need to time
|
|
105
|
+
// additional synchronization with the GPU
|
|
106
|
+
gpuTime = timer.elapsedMilliseconds();
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
CUDA_VERIFY(cudaProfilerStop());
|
|
110
|
+
printf("k-means time %.3f ms\n", gpuTime);
|
|
111
|
+
|
|
112
|
+
CUDA_VERIFY(cudaDeviceSynchronize());
|
|
113
|
+
|
|
114
|
+
return 0;
|
|
115
|
+
}
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
#include <cuda_profiler_api.h>
|
|
11
|
+
#include <faiss/IndexFlat.h>
|
|
12
|
+
#include <faiss/IndexIVFPQ.h>
|
|
13
|
+
#include <faiss/gpu/GpuIndexIVFPQ.h>
|
|
14
|
+
#include <faiss/gpu/StandardGpuResources.h>
|
|
15
|
+
#include <faiss/gpu/test/TestUtils.h>
|
|
16
|
+
#include <faiss/gpu/utils/DeviceUtils.h>
|
|
17
|
+
#include <faiss/gpu/utils/Timer.h>
|
|
18
|
+
#include <gflags/gflags.h>
|
|
19
|
+
#include <map>
|
|
20
|
+
#include <vector>
|
|
21
|
+
|
|
22
|
+
DEFINE_int32(batches, 10, "number of batches of vectors to add");
|
|
23
|
+
DEFINE_int32(batch_size, 10000, "number of vectors in each batch");
|
|
24
|
+
DEFINE_int32(dim, 256, "dimension of vectors");
|
|
25
|
+
DEFINE_int32(centroids, 4096, "num coarse centroids to use");
|
|
26
|
+
DEFINE_int32(bytes_per_vec, 32, "bytes per encoded vector");
|
|
27
|
+
DEFINE_int32(bits_per_code, 8, "bits per PQ code");
|
|
28
|
+
DEFINE_int32(index, 2, "0 = no indices on GPU; 1 = 32 bit, 2 = 64 bit on GPU");
|
|
29
|
+
DEFINE_bool(time_gpu, true, "time add to GPU");
|
|
30
|
+
DEFINE_bool(time_cpu, false, "time add to CPU");
|
|
31
|
+
DEFINE_bool(per_batch_time, false, "print per-batch times");
|
|
32
|
+
DEFINE_bool(reserve_memory, false, "whether or not to pre-reserve memory");
|
|
33
|
+
|
|
34
|
+
int main(int argc, char** argv) {
|
|
35
|
+
gflags::ParseCommandLineFlags(&argc, &argv, true);
|
|
36
|
+
|
|
37
|
+
cudaProfilerStop();
|
|
38
|
+
|
|
39
|
+
int dim = FLAGS_dim;
|
|
40
|
+
int numCentroids = FLAGS_centroids;
|
|
41
|
+
int bytesPerVec = FLAGS_bytes_per_vec;
|
|
42
|
+
int bitsPerCode = FLAGS_bits_per_code;
|
|
43
|
+
|
|
44
|
+
faiss::gpu::StandardGpuResources res;
|
|
45
|
+
|
|
46
|
+
// IndexIVFPQ will complain, but just give us enough to get through this
|
|
47
|
+
int numTrain = 4 * numCentroids;
|
|
48
|
+
std::vector<float> trainVecs = faiss::gpu::randVecs(numTrain, dim);
|
|
49
|
+
|
|
50
|
+
faiss::IndexFlatL2 coarseQuantizer(dim);
|
|
51
|
+
faiss::IndexIVFPQ cpuIndex(&coarseQuantizer, dim, numCentroids,
|
|
52
|
+
bytesPerVec, bitsPerCode);
|
|
53
|
+
if (FLAGS_time_cpu) {
|
|
54
|
+
cpuIndex.train(numTrain, trainVecs.data());
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
faiss::gpu::GpuIndexIVFPQConfig config;
|
|
58
|
+
config.device = 0;
|
|
59
|
+
config.indicesOptions = (faiss::gpu::IndicesOptions) FLAGS_index;
|
|
60
|
+
|
|
61
|
+
faiss::gpu::GpuIndexIVFPQ gpuIndex(
|
|
62
|
+
&res, dim, numCentroids, bytesPerVec, bitsPerCode,
|
|
63
|
+
faiss::METRIC_L2, config);
|
|
64
|
+
|
|
65
|
+
if (FLAGS_time_gpu) {
|
|
66
|
+
gpuIndex.train(numTrain, trainVecs.data());
|
|
67
|
+
if (FLAGS_reserve_memory) {
|
|
68
|
+
size_t numVecs = (size_t) FLAGS_batches * (size_t) FLAGS_batch_size;
|
|
69
|
+
gpuIndex.reserveMemory(numVecs);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
cudaDeviceSynchronize();
|
|
74
|
+
CUDA_VERIFY(cudaProfilerStart());
|
|
75
|
+
|
|
76
|
+
float totalGpuTime = 0.0f;
|
|
77
|
+
float totalCpuTime = 0.0f;
|
|
78
|
+
|
|
79
|
+
for (int i = 0; i < FLAGS_batches; ++i) {
|
|
80
|
+
if (!FLAGS_per_batch_time) {
|
|
81
|
+
if (i % 10 == 0) {
|
|
82
|
+
printf("Adding batch %d\n", i + 1);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
auto addVecs = faiss::gpu::randVecs(FLAGS_batch_size, dim);
|
|
87
|
+
|
|
88
|
+
if (FLAGS_time_gpu) {
|
|
89
|
+
faiss::gpu::CpuTimer timer;
|
|
90
|
+
gpuIndex.add(FLAGS_batch_size, addVecs.data());
|
|
91
|
+
CUDA_VERIFY(cudaDeviceSynchronize());
|
|
92
|
+
auto time = timer.elapsedMilliseconds();
|
|
93
|
+
|
|
94
|
+
totalGpuTime += time;
|
|
95
|
+
|
|
96
|
+
if (FLAGS_per_batch_time) {
|
|
97
|
+
printf("Batch %d | GPU time to add %d vecs: %.3f ms (%.5f ms per)\n",
|
|
98
|
+
i + 1, FLAGS_batch_size, time, time / (float) FLAGS_batch_size);
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
if (FLAGS_time_cpu) {
|
|
103
|
+
faiss::gpu::CpuTimer timer;
|
|
104
|
+
cpuIndex.add(FLAGS_batch_size, addVecs.data());
|
|
105
|
+
auto time = timer.elapsedMilliseconds();
|
|
106
|
+
|
|
107
|
+
totalCpuTime += time;
|
|
108
|
+
|
|
109
|
+
if (FLAGS_per_batch_time) {
|
|
110
|
+
printf("Batch %d | CPU time to add %d vecs: %.3f ms (%.5f ms per)\n",
|
|
111
|
+
i + 1, FLAGS_batch_size, time, time / (float) FLAGS_batch_size);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
CUDA_VERIFY(cudaProfilerStop());
|
|
117
|
+
|
|
118
|
+
int total = FLAGS_batch_size * FLAGS_batches;
|
|
119
|
+
|
|
120
|
+
if (FLAGS_time_gpu) {
|
|
121
|
+
printf("%d dim, %d centroids, %d x %d encoding\n"
|
|
122
|
+
"GPU time to add %d vectors (%d batches, %d per batch): "
|
|
123
|
+
"%.3f ms (%.3f us per)\n",
|
|
124
|
+
dim, numCentroids, bytesPerVec, bitsPerCode,
|
|
125
|
+
total, FLAGS_batches, FLAGS_batch_size,
|
|
126
|
+
totalGpuTime, totalGpuTime * 1000.0f / (float) total);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
if (FLAGS_time_cpu) {
|
|
130
|
+
printf("%d dim, %d centroids, %d x %d encoding\n"
|
|
131
|
+
"CPU time to add %d vectors (%d batches, %d per batch): "
|
|
132
|
+
"%.3f ms (%.3f us per)\n",
|
|
133
|
+
dim, numCentroids, bytesPerVec, bitsPerCode,
|
|
134
|
+
total, FLAGS_batches, FLAGS_batch_size,
|
|
135
|
+
totalCpuTime, totalCpuTime * 1000.0f / (float) total);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
return 0;
|
|
139
|
+
}
|