faiss 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/benchs/bench_6bit_codec.cpp +80 -0
- data/vendor/faiss/c_api/AutoTune_c.h +2 -0
- data/vendor/faiss/c_api/IndexShards_c.cpp +0 -6
- data/vendor/faiss/c_api/IndexShards_c.h +1 -4
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +4 -2
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +1 -1
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +1 -1
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +1 -1
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +1 -1
- data/vendor/faiss/demos/demo_imi_flat.cpp +5 -2
- data/vendor/faiss/demos/demo_imi_pq.cpp +6 -2
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +7 -2
- data/vendor/faiss/{AutoTune.cpp → faiss/AutoTune.cpp} +9 -9
- data/vendor/faiss/{AutoTune.h → faiss/AutoTune.h} +0 -0
- data/vendor/faiss/{Clustering.cpp → faiss/Clustering.cpp} +13 -12
- data/vendor/faiss/{Clustering.h → faiss/Clustering.h} +0 -0
- data/vendor/faiss/{DirectMap.cpp → faiss/DirectMap.cpp} +0 -0
- data/vendor/faiss/{DirectMap.h → faiss/DirectMap.h} +0 -0
- data/vendor/faiss/{IVFlib.cpp → faiss/IVFlib.cpp} +86 -11
- data/vendor/faiss/{IVFlib.h → faiss/IVFlib.h} +26 -8
- data/vendor/faiss/{Index.cpp → faiss/Index.cpp} +0 -0
- data/vendor/faiss/{Index.h → faiss/Index.h} +1 -1
- data/vendor/faiss/{Index2Layer.cpp → faiss/Index2Layer.cpp} +12 -11
- data/vendor/faiss/{Index2Layer.h → faiss/Index2Layer.h} +0 -0
- data/vendor/faiss/{IndexBinary.cpp → faiss/IndexBinary.cpp} +2 -1
- data/vendor/faiss/{IndexBinary.h → faiss/IndexBinary.h} +0 -0
- data/vendor/faiss/{IndexBinaryFlat.cpp → faiss/IndexBinaryFlat.cpp} +0 -0
- data/vendor/faiss/{IndexBinaryFlat.h → faiss/IndexBinaryFlat.h} +0 -0
- data/vendor/faiss/{IndexBinaryFromFloat.cpp → faiss/IndexBinaryFromFloat.cpp} +1 -0
- data/vendor/faiss/{IndexBinaryFromFloat.h → faiss/IndexBinaryFromFloat.h} +0 -0
- data/vendor/faiss/{IndexBinaryHNSW.cpp → faiss/IndexBinaryHNSW.cpp} +1 -2
- data/vendor/faiss/{IndexBinaryHNSW.h → faiss/IndexBinaryHNSW.h} +0 -0
- data/vendor/faiss/{IndexBinaryHash.cpp → faiss/IndexBinaryHash.cpp} +16 -7
- data/vendor/faiss/{IndexBinaryHash.h → faiss/IndexBinaryHash.h} +2 -1
- data/vendor/faiss/{IndexBinaryIVF.cpp → faiss/IndexBinaryIVF.cpp} +10 -16
- data/vendor/faiss/{IndexBinaryIVF.h → faiss/IndexBinaryIVF.h} +1 -1
- data/vendor/faiss/{IndexFlat.cpp → faiss/IndexFlat.cpp} +0 -0
- data/vendor/faiss/{IndexFlat.h → faiss/IndexFlat.h} +0 -0
- data/vendor/faiss/{IndexHNSW.cpp → faiss/IndexHNSW.cpp} +63 -32
- data/vendor/faiss/{IndexHNSW.h → faiss/IndexHNSW.h} +0 -0
- data/vendor/faiss/{IndexIVF.cpp → faiss/IndexIVF.cpp} +129 -46
- data/vendor/faiss/{IndexIVF.h → faiss/IndexIVF.h} +7 -3
- data/vendor/faiss/{IndexIVFFlat.cpp → faiss/IndexIVFFlat.cpp} +6 -5
- data/vendor/faiss/{IndexIVFFlat.h → faiss/IndexIVFFlat.h} +0 -0
- data/vendor/faiss/{IndexIVFPQ.cpp → faiss/IndexIVFPQ.cpp} +9 -8
- data/vendor/faiss/{IndexIVFPQ.h → faiss/IndexIVFPQ.h} +4 -2
- data/vendor/faiss/{IndexIVFPQR.cpp → faiss/IndexIVFPQR.cpp} +3 -1
- data/vendor/faiss/{IndexIVFPQR.h → faiss/IndexIVFPQR.h} +0 -0
- data/vendor/faiss/{IndexIVFSpectralHash.cpp → faiss/IndexIVFSpectralHash.cpp} +1 -1
- data/vendor/faiss/{IndexIVFSpectralHash.h → faiss/IndexIVFSpectralHash.h} +0 -0
- data/vendor/faiss/{IndexLSH.cpp → faiss/IndexLSH.cpp} +0 -0
- data/vendor/faiss/{IndexLSH.h → faiss/IndexLSH.h} +0 -0
- data/vendor/faiss/{IndexLattice.cpp → faiss/IndexLattice.cpp} +0 -0
- data/vendor/faiss/{IndexLattice.h → faiss/IndexLattice.h} +0 -0
- data/vendor/faiss/{IndexPQ.cpp → faiss/IndexPQ.cpp} +6 -6
- data/vendor/faiss/{IndexPQ.h → faiss/IndexPQ.h} +3 -1
- data/vendor/faiss/{IndexPreTransform.cpp → faiss/IndexPreTransform.cpp} +0 -0
- data/vendor/faiss/{IndexPreTransform.h → faiss/IndexPreTransform.h} +0 -0
- data/vendor/faiss/{IndexReplicas.cpp → faiss/IndexReplicas.cpp} +102 -10
- data/vendor/faiss/{IndexReplicas.h → faiss/IndexReplicas.h} +6 -0
- data/vendor/faiss/{IndexScalarQuantizer.cpp → faiss/IndexScalarQuantizer.cpp} +3 -3
- data/vendor/faiss/{IndexScalarQuantizer.h → faiss/IndexScalarQuantizer.h} +0 -0
- data/vendor/faiss/{IndexShards.cpp → faiss/IndexShards.cpp} +37 -12
- data/vendor/faiss/{IndexShards.h → faiss/IndexShards.h} +3 -4
- data/vendor/faiss/{InvertedLists.cpp → faiss/InvertedLists.cpp} +2 -2
- data/vendor/faiss/{InvertedLists.h → faiss/InvertedLists.h} +1 -0
- data/vendor/faiss/{MatrixStats.cpp → faiss/MatrixStats.cpp} +0 -0
- data/vendor/faiss/{MatrixStats.h → faiss/MatrixStats.h} +0 -0
- data/vendor/faiss/{MetaIndexes.cpp → faiss/MetaIndexes.cpp} +5 -3
- data/vendor/faiss/{MetaIndexes.h → faiss/MetaIndexes.h} +0 -0
- data/vendor/faiss/{MetricType.h → faiss/MetricType.h} +0 -0
- data/vendor/faiss/{OnDiskInvertedLists.cpp → faiss/OnDiskInvertedLists.cpp} +141 -3
- data/vendor/faiss/{OnDiskInvertedLists.h → faiss/OnDiskInvertedLists.h} +27 -7
- data/vendor/faiss/{VectorTransform.cpp → faiss/VectorTransform.cpp} +4 -3
- data/vendor/faiss/{VectorTransform.h → faiss/VectorTransform.h} +0 -0
- data/vendor/faiss/{clone_index.cpp → faiss/clone_index.cpp} +0 -0
- data/vendor/faiss/{clone_index.h → faiss/clone_index.h} +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.cpp +14 -14
- data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.h +6 -7
- data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuDistance.h +12 -4
- data/vendor/faiss/{gpu → faiss/gpu}/GpuFaissAssert.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndex.h +3 -9
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexBinaryFlat.h +7 -7
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexFlat.h +35 -10
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVF.h +1 -2
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFFlat.h +4 -3
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFPQ.h +21 -4
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFScalarQuantizer.h +4 -3
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndicesOptions.h +0 -0
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +200 -0
- data/vendor/faiss/faiss/gpu/GpuResources.h +264 -0
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +572 -0
- data/vendor/faiss/{gpu → faiss/gpu}/StandardGpuResources.h +83 -15
- data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper-inl.h +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper.h +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfClustering.cpp +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfIVFPQAdd.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/perf/WriteIndex.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexBinaryFlat.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexFlat.cpp +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFFlat.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFPQ.cpp +141 -52
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuMemoryException.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.cpp +4 -2
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/demo_ivfpq_indexing_gpu.cpp +7 -5
- data/vendor/faiss/{gpu → faiss/gpu}/utils/DeviceUtils.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +213 -0
- data/vendor/faiss/{gpu → faiss/gpu}/utils/StackDeviceMemory.h +25 -40
- data/vendor/faiss/{gpu → faiss/gpu}/utils/StaticUtils.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.cpp +1 -0
- data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.h +3 -1
- data/vendor/faiss/{impl → faiss/impl}/FaissAssert.h +1 -0
- data/vendor/faiss/{impl → faiss/impl}/FaissException.cpp +26 -0
- data/vendor/faiss/{impl → faiss/impl}/FaissException.h +4 -0
- data/vendor/faiss/{impl → faiss/impl}/HNSW.cpp +26 -26
- data/vendor/faiss/{impl → faiss/impl}/HNSW.h +19 -11
- data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.cpp +1 -1
- data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.h +1 -1
- data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer-inl.h +0 -1
- data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.cpp +9 -9
- data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.cpp +63 -39
- data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.h +1 -1
- data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex-inl.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/index_read.cpp +99 -116
- data/vendor/faiss/{impl → faiss/impl}/index_write.cpp +15 -50
- data/vendor/faiss/{impl → faiss/impl}/io.cpp +15 -10
- data/vendor/faiss/{impl → faiss/impl}/io.h +22 -8
- data/vendor/faiss/faiss/impl/io_macros.h +57 -0
- data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.cpp +52 -36
- data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.h +3 -3
- data/vendor/faiss/faiss/impl/platform_macros.h +24 -0
- data/vendor/faiss/{index_factory.cpp → faiss/index_factory.cpp} +33 -12
- data/vendor/faiss/{index_factory.h → faiss/index_factory.h} +0 -0
- data/vendor/faiss/{index_io.h → faiss/index_io.h} +55 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +112 -0
- data/vendor/faiss/faiss/python/python_callbacks.h +45 -0
- data/vendor/faiss/{utils → faiss/utils}/Heap.cpp +5 -5
- data/vendor/faiss/{utils → faiss/utils}/Heap.h +1 -3
- data/vendor/faiss/{utils → faiss/utils}/WorkerThread.cpp +0 -0
- data/vendor/faiss/{utils → faiss/utils}/WorkerThread.h +0 -0
- data/vendor/faiss/{utils → faiss/utils}/distances.cpp +28 -13
- data/vendor/faiss/{utils → faiss/utils}/distances.h +2 -1
- data/vendor/faiss/{utils → faiss/utils}/distances_simd.cpp +5 -5
- data/vendor/faiss/{utils → faiss/utils}/extra_distances.cpp +8 -7
- data/vendor/faiss/{utils → faiss/utils}/extra_distances.h +0 -0
- data/vendor/faiss/{utils → faiss/utils}/hamming-inl.h +1 -3
- data/vendor/faiss/{utils → faiss/utils}/hamming.cpp +8 -7
- data/vendor/faiss/{utils → faiss/utils}/hamming.h +7 -1
- data/vendor/faiss/{utils → faiss/utils}/random.cpp +5 -5
- data/vendor/faiss/{utils → faiss/utils}/random.h +0 -0
- data/vendor/faiss/{utils → faiss/utils}/utils.cpp +27 -28
- data/vendor/faiss/{utils → faiss/utils}/utils.h +4 -0
- data/vendor/faiss/misc/test_blas.cpp +4 -1
- data/vendor/faiss/tests/test_binary_flat.cpp +0 -2
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +6 -1
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +4 -1
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +6 -4
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +12 -5
- data/vendor/faiss/tests/test_merge.cpp +6 -3
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +7 -2
- data/vendor/faiss/tests/test_pairs_decoding.cpp +5 -1
- data/vendor/faiss/tests/test_params_override.cpp +7 -2
- data/vendor/faiss/tests/test_sliding_ivf.cpp +10 -4
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +14 -8
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +11 -7
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +12 -7
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +6 -3
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +7 -3
- metadata +154 -153
- data/vendor/faiss/gpu/GpuResources.cpp +0 -52
- data/vendor/faiss/gpu/GpuResources.h +0 -73
- data/vendor/faiss/gpu/StandardGpuResources.cpp +0 -303
- data/vendor/faiss/gpu/utils/DeviceMemory.cpp +0 -77
- data/vendor/faiss/gpu/utils/DeviceMemory.h +0 -71
- data/vendor/faiss/gpu/utils/MemorySpace.cpp +0 -89
- data/vendor/faiss/gpu/utils/MemorySpace.h +0 -44
- data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +0 -239
@@ -11,18 +11,20 @@
|
|
11
11
|
#include <faiss/gpu/GpuResources.h>
|
12
12
|
#include <faiss/gpu/utils/StackDeviceMemory.h>
|
13
13
|
#include <faiss/gpu/utils/DeviceUtils.h>
|
14
|
+
#include <functional>
|
15
|
+
#include <map>
|
14
16
|
#include <unordered_map>
|
15
17
|
#include <vector>
|
16
18
|
|
17
19
|
namespace faiss { namespace gpu {
|
18
20
|
|
19
|
-
///
|
20
|
-
///
|
21
|
-
class
|
21
|
+
/// Standard implementation of the GpuResources object that provides for a
|
22
|
+
/// temporary memory manager
|
23
|
+
class StandardGpuResourcesImpl : public GpuResources {
|
22
24
|
public:
|
23
|
-
|
25
|
+
StandardGpuResourcesImpl();
|
24
26
|
|
25
|
-
~
|
27
|
+
~StandardGpuResourcesImpl() override;
|
26
28
|
|
27
29
|
/// Disable allocation of temporary memory; all temporary memory
|
28
30
|
/// requests will call cudaMalloc / cudaFree at the point of use
|
@@ -46,9 +48,9 @@ class StandardGpuResources : public GpuResources {
|
|
46
48
|
/// for all devices
|
47
49
|
void setDefaultNullStreamAllDevices();
|
48
50
|
|
49
|
-
///
|
50
|
-
///
|
51
|
-
void
|
51
|
+
/// If enabled, will print every GPU memory allocation and deallocation to
|
52
|
+
/// standard output
|
53
|
+
void setLogMemoryAllocations(bool enable);
|
52
54
|
|
53
55
|
public:
|
54
56
|
/// Internal system calls
|
@@ -62,7 +64,17 @@ class StandardGpuResources : public GpuResources {
|
|
62
64
|
|
63
65
|
std::vector<cudaStream_t> getAlternateStreams(int device) override;
|
64
66
|
|
65
|
-
|
67
|
+
/// Allocate non-temporary GPU memory
|
68
|
+
void* allocMemory(const AllocRequest& req) override;
|
69
|
+
|
70
|
+
/// Returns a previous allocation
|
71
|
+
void deallocMemory(int device, void* in) override;
|
72
|
+
|
73
|
+
size_t getTempMemoryAvailable(int device) const override;
|
74
|
+
|
75
|
+
/// Export a description of memory used for Python
|
76
|
+
std::map<int, std::map<std::string, std::pair<int, size_t>>>
|
77
|
+
getMemoryInfo() const;
|
66
78
|
|
67
79
|
std::pair<void*, size_t> getPinnedMemory() override;
|
68
80
|
|
@@ -77,6 +89,13 @@ class StandardGpuResources : public GpuResources {
|
|
77
89
|
static size_t getDefaultTempMemForGPU(int device, size_t requested);
|
78
90
|
|
79
91
|
private:
|
92
|
+
/// Set of currently outstanding memory allocations per device
|
93
|
+
/// device -> (alloc request, allocated ptr)
|
94
|
+
std::unordered_map<int, std::unordered_map<void*, AllocRequest>> allocs_;
|
95
|
+
|
96
|
+
/// Temporary memory provider, per each device
|
97
|
+
std::unordered_map<int, std::unique_ptr<StackDeviceMemory>> tempMemory_;
|
98
|
+
|
80
99
|
/// Our default stream that work is ordered on, one per each device
|
81
100
|
std::unordered_map<int, cudaStream_t> defaultStreams_;
|
82
101
|
|
@@ -85,7 +104,7 @@ class StandardGpuResources : public GpuResources {
|
|
85
104
|
std::unordered_map<int, cudaStream_t> userDefaultStreams_;
|
86
105
|
|
87
106
|
/// Other streams we can use, per each device
|
88
|
-
std::unordered_map<int, std::vector<cudaStream_t
|
107
|
+
std::unordered_map<int, std::vector<cudaStream_t>> alternateStreams_;
|
89
108
|
|
90
109
|
/// Async copy stream to use for GPU <-> CPU pinned memory copies
|
91
110
|
std::unordered_map<int, cudaStream_t> asyncCopyStreams_;
|
@@ -93,9 +112,6 @@ class StandardGpuResources : public GpuResources {
|
|
93
112
|
/// cuBLAS handle for each device
|
94
113
|
std::unordered_map<int, cublasHandle_t> blasHandles_;
|
95
114
|
|
96
|
-
/// Temporary memory provider, per each device
|
97
|
-
std::unordered_map<int, std::unique_ptr<StackDeviceMemory> > memory_;
|
98
|
-
|
99
115
|
/// Pinned memory allocation for use with this GPU
|
100
116
|
void* pinnedMemAlloc_;
|
101
117
|
size_t pinnedMemAllocSize_;
|
@@ -107,8 +123,60 @@ class StandardGpuResources : public GpuResources {
|
|
107
123
|
/// Amount of pinned memory we should allocate
|
108
124
|
size_t pinnedMemSize_;
|
109
125
|
|
110
|
-
/// Whether or not
|
111
|
-
bool
|
126
|
+
/// Whether or not we log every GPU memory allocation and deallocation
|
127
|
+
bool allocLogging_;
|
128
|
+
};
|
129
|
+
|
130
|
+
/// Default implementation of GpuResources that allocates a cuBLAS
|
131
|
+
/// stream and 2 streams for use, as well as temporary memory
|
132
|
+
class StandardGpuResources : public GpuResourcesProvider {
|
133
|
+
public:
|
134
|
+
StandardGpuResources();
|
135
|
+
~StandardGpuResources() override;
|
136
|
+
|
137
|
+
std::shared_ptr<GpuResources> getResources() override;
|
138
|
+
|
139
|
+
/// Disable allocation of temporary memory; all temporary memory
|
140
|
+
/// requests will call cudaMalloc / cudaFree at the point of use
|
141
|
+
void noTempMemory();
|
142
|
+
|
143
|
+
/// Specify that we wish to use a certain fixed size of memory on
|
144
|
+
/// all devices as temporary memory. This is the upper bound for the GPU
|
145
|
+
/// memory that we will reserve. We will never go above 1.5 GiB on any GPU;
|
146
|
+
/// smaller GPUs (with <= 4 GiB or <= 8 GiB) will use less memory than that.
|
147
|
+
/// To avoid any temporary memory allocation, pass 0.
|
148
|
+
void setTempMemory(size_t size);
|
149
|
+
|
150
|
+
/// Set amount of pinned memory to allocate, for async GPU <-> CPU
|
151
|
+
/// transfers
|
152
|
+
void setPinnedMemory(size_t size);
|
153
|
+
|
154
|
+
/// Called to change the stream for work ordering
|
155
|
+
void setDefaultStream(int device, cudaStream_t stream);
|
156
|
+
|
157
|
+
/// Called to change the work ordering streams to the null stream
|
158
|
+
/// for all devices
|
159
|
+
void setDefaultNullStreamAllDevices();
|
160
|
+
|
161
|
+
/// Export a description of memory used for Python
|
162
|
+
std::map<int, std::map<std::string, std::pair<int, size_t>>>
|
163
|
+
getMemoryInfo() const;
|
164
|
+
|
165
|
+
/// Returns the current default stream
|
166
|
+
cudaStream_t getDefaultStream(int device);
|
167
|
+
|
168
|
+
/// Returns the current amount of temp memory available
|
169
|
+
size_t getTempMemoryAvailable(int device) const;
|
170
|
+
|
171
|
+
/// Synchronize our default stream with the CPU
|
172
|
+
void syncDefaultStreamCurrentDevice();
|
173
|
+
|
174
|
+
/// If enabled, will print every GPU memory allocation and deallocation to
|
175
|
+
/// standard output
|
176
|
+
void setLogMemoryAllocations(bool enable);
|
177
|
+
|
178
|
+
private:
|
179
|
+
std::shared_ptr<StandardGpuResourcesImpl> res_;
|
112
180
|
};
|
113
181
|
|
114
182
|
} } // namespace
|
File without changes
|
File without changes
|
@@ -13,7 +13,7 @@ namespace faiss { namespace gpu {
|
|
13
13
|
template <typename GpuIndex>
|
14
14
|
IndexWrapper<GpuIndex>::IndexWrapper(
|
15
15
|
int numGpus,
|
16
|
-
std::function<std::unique_ptr<GpuIndex>(
|
16
|
+
std::function<std::unique_ptr<GpuIndex>(GpuResourcesProvider*, int)> init) {
|
17
17
|
FAISS_ASSERT(numGpus <= faiss::gpu::getNumDevices());
|
18
18
|
for (int i = 0; i < numGpus; ++i) {
|
19
19
|
auto res = std::unique_ptr<faiss::gpu::StandardGpuResources>(
|
@@ -27,7 +27,7 @@ struct IndexWrapper {
|
|
27
27
|
|
28
28
|
IndexWrapper(
|
29
29
|
int numGpus,
|
30
|
-
std::function<std::unique_ptr<GpuIndex>(
|
30
|
+
std::function<std::unique_ptr<GpuIndex>(GpuResourcesProvider*, int)> init);
|
31
31
|
faiss::Index* getIndex();
|
32
32
|
|
33
33
|
void runOnIndices(std::function<void(GpuIndex*)> f);
|
@@ -53,7 +53,7 @@ int main(int argc, char** argv) {
|
|
53
53
|
printf("transposed storage %s\n", FLAGS_transposed ? "enabled" : "disabled");
|
54
54
|
printf("verbose %s\n", FLAGS_verbose ? "enabled" : "disabled");
|
55
55
|
|
56
|
-
auto initFn = [](faiss::gpu::
|
56
|
+
auto initFn = [](faiss::gpu::GpuResourcesProvider* res, int dev) ->
|
57
57
|
std::unique_ptr<faiss::gpu::GpuIndexFlat> {
|
58
58
|
if (FLAGS_pinned_mem >= 0) {
|
59
59
|
((faiss::gpu::StandardGpuResources*) res)->setPinnedMemory(
|
File without changes
|
File without changes
|
File without changes
|
@@ -277,7 +277,7 @@ TEST(TestGpuIndexFlat, CopyFrom) {
|
|
277
277
|
int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
|
278
278
|
|
279
279
|
faiss::gpu::GpuIndexFlatConfig config;
|
280
|
-
config.device =
|
280
|
+
config.device = device;
|
281
281
|
config.useFloat16 = false;
|
282
282
|
config.storeTransposed = false;
|
283
283
|
|
File without changes
|
@@ -51,7 +51,7 @@ struct Options {
|
|
51
51
|
// support non-multiple of 8 subcodes for IVFPQ.
|
52
52
|
bitsPerCode = 8;
|
53
53
|
nprobe = std::min(faiss::gpu::randVal(40, 1000), numCentroids);
|
54
|
-
numQuery = faiss::gpu::randVal(
|
54
|
+
numQuery = faiss::gpu::randVal(4, 8);
|
55
55
|
|
56
56
|
// Due to the approximate nature of the query and of floating point
|
57
57
|
// differences between GPU and CPU, to stay within our error bounds, only
|
@@ -91,7 +91,7 @@ struct Options {
|
|
91
91
|
}
|
92
92
|
|
93
93
|
float getCompareEpsilon() const {
|
94
|
-
return 0.
|
94
|
+
return 0.035f;
|
95
95
|
}
|
96
96
|
|
97
97
|
float getPctMaxDiff1() const {
|
@@ -131,12 +131,12 @@ TEST(TestGpuIndexIVFPQ, Query_L2) {
|
|
131
131
|
cpuIndex.train(opt.numTrain, trainVecs.data());
|
132
132
|
cpuIndex.add(opt.numAdd, addVecs.data());
|
133
133
|
|
134
|
+
// Use the default temporary memory management to test the memory manager
|
134
135
|
faiss::gpu::StandardGpuResources res;
|
135
|
-
res.noTempMemory();
|
136
136
|
|
137
137
|
faiss::gpu::GpuIndexIVFPQConfig config;
|
138
138
|
config.device = opt.device;
|
139
|
-
config.usePrecomputedTables =
|
139
|
+
config.usePrecomputedTables = (tries % 2 == 0);
|
140
140
|
config.indicesOptions = opt.indicesOpt;
|
141
141
|
config.useFloat16LookupTables = opt.useFloat16;
|
142
142
|
|
@@ -151,6 +151,93 @@ TEST(TestGpuIndexIVFPQ, Query_L2) {
|
|
151
151
|
}
|
152
152
|
}
|
153
153
|
|
154
|
+
void testMMCodeDistance(faiss::MetricType mt) {
|
155
|
+
// Explicitly test the code distance via batch matrix multiplication route
|
156
|
+
// (even for dimension sizes that would otherwise be handled by the
|
157
|
+
// specialized route (via enabling `useMMCodeDistance`)
|
158
|
+
for (int tries = 0; tries < 2; ++tries) {
|
159
|
+
Options opt;
|
160
|
+
|
161
|
+
std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
|
162
|
+
std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
|
163
|
+
|
164
|
+
faiss::IndexFlat coarseQuantizer(opt.dim, mt);
|
165
|
+
faiss::IndexIVFPQ cpuIndex(&coarseQuantizer, opt.dim, opt.numCentroids,
|
166
|
+
opt.codes, opt.bitsPerCode);
|
167
|
+
cpuIndex.nprobe = opt.nprobe;
|
168
|
+
cpuIndex.train(opt.numTrain, trainVecs.data());
|
169
|
+
cpuIndex.add(opt.numAdd, addVecs.data());
|
170
|
+
|
171
|
+
// Use the default temporary memory management to test the memory manager
|
172
|
+
faiss::gpu::StandardGpuResources res;
|
173
|
+
|
174
|
+
faiss::gpu::GpuIndexIVFPQConfig config;
|
175
|
+
config.device = opt.device;
|
176
|
+
config.usePrecomputedTables = false;
|
177
|
+
config.useMMCodeDistance = true;
|
178
|
+
config.indicesOptions = opt.indicesOpt;
|
179
|
+
|
180
|
+
// Make sure that the float16 version works as well
|
181
|
+
config.useFloat16LookupTables = (tries % 2 == 0);
|
182
|
+
config.flatConfig.useFloat16 = (tries % 2 == 1);
|
183
|
+
|
184
|
+
faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
|
185
|
+
gpuIndex.setNumProbes(opt.nprobe);
|
186
|
+
|
187
|
+
faiss::gpu::compareIndices(cpuIndex, gpuIndex,
|
188
|
+
opt.numQuery, opt.dim, opt.k, opt.toString(),
|
189
|
+
opt.getCompareEpsilon(),
|
190
|
+
opt.getPctMaxDiff1(),
|
191
|
+
opt.getPctMaxDiffN());
|
192
|
+
}
|
193
|
+
|
194
|
+
// These sizes are not specialized, they will fall back to the MM version
|
195
|
+
for (int dimPerSubQ : {7, 11}) {
|
196
|
+
Options opt;
|
197
|
+
|
198
|
+
opt.codes = 12;
|
199
|
+
opt.dim = dimPerSubQ * opt.codes;
|
200
|
+
|
201
|
+
std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
|
202
|
+
std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
|
203
|
+
|
204
|
+
faiss::IndexFlat coarseQuantizer(opt.dim, mt);
|
205
|
+
faiss::IndexIVFPQ cpuIndex(&coarseQuantizer, opt.dim, opt.numCentroids,
|
206
|
+
opt.codes, opt.bitsPerCode);
|
207
|
+
cpuIndex.nprobe = opt.nprobe;
|
208
|
+
cpuIndex.train(opt.numTrain, trainVecs.data());
|
209
|
+
cpuIndex.add(opt.numAdd, addVecs.data());
|
210
|
+
|
211
|
+
// Use the default temporary memory management to test the memory manager
|
212
|
+
faiss::gpu::StandardGpuResources res;
|
213
|
+
|
214
|
+
faiss::gpu::GpuIndexIVFPQConfig config;
|
215
|
+
config.device = opt.device;
|
216
|
+
config.usePrecomputedTables = false;
|
217
|
+
config.indicesOptions = opt.indicesOpt;
|
218
|
+
|
219
|
+
// Make sure that the float16 version works as well
|
220
|
+
config.useFloat16LookupTables = (dimPerSubQ == 7);
|
221
|
+
|
222
|
+
faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
|
223
|
+
gpuIndex.setNumProbes(opt.nprobe);
|
224
|
+
|
225
|
+
faiss::gpu::compareIndices(cpuIndex, gpuIndex,
|
226
|
+
opt.numQuery, opt.dim, opt.k, opt.toString(),
|
227
|
+
opt.getCompareEpsilon(),
|
228
|
+
opt.getPctMaxDiff1(),
|
229
|
+
opt.getPctMaxDiffN());
|
230
|
+
}
|
231
|
+
}
|
232
|
+
|
233
|
+
TEST(TestGpuIndexIVFPQ, Query_L2_MMCodeDistance) {
|
234
|
+
testMMCodeDistance(faiss::MetricType::METRIC_L2);
|
235
|
+
}
|
236
|
+
|
237
|
+
TEST(TestGpuIndexIVFPQ, Query_IP_MMCodeDistance) {
|
238
|
+
testMMCodeDistance(faiss::MetricType::METRIC_INNER_PRODUCT);
|
239
|
+
}
|
240
|
+
|
154
241
|
TEST(TestGpuIndexIVFPQ, Query_IP) {
|
155
242
|
for (int tries = 0; tries < 2; ++tries) {
|
156
243
|
Options opt;
|
@@ -167,8 +254,8 @@ TEST(TestGpuIndexIVFPQ, Query_IP) {
|
|
167
254
|
cpuIndex.train(opt.numTrain, trainVecs.data());
|
168
255
|
cpuIndex.add(opt.numAdd, addVecs.data());
|
169
256
|
|
257
|
+
// Use the default temporary memory management to test the memory manager
|
170
258
|
faiss::gpu::StandardGpuResources res;
|
171
|
-
res.noTempMemory();
|
172
259
|
|
173
260
|
faiss::gpu::GpuIndexIVFPQConfig config;
|
174
261
|
config.device = opt.device;
|
@@ -199,8 +286,8 @@ TEST(TestGpuIndexIVFPQ, Float16Coarse) {
|
|
199
286
|
cpuIndex.nprobe = opt.nprobe;
|
200
287
|
cpuIndex.train(opt.numTrain, trainVecs.data());
|
201
288
|
|
289
|
+
// Use the default temporary memory management to test the memory manager
|
202
290
|
faiss::gpu::StandardGpuResources res;
|
203
|
-
res.noTempMemory();
|
204
291
|
|
205
292
|
faiss::gpu::GpuIndexIVFPQConfig config;
|
206
293
|
config.device = opt.device;
|
@@ -235,8 +322,8 @@ TEST(TestGpuIndexIVFPQ, Add_L2) {
|
|
235
322
|
cpuIndex.nprobe = opt.nprobe;
|
236
323
|
cpuIndex.train(opt.numTrain, trainVecs.data());
|
237
324
|
|
325
|
+
// Use the default temporary memory management to test the memory manager
|
238
326
|
faiss::gpu::StandardGpuResources res;
|
239
|
-
res.noTempMemory();
|
240
327
|
|
241
328
|
faiss::gpu::GpuIndexIVFPQConfig config;
|
242
329
|
config.device = opt.device;
|
@@ -272,8 +359,8 @@ TEST(TestGpuIndexIVFPQ, Add_IP) {
|
|
272
359
|
cpuIndex.nprobe = opt.nprobe;
|
273
360
|
cpuIndex.train(opt.numTrain, trainVecs.data());
|
274
361
|
|
362
|
+
// Use the default temporary memory management to test the memory manager
|
275
363
|
faiss::gpu::StandardGpuResources res;
|
276
|
-
res.noTempMemory();
|
277
364
|
|
278
365
|
faiss::gpu::GpuIndexIVFPQConfig config;
|
279
366
|
config.device = opt.device;
|
@@ -296,54 +383,56 @@ TEST(TestGpuIndexIVFPQ, Add_IP) {
|
|
296
383
|
}
|
297
384
|
|
298
385
|
TEST(TestGpuIndexIVFPQ, CopyTo) {
|
299
|
-
|
300
|
-
|
301
|
-
|
386
|
+
for (int tries = 0; tries < 2; ++tries) {
|
387
|
+
Options opt;
|
388
|
+
std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
|
389
|
+
std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
|
302
390
|
|
303
|
-
|
304
|
-
|
391
|
+
// Use the default temporary memory management to test the memory manager
|
392
|
+
faiss::gpu::StandardGpuResources res;
|
305
393
|
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
394
|
+
faiss::gpu::GpuIndexIVFPQConfig config;
|
395
|
+
config.device = opt.device;
|
396
|
+
config.usePrecomputedTables = (tries % 2 == 0);
|
397
|
+
config.indicesOptions = opt.indicesOpt;
|
398
|
+
config.useFloat16LookupTables = opt.useFloat16;
|
311
399
|
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
400
|
+
faiss::gpu::GpuIndexIVFPQ gpuIndex(&res,
|
401
|
+
opt.dim,
|
402
|
+
opt.numCentroids,
|
403
|
+
opt.codes,
|
404
|
+
opt.bitsPerCode,
|
405
|
+
faiss::METRIC_L2,
|
406
|
+
config);
|
407
|
+
gpuIndex.setNumProbes(opt.nprobe);
|
408
|
+
gpuIndex.train(opt.numTrain, trainVecs.data());
|
409
|
+
gpuIndex.add(opt.numAdd, addVecs.data());
|
322
410
|
|
323
|
-
|
324
|
-
|
325
|
-
|
411
|
+
// Use garbage values to see if we overwrite them
|
412
|
+
faiss::IndexFlatL2 cpuQuantizer(1);
|
413
|
+
faiss::IndexIVFPQ cpuIndex(&cpuQuantizer, 1, 1, 1, 1);
|
326
414
|
|
327
|
-
|
415
|
+
gpuIndex.copyTo(&cpuIndex);
|
328
416
|
|
329
|
-
|
330
|
-
|
417
|
+
EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
|
418
|
+
EXPECT_EQ(gpuIndex.ntotal, opt.numAdd);
|
331
419
|
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
420
|
+
EXPECT_EQ(cpuIndex.d, gpuIndex.d);
|
421
|
+
EXPECT_EQ(cpuIndex.d, opt.dim);
|
422
|
+
EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
|
423
|
+
EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
|
424
|
+
EXPECT_EQ(cpuIndex.pq.M, gpuIndex.getNumSubQuantizers());
|
425
|
+
EXPECT_EQ(gpuIndex.getNumSubQuantizers(), opt.codes);
|
426
|
+
EXPECT_EQ(cpuIndex.pq.nbits, gpuIndex.getBitsPerCode());
|
427
|
+
EXPECT_EQ(gpuIndex.getBitsPerCode(), opt.bitsPerCode);
|
340
428
|
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
429
|
+
// Query both objects; results should be equivalent
|
430
|
+
faiss::gpu::compareIndices(cpuIndex, gpuIndex,
|
431
|
+
opt.numQuery, opt.dim, opt.k, opt.toString(),
|
432
|
+
opt.getCompareEpsilon(),
|
433
|
+
opt.getPctMaxDiff1(),
|
434
|
+
opt.getPctMaxDiffN());
|
435
|
+
}
|
347
436
|
}
|
348
437
|
|
349
438
|
TEST(TestGpuIndexIVFPQ, CopyFrom) {
|
@@ -358,9 +447,8 @@ TEST(TestGpuIndexIVFPQ, CopyFrom) {
|
|
358
447
|
cpuIndex.train(opt.numTrain, trainVecs.data());
|
359
448
|
cpuIndex.add(opt.numAdd, addVecs.data());
|
360
449
|
|
361
|
-
// Use
|
450
|
+
// Use the default temporary memory management to test the memory manager
|
362
451
|
faiss::gpu::StandardGpuResources res;
|
363
|
-
res.noTempMemory();
|
364
452
|
|
365
453
|
faiss::gpu::GpuIndexIVFPQConfig config;
|
366
454
|
config.device = opt.device;
|
@@ -368,6 +456,7 @@ TEST(TestGpuIndexIVFPQ, CopyFrom) {
|
|
368
456
|
config.indicesOptions = opt.indicesOpt;
|
369
457
|
config.useFloat16LookupTables = opt.useFloat16;
|
370
458
|
|
459
|
+
// Use garbage values to see if we overwrite them
|
371
460
|
faiss::gpu::GpuIndexIVFPQ
|
372
461
|
gpuIndex(&res, 1, 1, 1, 1, faiss::METRIC_L2, config);
|
373
462
|
gpuIndex.setNumProbes(1);
|
@@ -401,8 +490,8 @@ TEST(TestGpuIndexIVFPQ, QueryNaN) {
|
|
401
490
|
std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
|
402
491
|
std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
|
403
492
|
|
493
|
+
// Use the default temporary memory management to test the memory manager
|
404
494
|
faiss::gpu::StandardGpuResources res;
|
405
|
-
res.noTempMemory();
|
406
495
|
|
407
496
|
faiss::gpu::GpuIndexIVFPQConfig config;
|
408
497
|
config.device = opt.device;
|
@@ -447,8 +536,8 @@ TEST(TestGpuIndexIVFPQ, QueryNaN) {
|
|
447
536
|
TEST(TestGpuIndexIVFPQ, AddNaN) {
|
448
537
|
Options opt;
|
449
538
|
|
539
|
+
// Use the default temporary memory management to test the memory manager
|
450
540
|
faiss::gpu::StandardGpuResources res;
|
451
|
-
res.noTempMemory();
|
452
541
|
|
453
542
|
faiss::gpu::GpuIndexIVFPQConfig config;
|
454
543
|
config.device = opt.device;
|