faiss 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/benchs/bench_6bit_codec.cpp +80 -0
- data/vendor/faiss/c_api/AutoTune_c.h +2 -0
- data/vendor/faiss/c_api/IndexShards_c.cpp +0 -6
- data/vendor/faiss/c_api/IndexShards_c.h +1 -4
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +4 -2
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +1 -1
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +1 -1
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +1 -1
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +1 -1
- data/vendor/faiss/demos/demo_imi_flat.cpp +5 -2
- data/vendor/faiss/demos/demo_imi_pq.cpp +6 -2
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +7 -2
- data/vendor/faiss/{AutoTune.cpp → faiss/AutoTune.cpp} +9 -9
- data/vendor/faiss/{AutoTune.h → faiss/AutoTune.h} +0 -0
- data/vendor/faiss/{Clustering.cpp → faiss/Clustering.cpp} +13 -12
- data/vendor/faiss/{Clustering.h → faiss/Clustering.h} +0 -0
- data/vendor/faiss/{DirectMap.cpp → faiss/DirectMap.cpp} +0 -0
- data/vendor/faiss/{DirectMap.h → faiss/DirectMap.h} +0 -0
- data/vendor/faiss/{IVFlib.cpp → faiss/IVFlib.cpp} +86 -11
- data/vendor/faiss/{IVFlib.h → faiss/IVFlib.h} +26 -8
- data/vendor/faiss/{Index.cpp → faiss/Index.cpp} +0 -0
- data/vendor/faiss/{Index.h → faiss/Index.h} +1 -1
- data/vendor/faiss/{Index2Layer.cpp → faiss/Index2Layer.cpp} +12 -11
- data/vendor/faiss/{Index2Layer.h → faiss/Index2Layer.h} +0 -0
- data/vendor/faiss/{IndexBinary.cpp → faiss/IndexBinary.cpp} +2 -1
- data/vendor/faiss/{IndexBinary.h → faiss/IndexBinary.h} +0 -0
- data/vendor/faiss/{IndexBinaryFlat.cpp → faiss/IndexBinaryFlat.cpp} +0 -0
- data/vendor/faiss/{IndexBinaryFlat.h → faiss/IndexBinaryFlat.h} +0 -0
- data/vendor/faiss/{IndexBinaryFromFloat.cpp → faiss/IndexBinaryFromFloat.cpp} +1 -0
- data/vendor/faiss/{IndexBinaryFromFloat.h → faiss/IndexBinaryFromFloat.h} +0 -0
- data/vendor/faiss/{IndexBinaryHNSW.cpp → faiss/IndexBinaryHNSW.cpp} +1 -2
- data/vendor/faiss/{IndexBinaryHNSW.h → faiss/IndexBinaryHNSW.h} +0 -0
- data/vendor/faiss/{IndexBinaryHash.cpp → faiss/IndexBinaryHash.cpp} +16 -7
- data/vendor/faiss/{IndexBinaryHash.h → faiss/IndexBinaryHash.h} +2 -1
- data/vendor/faiss/{IndexBinaryIVF.cpp → faiss/IndexBinaryIVF.cpp} +10 -16
- data/vendor/faiss/{IndexBinaryIVF.h → faiss/IndexBinaryIVF.h} +1 -1
- data/vendor/faiss/{IndexFlat.cpp → faiss/IndexFlat.cpp} +0 -0
- data/vendor/faiss/{IndexFlat.h → faiss/IndexFlat.h} +0 -0
- data/vendor/faiss/{IndexHNSW.cpp → faiss/IndexHNSW.cpp} +63 -32
- data/vendor/faiss/{IndexHNSW.h → faiss/IndexHNSW.h} +0 -0
- data/vendor/faiss/{IndexIVF.cpp → faiss/IndexIVF.cpp} +129 -46
- data/vendor/faiss/{IndexIVF.h → faiss/IndexIVF.h} +7 -3
- data/vendor/faiss/{IndexIVFFlat.cpp → faiss/IndexIVFFlat.cpp} +6 -5
- data/vendor/faiss/{IndexIVFFlat.h → faiss/IndexIVFFlat.h} +0 -0
- data/vendor/faiss/{IndexIVFPQ.cpp → faiss/IndexIVFPQ.cpp} +9 -8
- data/vendor/faiss/{IndexIVFPQ.h → faiss/IndexIVFPQ.h} +4 -2
- data/vendor/faiss/{IndexIVFPQR.cpp → faiss/IndexIVFPQR.cpp} +3 -1
- data/vendor/faiss/{IndexIVFPQR.h → faiss/IndexIVFPQR.h} +0 -0
- data/vendor/faiss/{IndexIVFSpectralHash.cpp → faiss/IndexIVFSpectralHash.cpp} +1 -1
- data/vendor/faiss/{IndexIVFSpectralHash.h → faiss/IndexIVFSpectralHash.h} +0 -0
- data/vendor/faiss/{IndexLSH.cpp → faiss/IndexLSH.cpp} +0 -0
- data/vendor/faiss/{IndexLSH.h → faiss/IndexLSH.h} +0 -0
- data/vendor/faiss/{IndexLattice.cpp → faiss/IndexLattice.cpp} +0 -0
- data/vendor/faiss/{IndexLattice.h → faiss/IndexLattice.h} +0 -0
- data/vendor/faiss/{IndexPQ.cpp → faiss/IndexPQ.cpp} +6 -6
- data/vendor/faiss/{IndexPQ.h → faiss/IndexPQ.h} +3 -1
- data/vendor/faiss/{IndexPreTransform.cpp → faiss/IndexPreTransform.cpp} +0 -0
- data/vendor/faiss/{IndexPreTransform.h → faiss/IndexPreTransform.h} +0 -0
- data/vendor/faiss/{IndexReplicas.cpp → faiss/IndexReplicas.cpp} +102 -10
- data/vendor/faiss/{IndexReplicas.h → faiss/IndexReplicas.h} +6 -0
- data/vendor/faiss/{IndexScalarQuantizer.cpp → faiss/IndexScalarQuantizer.cpp} +3 -3
- data/vendor/faiss/{IndexScalarQuantizer.h → faiss/IndexScalarQuantizer.h} +0 -0
- data/vendor/faiss/{IndexShards.cpp → faiss/IndexShards.cpp} +37 -12
- data/vendor/faiss/{IndexShards.h → faiss/IndexShards.h} +3 -4
- data/vendor/faiss/{InvertedLists.cpp → faiss/InvertedLists.cpp} +2 -2
- data/vendor/faiss/{InvertedLists.h → faiss/InvertedLists.h} +1 -0
- data/vendor/faiss/{MatrixStats.cpp → faiss/MatrixStats.cpp} +0 -0
- data/vendor/faiss/{MatrixStats.h → faiss/MatrixStats.h} +0 -0
- data/vendor/faiss/{MetaIndexes.cpp → faiss/MetaIndexes.cpp} +5 -3
- data/vendor/faiss/{MetaIndexes.h → faiss/MetaIndexes.h} +0 -0
- data/vendor/faiss/{MetricType.h → faiss/MetricType.h} +0 -0
- data/vendor/faiss/{OnDiskInvertedLists.cpp → faiss/OnDiskInvertedLists.cpp} +141 -3
- data/vendor/faiss/{OnDiskInvertedLists.h → faiss/OnDiskInvertedLists.h} +27 -7
- data/vendor/faiss/{VectorTransform.cpp → faiss/VectorTransform.cpp} +4 -3
- data/vendor/faiss/{VectorTransform.h → faiss/VectorTransform.h} +0 -0
- data/vendor/faiss/{clone_index.cpp → faiss/clone_index.cpp} +0 -0
- data/vendor/faiss/{clone_index.h → faiss/clone_index.h} +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.cpp +14 -14
- data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.h +6 -7
- data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuDistance.h +12 -4
- data/vendor/faiss/{gpu → faiss/gpu}/GpuFaissAssert.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndex.h +3 -9
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexBinaryFlat.h +7 -7
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexFlat.h +35 -10
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVF.h +1 -2
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFFlat.h +4 -3
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFPQ.h +21 -4
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFScalarQuantizer.h +4 -3
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndicesOptions.h +0 -0
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +200 -0
- data/vendor/faiss/faiss/gpu/GpuResources.h +264 -0
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +572 -0
- data/vendor/faiss/{gpu → faiss/gpu}/StandardGpuResources.h +83 -15
- data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper-inl.h +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper.h +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfClustering.cpp +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfIVFPQAdd.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/perf/WriteIndex.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexBinaryFlat.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexFlat.cpp +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFFlat.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFPQ.cpp +141 -52
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuMemoryException.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.cpp +4 -2
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/demo_ivfpq_indexing_gpu.cpp +7 -5
- data/vendor/faiss/{gpu → faiss/gpu}/utils/DeviceUtils.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +213 -0
- data/vendor/faiss/{gpu → faiss/gpu}/utils/StackDeviceMemory.h +25 -40
- data/vendor/faiss/{gpu → faiss/gpu}/utils/StaticUtils.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.cpp +1 -0
- data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.h +3 -1
- data/vendor/faiss/{impl → faiss/impl}/FaissAssert.h +1 -0
- data/vendor/faiss/{impl → faiss/impl}/FaissException.cpp +26 -0
- data/vendor/faiss/{impl → faiss/impl}/FaissException.h +4 -0
- data/vendor/faiss/{impl → faiss/impl}/HNSW.cpp +26 -26
- data/vendor/faiss/{impl → faiss/impl}/HNSW.h +19 -11
- data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.cpp +1 -1
- data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.h +1 -1
- data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer-inl.h +0 -1
- data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.cpp +9 -9
- data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.cpp +63 -39
- data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.h +1 -1
- data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex-inl.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/index_read.cpp +99 -116
- data/vendor/faiss/{impl → faiss/impl}/index_write.cpp +15 -50
- data/vendor/faiss/{impl → faiss/impl}/io.cpp +15 -10
- data/vendor/faiss/{impl → faiss/impl}/io.h +22 -8
- data/vendor/faiss/faiss/impl/io_macros.h +57 -0
- data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.cpp +52 -36
- data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.h +3 -3
- data/vendor/faiss/faiss/impl/platform_macros.h +24 -0
- data/vendor/faiss/{index_factory.cpp → faiss/index_factory.cpp} +33 -12
- data/vendor/faiss/{index_factory.h → faiss/index_factory.h} +0 -0
- data/vendor/faiss/{index_io.h → faiss/index_io.h} +55 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +112 -0
- data/vendor/faiss/faiss/python/python_callbacks.h +45 -0
- data/vendor/faiss/{utils → faiss/utils}/Heap.cpp +5 -5
- data/vendor/faiss/{utils → faiss/utils}/Heap.h +1 -3
- data/vendor/faiss/{utils → faiss/utils}/WorkerThread.cpp +0 -0
- data/vendor/faiss/{utils → faiss/utils}/WorkerThread.h +0 -0
- data/vendor/faiss/{utils → faiss/utils}/distances.cpp +28 -13
- data/vendor/faiss/{utils → faiss/utils}/distances.h +2 -1
- data/vendor/faiss/{utils → faiss/utils}/distances_simd.cpp +5 -5
- data/vendor/faiss/{utils → faiss/utils}/extra_distances.cpp +8 -7
- data/vendor/faiss/{utils → faiss/utils}/extra_distances.h +0 -0
- data/vendor/faiss/{utils → faiss/utils}/hamming-inl.h +1 -3
- data/vendor/faiss/{utils → faiss/utils}/hamming.cpp +8 -7
- data/vendor/faiss/{utils → faiss/utils}/hamming.h +7 -1
- data/vendor/faiss/{utils → faiss/utils}/random.cpp +5 -5
- data/vendor/faiss/{utils → faiss/utils}/random.h +0 -0
- data/vendor/faiss/{utils → faiss/utils}/utils.cpp +27 -28
- data/vendor/faiss/{utils → faiss/utils}/utils.h +4 -0
- data/vendor/faiss/misc/test_blas.cpp +4 -1
- data/vendor/faiss/tests/test_binary_flat.cpp +0 -2
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +6 -1
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +4 -1
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +6 -4
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +12 -5
- data/vendor/faiss/tests/test_merge.cpp +6 -3
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +7 -2
- data/vendor/faiss/tests/test_pairs_decoding.cpp +5 -1
- data/vendor/faiss/tests/test_params_override.cpp +7 -2
- data/vendor/faiss/tests/test_sliding_ivf.cpp +10 -4
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +14 -8
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +11 -7
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +12 -7
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +6 -3
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +7 -3
- metadata +154 -153
- data/vendor/faiss/gpu/GpuResources.cpp +0 -52
- data/vendor/faiss/gpu/GpuResources.h +0 -73
- data/vendor/faiss/gpu/StandardGpuResources.cpp +0 -303
- data/vendor/faiss/gpu/utils/DeviceMemory.cpp +0 -77
- data/vendor/faiss/gpu/utils/DeviceMemory.h +0 -71
- data/vendor/faiss/gpu/utils/MemorySpace.cpp +0 -89
- data/vendor/faiss/gpu/utils/MemorySpace.h +0 -44
- data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +0 -239
@@ -11,18 +11,20 @@
|
|
11
11
|
#include <faiss/gpu/GpuResources.h>
|
12
12
|
#include <faiss/gpu/utils/StackDeviceMemory.h>
|
13
13
|
#include <faiss/gpu/utils/DeviceUtils.h>
|
14
|
+
#include <functional>
|
15
|
+
#include <map>
|
14
16
|
#include <unordered_map>
|
15
17
|
#include <vector>
|
16
18
|
|
17
19
|
namespace faiss { namespace gpu {
|
18
20
|
|
19
|
-
///
|
20
|
-
///
|
21
|
-
class
|
21
|
+
/// Standard implementation of the GpuResources object that provides for a
|
22
|
+
/// temporary memory manager
|
23
|
+
class StandardGpuResourcesImpl : public GpuResources {
|
22
24
|
public:
|
23
|
-
|
25
|
+
StandardGpuResourcesImpl();
|
24
26
|
|
25
|
-
~
|
27
|
+
~StandardGpuResourcesImpl() override;
|
26
28
|
|
27
29
|
/// Disable allocation of temporary memory; all temporary memory
|
28
30
|
/// requests will call cudaMalloc / cudaFree at the point of use
|
@@ -46,9 +48,9 @@ class StandardGpuResources : public GpuResources {
|
|
46
48
|
/// for all devices
|
47
49
|
void setDefaultNullStreamAllDevices();
|
48
50
|
|
49
|
-
///
|
50
|
-
///
|
51
|
-
void
|
51
|
+
/// If enabled, will print every GPU memory allocation and deallocation to
|
52
|
+
/// standard output
|
53
|
+
void setLogMemoryAllocations(bool enable);
|
52
54
|
|
53
55
|
public:
|
54
56
|
/// Internal system calls
|
@@ -62,7 +64,17 @@ class StandardGpuResources : public GpuResources {
|
|
62
64
|
|
63
65
|
std::vector<cudaStream_t> getAlternateStreams(int device) override;
|
64
66
|
|
65
|
-
|
67
|
+
/// Allocate non-temporary GPU memory
|
68
|
+
void* allocMemory(const AllocRequest& req) override;
|
69
|
+
|
70
|
+
/// Returns a previous allocation
|
71
|
+
void deallocMemory(int device, void* in) override;
|
72
|
+
|
73
|
+
size_t getTempMemoryAvailable(int device) const override;
|
74
|
+
|
75
|
+
/// Export a description of memory used for Python
|
76
|
+
std::map<int, std::map<std::string, std::pair<int, size_t>>>
|
77
|
+
getMemoryInfo() const;
|
66
78
|
|
67
79
|
std::pair<void*, size_t> getPinnedMemory() override;
|
68
80
|
|
@@ -77,6 +89,13 @@ class StandardGpuResources : public GpuResources {
|
|
77
89
|
static size_t getDefaultTempMemForGPU(int device, size_t requested);
|
78
90
|
|
79
91
|
private:
|
92
|
+
/// Set of currently outstanding memory allocations per device
|
93
|
+
/// device -> (alloc request, allocated ptr)
|
94
|
+
std::unordered_map<int, std::unordered_map<void*, AllocRequest>> allocs_;
|
95
|
+
|
96
|
+
/// Temporary memory provider, per each device
|
97
|
+
std::unordered_map<int, std::unique_ptr<StackDeviceMemory>> tempMemory_;
|
98
|
+
|
80
99
|
/// Our default stream that work is ordered on, one per each device
|
81
100
|
std::unordered_map<int, cudaStream_t> defaultStreams_;
|
82
101
|
|
@@ -85,7 +104,7 @@ class StandardGpuResources : public GpuResources {
|
|
85
104
|
std::unordered_map<int, cudaStream_t> userDefaultStreams_;
|
86
105
|
|
87
106
|
/// Other streams we can use, per each device
|
88
|
-
std::unordered_map<int, std::vector<cudaStream_t
|
107
|
+
std::unordered_map<int, std::vector<cudaStream_t>> alternateStreams_;
|
89
108
|
|
90
109
|
/// Async copy stream to use for GPU <-> CPU pinned memory copies
|
91
110
|
std::unordered_map<int, cudaStream_t> asyncCopyStreams_;
|
@@ -93,9 +112,6 @@ class StandardGpuResources : public GpuResources {
|
|
93
112
|
/// cuBLAS handle for each device
|
94
113
|
std::unordered_map<int, cublasHandle_t> blasHandles_;
|
95
114
|
|
96
|
-
/// Temporary memory provider, per each device
|
97
|
-
std::unordered_map<int, std::unique_ptr<StackDeviceMemory> > memory_;
|
98
|
-
|
99
115
|
/// Pinned memory allocation for use with this GPU
|
100
116
|
void* pinnedMemAlloc_;
|
101
117
|
size_t pinnedMemAllocSize_;
|
@@ -107,8 +123,60 @@ class StandardGpuResources : public GpuResources {
|
|
107
123
|
/// Amount of pinned memory we should allocate
|
108
124
|
size_t pinnedMemSize_;
|
109
125
|
|
110
|
-
/// Whether or not
|
111
|
-
bool
|
126
|
+
/// Whether or not we log every GPU memory allocation and deallocation
|
127
|
+
bool allocLogging_;
|
128
|
+
};
|
129
|
+
|
130
|
+
/// Default implementation of GpuResources that allocates a cuBLAS
|
131
|
+
/// stream and 2 streams for use, as well as temporary memory
|
132
|
+
class StandardGpuResources : public GpuResourcesProvider {
|
133
|
+
public:
|
134
|
+
StandardGpuResources();
|
135
|
+
~StandardGpuResources() override;
|
136
|
+
|
137
|
+
std::shared_ptr<GpuResources> getResources() override;
|
138
|
+
|
139
|
+
/// Disable allocation of temporary memory; all temporary memory
|
140
|
+
/// requests will call cudaMalloc / cudaFree at the point of use
|
141
|
+
void noTempMemory();
|
142
|
+
|
143
|
+
/// Specify that we wish to use a certain fixed size of memory on
|
144
|
+
/// all devices as temporary memory. This is the upper bound for the GPU
|
145
|
+
/// memory that we will reserve. We will never go above 1.5 GiB on any GPU;
|
146
|
+
/// smaller GPUs (with <= 4 GiB or <= 8 GiB) will use less memory than that.
|
147
|
+
/// To avoid any temporary memory allocation, pass 0.
|
148
|
+
void setTempMemory(size_t size);
|
149
|
+
|
150
|
+
/// Set amount of pinned memory to allocate, for async GPU <-> CPU
|
151
|
+
/// transfers
|
152
|
+
void setPinnedMemory(size_t size);
|
153
|
+
|
154
|
+
/// Called to change the stream for work ordering
|
155
|
+
void setDefaultStream(int device, cudaStream_t stream);
|
156
|
+
|
157
|
+
/// Called to change the work ordering streams to the null stream
|
158
|
+
/// for all devices
|
159
|
+
void setDefaultNullStreamAllDevices();
|
160
|
+
|
161
|
+
/// Export a description of memory used for Python
|
162
|
+
std::map<int, std::map<std::string, std::pair<int, size_t>>>
|
163
|
+
getMemoryInfo() const;
|
164
|
+
|
165
|
+
/// Returns the current default stream
|
166
|
+
cudaStream_t getDefaultStream(int device);
|
167
|
+
|
168
|
+
/// Returns the current amount of temp memory available
|
169
|
+
size_t getTempMemoryAvailable(int device) const;
|
170
|
+
|
171
|
+
/// Synchronize our default stream with the CPU
|
172
|
+
void syncDefaultStreamCurrentDevice();
|
173
|
+
|
174
|
+
/// If enabled, will print every GPU memory allocation and deallocation to
|
175
|
+
/// standard output
|
176
|
+
void setLogMemoryAllocations(bool enable);
|
177
|
+
|
178
|
+
private:
|
179
|
+
std::shared_ptr<StandardGpuResourcesImpl> res_;
|
112
180
|
};
|
113
181
|
|
114
182
|
} } // namespace
|
File without changes
|
File without changes
|
@@ -13,7 +13,7 @@ namespace faiss { namespace gpu {
|
|
13
13
|
template <typename GpuIndex>
|
14
14
|
IndexWrapper<GpuIndex>::IndexWrapper(
|
15
15
|
int numGpus,
|
16
|
-
std::function<std::unique_ptr<GpuIndex>(
|
16
|
+
std::function<std::unique_ptr<GpuIndex>(GpuResourcesProvider*, int)> init) {
|
17
17
|
FAISS_ASSERT(numGpus <= faiss::gpu::getNumDevices());
|
18
18
|
for (int i = 0; i < numGpus; ++i) {
|
19
19
|
auto res = std::unique_ptr<faiss::gpu::StandardGpuResources>(
|
@@ -27,7 +27,7 @@ struct IndexWrapper {
|
|
27
27
|
|
28
28
|
IndexWrapper(
|
29
29
|
int numGpus,
|
30
|
-
std::function<std::unique_ptr<GpuIndex>(
|
30
|
+
std::function<std::unique_ptr<GpuIndex>(GpuResourcesProvider*, int)> init);
|
31
31
|
faiss::Index* getIndex();
|
32
32
|
|
33
33
|
void runOnIndices(std::function<void(GpuIndex*)> f);
|
@@ -53,7 +53,7 @@ int main(int argc, char** argv) {
|
|
53
53
|
printf("transposed storage %s\n", FLAGS_transposed ? "enabled" : "disabled");
|
54
54
|
printf("verbose %s\n", FLAGS_verbose ? "enabled" : "disabled");
|
55
55
|
|
56
|
-
auto initFn = [](faiss::gpu::
|
56
|
+
auto initFn = [](faiss::gpu::GpuResourcesProvider* res, int dev) ->
|
57
57
|
std::unique_ptr<faiss::gpu::GpuIndexFlat> {
|
58
58
|
if (FLAGS_pinned_mem >= 0) {
|
59
59
|
((faiss::gpu::StandardGpuResources*) res)->setPinnedMemory(
|
File without changes
|
File without changes
|
File without changes
|
@@ -277,7 +277,7 @@ TEST(TestGpuIndexFlat, CopyFrom) {
|
|
277
277
|
int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
|
278
278
|
|
279
279
|
faiss::gpu::GpuIndexFlatConfig config;
|
280
|
-
config.device =
|
280
|
+
config.device = device;
|
281
281
|
config.useFloat16 = false;
|
282
282
|
config.storeTransposed = false;
|
283
283
|
|
File without changes
|
@@ -51,7 +51,7 @@ struct Options {
|
|
51
51
|
// support non-multiple of 8 subcodes for IVFPQ.
|
52
52
|
bitsPerCode = 8;
|
53
53
|
nprobe = std::min(faiss::gpu::randVal(40, 1000), numCentroids);
|
54
|
-
numQuery = faiss::gpu::randVal(
|
54
|
+
numQuery = faiss::gpu::randVal(4, 8);
|
55
55
|
|
56
56
|
// Due to the approximate nature of the query and of floating point
|
57
57
|
// differences between GPU and CPU, to stay within our error bounds, only
|
@@ -91,7 +91,7 @@ struct Options {
|
|
91
91
|
}
|
92
92
|
|
93
93
|
float getCompareEpsilon() const {
|
94
|
-
return 0.
|
94
|
+
return 0.035f;
|
95
95
|
}
|
96
96
|
|
97
97
|
float getPctMaxDiff1() const {
|
@@ -131,12 +131,12 @@ TEST(TestGpuIndexIVFPQ, Query_L2) {
|
|
131
131
|
cpuIndex.train(opt.numTrain, trainVecs.data());
|
132
132
|
cpuIndex.add(opt.numAdd, addVecs.data());
|
133
133
|
|
134
|
+
// Use the default temporary memory management to test the memory manager
|
134
135
|
faiss::gpu::StandardGpuResources res;
|
135
|
-
res.noTempMemory();
|
136
136
|
|
137
137
|
faiss::gpu::GpuIndexIVFPQConfig config;
|
138
138
|
config.device = opt.device;
|
139
|
-
config.usePrecomputedTables =
|
139
|
+
config.usePrecomputedTables = (tries % 2 == 0);
|
140
140
|
config.indicesOptions = opt.indicesOpt;
|
141
141
|
config.useFloat16LookupTables = opt.useFloat16;
|
142
142
|
|
@@ -151,6 +151,93 @@ TEST(TestGpuIndexIVFPQ, Query_L2) {
|
|
151
151
|
}
|
152
152
|
}
|
153
153
|
|
154
|
+
void testMMCodeDistance(faiss::MetricType mt) {
|
155
|
+
// Explicitly test the code distance via batch matrix multiplication route
|
156
|
+
// (even for dimension sizes that would otherwise be handled by the
|
157
|
+
// specialized route (via enabling `useMMCodeDistance`)
|
158
|
+
for (int tries = 0; tries < 2; ++tries) {
|
159
|
+
Options opt;
|
160
|
+
|
161
|
+
std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
|
162
|
+
std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
|
163
|
+
|
164
|
+
faiss::IndexFlat coarseQuantizer(opt.dim, mt);
|
165
|
+
faiss::IndexIVFPQ cpuIndex(&coarseQuantizer, opt.dim, opt.numCentroids,
|
166
|
+
opt.codes, opt.bitsPerCode);
|
167
|
+
cpuIndex.nprobe = opt.nprobe;
|
168
|
+
cpuIndex.train(opt.numTrain, trainVecs.data());
|
169
|
+
cpuIndex.add(opt.numAdd, addVecs.data());
|
170
|
+
|
171
|
+
// Use the default temporary memory management to test the memory manager
|
172
|
+
faiss::gpu::StandardGpuResources res;
|
173
|
+
|
174
|
+
faiss::gpu::GpuIndexIVFPQConfig config;
|
175
|
+
config.device = opt.device;
|
176
|
+
config.usePrecomputedTables = false;
|
177
|
+
config.useMMCodeDistance = true;
|
178
|
+
config.indicesOptions = opt.indicesOpt;
|
179
|
+
|
180
|
+
// Make sure that the float16 version works as well
|
181
|
+
config.useFloat16LookupTables = (tries % 2 == 0);
|
182
|
+
config.flatConfig.useFloat16 = (tries % 2 == 1);
|
183
|
+
|
184
|
+
faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
|
185
|
+
gpuIndex.setNumProbes(opt.nprobe);
|
186
|
+
|
187
|
+
faiss::gpu::compareIndices(cpuIndex, gpuIndex,
|
188
|
+
opt.numQuery, opt.dim, opt.k, opt.toString(),
|
189
|
+
opt.getCompareEpsilon(),
|
190
|
+
opt.getPctMaxDiff1(),
|
191
|
+
opt.getPctMaxDiffN());
|
192
|
+
}
|
193
|
+
|
194
|
+
// These sizes are not specialized, they will fall back to the MM version
|
195
|
+
for (int dimPerSubQ : {7, 11}) {
|
196
|
+
Options opt;
|
197
|
+
|
198
|
+
opt.codes = 12;
|
199
|
+
opt.dim = dimPerSubQ * opt.codes;
|
200
|
+
|
201
|
+
std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
|
202
|
+
std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
|
203
|
+
|
204
|
+
faiss::IndexFlat coarseQuantizer(opt.dim, mt);
|
205
|
+
faiss::IndexIVFPQ cpuIndex(&coarseQuantizer, opt.dim, opt.numCentroids,
|
206
|
+
opt.codes, opt.bitsPerCode);
|
207
|
+
cpuIndex.nprobe = opt.nprobe;
|
208
|
+
cpuIndex.train(opt.numTrain, trainVecs.data());
|
209
|
+
cpuIndex.add(opt.numAdd, addVecs.data());
|
210
|
+
|
211
|
+
// Use the default temporary memory management to test the memory manager
|
212
|
+
faiss::gpu::StandardGpuResources res;
|
213
|
+
|
214
|
+
faiss::gpu::GpuIndexIVFPQConfig config;
|
215
|
+
config.device = opt.device;
|
216
|
+
config.usePrecomputedTables = false;
|
217
|
+
config.indicesOptions = opt.indicesOpt;
|
218
|
+
|
219
|
+
// Make sure that the float16 version works as well
|
220
|
+
config.useFloat16LookupTables = (dimPerSubQ == 7);
|
221
|
+
|
222
|
+
faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
|
223
|
+
gpuIndex.setNumProbes(opt.nprobe);
|
224
|
+
|
225
|
+
faiss::gpu::compareIndices(cpuIndex, gpuIndex,
|
226
|
+
opt.numQuery, opt.dim, opt.k, opt.toString(),
|
227
|
+
opt.getCompareEpsilon(),
|
228
|
+
opt.getPctMaxDiff1(),
|
229
|
+
opt.getPctMaxDiffN());
|
230
|
+
}
|
231
|
+
}
|
232
|
+
|
233
|
+
TEST(TestGpuIndexIVFPQ, Query_L2_MMCodeDistance) {
|
234
|
+
testMMCodeDistance(faiss::MetricType::METRIC_L2);
|
235
|
+
}
|
236
|
+
|
237
|
+
TEST(TestGpuIndexIVFPQ, Query_IP_MMCodeDistance) {
|
238
|
+
testMMCodeDistance(faiss::MetricType::METRIC_INNER_PRODUCT);
|
239
|
+
}
|
240
|
+
|
154
241
|
TEST(TestGpuIndexIVFPQ, Query_IP) {
|
155
242
|
for (int tries = 0; tries < 2; ++tries) {
|
156
243
|
Options opt;
|
@@ -167,8 +254,8 @@ TEST(TestGpuIndexIVFPQ, Query_IP) {
|
|
167
254
|
cpuIndex.train(opt.numTrain, trainVecs.data());
|
168
255
|
cpuIndex.add(opt.numAdd, addVecs.data());
|
169
256
|
|
257
|
+
// Use the default temporary memory management to test the memory manager
|
170
258
|
faiss::gpu::StandardGpuResources res;
|
171
|
-
res.noTempMemory();
|
172
259
|
|
173
260
|
faiss::gpu::GpuIndexIVFPQConfig config;
|
174
261
|
config.device = opt.device;
|
@@ -199,8 +286,8 @@ TEST(TestGpuIndexIVFPQ, Float16Coarse) {
|
|
199
286
|
cpuIndex.nprobe = opt.nprobe;
|
200
287
|
cpuIndex.train(opt.numTrain, trainVecs.data());
|
201
288
|
|
289
|
+
// Use the default temporary memory management to test the memory manager
|
202
290
|
faiss::gpu::StandardGpuResources res;
|
203
|
-
res.noTempMemory();
|
204
291
|
|
205
292
|
faiss::gpu::GpuIndexIVFPQConfig config;
|
206
293
|
config.device = opt.device;
|
@@ -235,8 +322,8 @@ TEST(TestGpuIndexIVFPQ, Add_L2) {
|
|
235
322
|
cpuIndex.nprobe = opt.nprobe;
|
236
323
|
cpuIndex.train(opt.numTrain, trainVecs.data());
|
237
324
|
|
325
|
+
// Use the default temporary memory management to test the memory manager
|
238
326
|
faiss::gpu::StandardGpuResources res;
|
239
|
-
res.noTempMemory();
|
240
327
|
|
241
328
|
faiss::gpu::GpuIndexIVFPQConfig config;
|
242
329
|
config.device = opt.device;
|
@@ -272,8 +359,8 @@ TEST(TestGpuIndexIVFPQ, Add_IP) {
|
|
272
359
|
cpuIndex.nprobe = opt.nprobe;
|
273
360
|
cpuIndex.train(opt.numTrain, trainVecs.data());
|
274
361
|
|
362
|
+
// Use the default temporary memory management to test the memory manager
|
275
363
|
faiss::gpu::StandardGpuResources res;
|
276
|
-
res.noTempMemory();
|
277
364
|
|
278
365
|
faiss::gpu::GpuIndexIVFPQConfig config;
|
279
366
|
config.device = opt.device;
|
@@ -296,54 +383,56 @@ TEST(TestGpuIndexIVFPQ, Add_IP) {
|
|
296
383
|
}
|
297
384
|
|
298
385
|
TEST(TestGpuIndexIVFPQ, CopyTo) {
|
299
|
-
|
300
|
-
|
301
|
-
|
386
|
+
for (int tries = 0; tries < 2; ++tries) {
|
387
|
+
Options opt;
|
388
|
+
std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
|
389
|
+
std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
|
302
390
|
|
303
|
-
|
304
|
-
|
391
|
+
// Use the default temporary memory management to test the memory manager
|
392
|
+
faiss::gpu::StandardGpuResources res;
|
305
393
|
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
394
|
+
faiss::gpu::GpuIndexIVFPQConfig config;
|
395
|
+
config.device = opt.device;
|
396
|
+
config.usePrecomputedTables = (tries % 2 == 0);
|
397
|
+
config.indicesOptions = opt.indicesOpt;
|
398
|
+
config.useFloat16LookupTables = opt.useFloat16;
|
311
399
|
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
400
|
+
faiss::gpu::GpuIndexIVFPQ gpuIndex(&res,
|
401
|
+
opt.dim,
|
402
|
+
opt.numCentroids,
|
403
|
+
opt.codes,
|
404
|
+
opt.bitsPerCode,
|
405
|
+
faiss::METRIC_L2,
|
406
|
+
config);
|
407
|
+
gpuIndex.setNumProbes(opt.nprobe);
|
408
|
+
gpuIndex.train(opt.numTrain, trainVecs.data());
|
409
|
+
gpuIndex.add(opt.numAdd, addVecs.data());
|
322
410
|
|
323
|
-
|
324
|
-
|
325
|
-
|
411
|
+
// Use garbage values to see if we overwrite them
|
412
|
+
faiss::IndexFlatL2 cpuQuantizer(1);
|
413
|
+
faiss::IndexIVFPQ cpuIndex(&cpuQuantizer, 1, 1, 1, 1);
|
326
414
|
|
327
|
-
|
415
|
+
gpuIndex.copyTo(&cpuIndex);
|
328
416
|
|
329
|
-
|
330
|
-
|
417
|
+
EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
|
418
|
+
EXPECT_EQ(gpuIndex.ntotal, opt.numAdd);
|
331
419
|
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
420
|
+
EXPECT_EQ(cpuIndex.d, gpuIndex.d);
|
421
|
+
EXPECT_EQ(cpuIndex.d, opt.dim);
|
422
|
+
EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
|
423
|
+
EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
|
424
|
+
EXPECT_EQ(cpuIndex.pq.M, gpuIndex.getNumSubQuantizers());
|
425
|
+
EXPECT_EQ(gpuIndex.getNumSubQuantizers(), opt.codes);
|
426
|
+
EXPECT_EQ(cpuIndex.pq.nbits, gpuIndex.getBitsPerCode());
|
427
|
+
EXPECT_EQ(gpuIndex.getBitsPerCode(), opt.bitsPerCode);
|
340
428
|
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
429
|
+
// Query both objects; results should be equivalent
|
430
|
+
faiss::gpu::compareIndices(cpuIndex, gpuIndex,
|
431
|
+
opt.numQuery, opt.dim, opt.k, opt.toString(),
|
432
|
+
opt.getCompareEpsilon(),
|
433
|
+
opt.getPctMaxDiff1(),
|
434
|
+
opt.getPctMaxDiffN());
|
435
|
+
}
|
347
436
|
}
|
348
437
|
|
349
438
|
TEST(TestGpuIndexIVFPQ, CopyFrom) {
|
@@ -358,9 +447,8 @@ TEST(TestGpuIndexIVFPQ, CopyFrom) {
|
|
358
447
|
cpuIndex.train(opt.numTrain, trainVecs.data());
|
359
448
|
cpuIndex.add(opt.numAdd, addVecs.data());
|
360
449
|
|
361
|
-
// Use
|
450
|
+
// Use the default temporary memory management to test the memory manager
|
362
451
|
faiss::gpu::StandardGpuResources res;
|
363
|
-
res.noTempMemory();
|
364
452
|
|
365
453
|
faiss::gpu::GpuIndexIVFPQConfig config;
|
366
454
|
config.device = opt.device;
|
@@ -368,6 +456,7 @@ TEST(TestGpuIndexIVFPQ, CopyFrom) {
|
|
368
456
|
config.indicesOptions = opt.indicesOpt;
|
369
457
|
config.useFloat16LookupTables = opt.useFloat16;
|
370
458
|
|
459
|
+
// Use garbage values to see if we overwrite them
|
371
460
|
faiss::gpu::GpuIndexIVFPQ
|
372
461
|
gpuIndex(&res, 1, 1, 1, 1, faiss::METRIC_L2, config);
|
373
462
|
gpuIndex.setNumProbes(1);
|
@@ -401,8 +490,8 @@ TEST(TestGpuIndexIVFPQ, QueryNaN) {
|
|
401
490
|
std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
|
402
491
|
std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
|
403
492
|
|
493
|
+
// Use the default temporary memory management to test the memory manager
|
404
494
|
faiss::gpu::StandardGpuResources res;
|
405
|
-
res.noTempMemory();
|
406
495
|
|
407
496
|
faiss::gpu::GpuIndexIVFPQConfig config;
|
408
497
|
config.device = opt.device;
|
@@ -447,8 +536,8 @@ TEST(TestGpuIndexIVFPQ, QueryNaN) {
|
|
447
536
|
TEST(TestGpuIndexIVFPQ, AddNaN) {
|
448
537
|
Options opt;
|
449
538
|
|
539
|
+
// Use the default temporary memory management to test the memory manager
|
450
540
|
faiss::gpu::StandardGpuResources res;
|
451
|
-
res.noTempMemory();
|
452
541
|
|
453
542
|
faiss::gpu::GpuIndexIVFPQConfig config;
|
454
543
|
config.device = opt.device;
|