faiss 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/benchs/bench_6bit_codec.cpp +80 -0
- data/vendor/faiss/c_api/AutoTune_c.h +2 -0
- data/vendor/faiss/c_api/IndexShards_c.cpp +0 -6
- data/vendor/faiss/c_api/IndexShards_c.h +1 -4
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +4 -2
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +1 -1
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +1 -1
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +1 -1
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +1 -1
- data/vendor/faiss/demos/demo_imi_flat.cpp +5 -2
- data/vendor/faiss/demos/demo_imi_pq.cpp +6 -2
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +7 -2
- data/vendor/faiss/{AutoTune.cpp → faiss/AutoTune.cpp} +9 -9
- data/vendor/faiss/{AutoTune.h → faiss/AutoTune.h} +0 -0
- data/vendor/faiss/{Clustering.cpp → faiss/Clustering.cpp} +13 -12
- data/vendor/faiss/{Clustering.h → faiss/Clustering.h} +0 -0
- data/vendor/faiss/{DirectMap.cpp → faiss/DirectMap.cpp} +0 -0
- data/vendor/faiss/{DirectMap.h → faiss/DirectMap.h} +0 -0
- data/vendor/faiss/{IVFlib.cpp → faiss/IVFlib.cpp} +86 -11
- data/vendor/faiss/{IVFlib.h → faiss/IVFlib.h} +26 -8
- data/vendor/faiss/{Index.cpp → faiss/Index.cpp} +0 -0
- data/vendor/faiss/{Index.h → faiss/Index.h} +1 -1
- data/vendor/faiss/{Index2Layer.cpp → faiss/Index2Layer.cpp} +12 -11
- data/vendor/faiss/{Index2Layer.h → faiss/Index2Layer.h} +0 -0
- data/vendor/faiss/{IndexBinary.cpp → faiss/IndexBinary.cpp} +2 -1
- data/vendor/faiss/{IndexBinary.h → faiss/IndexBinary.h} +0 -0
- data/vendor/faiss/{IndexBinaryFlat.cpp → faiss/IndexBinaryFlat.cpp} +0 -0
- data/vendor/faiss/{IndexBinaryFlat.h → faiss/IndexBinaryFlat.h} +0 -0
- data/vendor/faiss/{IndexBinaryFromFloat.cpp → faiss/IndexBinaryFromFloat.cpp} +1 -0
- data/vendor/faiss/{IndexBinaryFromFloat.h → faiss/IndexBinaryFromFloat.h} +0 -0
- data/vendor/faiss/{IndexBinaryHNSW.cpp → faiss/IndexBinaryHNSW.cpp} +1 -2
- data/vendor/faiss/{IndexBinaryHNSW.h → faiss/IndexBinaryHNSW.h} +0 -0
- data/vendor/faiss/{IndexBinaryHash.cpp → faiss/IndexBinaryHash.cpp} +16 -7
- data/vendor/faiss/{IndexBinaryHash.h → faiss/IndexBinaryHash.h} +2 -1
- data/vendor/faiss/{IndexBinaryIVF.cpp → faiss/IndexBinaryIVF.cpp} +10 -16
- data/vendor/faiss/{IndexBinaryIVF.h → faiss/IndexBinaryIVF.h} +1 -1
- data/vendor/faiss/{IndexFlat.cpp → faiss/IndexFlat.cpp} +0 -0
- data/vendor/faiss/{IndexFlat.h → faiss/IndexFlat.h} +0 -0
- data/vendor/faiss/{IndexHNSW.cpp → faiss/IndexHNSW.cpp} +63 -32
- data/vendor/faiss/{IndexHNSW.h → faiss/IndexHNSW.h} +0 -0
- data/vendor/faiss/{IndexIVF.cpp → faiss/IndexIVF.cpp} +129 -46
- data/vendor/faiss/{IndexIVF.h → faiss/IndexIVF.h} +7 -3
- data/vendor/faiss/{IndexIVFFlat.cpp → faiss/IndexIVFFlat.cpp} +6 -5
- data/vendor/faiss/{IndexIVFFlat.h → faiss/IndexIVFFlat.h} +0 -0
- data/vendor/faiss/{IndexIVFPQ.cpp → faiss/IndexIVFPQ.cpp} +9 -8
- data/vendor/faiss/{IndexIVFPQ.h → faiss/IndexIVFPQ.h} +4 -2
- data/vendor/faiss/{IndexIVFPQR.cpp → faiss/IndexIVFPQR.cpp} +3 -1
- data/vendor/faiss/{IndexIVFPQR.h → faiss/IndexIVFPQR.h} +0 -0
- data/vendor/faiss/{IndexIVFSpectralHash.cpp → faiss/IndexIVFSpectralHash.cpp} +1 -1
- data/vendor/faiss/{IndexIVFSpectralHash.h → faiss/IndexIVFSpectralHash.h} +0 -0
- data/vendor/faiss/{IndexLSH.cpp → faiss/IndexLSH.cpp} +0 -0
- data/vendor/faiss/{IndexLSH.h → faiss/IndexLSH.h} +0 -0
- data/vendor/faiss/{IndexLattice.cpp → faiss/IndexLattice.cpp} +0 -0
- data/vendor/faiss/{IndexLattice.h → faiss/IndexLattice.h} +0 -0
- data/vendor/faiss/{IndexPQ.cpp → faiss/IndexPQ.cpp} +6 -6
- data/vendor/faiss/{IndexPQ.h → faiss/IndexPQ.h} +3 -1
- data/vendor/faiss/{IndexPreTransform.cpp → faiss/IndexPreTransform.cpp} +0 -0
- data/vendor/faiss/{IndexPreTransform.h → faiss/IndexPreTransform.h} +0 -0
- data/vendor/faiss/{IndexReplicas.cpp → faiss/IndexReplicas.cpp} +102 -10
- data/vendor/faiss/{IndexReplicas.h → faiss/IndexReplicas.h} +6 -0
- data/vendor/faiss/{IndexScalarQuantizer.cpp → faiss/IndexScalarQuantizer.cpp} +3 -3
- data/vendor/faiss/{IndexScalarQuantizer.h → faiss/IndexScalarQuantizer.h} +0 -0
- data/vendor/faiss/{IndexShards.cpp → faiss/IndexShards.cpp} +37 -12
- data/vendor/faiss/{IndexShards.h → faiss/IndexShards.h} +3 -4
- data/vendor/faiss/{InvertedLists.cpp → faiss/InvertedLists.cpp} +2 -2
- data/vendor/faiss/{InvertedLists.h → faiss/InvertedLists.h} +1 -0
- data/vendor/faiss/{MatrixStats.cpp → faiss/MatrixStats.cpp} +0 -0
- data/vendor/faiss/{MatrixStats.h → faiss/MatrixStats.h} +0 -0
- data/vendor/faiss/{MetaIndexes.cpp → faiss/MetaIndexes.cpp} +5 -3
- data/vendor/faiss/{MetaIndexes.h → faiss/MetaIndexes.h} +0 -0
- data/vendor/faiss/{MetricType.h → faiss/MetricType.h} +0 -0
- data/vendor/faiss/{OnDiskInvertedLists.cpp → faiss/OnDiskInvertedLists.cpp} +141 -3
- data/vendor/faiss/{OnDiskInvertedLists.h → faiss/OnDiskInvertedLists.h} +27 -7
- data/vendor/faiss/{VectorTransform.cpp → faiss/VectorTransform.cpp} +4 -3
- data/vendor/faiss/{VectorTransform.h → faiss/VectorTransform.h} +0 -0
- data/vendor/faiss/{clone_index.cpp → faiss/clone_index.cpp} +0 -0
- data/vendor/faiss/{clone_index.h → faiss/clone_index.h} +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.cpp +14 -14
- data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.h +6 -7
- data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuDistance.h +12 -4
- data/vendor/faiss/{gpu → faiss/gpu}/GpuFaissAssert.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndex.h +3 -9
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexBinaryFlat.h +7 -7
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexFlat.h +35 -10
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVF.h +1 -2
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFFlat.h +4 -3
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFPQ.h +21 -4
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFScalarQuantizer.h +4 -3
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndicesOptions.h +0 -0
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +200 -0
- data/vendor/faiss/faiss/gpu/GpuResources.h +264 -0
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +572 -0
- data/vendor/faiss/{gpu → faiss/gpu}/StandardGpuResources.h +83 -15
- data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper-inl.h +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper.h +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfClustering.cpp +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfIVFPQAdd.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/perf/WriteIndex.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexBinaryFlat.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexFlat.cpp +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFFlat.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFPQ.cpp +141 -52
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuMemoryException.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.cpp +4 -2
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/demo_ivfpq_indexing_gpu.cpp +7 -5
- data/vendor/faiss/{gpu → faiss/gpu}/utils/DeviceUtils.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +213 -0
- data/vendor/faiss/{gpu → faiss/gpu}/utils/StackDeviceMemory.h +25 -40
- data/vendor/faiss/{gpu → faiss/gpu}/utils/StaticUtils.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.cpp +1 -0
- data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.h +3 -1
- data/vendor/faiss/{impl → faiss/impl}/FaissAssert.h +1 -0
- data/vendor/faiss/{impl → faiss/impl}/FaissException.cpp +26 -0
- data/vendor/faiss/{impl → faiss/impl}/FaissException.h +4 -0
- data/vendor/faiss/{impl → faiss/impl}/HNSW.cpp +26 -26
- data/vendor/faiss/{impl → faiss/impl}/HNSW.h +19 -11
- data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.cpp +1 -1
- data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.h +1 -1
- data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer-inl.h +0 -1
- data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.cpp +9 -9
- data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.cpp +63 -39
- data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.h +1 -1
- data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex-inl.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/index_read.cpp +99 -116
- data/vendor/faiss/{impl → faiss/impl}/index_write.cpp +15 -50
- data/vendor/faiss/{impl → faiss/impl}/io.cpp +15 -10
- data/vendor/faiss/{impl → faiss/impl}/io.h +22 -8
- data/vendor/faiss/faiss/impl/io_macros.h +57 -0
- data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.cpp +52 -36
- data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.h +3 -3
- data/vendor/faiss/faiss/impl/platform_macros.h +24 -0
- data/vendor/faiss/{index_factory.cpp → faiss/index_factory.cpp} +33 -12
- data/vendor/faiss/{index_factory.h → faiss/index_factory.h} +0 -0
- data/vendor/faiss/{index_io.h → faiss/index_io.h} +55 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +112 -0
- data/vendor/faiss/faiss/python/python_callbacks.h +45 -0
- data/vendor/faiss/{utils → faiss/utils}/Heap.cpp +5 -5
- data/vendor/faiss/{utils → faiss/utils}/Heap.h +1 -3
- data/vendor/faiss/{utils → faiss/utils}/WorkerThread.cpp +0 -0
- data/vendor/faiss/{utils → faiss/utils}/WorkerThread.h +0 -0
- data/vendor/faiss/{utils → faiss/utils}/distances.cpp +28 -13
- data/vendor/faiss/{utils → faiss/utils}/distances.h +2 -1
- data/vendor/faiss/{utils → faiss/utils}/distances_simd.cpp +5 -5
- data/vendor/faiss/{utils → faiss/utils}/extra_distances.cpp +8 -7
- data/vendor/faiss/{utils → faiss/utils}/extra_distances.h +0 -0
- data/vendor/faiss/{utils → faiss/utils}/hamming-inl.h +1 -3
- data/vendor/faiss/{utils → faiss/utils}/hamming.cpp +8 -7
- data/vendor/faiss/{utils → faiss/utils}/hamming.h +7 -1
- data/vendor/faiss/{utils → faiss/utils}/random.cpp +5 -5
- data/vendor/faiss/{utils → faiss/utils}/random.h +0 -0
- data/vendor/faiss/{utils → faiss/utils}/utils.cpp +27 -28
- data/vendor/faiss/{utils → faiss/utils}/utils.h +4 -0
- data/vendor/faiss/misc/test_blas.cpp +4 -1
- data/vendor/faiss/tests/test_binary_flat.cpp +0 -2
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +6 -1
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +4 -1
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +6 -4
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +12 -5
- data/vendor/faiss/tests/test_merge.cpp +6 -3
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +7 -2
- data/vendor/faiss/tests/test_pairs_decoding.cpp +5 -1
- data/vendor/faiss/tests/test_params_override.cpp +7 -2
- data/vendor/faiss/tests/test_sliding_ivf.cpp +10 -4
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +14 -8
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +11 -7
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +12 -7
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +6 -3
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +7 -3
- metadata +154 -153
- data/vendor/faiss/gpu/GpuResources.cpp +0 -52
- data/vendor/faiss/gpu/GpuResources.h +0 -73
- data/vendor/faiss/gpu/StandardGpuResources.cpp +0 -303
- data/vendor/faiss/gpu/utils/DeviceMemory.cpp +0 -77
- data/vendor/faiss/gpu/utils/DeviceMemory.h +0 -71
- data/vendor/faiss/gpu/utils/MemorySpace.cpp +0 -89
- data/vendor/faiss/gpu/utils/MemorySpace.h +0 -44
- data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +0 -239
@@ -1,71 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
-
*
|
4
|
-
* This source code is licensed under the MIT license found in the
|
5
|
-
* LICENSE file in the root directory of this source tree.
|
6
|
-
*/
|
7
|
-
|
8
|
-
|
9
|
-
#pragma once
|
10
|
-
|
11
|
-
#include <cuda_runtime.h>
|
12
|
-
#include <string>
|
13
|
-
|
14
|
-
namespace faiss { namespace gpu {
|
15
|
-
|
16
|
-
class DeviceMemory;
|
17
|
-
|
18
|
-
class DeviceMemoryReservation {
|
19
|
-
public:
|
20
|
-
DeviceMemoryReservation();
|
21
|
-
DeviceMemoryReservation(DeviceMemory* state,
|
22
|
-
int device, void* p, size_t size,
|
23
|
-
cudaStream_t stream);
|
24
|
-
DeviceMemoryReservation(DeviceMemoryReservation&& m) noexcept;
|
25
|
-
~DeviceMemoryReservation();
|
26
|
-
|
27
|
-
DeviceMemoryReservation& operator=(DeviceMemoryReservation&& m);
|
28
|
-
|
29
|
-
int device() { return device_; }
|
30
|
-
void* get() { return data_; }
|
31
|
-
size_t size() { return size_; }
|
32
|
-
cudaStream_t stream() { return stream_; }
|
33
|
-
|
34
|
-
private:
|
35
|
-
DeviceMemory* state_;
|
36
|
-
|
37
|
-
int device_;
|
38
|
-
void* data_;
|
39
|
-
size_t size_;
|
40
|
-
cudaStream_t stream_;
|
41
|
-
};
|
42
|
-
|
43
|
-
/// Manages temporary memory allocations on a GPU device
|
44
|
-
class DeviceMemory {
|
45
|
-
public:
|
46
|
-
virtual ~DeviceMemory();
|
47
|
-
|
48
|
-
/// Returns the device we are managing memory for
|
49
|
-
virtual int getDevice() const = 0;
|
50
|
-
|
51
|
-
/// Obtains a temporary memory allocation for our device,
|
52
|
-
/// whose usage is ordered with respect to the given stream.
|
53
|
-
virtual DeviceMemoryReservation getMemory(cudaStream_t stream,
|
54
|
-
size_t size) = 0;
|
55
|
-
|
56
|
-
/// Returns the current size available without calling cudaMalloc
|
57
|
-
virtual size_t getSizeAvailable() const = 0;
|
58
|
-
|
59
|
-
/// Returns a string containing our current memory manager state
|
60
|
-
virtual std::string toString() const = 0;
|
61
|
-
|
62
|
-
/// Returns the high-water mark of cudaMalloc allocations for our
|
63
|
-
/// device
|
64
|
-
virtual size_t getHighWaterCudaMalloc() const = 0;
|
65
|
-
|
66
|
-
protected:
|
67
|
-
friend class DeviceMemoryReservation;
|
68
|
-
virtual void returnAllocation(DeviceMemoryReservation& m) = 0;
|
69
|
-
};
|
70
|
-
|
71
|
-
} } // namespace
|
@@ -1,89 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
-
*
|
4
|
-
* This source code is licensed under the MIT license found in the
|
5
|
-
* LICENSE file in the root directory of this source tree.
|
6
|
-
*/
|
7
|
-
|
8
|
-
|
9
|
-
#include <faiss/gpu/utils/MemorySpace.h>
|
10
|
-
#include <faiss/impl/FaissAssert.h>
|
11
|
-
#include <cuda_runtime.h>
|
12
|
-
|
13
|
-
namespace faiss { namespace gpu {
|
14
|
-
|
15
|
-
/// Allocates CUDA memory for a given memory space
|
16
|
-
void allocMemorySpaceV(MemorySpace space, void** p, size_t size) {
|
17
|
-
switch (space) {
|
18
|
-
case MemorySpace::Device:
|
19
|
-
{
|
20
|
-
auto err = cudaMalloc(p, size);
|
21
|
-
|
22
|
-
// Throw if we fail to allocate
|
23
|
-
FAISS_THROW_IF_NOT_FMT(
|
24
|
-
err == cudaSuccess,
|
25
|
-
"failed to cudaMalloc %zu bytes (error %d %s)",
|
26
|
-
size, (int) err, cudaGetErrorString(err));
|
27
|
-
}
|
28
|
-
break;
|
29
|
-
case MemorySpace::Unified:
|
30
|
-
{
|
31
|
-
#ifdef FAISS_UNIFIED_MEM
|
32
|
-
auto err = cudaMallocManaged(p, size);
|
33
|
-
|
34
|
-
// Throw if we fail to allocate
|
35
|
-
FAISS_THROW_IF_NOT_FMT(
|
36
|
-
err == cudaSuccess,
|
37
|
-
"failed to cudaMallocManaged %zu bytes (error %d %s)",
|
38
|
-
size, (int) err, cudaGetErrorString(err));
|
39
|
-
#else
|
40
|
-
FAISS_THROW_MSG("Attempting to allocate via cudaMallocManaged "
|
41
|
-
"without CUDA 8+ support");
|
42
|
-
#endif
|
43
|
-
}
|
44
|
-
break;
|
45
|
-
case MemorySpace::HostPinned:
|
46
|
-
{
|
47
|
-
auto err = cudaHostAlloc(p, size, cudaHostAllocDefault);
|
48
|
-
|
49
|
-
// Throw if we fail to allocate
|
50
|
-
FAISS_THROW_IF_NOT_FMT(
|
51
|
-
err == cudaSuccess,
|
52
|
-
"failed to cudaHostAlloc %zu bytes (error %d %s)",
|
53
|
-
size, (int) err, cudaGetErrorString(err));
|
54
|
-
}
|
55
|
-
break;
|
56
|
-
default:
|
57
|
-
FAISS_ASSERT_FMT(false, "unknown MemorySpace %d", (int) space);
|
58
|
-
break;
|
59
|
-
}
|
60
|
-
}
|
61
|
-
|
62
|
-
// We'll allow allocation to fail, but free should always succeed and be a
|
63
|
-
// fatal error if it doesn't free
|
64
|
-
void freeMemorySpace(MemorySpace space, void* p) {
|
65
|
-
switch (space) {
|
66
|
-
case MemorySpace::Device:
|
67
|
-
case MemorySpace::Unified:
|
68
|
-
{
|
69
|
-
auto err = cudaFree(p);
|
70
|
-
FAISS_ASSERT_FMT(err == cudaSuccess,
|
71
|
-
"Failed to cudaFree pointer %p (error %d %s)",
|
72
|
-
p, (int) err, cudaGetErrorString(err));
|
73
|
-
}
|
74
|
-
break;
|
75
|
-
case MemorySpace::HostPinned:
|
76
|
-
{
|
77
|
-
auto err = cudaFreeHost(p);
|
78
|
-
FAISS_ASSERT_FMT(err == cudaSuccess,
|
79
|
-
"Failed to cudaFreeHost pointer %p (error %d %s)",
|
80
|
-
p, (int) err, cudaGetErrorString(err));
|
81
|
-
}
|
82
|
-
break;
|
83
|
-
default:
|
84
|
-
FAISS_ASSERT_FMT(false, "unknown MemorySpace %d", (int) space);
|
85
|
-
break;
|
86
|
-
}
|
87
|
-
}
|
88
|
-
|
89
|
-
} }
|
@@ -1,44 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
-
*
|
4
|
-
* This source code is licensed under the MIT license found in the
|
5
|
-
* LICENSE file in the root directory of this source tree.
|
6
|
-
*/
|
7
|
-
|
8
|
-
|
9
|
-
#pragma once
|
10
|
-
|
11
|
-
#include <cuda.h>
|
12
|
-
|
13
|
-
#if CUDA_VERSION >= 8000
|
14
|
-
// Whether or not we enable usage of CUDA Unified Memory
|
15
|
-
#define FAISS_UNIFIED_MEM 1
|
16
|
-
#endif
|
17
|
-
|
18
|
-
namespace faiss { namespace gpu {
|
19
|
-
|
20
|
-
enum MemorySpace {
|
21
|
-
/// Managed using cudaMalloc/cudaFree
|
22
|
-
Device = 1,
|
23
|
-
/// Managed using cudaMallocManaged/cudaFree
|
24
|
-
Unified = 2,
|
25
|
-
/// Managed using cudaHostAlloc/cudaFreeHost
|
26
|
-
HostPinned = 3,
|
27
|
-
};
|
28
|
-
|
29
|
-
/// All memory allocations and de-allocations come through these functions
|
30
|
-
|
31
|
-
/// Allocates CUDA memory for a given memory space (void pointer)
|
32
|
-
/// Throws a FaissException if we are unable to allocate the memory
|
33
|
-
void allocMemorySpaceV(MemorySpace space, void** p, size_t size);
|
34
|
-
|
35
|
-
template <typename T>
|
36
|
-
inline void allocMemorySpace(MemorySpace space, T** p, size_t size) {
|
37
|
-
allocMemorySpaceV(space, (void**)(void*) p, size);
|
38
|
-
}
|
39
|
-
|
40
|
-
/// Frees CUDA memory for a given memory space
|
41
|
-
/// Asserts if we are unable to free the region
|
42
|
-
void freeMemorySpace(MemorySpace space, void* p);
|
43
|
-
|
44
|
-
} }
|
@@ -1,239 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
-
*
|
4
|
-
* This source code is licensed under the MIT license found in the
|
5
|
-
* LICENSE file in the root directory of this source tree.
|
6
|
-
*/
|
7
|
-
|
8
|
-
|
9
|
-
#include <faiss/gpu/utils/StackDeviceMemory.h>
|
10
|
-
#include <faiss/gpu/utils/DeviceUtils.h>
|
11
|
-
#include <faiss/gpu/utils/MemorySpace.h>
|
12
|
-
#include <faiss/gpu/utils/StaticUtils.h>
|
13
|
-
#include <faiss/impl/FaissAssert.h>
|
14
|
-
#include <stdio.h>
|
15
|
-
#include <sstream>
|
16
|
-
|
17
|
-
namespace faiss { namespace gpu {
|
18
|
-
|
19
|
-
StackDeviceMemory::Stack::Stack(int d, size_t sz)
|
20
|
-
: device_(d),
|
21
|
-
isOwner_(true),
|
22
|
-
start_(nullptr),
|
23
|
-
end_(nullptr),
|
24
|
-
size_(sz),
|
25
|
-
head_(nullptr),
|
26
|
-
mallocCurrent_(0),
|
27
|
-
highWaterMemoryUsed_(0),
|
28
|
-
highWaterMalloc_(0),
|
29
|
-
cudaMallocWarning_(true) {
|
30
|
-
DeviceScope s(device_);
|
31
|
-
|
32
|
-
allocMemorySpace(MemorySpace::Device, &start_, size_);
|
33
|
-
|
34
|
-
head_ = start_;
|
35
|
-
end_ = start_ + size_;
|
36
|
-
}
|
37
|
-
|
38
|
-
StackDeviceMemory::Stack::Stack(int d, void* p, size_t sz, bool isOwner)
|
39
|
-
: device_(d),
|
40
|
-
isOwner_(isOwner),
|
41
|
-
start_((char*) p),
|
42
|
-
end_(((char*) p) + sz),
|
43
|
-
size_(sz),
|
44
|
-
head_((char*) p),
|
45
|
-
mallocCurrent_(0),
|
46
|
-
highWaterMemoryUsed_(0),
|
47
|
-
highWaterMalloc_(0),
|
48
|
-
cudaMallocWarning_(true) {
|
49
|
-
}
|
50
|
-
|
51
|
-
StackDeviceMemory::Stack::~Stack() {
|
52
|
-
if (isOwner_) {
|
53
|
-
DeviceScope s(device_);
|
54
|
-
|
55
|
-
freeMemorySpace(MemorySpace::Device, start_);
|
56
|
-
}
|
57
|
-
}
|
58
|
-
|
59
|
-
size_t
|
60
|
-
StackDeviceMemory::Stack::getSizeAvailable() const {
|
61
|
-
return (end_ - head_);
|
62
|
-
}
|
63
|
-
|
64
|
-
char*
|
65
|
-
StackDeviceMemory::Stack::getAlloc(size_t size,
|
66
|
-
cudaStream_t stream) {
|
67
|
-
if (size > (end_ - head_)) {
|
68
|
-
// Too large for our stack
|
69
|
-
DeviceScope s(device_);
|
70
|
-
|
71
|
-
if (cudaMallocWarning_) {
|
72
|
-
// Print our requested size before we attempt the allocation
|
73
|
-
fprintf(stderr, "WARN: increase temp memory to avoid cudaMalloc, "
|
74
|
-
"or decrease query/add size (alloc %zu B, highwater %zu B)\n",
|
75
|
-
size, highWaterMalloc_);
|
76
|
-
}
|
77
|
-
|
78
|
-
char* p = nullptr;
|
79
|
-
allocMemorySpace(MemorySpace::Device, &p, size);
|
80
|
-
|
81
|
-
mallocCurrent_ += size;
|
82
|
-
highWaterMalloc_ = std::max(highWaterMalloc_, mallocCurrent_);
|
83
|
-
|
84
|
-
return p;
|
85
|
-
} else {
|
86
|
-
// We can make the allocation out of our stack
|
87
|
-
// Find all the ranges that we overlap that may have been
|
88
|
-
// previously allocated; our allocation will be [head, endAlloc)
|
89
|
-
char* startAlloc = head_;
|
90
|
-
char* endAlloc = head_ + size;
|
91
|
-
|
92
|
-
while (lastUsers_.size() > 0) {
|
93
|
-
auto& prevUser = lastUsers_.back();
|
94
|
-
|
95
|
-
// Because there is a previous user, we must overlap it
|
96
|
-
FAISS_ASSERT(prevUser.start_ <= endAlloc && prevUser.end_ >= startAlloc);
|
97
|
-
|
98
|
-
if (stream != prevUser.stream_) {
|
99
|
-
// Synchronization required
|
100
|
-
// FIXME
|
101
|
-
FAISS_ASSERT(false);
|
102
|
-
}
|
103
|
-
|
104
|
-
if (endAlloc < prevUser.end_) {
|
105
|
-
// Update the previous user info
|
106
|
-
prevUser.start_ = endAlloc;
|
107
|
-
|
108
|
-
break;
|
109
|
-
}
|
110
|
-
|
111
|
-
// If we're the exact size of the previous request, then we
|
112
|
-
// don't need to continue
|
113
|
-
bool done = (prevUser.end_ == endAlloc);
|
114
|
-
|
115
|
-
lastUsers_.pop_back();
|
116
|
-
|
117
|
-
if (done) {
|
118
|
-
break;
|
119
|
-
}
|
120
|
-
}
|
121
|
-
|
122
|
-
head_ = endAlloc;
|
123
|
-
FAISS_ASSERT(head_ <= end_);
|
124
|
-
|
125
|
-
highWaterMemoryUsed_ = std::max(highWaterMemoryUsed_,
|
126
|
-
(size_t) (head_ - start_));
|
127
|
-
return startAlloc;
|
128
|
-
}
|
129
|
-
}
|
130
|
-
|
131
|
-
void
|
132
|
-
StackDeviceMemory::Stack::returnAlloc(char* p,
|
133
|
-
size_t size,
|
134
|
-
cudaStream_t stream) {
|
135
|
-
if (p < start_ || p >= end_) {
|
136
|
-
// This is not on our stack; it was a one-off allocation
|
137
|
-
DeviceScope s(device_);
|
138
|
-
|
139
|
-
freeMemorySpace(MemorySpace::Device, p);
|
140
|
-
|
141
|
-
FAISS_ASSERT(mallocCurrent_ >= size);
|
142
|
-
mallocCurrent_ -= size;
|
143
|
-
} else {
|
144
|
-
// This is on our stack
|
145
|
-
// Allocations should be freed in the reverse order they are made
|
146
|
-
FAISS_ASSERT(p + size == head_);
|
147
|
-
|
148
|
-
head_ = p;
|
149
|
-
lastUsers_.push_back(Range(p, p + size, stream));
|
150
|
-
}
|
151
|
-
}
|
152
|
-
|
153
|
-
std::string
|
154
|
-
StackDeviceMemory::Stack::toString() const {
|
155
|
-
std::stringstream s;
|
156
|
-
|
157
|
-
s << "SDM device " << device_ << ": Total memory " << size_ << " ["
|
158
|
-
<< (void*) start_ << ", " << (void*) end_ << ")\n";
|
159
|
-
s << " Available memory " << (size_t) (end_ - head_)
|
160
|
-
<< " [" << (void*) head_ << ", " << (void*) end_ << ")\n";
|
161
|
-
s << " High water temp alloc " << highWaterMemoryUsed_ << "\n";
|
162
|
-
s << " High water cudaMalloc " << highWaterMalloc_ << "\n";
|
163
|
-
|
164
|
-
int i = lastUsers_.size();
|
165
|
-
for (auto it = lastUsers_.rbegin(); it != lastUsers_.rend(); ++it) {
|
166
|
-
s << i-- << ": size " << (size_t) (it->end_ - it->start_)
|
167
|
-
<< " stream " << it->stream_
|
168
|
-
<< " [" << (void*) it->start_ << ", " << (void*) it->end_ << ")\n";
|
169
|
-
}
|
170
|
-
|
171
|
-
return s.str();
|
172
|
-
}
|
173
|
-
|
174
|
-
size_t
|
175
|
-
StackDeviceMemory::Stack::getHighWaterCudaMalloc() const {
|
176
|
-
return highWaterMalloc_;
|
177
|
-
}
|
178
|
-
|
179
|
-
StackDeviceMemory::StackDeviceMemory(int device, size_t allocPerDevice)
|
180
|
-
: device_(device),
|
181
|
-
stack_(device, allocPerDevice) {
|
182
|
-
}
|
183
|
-
|
184
|
-
StackDeviceMemory::StackDeviceMemory(int device,
|
185
|
-
void* p, size_t size, bool isOwner)
|
186
|
-
: device_(device),
|
187
|
-
stack_(device, p, size, isOwner) {
|
188
|
-
}
|
189
|
-
|
190
|
-
StackDeviceMemory::~StackDeviceMemory() {
|
191
|
-
}
|
192
|
-
|
193
|
-
void
|
194
|
-
StackDeviceMemory::setCudaMallocWarning(bool b) {
|
195
|
-
stack_.cudaMallocWarning_ = b;
|
196
|
-
}
|
197
|
-
|
198
|
-
int
|
199
|
-
StackDeviceMemory::getDevice() const {
|
200
|
-
return device_;
|
201
|
-
}
|
202
|
-
|
203
|
-
DeviceMemoryReservation
|
204
|
-
StackDeviceMemory::getMemory(cudaStream_t stream, size_t size) {
|
205
|
-
// We guarantee 16 byte alignment for allocations, so bump up `size`
|
206
|
-
// to the next highest multiple of 16
|
207
|
-
size = utils::roundUp(size, (size_t) 16);
|
208
|
-
|
209
|
-
return DeviceMemoryReservation(this,
|
210
|
-
device_,
|
211
|
-
stack_.getAlloc(size, stream),
|
212
|
-
size,
|
213
|
-
stream);
|
214
|
-
}
|
215
|
-
|
216
|
-
size_t
|
217
|
-
StackDeviceMemory::getSizeAvailable() const {
|
218
|
-
return stack_.getSizeAvailable();
|
219
|
-
}
|
220
|
-
|
221
|
-
std::string
|
222
|
-
StackDeviceMemory::toString() const {
|
223
|
-
return stack_.toString();
|
224
|
-
}
|
225
|
-
|
226
|
-
size_t
|
227
|
-
StackDeviceMemory::getHighWaterCudaMalloc() const {
|
228
|
-
return stack_.getHighWaterCudaMalloc();
|
229
|
-
}
|
230
|
-
|
231
|
-
void
|
232
|
-
StackDeviceMemory::returnAllocation(DeviceMemoryReservation& m) {
|
233
|
-
FAISS_ASSERT(m.get());
|
234
|
-
FAISS_ASSERT(device_ == m.device());
|
235
|
-
|
236
|
-
stack_.returnAlloc((char*) m.get(), m.size(), m.stream());
|
237
|
-
}
|
238
|
-
|
239
|
-
} } // namespace
|