faiss 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/benchs/bench_6bit_codec.cpp +80 -0
- data/vendor/faiss/c_api/AutoTune_c.h +2 -0
- data/vendor/faiss/c_api/IndexShards_c.cpp +0 -6
- data/vendor/faiss/c_api/IndexShards_c.h +1 -4
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +4 -2
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +1 -1
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +1 -1
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +1 -1
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +1 -1
- data/vendor/faiss/demos/demo_imi_flat.cpp +5 -2
- data/vendor/faiss/demos/demo_imi_pq.cpp +6 -2
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +7 -2
- data/vendor/faiss/{AutoTune.cpp → faiss/AutoTune.cpp} +9 -9
- data/vendor/faiss/{AutoTune.h → faiss/AutoTune.h} +0 -0
- data/vendor/faiss/{Clustering.cpp → faiss/Clustering.cpp} +13 -12
- data/vendor/faiss/{Clustering.h → faiss/Clustering.h} +0 -0
- data/vendor/faiss/{DirectMap.cpp → faiss/DirectMap.cpp} +0 -0
- data/vendor/faiss/{DirectMap.h → faiss/DirectMap.h} +0 -0
- data/vendor/faiss/{IVFlib.cpp → faiss/IVFlib.cpp} +86 -11
- data/vendor/faiss/{IVFlib.h → faiss/IVFlib.h} +26 -8
- data/vendor/faiss/{Index.cpp → faiss/Index.cpp} +0 -0
- data/vendor/faiss/{Index.h → faiss/Index.h} +1 -1
- data/vendor/faiss/{Index2Layer.cpp → faiss/Index2Layer.cpp} +12 -11
- data/vendor/faiss/{Index2Layer.h → faiss/Index2Layer.h} +0 -0
- data/vendor/faiss/{IndexBinary.cpp → faiss/IndexBinary.cpp} +2 -1
- data/vendor/faiss/{IndexBinary.h → faiss/IndexBinary.h} +0 -0
- data/vendor/faiss/{IndexBinaryFlat.cpp → faiss/IndexBinaryFlat.cpp} +0 -0
- data/vendor/faiss/{IndexBinaryFlat.h → faiss/IndexBinaryFlat.h} +0 -0
- data/vendor/faiss/{IndexBinaryFromFloat.cpp → faiss/IndexBinaryFromFloat.cpp} +1 -0
- data/vendor/faiss/{IndexBinaryFromFloat.h → faiss/IndexBinaryFromFloat.h} +0 -0
- data/vendor/faiss/{IndexBinaryHNSW.cpp → faiss/IndexBinaryHNSW.cpp} +1 -2
- data/vendor/faiss/{IndexBinaryHNSW.h → faiss/IndexBinaryHNSW.h} +0 -0
- data/vendor/faiss/{IndexBinaryHash.cpp → faiss/IndexBinaryHash.cpp} +16 -7
- data/vendor/faiss/{IndexBinaryHash.h → faiss/IndexBinaryHash.h} +2 -1
- data/vendor/faiss/{IndexBinaryIVF.cpp → faiss/IndexBinaryIVF.cpp} +10 -16
- data/vendor/faiss/{IndexBinaryIVF.h → faiss/IndexBinaryIVF.h} +1 -1
- data/vendor/faiss/{IndexFlat.cpp → faiss/IndexFlat.cpp} +0 -0
- data/vendor/faiss/{IndexFlat.h → faiss/IndexFlat.h} +0 -0
- data/vendor/faiss/{IndexHNSW.cpp → faiss/IndexHNSW.cpp} +63 -32
- data/vendor/faiss/{IndexHNSW.h → faiss/IndexHNSW.h} +0 -0
- data/vendor/faiss/{IndexIVF.cpp → faiss/IndexIVF.cpp} +129 -46
- data/vendor/faiss/{IndexIVF.h → faiss/IndexIVF.h} +7 -3
- data/vendor/faiss/{IndexIVFFlat.cpp → faiss/IndexIVFFlat.cpp} +6 -5
- data/vendor/faiss/{IndexIVFFlat.h → faiss/IndexIVFFlat.h} +0 -0
- data/vendor/faiss/{IndexIVFPQ.cpp → faiss/IndexIVFPQ.cpp} +9 -8
- data/vendor/faiss/{IndexIVFPQ.h → faiss/IndexIVFPQ.h} +4 -2
- data/vendor/faiss/{IndexIVFPQR.cpp → faiss/IndexIVFPQR.cpp} +3 -1
- data/vendor/faiss/{IndexIVFPQR.h → faiss/IndexIVFPQR.h} +0 -0
- data/vendor/faiss/{IndexIVFSpectralHash.cpp → faiss/IndexIVFSpectralHash.cpp} +1 -1
- data/vendor/faiss/{IndexIVFSpectralHash.h → faiss/IndexIVFSpectralHash.h} +0 -0
- data/vendor/faiss/{IndexLSH.cpp → faiss/IndexLSH.cpp} +0 -0
- data/vendor/faiss/{IndexLSH.h → faiss/IndexLSH.h} +0 -0
- data/vendor/faiss/{IndexLattice.cpp → faiss/IndexLattice.cpp} +0 -0
- data/vendor/faiss/{IndexLattice.h → faiss/IndexLattice.h} +0 -0
- data/vendor/faiss/{IndexPQ.cpp → faiss/IndexPQ.cpp} +6 -6
- data/vendor/faiss/{IndexPQ.h → faiss/IndexPQ.h} +3 -1
- data/vendor/faiss/{IndexPreTransform.cpp → faiss/IndexPreTransform.cpp} +0 -0
- data/vendor/faiss/{IndexPreTransform.h → faiss/IndexPreTransform.h} +0 -0
- data/vendor/faiss/{IndexReplicas.cpp → faiss/IndexReplicas.cpp} +102 -10
- data/vendor/faiss/{IndexReplicas.h → faiss/IndexReplicas.h} +6 -0
- data/vendor/faiss/{IndexScalarQuantizer.cpp → faiss/IndexScalarQuantizer.cpp} +3 -3
- data/vendor/faiss/{IndexScalarQuantizer.h → faiss/IndexScalarQuantizer.h} +0 -0
- data/vendor/faiss/{IndexShards.cpp → faiss/IndexShards.cpp} +37 -12
- data/vendor/faiss/{IndexShards.h → faiss/IndexShards.h} +3 -4
- data/vendor/faiss/{InvertedLists.cpp → faiss/InvertedLists.cpp} +2 -2
- data/vendor/faiss/{InvertedLists.h → faiss/InvertedLists.h} +1 -0
- data/vendor/faiss/{MatrixStats.cpp → faiss/MatrixStats.cpp} +0 -0
- data/vendor/faiss/{MatrixStats.h → faiss/MatrixStats.h} +0 -0
- data/vendor/faiss/{MetaIndexes.cpp → faiss/MetaIndexes.cpp} +5 -3
- data/vendor/faiss/{MetaIndexes.h → faiss/MetaIndexes.h} +0 -0
- data/vendor/faiss/{MetricType.h → faiss/MetricType.h} +0 -0
- data/vendor/faiss/{OnDiskInvertedLists.cpp → faiss/OnDiskInvertedLists.cpp} +141 -3
- data/vendor/faiss/{OnDiskInvertedLists.h → faiss/OnDiskInvertedLists.h} +27 -7
- data/vendor/faiss/{VectorTransform.cpp → faiss/VectorTransform.cpp} +4 -3
- data/vendor/faiss/{VectorTransform.h → faiss/VectorTransform.h} +0 -0
- data/vendor/faiss/{clone_index.cpp → faiss/clone_index.cpp} +0 -0
- data/vendor/faiss/{clone_index.h → faiss/clone_index.h} +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.cpp +14 -14
- data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.h +6 -7
- data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuDistance.h +12 -4
- data/vendor/faiss/{gpu → faiss/gpu}/GpuFaissAssert.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndex.h +3 -9
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexBinaryFlat.h +7 -7
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexFlat.h +35 -10
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVF.h +1 -2
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFFlat.h +4 -3
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFPQ.h +21 -4
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFScalarQuantizer.h +4 -3
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndicesOptions.h +0 -0
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +200 -0
- data/vendor/faiss/faiss/gpu/GpuResources.h +264 -0
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +572 -0
- data/vendor/faiss/{gpu → faiss/gpu}/StandardGpuResources.h +83 -15
- data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper-inl.h +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper.h +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfClustering.cpp +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfIVFPQAdd.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/perf/WriteIndex.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexBinaryFlat.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexFlat.cpp +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFFlat.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFPQ.cpp +141 -52
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuMemoryException.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.cpp +4 -2
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/demo_ivfpq_indexing_gpu.cpp +7 -5
- data/vendor/faiss/{gpu → faiss/gpu}/utils/DeviceUtils.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +213 -0
- data/vendor/faiss/{gpu → faiss/gpu}/utils/StackDeviceMemory.h +25 -40
- data/vendor/faiss/{gpu → faiss/gpu}/utils/StaticUtils.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.cpp +1 -0
- data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.h +3 -1
- data/vendor/faiss/{impl → faiss/impl}/FaissAssert.h +1 -0
- data/vendor/faiss/{impl → faiss/impl}/FaissException.cpp +26 -0
- data/vendor/faiss/{impl → faiss/impl}/FaissException.h +4 -0
- data/vendor/faiss/{impl → faiss/impl}/HNSW.cpp +26 -26
- data/vendor/faiss/{impl → faiss/impl}/HNSW.h +19 -11
- data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.cpp +1 -1
- data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.h +1 -1
- data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer-inl.h +0 -1
- data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.cpp +9 -9
- data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.cpp +63 -39
- data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.h +1 -1
- data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex-inl.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/index_read.cpp +99 -116
- data/vendor/faiss/{impl → faiss/impl}/index_write.cpp +15 -50
- data/vendor/faiss/{impl → faiss/impl}/io.cpp +15 -10
- data/vendor/faiss/{impl → faiss/impl}/io.h +22 -8
- data/vendor/faiss/faiss/impl/io_macros.h +57 -0
- data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.cpp +52 -36
- data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.h +3 -3
- data/vendor/faiss/faiss/impl/platform_macros.h +24 -0
- data/vendor/faiss/{index_factory.cpp → faiss/index_factory.cpp} +33 -12
- data/vendor/faiss/{index_factory.h → faiss/index_factory.h} +0 -0
- data/vendor/faiss/{index_io.h → faiss/index_io.h} +55 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +112 -0
- data/vendor/faiss/faiss/python/python_callbacks.h +45 -0
- data/vendor/faiss/{utils → faiss/utils}/Heap.cpp +5 -5
- data/vendor/faiss/{utils → faiss/utils}/Heap.h +1 -3
- data/vendor/faiss/{utils → faiss/utils}/WorkerThread.cpp +0 -0
- data/vendor/faiss/{utils → faiss/utils}/WorkerThread.h +0 -0
- data/vendor/faiss/{utils → faiss/utils}/distances.cpp +28 -13
- data/vendor/faiss/{utils → faiss/utils}/distances.h +2 -1
- data/vendor/faiss/{utils → faiss/utils}/distances_simd.cpp +5 -5
- data/vendor/faiss/{utils → faiss/utils}/extra_distances.cpp +8 -7
- data/vendor/faiss/{utils → faiss/utils}/extra_distances.h +0 -0
- data/vendor/faiss/{utils → faiss/utils}/hamming-inl.h +1 -3
- data/vendor/faiss/{utils → faiss/utils}/hamming.cpp +8 -7
- data/vendor/faiss/{utils → faiss/utils}/hamming.h +7 -1
- data/vendor/faiss/{utils → faiss/utils}/random.cpp +5 -5
- data/vendor/faiss/{utils → faiss/utils}/random.h +0 -0
- data/vendor/faiss/{utils → faiss/utils}/utils.cpp +27 -28
- data/vendor/faiss/{utils → faiss/utils}/utils.h +4 -0
- data/vendor/faiss/misc/test_blas.cpp +4 -1
- data/vendor/faiss/tests/test_binary_flat.cpp +0 -2
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +6 -1
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +4 -1
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +6 -4
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +12 -5
- data/vendor/faiss/tests/test_merge.cpp +6 -3
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +7 -2
- data/vendor/faiss/tests/test_pairs_decoding.cpp +5 -1
- data/vendor/faiss/tests/test_params_override.cpp +7 -2
- data/vendor/faiss/tests/test_sliding_ivf.cpp +10 -4
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +14 -8
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +11 -7
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +12 -7
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +6 -3
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +7 -3
- metadata +154 -153
- data/vendor/faiss/gpu/GpuResources.cpp +0 -52
- data/vendor/faiss/gpu/GpuResources.h +0 -73
- data/vendor/faiss/gpu/StandardGpuResources.cpp +0 -303
- data/vendor/faiss/gpu/utils/DeviceMemory.cpp +0 -77
- data/vendor/faiss/gpu/utils/DeviceMemory.h +0 -71
- data/vendor/faiss/gpu/utils/MemorySpace.cpp +0 -89
- data/vendor/faiss/gpu/utils/MemorySpace.h +0 -44
- data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +0 -239
@@ -1,71 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
-
*
|
4
|
-
* This source code is licensed under the MIT license found in the
|
5
|
-
* LICENSE file in the root directory of this source tree.
|
6
|
-
*/
|
7
|
-
|
8
|
-
|
9
|
-
#pragma once
|
10
|
-
|
11
|
-
#include <cuda_runtime.h>
|
12
|
-
#include <string>
|
13
|
-
|
14
|
-
namespace faiss { namespace gpu {
|
15
|
-
|
16
|
-
class DeviceMemory;
|
17
|
-
|
18
|
-
class DeviceMemoryReservation {
|
19
|
-
public:
|
20
|
-
DeviceMemoryReservation();
|
21
|
-
DeviceMemoryReservation(DeviceMemory* state,
|
22
|
-
int device, void* p, size_t size,
|
23
|
-
cudaStream_t stream);
|
24
|
-
DeviceMemoryReservation(DeviceMemoryReservation&& m) noexcept;
|
25
|
-
~DeviceMemoryReservation();
|
26
|
-
|
27
|
-
DeviceMemoryReservation& operator=(DeviceMemoryReservation&& m);
|
28
|
-
|
29
|
-
int device() { return device_; }
|
30
|
-
void* get() { return data_; }
|
31
|
-
size_t size() { return size_; }
|
32
|
-
cudaStream_t stream() { return stream_; }
|
33
|
-
|
34
|
-
private:
|
35
|
-
DeviceMemory* state_;
|
36
|
-
|
37
|
-
int device_;
|
38
|
-
void* data_;
|
39
|
-
size_t size_;
|
40
|
-
cudaStream_t stream_;
|
41
|
-
};
|
42
|
-
|
43
|
-
/// Manages temporary memory allocations on a GPU device
|
44
|
-
class DeviceMemory {
|
45
|
-
public:
|
46
|
-
virtual ~DeviceMemory();
|
47
|
-
|
48
|
-
/// Returns the device we are managing memory for
|
49
|
-
virtual int getDevice() const = 0;
|
50
|
-
|
51
|
-
/// Obtains a temporary memory allocation for our device,
|
52
|
-
/// whose usage is ordered with respect to the given stream.
|
53
|
-
virtual DeviceMemoryReservation getMemory(cudaStream_t stream,
|
54
|
-
size_t size) = 0;
|
55
|
-
|
56
|
-
/// Returns the current size available without calling cudaMalloc
|
57
|
-
virtual size_t getSizeAvailable() const = 0;
|
58
|
-
|
59
|
-
/// Returns a string containing our current memory manager state
|
60
|
-
virtual std::string toString() const = 0;
|
61
|
-
|
62
|
-
/// Returns the high-water mark of cudaMalloc allocations for our
|
63
|
-
/// device
|
64
|
-
virtual size_t getHighWaterCudaMalloc() const = 0;
|
65
|
-
|
66
|
-
protected:
|
67
|
-
friend class DeviceMemoryReservation;
|
68
|
-
virtual void returnAllocation(DeviceMemoryReservation& m) = 0;
|
69
|
-
};
|
70
|
-
|
71
|
-
} } // namespace
|
@@ -1,89 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
-
*
|
4
|
-
* This source code is licensed under the MIT license found in the
|
5
|
-
* LICENSE file in the root directory of this source tree.
|
6
|
-
*/
|
7
|
-
|
8
|
-
|
9
|
-
#include <faiss/gpu/utils/MemorySpace.h>
|
10
|
-
#include <faiss/impl/FaissAssert.h>
|
11
|
-
#include <cuda_runtime.h>
|
12
|
-
|
13
|
-
namespace faiss { namespace gpu {
|
14
|
-
|
15
|
-
/// Allocates CUDA memory for a given memory space
|
16
|
-
void allocMemorySpaceV(MemorySpace space, void** p, size_t size) {
|
17
|
-
switch (space) {
|
18
|
-
case MemorySpace::Device:
|
19
|
-
{
|
20
|
-
auto err = cudaMalloc(p, size);
|
21
|
-
|
22
|
-
// Throw if we fail to allocate
|
23
|
-
FAISS_THROW_IF_NOT_FMT(
|
24
|
-
err == cudaSuccess,
|
25
|
-
"failed to cudaMalloc %zu bytes (error %d %s)",
|
26
|
-
size, (int) err, cudaGetErrorString(err));
|
27
|
-
}
|
28
|
-
break;
|
29
|
-
case MemorySpace::Unified:
|
30
|
-
{
|
31
|
-
#ifdef FAISS_UNIFIED_MEM
|
32
|
-
auto err = cudaMallocManaged(p, size);
|
33
|
-
|
34
|
-
// Throw if we fail to allocate
|
35
|
-
FAISS_THROW_IF_NOT_FMT(
|
36
|
-
err == cudaSuccess,
|
37
|
-
"failed to cudaMallocManaged %zu bytes (error %d %s)",
|
38
|
-
size, (int) err, cudaGetErrorString(err));
|
39
|
-
#else
|
40
|
-
FAISS_THROW_MSG("Attempting to allocate via cudaMallocManaged "
|
41
|
-
"without CUDA 8+ support");
|
42
|
-
#endif
|
43
|
-
}
|
44
|
-
break;
|
45
|
-
case MemorySpace::HostPinned:
|
46
|
-
{
|
47
|
-
auto err = cudaHostAlloc(p, size, cudaHostAllocDefault);
|
48
|
-
|
49
|
-
// Throw if we fail to allocate
|
50
|
-
FAISS_THROW_IF_NOT_FMT(
|
51
|
-
err == cudaSuccess,
|
52
|
-
"failed to cudaHostAlloc %zu bytes (error %d %s)",
|
53
|
-
size, (int) err, cudaGetErrorString(err));
|
54
|
-
}
|
55
|
-
break;
|
56
|
-
default:
|
57
|
-
FAISS_ASSERT_FMT(false, "unknown MemorySpace %d", (int) space);
|
58
|
-
break;
|
59
|
-
}
|
60
|
-
}
|
61
|
-
|
62
|
-
// We'll allow allocation to fail, but free should always succeed and be a
|
63
|
-
// fatal error if it doesn't free
|
64
|
-
void freeMemorySpace(MemorySpace space, void* p) {
|
65
|
-
switch (space) {
|
66
|
-
case MemorySpace::Device:
|
67
|
-
case MemorySpace::Unified:
|
68
|
-
{
|
69
|
-
auto err = cudaFree(p);
|
70
|
-
FAISS_ASSERT_FMT(err == cudaSuccess,
|
71
|
-
"Failed to cudaFree pointer %p (error %d %s)",
|
72
|
-
p, (int) err, cudaGetErrorString(err));
|
73
|
-
}
|
74
|
-
break;
|
75
|
-
case MemorySpace::HostPinned:
|
76
|
-
{
|
77
|
-
auto err = cudaFreeHost(p);
|
78
|
-
FAISS_ASSERT_FMT(err == cudaSuccess,
|
79
|
-
"Failed to cudaFreeHost pointer %p (error %d %s)",
|
80
|
-
p, (int) err, cudaGetErrorString(err));
|
81
|
-
}
|
82
|
-
break;
|
83
|
-
default:
|
84
|
-
FAISS_ASSERT_FMT(false, "unknown MemorySpace %d", (int) space);
|
85
|
-
break;
|
86
|
-
}
|
87
|
-
}
|
88
|
-
|
89
|
-
} }
|
@@ -1,44 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
-
*
|
4
|
-
* This source code is licensed under the MIT license found in the
|
5
|
-
* LICENSE file in the root directory of this source tree.
|
6
|
-
*/
|
7
|
-
|
8
|
-
|
9
|
-
#pragma once
|
10
|
-
|
11
|
-
#include <cuda.h>
|
12
|
-
|
13
|
-
#if CUDA_VERSION >= 8000
|
14
|
-
// Whether or not we enable usage of CUDA Unified Memory
|
15
|
-
#define FAISS_UNIFIED_MEM 1
|
16
|
-
#endif
|
17
|
-
|
18
|
-
namespace faiss { namespace gpu {
|
19
|
-
|
20
|
-
enum MemorySpace {
|
21
|
-
/// Managed using cudaMalloc/cudaFree
|
22
|
-
Device = 1,
|
23
|
-
/// Managed using cudaMallocManaged/cudaFree
|
24
|
-
Unified = 2,
|
25
|
-
/// Managed using cudaHostAlloc/cudaFreeHost
|
26
|
-
HostPinned = 3,
|
27
|
-
};
|
28
|
-
|
29
|
-
/// All memory allocations and de-allocations come through these functions
|
30
|
-
|
31
|
-
/// Allocates CUDA memory for a given memory space (void pointer)
|
32
|
-
/// Throws a FaissException if we are unable to allocate the memory
|
33
|
-
void allocMemorySpaceV(MemorySpace space, void** p, size_t size);
|
34
|
-
|
35
|
-
template <typename T>
|
36
|
-
inline void allocMemorySpace(MemorySpace space, T** p, size_t size) {
|
37
|
-
allocMemorySpaceV(space, (void**)(void*) p, size);
|
38
|
-
}
|
39
|
-
|
40
|
-
/// Frees CUDA memory for a given memory space
|
41
|
-
/// Asserts if we are unable to free the region
|
42
|
-
void freeMemorySpace(MemorySpace space, void* p);
|
43
|
-
|
44
|
-
} }
|
@@ -1,239 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
-
*
|
4
|
-
* This source code is licensed under the MIT license found in the
|
5
|
-
* LICENSE file in the root directory of this source tree.
|
6
|
-
*/
|
7
|
-
|
8
|
-
|
9
|
-
#include <faiss/gpu/utils/StackDeviceMemory.h>
|
10
|
-
#include <faiss/gpu/utils/DeviceUtils.h>
|
11
|
-
#include <faiss/gpu/utils/MemorySpace.h>
|
12
|
-
#include <faiss/gpu/utils/StaticUtils.h>
|
13
|
-
#include <faiss/impl/FaissAssert.h>
|
14
|
-
#include <stdio.h>
|
15
|
-
#include <sstream>
|
16
|
-
|
17
|
-
namespace faiss { namespace gpu {
|
18
|
-
|
19
|
-
StackDeviceMemory::Stack::Stack(int d, size_t sz)
|
20
|
-
: device_(d),
|
21
|
-
isOwner_(true),
|
22
|
-
start_(nullptr),
|
23
|
-
end_(nullptr),
|
24
|
-
size_(sz),
|
25
|
-
head_(nullptr),
|
26
|
-
mallocCurrent_(0),
|
27
|
-
highWaterMemoryUsed_(0),
|
28
|
-
highWaterMalloc_(0),
|
29
|
-
cudaMallocWarning_(true) {
|
30
|
-
DeviceScope s(device_);
|
31
|
-
|
32
|
-
allocMemorySpace(MemorySpace::Device, &start_, size_);
|
33
|
-
|
34
|
-
head_ = start_;
|
35
|
-
end_ = start_ + size_;
|
36
|
-
}
|
37
|
-
|
38
|
-
StackDeviceMemory::Stack::Stack(int d, void* p, size_t sz, bool isOwner)
|
39
|
-
: device_(d),
|
40
|
-
isOwner_(isOwner),
|
41
|
-
start_((char*) p),
|
42
|
-
end_(((char*) p) + sz),
|
43
|
-
size_(sz),
|
44
|
-
head_((char*) p),
|
45
|
-
mallocCurrent_(0),
|
46
|
-
highWaterMemoryUsed_(0),
|
47
|
-
highWaterMalloc_(0),
|
48
|
-
cudaMallocWarning_(true) {
|
49
|
-
}
|
50
|
-
|
51
|
-
StackDeviceMemory::Stack::~Stack() {
|
52
|
-
if (isOwner_) {
|
53
|
-
DeviceScope s(device_);
|
54
|
-
|
55
|
-
freeMemorySpace(MemorySpace::Device, start_);
|
56
|
-
}
|
57
|
-
}
|
58
|
-
|
59
|
-
size_t
|
60
|
-
StackDeviceMemory::Stack::getSizeAvailable() const {
|
61
|
-
return (end_ - head_);
|
62
|
-
}
|
63
|
-
|
64
|
-
char*
|
65
|
-
StackDeviceMemory::Stack::getAlloc(size_t size,
|
66
|
-
cudaStream_t stream) {
|
67
|
-
if (size > (end_ - head_)) {
|
68
|
-
// Too large for our stack
|
69
|
-
DeviceScope s(device_);
|
70
|
-
|
71
|
-
if (cudaMallocWarning_) {
|
72
|
-
// Print our requested size before we attempt the allocation
|
73
|
-
fprintf(stderr, "WARN: increase temp memory to avoid cudaMalloc, "
|
74
|
-
"or decrease query/add size (alloc %zu B, highwater %zu B)\n",
|
75
|
-
size, highWaterMalloc_);
|
76
|
-
}
|
77
|
-
|
78
|
-
char* p = nullptr;
|
79
|
-
allocMemorySpace(MemorySpace::Device, &p, size);
|
80
|
-
|
81
|
-
mallocCurrent_ += size;
|
82
|
-
highWaterMalloc_ = std::max(highWaterMalloc_, mallocCurrent_);
|
83
|
-
|
84
|
-
return p;
|
85
|
-
} else {
|
86
|
-
// We can make the allocation out of our stack
|
87
|
-
// Find all the ranges that we overlap that may have been
|
88
|
-
// previously allocated; our allocation will be [head, endAlloc)
|
89
|
-
char* startAlloc = head_;
|
90
|
-
char* endAlloc = head_ + size;
|
91
|
-
|
92
|
-
while (lastUsers_.size() > 0) {
|
93
|
-
auto& prevUser = lastUsers_.back();
|
94
|
-
|
95
|
-
// Because there is a previous user, we must overlap it
|
96
|
-
FAISS_ASSERT(prevUser.start_ <= endAlloc && prevUser.end_ >= startAlloc);
|
97
|
-
|
98
|
-
if (stream != prevUser.stream_) {
|
99
|
-
// Synchronization required
|
100
|
-
// FIXME
|
101
|
-
FAISS_ASSERT(false);
|
102
|
-
}
|
103
|
-
|
104
|
-
if (endAlloc < prevUser.end_) {
|
105
|
-
// Update the previous user info
|
106
|
-
prevUser.start_ = endAlloc;
|
107
|
-
|
108
|
-
break;
|
109
|
-
}
|
110
|
-
|
111
|
-
// If we're the exact size of the previous request, then we
|
112
|
-
// don't need to continue
|
113
|
-
bool done = (prevUser.end_ == endAlloc);
|
114
|
-
|
115
|
-
lastUsers_.pop_back();
|
116
|
-
|
117
|
-
if (done) {
|
118
|
-
break;
|
119
|
-
}
|
120
|
-
}
|
121
|
-
|
122
|
-
head_ = endAlloc;
|
123
|
-
FAISS_ASSERT(head_ <= end_);
|
124
|
-
|
125
|
-
highWaterMemoryUsed_ = std::max(highWaterMemoryUsed_,
|
126
|
-
(size_t) (head_ - start_));
|
127
|
-
return startAlloc;
|
128
|
-
}
|
129
|
-
}
|
130
|
-
|
131
|
-
void
|
132
|
-
StackDeviceMemory::Stack::returnAlloc(char* p,
|
133
|
-
size_t size,
|
134
|
-
cudaStream_t stream) {
|
135
|
-
if (p < start_ || p >= end_) {
|
136
|
-
// This is not on our stack; it was a one-off allocation
|
137
|
-
DeviceScope s(device_);
|
138
|
-
|
139
|
-
freeMemorySpace(MemorySpace::Device, p);
|
140
|
-
|
141
|
-
FAISS_ASSERT(mallocCurrent_ >= size);
|
142
|
-
mallocCurrent_ -= size;
|
143
|
-
} else {
|
144
|
-
// This is on our stack
|
145
|
-
// Allocations should be freed in the reverse order they are made
|
146
|
-
FAISS_ASSERT(p + size == head_);
|
147
|
-
|
148
|
-
head_ = p;
|
149
|
-
lastUsers_.push_back(Range(p, p + size, stream));
|
150
|
-
}
|
151
|
-
}
|
152
|
-
|
153
|
-
std::string
|
154
|
-
StackDeviceMemory::Stack::toString() const {
|
155
|
-
std::stringstream s;
|
156
|
-
|
157
|
-
s << "SDM device " << device_ << ": Total memory " << size_ << " ["
|
158
|
-
<< (void*) start_ << ", " << (void*) end_ << ")\n";
|
159
|
-
s << " Available memory " << (size_t) (end_ - head_)
|
160
|
-
<< " [" << (void*) head_ << ", " << (void*) end_ << ")\n";
|
161
|
-
s << " High water temp alloc " << highWaterMemoryUsed_ << "\n";
|
162
|
-
s << " High water cudaMalloc " << highWaterMalloc_ << "\n";
|
163
|
-
|
164
|
-
int i = lastUsers_.size();
|
165
|
-
for (auto it = lastUsers_.rbegin(); it != lastUsers_.rend(); ++it) {
|
166
|
-
s << i-- << ": size " << (size_t) (it->end_ - it->start_)
|
167
|
-
<< " stream " << it->stream_
|
168
|
-
<< " [" << (void*) it->start_ << ", " << (void*) it->end_ << ")\n";
|
169
|
-
}
|
170
|
-
|
171
|
-
return s.str();
|
172
|
-
}
|
173
|
-
|
174
|
-
size_t
|
175
|
-
StackDeviceMemory::Stack::getHighWaterCudaMalloc() const {
|
176
|
-
return highWaterMalloc_;
|
177
|
-
}
|
178
|
-
|
179
|
-
StackDeviceMemory::StackDeviceMemory(int device, size_t allocPerDevice)
|
180
|
-
: device_(device),
|
181
|
-
stack_(device, allocPerDevice) {
|
182
|
-
}
|
183
|
-
|
184
|
-
StackDeviceMemory::StackDeviceMemory(int device,
|
185
|
-
void* p, size_t size, bool isOwner)
|
186
|
-
: device_(device),
|
187
|
-
stack_(device, p, size, isOwner) {
|
188
|
-
}
|
189
|
-
|
190
|
-
StackDeviceMemory::~StackDeviceMemory() {
|
191
|
-
}
|
192
|
-
|
193
|
-
void
|
194
|
-
StackDeviceMemory::setCudaMallocWarning(bool b) {
|
195
|
-
stack_.cudaMallocWarning_ = b;
|
196
|
-
}
|
197
|
-
|
198
|
-
int
|
199
|
-
StackDeviceMemory::getDevice() const {
|
200
|
-
return device_;
|
201
|
-
}
|
202
|
-
|
203
|
-
DeviceMemoryReservation
|
204
|
-
StackDeviceMemory::getMemory(cudaStream_t stream, size_t size) {
|
205
|
-
// We guarantee 16 byte alignment for allocations, so bump up `size`
|
206
|
-
// to the next highest multiple of 16
|
207
|
-
size = utils::roundUp(size, (size_t) 16);
|
208
|
-
|
209
|
-
return DeviceMemoryReservation(this,
|
210
|
-
device_,
|
211
|
-
stack_.getAlloc(size, stream),
|
212
|
-
size,
|
213
|
-
stream);
|
214
|
-
}
|
215
|
-
|
216
|
-
size_t
|
217
|
-
StackDeviceMemory::getSizeAvailable() const {
|
218
|
-
return stack_.getSizeAvailable();
|
219
|
-
}
|
220
|
-
|
221
|
-
std::string
|
222
|
-
StackDeviceMemory::toString() const {
|
223
|
-
return stack_.toString();
|
224
|
-
}
|
225
|
-
|
226
|
-
size_t
|
227
|
-
StackDeviceMemory::getHighWaterCudaMalloc() const {
|
228
|
-
return stack_.getHighWaterCudaMalloc();
|
229
|
-
}
|
230
|
-
|
231
|
-
void
|
232
|
-
StackDeviceMemory::returnAllocation(DeviceMemoryReservation& m) {
|
233
|
-
FAISS_ASSERT(m.get());
|
234
|
-
FAISS_ASSERT(device_ == m.device());
|
235
|
-
|
236
|
-
stack_.returnAlloc((char*) m.get(), m.size(), m.stream());
|
237
|
-
}
|
238
|
-
|
239
|
-
} } // namespace
|