faiss 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/benchs/bench_6bit_codec.cpp +80 -0
- data/vendor/faiss/c_api/AutoTune_c.h +2 -0
- data/vendor/faiss/c_api/IndexShards_c.cpp +0 -6
- data/vendor/faiss/c_api/IndexShards_c.h +1 -4
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +4 -2
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +1 -1
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +1 -1
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +1 -1
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +1 -1
- data/vendor/faiss/demos/demo_imi_flat.cpp +5 -2
- data/vendor/faiss/demos/demo_imi_pq.cpp +6 -2
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +7 -2
- data/vendor/faiss/{AutoTune.cpp → faiss/AutoTune.cpp} +9 -9
- data/vendor/faiss/{AutoTune.h → faiss/AutoTune.h} +0 -0
- data/vendor/faiss/{Clustering.cpp → faiss/Clustering.cpp} +13 -12
- data/vendor/faiss/{Clustering.h → faiss/Clustering.h} +0 -0
- data/vendor/faiss/{DirectMap.cpp → faiss/DirectMap.cpp} +0 -0
- data/vendor/faiss/{DirectMap.h → faiss/DirectMap.h} +0 -0
- data/vendor/faiss/{IVFlib.cpp → faiss/IVFlib.cpp} +86 -11
- data/vendor/faiss/{IVFlib.h → faiss/IVFlib.h} +26 -8
- data/vendor/faiss/{Index.cpp → faiss/Index.cpp} +0 -0
- data/vendor/faiss/{Index.h → faiss/Index.h} +1 -1
- data/vendor/faiss/{Index2Layer.cpp → faiss/Index2Layer.cpp} +12 -11
- data/vendor/faiss/{Index2Layer.h → faiss/Index2Layer.h} +0 -0
- data/vendor/faiss/{IndexBinary.cpp → faiss/IndexBinary.cpp} +2 -1
- data/vendor/faiss/{IndexBinary.h → faiss/IndexBinary.h} +0 -0
- data/vendor/faiss/{IndexBinaryFlat.cpp → faiss/IndexBinaryFlat.cpp} +0 -0
- data/vendor/faiss/{IndexBinaryFlat.h → faiss/IndexBinaryFlat.h} +0 -0
- data/vendor/faiss/{IndexBinaryFromFloat.cpp → faiss/IndexBinaryFromFloat.cpp} +1 -0
- data/vendor/faiss/{IndexBinaryFromFloat.h → faiss/IndexBinaryFromFloat.h} +0 -0
- data/vendor/faiss/{IndexBinaryHNSW.cpp → faiss/IndexBinaryHNSW.cpp} +1 -2
- data/vendor/faiss/{IndexBinaryHNSW.h → faiss/IndexBinaryHNSW.h} +0 -0
- data/vendor/faiss/{IndexBinaryHash.cpp → faiss/IndexBinaryHash.cpp} +16 -7
- data/vendor/faiss/{IndexBinaryHash.h → faiss/IndexBinaryHash.h} +2 -1
- data/vendor/faiss/{IndexBinaryIVF.cpp → faiss/IndexBinaryIVF.cpp} +10 -16
- data/vendor/faiss/{IndexBinaryIVF.h → faiss/IndexBinaryIVF.h} +1 -1
- data/vendor/faiss/{IndexFlat.cpp → faiss/IndexFlat.cpp} +0 -0
- data/vendor/faiss/{IndexFlat.h → faiss/IndexFlat.h} +0 -0
- data/vendor/faiss/{IndexHNSW.cpp → faiss/IndexHNSW.cpp} +63 -32
- data/vendor/faiss/{IndexHNSW.h → faiss/IndexHNSW.h} +0 -0
- data/vendor/faiss/{IndexIVF.cpp → faiss/IndexIVF.cpp} +129 -46
- data/vendor/faiss/{IndexIVF.h → faiss/IndexIVF.h} +7 -3
- data/vendor/faiss/{IndexIVFFlat.cpp → faiss/IndexIVFFlat.cpp} +6 -5
- data/vendor/faiss/{IndexIVFFlat.h → faiss/IndexIVFFlat.h} +0 -0
- data/vendor/faiss/{IndexIVFPQ.cpp → faiss/IndexIVFPQ.cpp} +9 -8
- data/vendor/faiss/{IndexIVFPQ.h → faiss/IndexIVFPQ.h} +4 -2
- data/vendor/faiss/{IndexIVFPQR.cpp → faiss/IndexIVFPQR.cpp} +3 -1
- data/vendor/faiss/{IndexIVFPQR.h → faiss/IndexIVFPQR.h} +0 -0
- data/vendor/faiss/{IndexIVFSpectralHash.cpp → faiss/IndexIVFSpectralHash.cpp} +1 -1
- data/vendor/faiss/{IndexIVFSpectralHash.h → faiss/IndexIVFSpectralHash.h} +0 -0
- data/vendor/faiss/{IndexLSH.cpp → faiss/IndexLSH.cpp} +0 -0
- data/vendor/faiss/{IndexLSH.h → faiss/IndexLSH.h} +0 -0
- data/vendor/faiss/{IndexLattice.cpp → faiss/IndexLattice.cpp} +0 -0
- data/vendor/faiss/{IndexLattice.h → faiss/IndexLattice.h} +0 -0
- data/vendor/faiss/{IndexPQ.cpp → faiss/IndexPQ.cpp} +6 -6
- data/vendor/faiss/{IndexPQ.h → faiss/IndexPQ.h} +3 -1
- data/vendor/faiss/{IndexPreTransform.cpp → faiss/IndexPreTransform.cpp} +0 -0
- data/vendor/faiss/{IndexPreTransform.h → faiss/IndexPreTransform.h} +0 -0
- data/vendor/faiss/{IndexReplicas.cpp → faiss/IndexReplicas.cpp} +102 -10
- data/vendor/faiss/{IndexReplicas.h → faiss/IndexReplicas.h} +6 -0
- data/vendor/faiss/{IndexScalarQuantizer.cpp → faiss/IndexScalarQuantizer.cpp} +3 -3
- data/vendor/faiss/{IndexScalarQuantizer.h → faiss/IndexScalarQuantizer.h} +0 -0
- data/vendor/faiss/{IndexShards.cpp → faiss/IndexShards.cpp} +37 -12
- data/vendor/faiss/{IndexShards.h → faiss/IndexShards.h} +3 -4
- data/vendor/faiss/{InvertedLists.cpp → faiss/InvertedLists.cpp} +2 -2
- data/vendor/faiss/{InvertedLists.h → faiss/InvertedLists.h} +1 -0
- data/vendor/faiss/{MatrixStats.cpp → faiss/MatrixStats.cpp} +0 -0
- data/vendor/faiss/{MatrixStats.h → faiss/MatrixStats.h} +0 -0
- data/vendor/faiss/{MetaIndexes.cpp → faiss/MetaIndexes.cpp} +5 -3
- data/vendor/faiss/{MetaIndexes.h → faiss/MetaIndexes.h} +0 -0
- data/vendor/faiss/{MetricType.h → faiss/MetricType.h} +0 -0
- data/vendor/faiss/{OnDiskInvertedLists.cpp → faiss/OnDiskInvertedLists.cpp} +141 -3
- data/vendor/faiss/{OnDiskInvertedLists.h → faiss/OnDiskInvertedLists.h} +27 -7
- data/vendor/faiss/{VectorTransform.cpp → faiss/VectorTransform.cpp} +4 -3
- data/vendor/faiss/{VectorTransform.h → faiss/VectorTransform.h} +0 -0
- data/vendor/faiss/{clone_index.cpp → faiss/clone_index.cpp} +0 -0
- data/vendor/faiss/{clone_index.h → faiss/clone_index.h} +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.cpp +14 -14
- data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.h +6 -7
- data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuDistance.h +12 -4
- data/vendor/faiss/{gpu → faiss/gpu}/GpuFaissAssert.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndex.h +3 -9
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexBinaryFlat.h +7 -7
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexFlat.h +35 -10
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVF.h +1 -2
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFFlat.h +4 -3
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFPQ.h +21 -4
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFScalarQuantizer.h +4 -3
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndicesOptions.h +0 -0
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +200 -0
- data/vendor/faiss/faiss/gpu/GpuResources.h +264 -0
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +572 -0
- data/vendor/faiss/{gpu → faiss/gpu}/StandardGpuResources.h +83 -15
- data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper-inl.h +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper.h +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfClustering.cpp +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfIVFPQAdd.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/perf/WriteIndex.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexBinaryFlat.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexFlat.cpp +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFFlat.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFPQ.cpp +141 -52
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuMemoryException.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.cpp +4 -2
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/demo_ivfpq_indexing_gpu.cpp +7 -5
- data/vendor/faiss/{gpu → faiss/gpu}/utils/DeviceUtils.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +213 -0
- data/vendor/faiss/{gpu → faiss/gpu}/utils/StackDeviceMemory.h +25 -40
- data/vendor/faiss/{gpu → faiss/gpu}/utils/StaticUtils.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.cpp +1 -0
- data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.h +3 -1
- data/vendor/faiss/{impl → faiss/impl}/FaissAssert.h +1 -0
- data/vendor/faiss/{impl → faiss/impl}/FaissException.cpp +26 -0
- data/vendor/faiss/{impl → faiss/impl}/FaissException.h +4 -0
- data/vendor/faiss/{impl → faiss/impl}/HNSW.cpp +26 -26
- data/vendor/faiss/{impl → faiss/impl}/HNSW.h +19 -11
- data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.cpp +1 -1
- data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.h +1 -1
- data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer-inl.h +0 -1
- data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.cpp +9 -9
- data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.cpp +63 -39
- data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.h +1 -1
- data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex-inl.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/index_read.cpp +99 -116
- data/vendor/faiss/{impl → faiss/impl}/index_write.cpp +15 -50
- data/vendor/faiss/{impl → faiss/impl}/io.cpp +15 -10
- data/vendor/faiss/{impl → faiss/impl}/io.h +22 -8
- data/vendor/faiss/faiss/impl/io_macros.h +57 -0
- data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.cpp +52 -36
- data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.h +3 -3
- data/vendor/faiss/faiss/impl/platform_macros.h +24 -0
- data/vendor/faiss/{index_factory.cpp → faiss/index_factory.cpp} +33 -12
- data/vendor/faiss/{index_factory.h → faiss/index_factory.h} +0 -0
- data/vendor/faiss/{index_io.h → faiss/index_io.h} +55 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +112 -0
- data/vendor/faiss/faiss/python/python_callbacks.h +45 -0
- data/vendor/faiss/{utils → faiss/utils}/Heap.cpp +5 -5
- data/vendor/faiss/{utils → faiss/utils}/Heap.h +1 -3
- data/vendor/faiss/{utils → faiss/utils}/WorkerThread.cpp +0 -0
- data/vendor/faiss/{utils → faiss/utils}/WorkerThread.h +0 -0
- data/vendor/faiss/{utils → faiss/utils}/distances.cpp +28 -13
- data/vendor/faiss/{utils → faiss/utils}/distances.h +2 -1
- data/vendor/faiss/{utils → faiss/utils}/distances_simd.cpp +5 -5
- data/vendor/faiss/{utils → faiss/utils}/extra_distances.cpp +8 -7
- data/vendor/faiss/{utils → faiss/utils}/extra_distances.h +0 -0
- data/vendor/faiss/{utils → faiss/utils}/hamming-inl.h +1 -3
- data/vendor/faiss/{utils → faiss/utils}/hamming.cpp +8 -7
- data/vendor/faiss/{utils → faiss/utils}/hamming.h +7 -1
- data/vendor/faiss/{utils → faiss/utils}/random.cpp +5 -5
- data/vendor/faiss/{utils → faiss/utils}/random.h +0 -0
- data/vendor/faiss/{utils → faiss/utils}/utils.cpp +27 -28
- data/vendor/faiss/{utils → faiss/utils}/utils.h +4 -0
- data/vendor/faiss/misc/test_blas.cpp +4 -1
- data/vendor/faiss/tests/test_binary_flat.cpp +0 -2
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +6 -1
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +4 -1
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +6 -4
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +12 -5
- data/vendor/faiss/tests/test_merge.cpp +6 -3
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +7 -2
- data/vendor/faiss/tests/test_pairs_decoding.cpp +5 -1
- data/vendor/faiss/tests/test_params_override.cpp +7 -2
- data/vendor/faiss/tests/test_sliding_ivf.cpp +10 -4
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +14 -8
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +11 -7
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +12 -7
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +6 -3
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +7 -3
- metadata +154 -153
- data/vendor/faiss/gpu/GpuResources.cpp +0 -52
- data/vendor/faiss/gpu/GpuResources.h +0 -73
- data/vendor/faiss/gpu/StandardGpuResources.cpp +0 -303
- data/vendor/faiss/gpu/utils/DeviceMemory.cpp +0 -77
- data/vendor/faiss/gpu/utils/DeviceMemory.h +0 -71
- data/vendor/faiss/gpu/utils/MemorySpace.cpp +0 -89
- data/vendor/faiss/gpu/utils/MemorySpace.h +0 -44
- data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +0 -239
File without changes
|
@@ -24,6 +24,8 @@ inline float relativeError(float a, float b) {
|
|
24
24
|
// This seed is also used for the faiss float_rand API; in a test it
|
25
25
|
// is all within a single thread, so it is ok
|
26
26
|
long s_seed = 1;
|
27
|
+
std::mt19937 rng(1);
|
28
|
+
std::uniform_int_distribution<> distrib;
|
27
29
|
|
28
30
|
void newTestSeed() {
|
29
31
|
struct timespec t;
|
@@ -35,7 +37,7 @@ void newTestSeed() {
|
|
35
37
|
void setTestSeed(long seed) {
|
36
38
|
printf("testing with random seed %ld\n", seed);
|
37
39
|
|
38
|
-
|
40
|
+
rng = std::mt19937(seed);
|
39
41
|
s_seed = seed;
|
40
42
|
}
|
41
43
|
|
@@ -43,7 +45,7 @@ int randVal(int a, int b) {
|
|
43
45
|
EXPECT_GE(a, 0);
|
44
46
|
EXPECT_LE(a, b);
|
45
47
|
|
46
|
-
return a + (
|
48
|
+
return a + (distrib(rng) % (b + 1 - a));
|
47
49
|
}
|
48
50
|
|
49
51
|
bool randBool() {
|
File without changes
|
@@ -5,12 +5,10 @@
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
6
6
|
*/
|
7
7
|
|
8
|
-
// Copyright 2004-present Facebook. All Rights Reserved
|
9
|
-
|
10
|
-
|
11
8
|
#include <cmath>
|
12
9
|
#include <cstdio>
|
13
10
|
#include <cstdlib>
|
11
|
+
#include <random>
|
14
12
|
|
15
13
|
#include <sys/time.h>
|
16
14
|
|
@@ -64,13 +62,16 @@ int main ()
|
|
64
62
|
faiss::gpu::GpuIndexIVFPQ index (
|
65
63
|
&resources, d, ncentroids, 4, 8, faiss::METRIC_L2, config);
|
66
64
|
|
65
|
+
std::mt19937 rng;
|
66
|
+
|
67
67
|
{ // training
|
68
68
|
printf ("[%.3f s] Generating %ld vectors in %dD for training\n",
|
69
69
|
elapsed() - t0, nt, d);
|
70
70
|
|
71
71
|
std::vector <float> trainvecs (nt * d);
|
72
|
+
std::uniform_real_distribution<> distrib;
|
72
73
|
for (size_t i = 0; i < nt * d; i++) {
|
73
|
-
trainvecs[i] =
|
74
|
+
trainvecs[i] = distrib(rng);
|
74
75
|
}
|
75
76
|
|
76
77
|
printf ("[%.3f s] Training the index\n",
|
@@ -100,8 +101,9 @@ int main ()
|
|
100
101
|
elapsed() - t0, nb);
|
101
102
|
|
102
103
|
std::vector <float> database (nb * d);
|
104
|
+
std::uniform_real_distribution<> distrib;
|
103
105
|
for (size_t i = 0; i < nb * d; i++) {
|
104
|
-
database[i] =
|
106
|
+
database[i] = distrib(rng);
|
105
107
|
}
|
106
108
|
|
107
109
|
printf ("[%.3f s] Adding the vectors to the index\n",
|
@@ -102,7 +102,7 @@ class CublasHandleScope {
|
|
102
102
|
class CudaEvent {
|
103
103
|
public:
|
104
104
|
/// Creates an event and records it in this stream
|
105
|
-
explicit CudaEvent(cudaStream_t stream);
|
105
|
+
explicit CudaEvent(cudaStream_t stream, bool timer = false);
|
106
106
|
CudaEvent(const CudaEvent& event) = delete;
|
107
107
|
CudaEvent(CudaEvent&& event) noexcept;
|
108
108
|
~CudaEvent();
|
@@ -0,0 +1,213 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
|
9
|
+
#include <faiss/gpu/utils/StackDeviceMemory.h>
|
10
|
+
#include <faiss/gpu/utils/DeviceUtils.h>
|
11
|
+
#include <faiss/gpu/utils/StaticUtils.h>
|
12
|
+
#include <faiss/impl/FaissAssert.h>
|
13
|
+
#include <sstream>
|
14
|
+
|
15
|
+
namespace faiss { namespace gpu {
|
16
|
+
|
17
|
+
namespace {
|
18
|
+
|
19
|
+
size_t adjustStackSize(size_t sz) {
|
20
|
+
if (sz == 0) {
|
21
|
+
return 0;
|
22
|
+
} else {
|
23
|
+
// ensure that we have at least 16 bytes, as all allocations are bumped up
|
24
|
+
// to 16
|
25
|
+
return utils::roundUp(sz, (size_t) 16);
|
26
|
+
}
|
27
|
+
}
|
28
|
+
|
29
|
+
} // namespace
|
30
|
+
|
31
|
+
StackDeviceMemory::Stack::Stack(GpuResources* res, int d, size_t sz)
|
32
|
+
: res_(res),
|
33
|
+
device_(d),
|
34
|
+
alloc_(nullptr),
|
35
|
+
allocSize_(adjustStackSize(sz)),
|
36
|
+
start_(nullptr),
|
37
|
+
end_(nullptr),
|
38
|
+
head_(nullptr),
|
39
|
+
highWaterMemoryUsed_(0) {
|
40
|
+
if (allocSize_ == 0) {
|
41
|
+
return;
|
42
|
+
}
|
43
|
+
|
44
|
+
DeviceScope s(device_);
|
45
|
+
auto req = AllocRequest(AllocType::TemporaryMemoryBuffer,
|
46
|
+
device_,
|
47
|
+
MemorySpace::Device,
|
48
|
+
res_->getDefaultStream(device_),
|
49
|
+
allocSize_);
|
50
|
+
|
51
|
+
alloc_ = (char*) res_->allocMemory(req);
|
52
|
+
FAISS_ASSERT_FMT(
|
53
|
+
alloc_,
|
54
|
+
"could not reserve temporary memory region of size %zu", allocSize_);
|
55
|
+
|
56
|
+
// In order to disambiguate between our entire region of temporary memory
|
57
|
+
// versus the first allocation in the temporary memory region, ensure that the
|
58
|
+
// first address returned is +16 bytes from the beginning
|
59
|
+
start_ = alloc_ + 16;
|
60
|
+
head_ = start_;
|
61
|
+
end_ = alloc_ + allocSize_;
|
62
|
+
}
|
63
|
+
|
64
|
+
StackDeviceMemory::Stack::~Stack() {
|
65
|
+
DeviceScope s(device_);
|
66
|
+
|
67
|
+
// FIXME: make sure there are no outstanding memory allocations?
|
68
|
+
if (alloc_) {
|
69
|
+
res_->deallocMemory(device_, alloc_);
|
70
|
+
}
|
71
|
+
}
|
72
|
+
|
73
|
+
size_t
|
74
|
+
StackDeviceMemory::Stack::getSizeAvailable() const {
|
75
|
+
return (end_ - head_);
|
76
|
+
}
|
77
|
+
|
78
|
+
char*
|
79
|
+
StackDeviceMemory::Stack::getAlloc(size_t size,
|
80
|
+
cudaStream_t stream) {
|
81
|
+
// The user must check to see that the allocation fit within us
|
82
|
+
auto sizeRemaining = getSizeAvailable();
|
83
|
+
|
84
|
+
FAISS_ASSERT(size <= sizeRemaining);
|
85
|
+
|
86
|
+
// We can make the allocation out of our stack
|
87
|
+
// Find all the ranges that we overlap that may have been
|
88
|
+
// previously allocated; our allocation will be [head, endAlloc)
|
89
|
+
char* startAlloc = head_;
|
90
|
+
char* endAlloc = head_ + size;
|
91
|
+
|
92
|
+
while (lastUsers_.size() > 0) {
|
93
|
+
auto& prevUser = lastUsers_.back();
|
94
|
+
|
95
|
+
// Because there is a previous user, we must overlap it
|
96
|
+
FAISS_ASSERT(prevUser.start_ <= endAlloc && prevUser.end_ >= startAlloc);
|
97
|
+
|
98
|
+
if (stream != prevUser.stream_) {
|
99
|
+
// Synchronization required
|
100
|
+
streamWait({stream}, {prevUser.stream_});
|
101
|
+
}
|
102
|
+
|
103
|
+
if (endAlloc < prevUser.end_) {
|
104
|
+
// Update the previous user info
|
105
|
+
prevUser.start_ = endAlloc;
|
106
|
+
|
107
|
+
break;
|
108
|
+
}
|
109
|
+
|
110
|
+
// If we're the exact size of the previous request, then we
|
111
|
+
// don't need to continue
|
112
|
+
bool done = (prevUser.end_ == endAlloc);
|
113
|
+
|
114
|
+
lastUsers_.pop_back();
|
115
|
+
|
116
|
+
if (done) {
|
117
|
+
break;
|
118
|
+
}
|
119
|
+
}
|
120
|
+
|
121
|
+
head_ = endAlloc;
|
122
|
+
FAISS_ASSERT(head_ <= end_);
|
123
|
+
|
124
|
+
highWaterMemoryUsed_ = std::max(highWaterMemoryUsed_,
|
125
|
+
(size_t) (head_ - start_));
|
126
|
+
FAISS_ASSERT(startAlloc);
|
127
|
+
return startAlloc;
|
128
|
+
}
|
129
|
+
|
130
|
+
void
|
131
|
+
StackDeviceMemory::Stack::returnAlloc(char* p,
|
132
|
+
size_t size,
|
133
|
+
cudaStream_t stream) {
|
134
|
+
// This allocation should be within ourselves
|
135
|
+
FAISS_ASSERT(p >= start_ && p < end_);
|
136
|
+
|
137
|
+
// All allocations should have been adjusted to a multiple of 16 bytes
|
138
|
+
FAISS_ASSERT(size % 16 == 0);
|
139
|
+
|
140
|
+
// This is on our stack
|
141
|
+
// Allocations should be freed in the reverse order they are made
|
142
|
+
if (p + size != head_) {
|
143
|
+
FAISS_ASSERT(p + size == head_);
|
144
|
+
}
|
145
|
+
|
146
|
+
head_ = p;
|
147
|
+
lastUsers_.push_back(Range(p, p + size, stream));
|
148
|
+
}
|
149
|
+
|
150
|
+
std::string
|
151
|
+
StackDeviceMemory::Stack::toString() const {
|
152
|
+
std::stringstream s;
|
153
|
+
|
154
|
+
s << "SDM device " << device_ << ": Total memory " << allocSize_ << " ["
|
155
|
+
<< (void*) start_ << ", " << (void*) end_ << ")\n";
|
156
|
+
s << " Available memory " << (size_t) (end_ - head_)
|
157
|
+
<< " [" << (void*) head_ << ", " << (void*) end_ << ")\n";
|
158
|
+
s << " High water temp alloc " << highWaterMemoryUsed_ << "\n";
|
159
|
+
|
160
|
+
int i = lastUsers_.size();
|
161
|
+
for (auto it = lastUsers_.rbegin(); it != lastUsers_.rend(); ++it) {
|
162
|
+
s << i-- << ": size " << (size_t) (it->end_ - it->start_)
|
163
|
+
<< " stream " << it->stream_
|
164
|
+
<< " [" << (void*) it->start_ << ", " << (void*) it->end_ << ")\n";
|
165
|
+
}
|
166
|
+
|
167
|
+
return s.str();
|
168
|
+
}
|
169
|
+
|
170
|
+
StackDeviceMemory::StackDeviceMemory(GpuResources* res,
|
171
|
+
int device,
|
172
|
+
size_t allocPerDevice)
|
173
|
+
: device_(device),
|
174
|
+
stack_(res, device, allocPerDevice) {
|
175
|
+
}
|
176
|
+
|
177
|
+
StackDeviceMemory::~StackDeviceMemory() {
|
178
|
+
}
|
179
|
+
|
180
|
+
int
|
181
|
+
StackDeviceMemory::getDevice() const {
|
182
|
+
return device_;
|
183
|
+
}
|
184
|
+
|
185
|
+
size_t
|
186
|
+
StackDeviceMemory::getSizeAvailable() const {
|
187
|
+
return stack_.getSizeAvailable();
|
188
|
+
}
|
189
|
+
|
190
|
+
std::string
|
191
|
+
StackDeviceMemory::toString() const {
|
192
|
+
return stack_.toString();
|
193
|
+
}
|
194
|
+
|
195
|
+
void*
|
196
|
+
StackDeviceMemory::allocMemory(cudaStream_t stream, size_t size) {
|
197
|
+
// All allocations should have been adjusted to a multiple of 16 bytes
|
198
|
+
FAISS_ASSERT(size % 16 == 0);
|
199
|
+
return stack_.getAlloc(size, stream);
|
200
|
+
}
|
201
|
+
|
202
|
+
void
|
203
|
+
StackDeviceMemory::deallocMemory(int device,
|
204
|
+
cudaStream_t stream,
|
205
|
+
size_t size,
|
206
|
+
void* p) {
|
207
|
+
FAISS_ASSERT(p);
|
208
|
+
FAISS_ASSERT(device == device_);
|
209
|
+
|
210
|
+
stack_.returnAlloc((char*) p, size, stream);
|
211
|
+
}
|
212
|
+
|
213
|
+
} } // namespace
|
@@ -8,41 +8,38 @@
|
|
8
8
|
|
9
9
|
#pragma once
|
10
10
|
|
11
|
-
#include <faiss/gpu/
|
11
|
+
#include <faiss/gpu/GpuResources.h>
|
12
|
+
#include <cuda_runtime.h>
|
12
13
|
#include <list>
|
13
14
|
#include <memory>
|
14
15
|
#include <unordered_map>
|
16
|
+
#include <tuple>
|
15
17
|
|
16
18
|
namespace faiss { namespace gpu {
|
17
19
|
|
18
20
|
/// Device memory manager that provides temporary memory allocations
|
19
|
-
/// out of a region of memory
|
20
|
-
class StackDeviceMemory
|
21
|
+
/// out of a region of memory, for a single device
|
22
|
+
class StackDeviceMemory {
|
21
23
|
public:
|
22
24
|
/// Allocate a new region of memory that we manage
|
23
|
-
|
25
|
+
StackDeviceMemory(GpuResources* res,
|
26
|
+
int device,
|
27
|
+
size_t allocPerDevice);
|
24
28
|
|
25
29
|
/// Manage a region of memory for a particular device, with or
|
26
30
|
/// without ownership
|
27
31
|
StackDeviceMemory(int device, void* p, size_t size, bool isOwner);
|
28
32
|
|
29
|
-
~StackDeviceMemory()
|
33
|
+
~StackDeviceMemory();
|
30
34
|
|
31
|
-
|
32
|
-
/// when cudaMalloc gets called
|
33
|
-
void setCudaMallocWarning(bool b);
|
35
|
+
int getDevice() const;
|
34
36
|
|
35
|
-
|
37
|
+
/// All allocations requested should be a multiple of 16 bytes
|
38
|
+
void* allocMemory(cudaStream_t stream, size_t size);
|
39
|
+
void deallocMemory(int device, cudaStream_t, size_t size, void* p);
|
36
40
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
size_t getSizeAvailable() const override;
|
41
|
-
std::string toString() const override;
|
42
|
-
size_t getHighWaterCudaMalloc() const override;
|
43
|
-
|
44
|
-
protected:
|
45
|
-
void returnAllocation(DeviceMemoryReservation& m) override;
|
41
|
+
size_t getSizeAvailable() const;
|
42
|
+
std::string toString() const;
|
46
43
|
|
47
44
|
protected:
|
48
45
|
/// Previous allocation ranges and the streams for which
|
@@ -60,10 +57,8 @@ class StackDeviceMemory : public DeviceMemory {
|
|
60
57
|
|
61
58
|
struct Stack {
|
62
59
|
/// Constructor that allocates memory via cudaMalloc
|
63
|
-
Stack(int device, size_t size);
|
60
|
+
Stack(GpuResources* res, int device, size_t size);
|
64
61
|
|
65
|
-
/// Constructor that references a pre-allocated region of memory
|
66
|
-
Stack(int device, void* p, size_t size, bool isOwner);
|
67
62
|
~Stack();
|
68
63
|
|
69
64
|
/// Returns how much size is available for an allocation without
|
@@ -80,23 +75,23 @@ class StackDeviceMemory : public DeviceMemory {
|
|
80
75
|
/// Returns the stack state
|
81
76
|
std::string toString() const;
|
82
77
|
|
83
|
-
///
|
84
|
-
|
78
|
+
/// Our GpuResources object
|
79
|
+
GpuResources* res_;
|
85
80
|
|
86
81
|
/// Device this allocation is on
|
87
82
|
int device_;
|
88
83
|
|
89
|
-
///
|
90
|
-
|
84
|
+
/// Where our temporary memory buffer is allocated; we allocate starting 16
|
85
|
+
/// bytes into this
|
86
|
+
char* alloc_;
|
87
|
+
|
88
|
+
/// Total size of our allocation
|
89
|
+
size_t allocSize_;
|
91
90
|
|
92
|
-
///
|
93
|
-
/// [start_, end_) is valid
|
91
|
+
/// Our temporary memory region; [start_, end_) is valid
|
94
92
|
char* start_;
|
95
93
|
char* end_;
|
96
94
|
|
97
|
-
/// Total size end_ - start_
|
98
|
-
size_t size_;
|
99
|
-
|
100
95
|
/// Stack head within [start, end)
|
101
96
|
char* head_;
|
102
97
|
|
@@ -104,19 +99,9 @@ class StackDeviceMemory : public DeviceMemory {
|
|
104
99
|
/// possible synchronization purposes
|
105
100
|
std::list<Range> lastUsers_;
|
106
101
|
|
107
|
-
/// How much cudaMalloc memory is currently outstanding?
|
108
|
-
size_t mallocCurrent_;
|
109
|
-
|
110
102
|
/// What's the high water mark in terms of memory used from the
|
111
103
|
/// temporary buffer?
|
112
104
|
size_t highWaterMemoryUsed_;
|
113
|
-
|
114
|
-
/// What's the high water mark in terms of memory allocated via
|
115
|
-
/// cudaMalloc?
|
116
|
-
size_t highWaterMalloc_;
|
117
|
-
|
118
|
-
/// Whether or not a warning upon cudaMalloc is generated
|
119
|
-
bool cudaMallocWarning_;
|
120
105
|
};
|
121
106
|
|
122
107
|
/// Our device
|
File without changes
|
File without changes
|
File without changes
|
@@ -21,6 +21,8 @@
|
|
21
21
|
#include <mutex>
|
22
22
|
|
23
23
|
#include <faiss/Index.h>
|
24
|
+
#include <faiss/impl/platform_macros.h>
|
25
|
+
|
24
26
|
|
25
27
|
namespace faiss {
|
26
28
|
|
@@ -218,7 +220,7 @@ struct DistanceComputer {
|
|
218
220
|
* Interrupt callback
|
219
221
|
***********************************************************/
|
220
222
|
|
221
|
-
struct InterruptCallback {
|
223
|
+
struct FAISS_API InterruptCallback {
|
222
224
|
virtual bool want_interrupt () = 0;
|
223
225
|
virtual ~InterruptCallback() {}
|
224
226
|
|