faiss 0.6.1 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/Index.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +6 -7
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +3 -3
- data/vendor/faiss/faiss/IndexHNSW.cpp +173 -143
- data/vendor/faiss/faiss/IndexIVF.cpp +2 -2
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +2 -2
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -1
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +3 -3
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +2 -3
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +2 -3
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +4 -13
- data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -1
- data/vendor/faiss/faiss/IndexNSG.cpp +1 -2
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +68 -6
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +10 -0
- data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +1 -1
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +902 -12
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +702 -10
- data/vendor/faiss/faiss/factory_tools.cpp +4 -0
- data/vendor/faiss/faiss/gpu/GpuResources.h +3 -2
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +11 -12
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +3 -3
- data/vendor/faiss/faiss/gpu_metal/MetalDistance.h +87 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +7 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndexIVFFlat.h +181 -0
- data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +48 -3
- data/vendor/faiss/faiss/gpu_metal/MetalPythonBridge.h +45 -0
- data/vendor/faiss/faiss/gpu_metal/impl/MetalIVFFlat.h +193 -0
- data/vendor/faiss/faiss/impl/HNSW.cpp +556 -199
- data/vendor/faiss/faiss/impl/HNSW.h +51 -13
- data/vendor/faiss/faiss/impl/NSG.cpp +15 -11
- data/vendor/faiss/faiss/impl/Panorama.h +11 -0
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -2
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +1 -1
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +7 -1
- data/vendor/faiss/faiss/impl/ResultHandler.h +1 -0
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +271 -8
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +50 -0
- data/vendor/faiss/faiss/impl/VisitedTable.cpp +10 -10
- data/vendor/faiss/faiss/impl/VisitedTable.h +69 -34
- data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +3 -1
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +35 -43
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -15
- data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +86 -40
- data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +81 -50
- data/vendor/faiss/faiss/impl/index_read.cpp +100 -39
- data/vendor/faiss/faiss/impl/index_write.cpp +1 -0
- data/vendor/faiss/faiss/impl/io_macros.h +25 -0
- data/vendor/faiss/faiss/impl/platform_macros.h +12 -8
- data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +2 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +2 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +2 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +20 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +36 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +5 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_scan_impl.h +105 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +2 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +6 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +327 -18
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +264 -27
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512-impl.h +553 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512-spr.cpp +559 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +199 -27
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +366 -3
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +144 -19
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +26 -0
- data/vendor/faiss/faiss/impl/simd_dispatch.h +65 -8
- data/vendor/faiss/faiss/index_factory.cpp +5 -1
- data/vendor/faiss/faiss/index_io.h +16 -0
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +4 -1
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +13 -13
- data/vendor/faiss/faiss/invlists/InvertedLists.h +2 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +119 -22
- data/vendor/faiss/faiss/svs/IndexSVSVamana.h +15 -5
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.cpp +3 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +2 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +65 -24
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +3 -2
- data/vendor/faiss/faiss/utils/bf16.h +34 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +0 -1
- data/vendor/faiss/faiss/utils/hamming.cpp +8 -8
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +2 -1
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512_spr.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +6 -30
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512_spr.h +171 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +0 -2
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +14 -68
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512_spr.cpp +343 -0
- data/vendor/faiss/faiss/utils/simd_levels.cpp +12 -2
- metadata +12 -2
|
@@ -43,6 +43,10 @@ const std::map<faiss::ScalarQuantizer::QuantizerType, std::string> sq_types = {
|
|
|
43
43
|
{faiss::ScalarQuantizer::QT_3bit_tqmse, "SQtqmse3"},
|
|
44
44
|
{faiss::ScalarQuantizer::QT_4bit_tqmse, "SQtqmse4"},
|
|
45
45
|
{faiss::ScalarQuantizer::QT_8bit_tqmse, "SQtqmse8"},
|
|
46
|
+
{faiss::ScalarQuantizer::QT_2bit_tq, "SQtq2"},
|
|
47
|
+
{faiss::ScalarQuantizer::QT_3bit_tq, "SQtq3"},
|
|
48
|
+
{faiss::ScalarQuantizer::QT_4bit_tq, "SQtq4"},
|
|
49
|
+
{faiss::ScalarQuantizer::QT_5bit_tq, "SQtq5"},
|
|
46
50
|
};
|
|
47
51
|
|
|
48
52
|
int get_hnsw_M(const faiss::IndexHNSW* index) {
|
|
@@ -33,7 +33,8 @@
|
|
|
33
33
|
|
|
34
34
|
#if defined USE_NVIDIA_CUVS
|
|
35
35
|
#include <raft/core/device_resources.hpp>
|
|
36
|
-
#include <
|
|
36
|
+
#include <cuda/memory_resource>
|
|
37
|
+
#include <optional>
|
|
37
38
|
#endif
|
|
38
39
|
|
|
39
40
|
namespace faiss {
|
|
@@ -163,7 +164,7 @@ struct AllocRequest : public AllocInfo {
|
|
|
163
164
|
size_t size = 0;
|
|
164
165
|
|
|
165
166
|
#if defined USE_NVIDIA_CUVS
|
|
166
|
-
|
|
167
|
+
std::optional<cuda::mr::any_resource<cuda::mr::device_accessible>> mr;
|
|
167
168
|
#endif
|
|
168
169
|
};
|
|
169
170
|
|
|
@@ -92,8 +92,8 @@ std::string allocsToString(const std::unordered_map<void*, AllocRequest>& map) {
|
|
|
92
92
|
StandardGpuResourcesImpl::StandardGpuResourcesImpl()
|
|
93
93
|
:
|
|
94
94
|
#if defined USE_NVIDIA_CUVS
|
|
95
|
-
mmr_
|
|
96
|
-
pmr_
|
|
95
|
+
mmr_{},
|
|
96
|
+
pmr_{},
|
|
97
97
|
#endif
|
|
98
98
|
pinnedMemAlloc_(nullptr),
|
|
99
99
|
pinnedMemAllocSize_(0),
|
|
@@ -164,7 +164,7 @@ StandardGpuResourcesImpl::~StandardGpuResourcesImpl() {
|
|
|
164
164
|
|
|
165
165
|
if (pinnedMemAlloc_) {
|
|
166
166
|
#if defined USE_NVIDIA_CUVS
|
|
167
|
-
pmr_
|
|
167
|
+
pmr_.deallocate_sync(pinnedMemAlloc_, pinnedMemAllocSize_);
|
|
168
168
|
#else
|
|
169
169
|
auto err = cudaFreeHost(pinnedMemAlloc_);
|
|
170
170
|
FAISS_ASSERT_FMT(
|
|
@@ -350,7 +350,7 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
|
|
|
350
350
|
// pinned memory allocation
|
|
351
351
|
if (defaultStreams_.empty() && pinnedMemSize_ > 0) {
|
|
352
352
|
try {
|
|
353
|
-
pinnedMemAlloc_ = pmr_
|
|
353
|
+
pinnedMemAlloc_ = pmr_.allocate_sync(pinnedMemSize_);
|
|
354
354
|
} catch (const std::bad_alloc& rmm_ex) {
|
|
355
355
|
FAISS_THROW_MSG("CUDA memory allocation error");
|
|
356
356
|
}
|
|
@@ -546,10 +546,9 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) {
|
|
|
546
546
|
} else if (adjReq.space == MemorySpace::Device) {
|
|
547
547
|
#if defined USE_NVIDIA_CUVS
|
|
548
548
|
try {
|
|
549
|
-
rmm::mr::
|
|
550
|
-
rmm::
|
|
551
|
-
|
|
552
|
-
p = current_mr->allocate(adjReq.stream, adjReq.size);
|
|
549
|
+
auto current_mr = rmm::mr::get_per_device_resource_ref(
|
|
550
|
+
rmm::cuda_device_id{adjReq.device});
|
|
551
|
+
p = current_mr.allocate(adjReq.stream, adjReq.size);
|
|
553
552
|
adjReq.mr = current_mr;
|
|
554
553
|
} catch (const std::bad_alloc& rmm_ex) {
|
|
555
554
|
FAISS_THROW_MSG("CUDA memory allocation error");
|
|
@@ -562,7 +561,7 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) {
|
|
|
562
561
|
// FIXME: as of CUDA 11, a memory allocation error appears to be
|
|
563
562
|
// presented via cudaGetLastError as well, and needs to be
|
|
564
563
|
// cleared. Just call the function to clear it
|
|
565
|
-
cudaGetLastError();
|
|
564
|
+
(void)cudaGetLastError();
|
|
566
565
|
|
|
567
566
|
std::stringstream ss;
|
|
568
567
|
ss << "StandardGpuResources: alloc fail " << adjReq.toString()
|
|
@@ -584,8 +583,8 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) {
|
|
|
584
583
|
// TODO: change this to use the current device resource once RMM has
|
|
585
584
|
// a way to retrieve a "guaranteed" managed memory resource for a
|
|
586
585
|
// device.
|
|
587
|
-
p = mmr_
|
|
588
|
-
adjReq.mr = mmr_
|
|
586
|
+
p = mmr_.allocate(adjReq.stream, adjReq.size);
|
|
587
|
+
adjReq.mr = mmr_;
|
|
589
588
|
} catch (const std::bad_alloc& rmm_ex) {
|
|
590
589
|
FAISS_THROW_MSG("CUDA memory allocation error");
|
|
591
590
|
}
|
|
@@ -596,7 +595,7 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) {
|
|
|
596
595
|
// FIXME: as of CUDA 11, a memory allocation error appears to be
|
|
597
596
|
// presented via cudaGetLastError as well, and needs to be cleared.
|
|
598
597
|
// Just call the function to clear it
|
|
599
|
-
cudaGetLastError();
|
|
598
|
+
(void)cudaGetLastError();
|
|
600
599
|
|
|
601
600
|
std::stringstream ss;
|
|
602
601
|
ss << "StandardGpuResources: alloc fail " << adjReq.toString()
|
|
@@ -25,7 +25,7 @@
|
|
|
25
25
|
|
|
26
26
|
#if defined USE_NVIDIA_CUVS
|
|
27
27
|
#include <raft/core/device_resources.hpp>
|
|
28
|
-
#include <rmm/mr/
|
|
28
|
+
#include <rmm/mr/managed_memory_resource.hpp>
|
|
29
29
|
#include <rmm/mr/pinned_host_memory_resource.hpp>
|
|
30
30
|
#endif
|
|
31
31
|
|
|
@@ -171,10 +171,10 @@ class StandardGpuResourcesImpl : public GpuResources {
|
|
|
171
171
|
*/
|
|
172
172
|
|
|
173
173
|
// managed_memory_resource
|
|
174
|
-
|
|
174
|
+
rmm::mr::managed_memory_resource mmr_;
|
|
175
175
|
|
|
176
176
|
// pinned_host_memory_resource
|
|
177
|
-
|
|
177
|
+
rmm::mr::pinned_host_memory_resource pmr_;
|
|
178
178
|
#endif
|
|
179
179
|
|
|
180
180
|
/// Pinned memory allocation for use with this GPU
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
// @lint-ignore-every LICENSELINT
|
|
2
|
+
/**
|
|
3
|
+
* Copyright (c) Meta Platforms, Inc. and its affiliates.
|
|
4
|
+
*
|
|
5
|
+
* This source code is licensed under the MIT license found in the
|
|
6
|
+
* LICENSE file in the root directory of this source tree.
|
|
7
|
+
*
|
|
8
|
+
* IVF distance computation and scan dispatch for Metal backend.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
#pragma once
|
|
12
|
+
|
|
13
|
+
#import <Metal/Metal.h>
|
|
14
|
+
|
|
15
|
+
#include <cstddef>
|
|
16
|
+
#include <cstdint>
|
|
17
|
+
#include <memory>
|
|
18
|
+
|
|
19
|
+
namespace faiss {
|
|
20
|
+
namespace gpu_metal {
|
|
21
|
+
|
|
22
|
+
class MetalResources;
|
|
23
|
+
|
|
24
|
+
int getMetalDistanceMaxK();
|
|
25
|
+
|
|
26
|
+
bool runMetalComputeNorms(
|
|
27
|
+
id<MTLDevice> device,
|
|
28
|
+
id<MTLCommandQueue> queue,
|
|
29
|
+
id<MTLBuffer> vectors,
|
|
30
|
+
int nb,
|
|
31
|
+
int d,
|
|
32
|
+
id<MTLBuffer> normsBuf,
|
|
33
|
+
bool waitForCompletion = true);
|
|
34
|
+
|
|
35
|
+
bool runMetalIVFFlatScan(
|
|
36
|
+
id<MTLDevice> device,
|
|
37
|
+
id<MTLCommandQueue> queue,
|
|
38
|
+
id<MTLBuffer> queries,
|
|
39
|
+
id<MTLBuffer> codes,
|
|
40
|
+
id<MTLBuffer> ids,
|
|
41
|
+
id<MTLBuffer> listOffset,
|
|
42
|
+
id<MTLBuffer> listLength,
|
|
43
|
+
id<MTLBuffer> coarseAssign,
|
|
44
|
+
int nq,
|
|
45
|
+
int d,
|
|
46
|
+
int k,
|
|
47
|
+
int nprobe,
|
|
48
|
+
bool isL2,
|
|
49
|
+
id<MTLBuffer> outDistances,
|
|
50
|
+
id<MTLBuffer> outIndices,
|
|
51
|
+
id<MTLBuffer> perListDistBuf,
|
|
52
|
+
id<MTLBuffer> perListIdxBuf,
|
|
53
|
+
id<MTLBuffer> interleavedCodes = nil,
|
|
54
|
+
id<MTLBuffer> interleavedCodesOffset = nil,
|
|
55
|
+
bool waitForCompletion = true);
|
|
56
|
+
|
|
57
|
+
bool runMetalIVFFlatFullSearch(
|
|
58
|
+
id<MTLDevice> device,
|
|
59
|
+
id<MTLCommandQueue> queue,
|
|
60
|
+
id<MTLBuffer> queries,
|
|
61
|
+
int nq,
|
|
62
|
+
int d,
|
|
63
|
+
int k,
|
|
64
|
+
int nprobe,
|
|
65
|
+
bool isL2,
|
|
66
|
+
id<MTLBuffer> centroids,
|
|
67
|
+
int nlist,
|
|
68
|
+
id<MTLBuffer> codes,
|
|
69
|
+
id<MTLBuffer> ids,
|
|
70
|
+
id<MTLBuffer> listOffset,
|
|
71
|
+
id<MTLBuffer> listLength,
|
|
72
|
+
id<MTLBuffer> outDistances,
|
|
73
|
+
id<MTLBuffer> outIndices,
|
|
74
|
+
id<MTLBuffer> perListDistBuf,
|
|
75
|
+
id<MTLBuffer> perListIdxBuf,
|
|
76
|
+
id<MTLBuffer> coarseDistBuf,
|
|
77
|
+
id<MTLBuffer> coarseIdxBuf,
|
|
78
|
+
id<MTLBuffer> distMatrixBuf,
|
|
79
|
+
id<MTLBuffer> centroidNormsBuf = nil,
|
|
80
|
+
int avgListLen = 256,
|
|
81
|
+
id<MTLBuffer> interleavedCodes = nil,
|
|
82
|
+
id<MTLBuffer> interleavedCodesOffset = nil,
|
|
83
|
+
bool centroidsAreFP16 = false,
|
|
84
|
+
bool waitForCompletion = true);
|
|
85
|
+
|
|
86
|
+
} // namespace gpu_metal
|
|
87
|
+
} // namespace faiss
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
#pragma once
|
|
12
12
|
|
|
13
13
|
#include <faiss/Index.h>
|
|
14
|
+
#include <faiss/gpu/GpuIndicesOptions.h>
|
|
14
15
|
#include <faiss/gpu_metal/MetalResources.h>
|
|
15
16
|
#include <memory>
|
|
16
17
|
|
|
@@ -20,6 +21,12 @@ namespace gpu_metal {
|
|
|
20
21
|
/// Configuration for Metal index (mirrors GpuIndexConfig roles).
|
|
21
22
|
struct MetalIndexConfig {
|
|
22
23
|
int device = 0;
|
|
24
|
+
|
|
25
|
+
bool useFloat16CoarseQuantizer = false;
|
|
26
|
+
|
|
27
|
+
faiss::gpu::IndicesOptions indicesOptions = faiss::gpu::INDICES_64_BIT;
|
|
28
|
+
|
|
29
|
+
bool interleavedLayout = true;
|
|
23
30
|
};
|
|
24
31
|
|
|
25
32
|
/// Base class for Metal-backed indexes. Mirrors faiss::gpu::GpuIndex.
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
// @lint-ignore-every LICENSELINT
|
|
2
|
+
/**
|
|
3
|
+
* Copyright (c) Meta Platforms, Inc. and its affiliates.
|
|
4
|
+
*
|
|
5
|
+
* This source code is licensed under the MIT license found in the
|
|
6
|
+
* LICENSE file in the root directory of this source tree.
|
|
7
|
+
*
|
|
8
|
+
* Minimal Metal IVFFlat wrapper.
|
|
9
|
+
*
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
#pragma once
|
|
13
|
+
|
|
14
|
+
#import <Metal/Metal.h>
|
|
15
|
+
|
|
16
|
+
#include <faiss/IndexIVFFlat.h>
|
|
17
|
+
#include <faiss/gpu/GpuIndicesOptions.h>
|
|
18
|
+
#include <faiss/gpu_metal/MetalIndex.h>
|
|
19
|
+
|
|
20
|
+
#include <memory>
|
|
21
|
+
|
|
22
|
+
namespace faiss {
|
|
23
|
+
namespace gpu_metal {
|
|
24
|
+
class MetalIVFFlatImpl;
|
|
25
|
+
} // namespace gpu_metal
|
|
26
|
+
} // namespace faiss
|
|
27
|
+
|
|
28
|
+
namespace faiss {
|
|
29
|
+
namespace gpu_metal {
|
|
30
|
+
|
|
31
|
+
/// IVFFlat index wrapper for Metal backend.
|
|
32
|
+
/// Currently delegates to an internal CPU IndexIVFFlat; later phases
|
|
33
|
+
/// may move list scanning to GPU.
|
|
34
|
+
class MetalIndexIVFFlat : public MetalIndex {
|
|
35
|
+
public:
|
|
36
|
+
struct AppendDebugStats {
|
|
37
|
+
size_t relayoutEvents = 0;
|
|
38
|
+
size_t movedLists = 0;
|
|
39
|
+
size_t movedVectors = 0;
|
|
40
|
+
size_t reusedSegmentAllocs = 0;
|
|
41
|
+
size_t tailSegmentAllocs = 0;
|
|
42
|
+
size_t reusedCapacityVecs = 0;
|
|
43
|
+
size_t tailCapacityVecs = 0;
|
|
44
|
+
size_t tailShrinkEvents = 0;
|
|
45
|
+
size_t tailShrunkVecs = 0;
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
/// Construct empty IVFFlat index with its own CPU quantizer.
|
|
49
|
+
MetalIndexIVFFlat(
|
|
50
|
+
std::shared_ptr<MetalResources> resources,
|
|
51
|
+
int dims,
|
|
52
|
+
idx_t nlist,
|
|
53
|
+
faiss::MetricType metric,
|
|
54
|
+
float metricArg = 0.0f,
|
|
55
|
+
MetalIndexConfig config = MetalIndexConfig());
|
|
56
|
+
|
|
57
|
+
/// Construct empty IVFFlat index with caller-provided coarse quantizer.
|
|
58
|
+
/// If ownFields is true, this index takes ownership of `coarseQuantizer`.
|
|
59
|
+
MetalIndexIVFFlat(
|
|
60
|
+
std::shared_ptr<MetalResources> resources,
|
|
61
|
+
faiss::Index* coarseQuantizer,
|
|
62
|
+
int dims,
|
|
63
|
+
idx_t nlist,
|
|
64
|
+
faiss::MetricType metric,
|
|
65
|
+
float metricArg = 0.0f,
|
|
66
|
+
MetalIndexConfig config = MetalIndexConfig(),
|
|
67
|
+
bool ownFields = false);
|
|
68
|
+
|
|
69
|
+
/// Construct from an existing CPU IndexIVFFlat (used by cloners later).
|
|
70
|
+
MetalIndexIVFFlat(
|
|
71
|
+
std::shared_ptr<MetalResources> resources,
|
|
72
|
+
const faiss::IndexIVFFlat* cpuIndex,
|
|
73
|
+
MetalIndexConfig config = MetalIndexConfig());
|
|
74
|
+
|
|
75
|
+
~MetalIndexIVFFlat() override;
|
|
76
|
+
|
|
77
|
+
void train(idx_t n, const float* x) override;
|
|
78
|
+
void add(idx_t n, const float* x) override;
|
|
79
|
+
void add_with_ids(idx_t n, const float* x, const idx_t* xids) override;
|
|
80
|
+
void reset() override;
|
|
81
|
+
|
|
82
|
+
void search(
|
|
83
|
+
idx_t n,
|
|
84
|
+
const float* x,
|
|
85
|
+
idx_t k,
|
|
86
|
+
float* distances,
|
|
87
|
+
idx_t* labels,
|
|
88
|
+
const SearchParameters* params = nullptr) const override;
|
|
89
|
+
|
|
90
|
+
/// Search with caller-provided coarse assignments (skips coarse quantizer).
|
|
91
|
+
/// @param assign Coarse list assignments (n x nprobe), row-major idx_t
|
|
92
|
+
/// @param centroid_dis Distances to assigned centroids (n x nprobe); unused
|
|
93
|
+
/// by GPU scan but accepted for API compatibility
|
|
94
|
+
/// @param store_pairs Ignored (always false for GPU path)
|
|
95
|
+
void search_preassigned(
|
|
96
|
+
idx_t n,
|
|
97
|
+
const float* x,
|
|
98
|
+
idx_t k,
|
|
99
|
+
const idx_t* assign,
|
|
100
|
+
const float* centroid_dis,
|
|
101
|
+
float* distances,
|
|
102
|
+
idx_t* labels,
|
|
103
|
+
bool store_pairs,
|
|
104
|
+
const IVFSearchParameters* params = nullptr,
|
|
105
|
+
IndexIVFStats* stats = nullptr) const;
|
|
106
|
+
|
|
107
|
+
/// Copy from a CPU IndexIVFFlat (helper for future cloner support).
|
|
108
|
+
void copyFrom(const faiss::IndexIVFFlat* index);
|
|
109
|
+
|
|
110
|
+
/// Copy to a CPU IndexIVFFlat.
|
|
111
|
+
void copyTo(faiss::IndexIVFFlat* index) const;
|
|
112
|
+
|
|
113
|
+
/// Reconstruct a single stored vector by internal key.
|
|
114
|
+
void reconstruct(idx_t key, float* recons) const override;
|
|
115
|
+
|
|
116
|
+
/// Reconstruct n contiguous stored vectors starting at i0.
|
|
117
|
+
void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
|
|
118
|
+
|
|
119
|
+
/// Re-upload coarse quantizer centroids to GPU after external changes.
|
|
120
|
+
void updateQuantizer();
|
|
121
|
+
|
|
122
|
+
/// Return the vector indices in inverted list `listId`.
|
|
123
|
+
std::vector<idx_t> getListIndices(idx_t listId) const;
|
|
124
|
+
|
|
125
|
+
/// Return raw vector data from inverted list `listId`.
|
|
126
|
+
std::vector<float> getListVectorData(idx_t listId) const;
|
|
127
|
+
|
|
128
|
+
/// Release unused GPU memory.
|
|
129
|
+
void reclaimMemory();
|
|
130
|
+
|
|
131
|
+
/// Pre-allocate GPU storage for the given total number of vectors.
|
|
132
|
+
void reserveMemory(idx_t numVecs);
|
|
133
|
+
|
|
134
|
+
/// Accessors (needed by cloner and tests).
|
|
135
|
+
idx_t nlist() const;
|
|
136
|
+
size_t nprobe() const;
|
|
137
|
+
bool interleavedLayout() const;
|
|
138
|
+
faiss::gpu::IndicesOptions indicesOptions() const;
|
|
139
|
+
AppendDebugStats appendDebugStats() const;
|
|
140
|
+
void resetAppendDebugStats();
|
|
141
|
+
|
|
142
|
+
private:
|
|
143
|
+
std::unique_ptr<faiss::IndexIVFFlat> cpuIndex_;
|
|
144
|
+
std::unique_ptr<MetalIVFFlatImpl> gpuIvf_;
|
|
145
|
+
faiss::gpu::IndicesOptions indicesOptions_;
|
|
146
|
+
bool interleavedLayout_;
|
|
147
|
+
|
|
148
|
+
// Persistent search buffers — allocated once, grown lazily.
|
|
149
|
+
// Declared mutable so search() (const) can resize them.
|
|
150
|
+
mutable id<MTLBuffer> searchQueriesBuf_ = nil;
|
|
151
|
+
mutable id<MTLBuffer> searchCoarseBuf_ = nil;
|
|
152
|
+
mutable id<MTLBuffer> searchOutDistBuf_ = nil;
|
|
153
|
+
mutable id<MTLBuffer> searchOutIdxBuf_ = nil;
|
|
154
|
+
mutable size_t searchQueriesCap_ = 0; // bytes
|
|
155
|
+
mutable size_t searchCoarseCap_ = 0;
|
|
156
|
+
mutable size_t searchOutDistCap_ = 0;
|
|
157
|
+
mutable size_t searchOutIdxCap_ = 0;
|
|
158
|
+
mutable id<MTLBuffer> searchPerListDistBuf_ = nil;
|
|
159
|
+
mutable id<MTLBuffer> searchPerListIdxBuf_ = nil;
|
|
160
|
+
mutable size_t searchPerListDistCap_ = 0;
|
|
161
|
+
mutable size_t searchPerListIdxCap_ = 0;
|
|
162
|
+
|
|
163
|
+
// GPU coarse quantizer buffers (cached, rebuilt on train)
|
|
164
|
+
mutable id<MTLBuffer> centroidBuf_ = nil;
|
|
165
|
+
mutable id<MTLBuffer> centroidNormsBuf_ = nil; // pre-computed ||c||²
|
|
166
|
+
mutable id<MTLBuffer> coarseOutDistBuf_ = nil;
|
|
167
|
+
mutable id<MTLBuffer> coarseOutIdxBuf_ = nil;
|
|
168
|
+
mutable size_t coarseOutDistCap_ = 0;
|
|
169
|
+
mutable size_t coarseOutIdxCap_ = 0;
|
|
170
|
+
mutable id<MTLBuffer> distMatrixBuf_ = nil;
|
|
171
|
+
mutable size_t distMatrixCap_ = 0;
|
|
172
|
+
|
|
173
|
+
/// Ensures buf is at least `needed` bytes, reallocating if necessary.
|
|
174
|
+
void ensureSearchBuf_(id<MTLBuffer>& buf, size_t& cap, size_t needed) const;
|
|
175
|
+
|
|
176
|
+
/// (Re)uploads quantizer centroids to centroidBuf_.
|
|
177
|
+
void uploadCentroids_() const;
|
|
178
|
+
};
|
|
179
|
+
|
|
180
|
+
} // namespace gpu_metal
|
|
181
|
+
} // namespace faiss
|
|
@@ -19,13 +19,15 @@
|
|
|
19
19
|
namespace faiss {
|
|
20
20
|
namespace gpu_metal {
|
|
21
21
|
|
|
22
|
+
enum class IVFScanVariant { Standard, Small, Interleaved };
|
|
23
|
+
|
|
22
24
|
class MetalKernels {
|
|
23
25
|
public:
|
|
24
26
|
explicit MetalKernels(id<MTLDevice> device);
|
|
25
27
|
~MetalKernels();
|
|
26
28
|
|
|
27
29
|
bool isValid() const;
|
|
28
|
-
static constexpr int kMaxK =
|
|
30
|
+
static constexpr int kMaxK = 2048;
|
|
29
31
|
|
|
30
32
|
void encodeDistanceMatrix(
|
|
31
33
|
id<MTLComputeCommandEncoder> enc,
|
|
@@ -37,6 +39,23 @@ class MetalKernels {
|
|
|
37
39
|
int d,
|
|
38
40
|
MetricType metric);
|
|
39
41
|
|
|
42
|
+
void encodeL2WithNorms(
|
|
43
|
+
id<MTLComputeCommandEncoder> enc,
|
|
44
|
+
id<MTLBuffer> queries,
|
|
45
|
+
id<MTLBuffer> vectors,
|
|
46
|
+
id<MTLBuffer> distances,
|
|
47
|
+
id<MTLBuffer> vecNorms,
|
|
48
|
+
int nq,
|
|
49
|
+
int nb,
|
|
50
|
+
int d);
|
|
51
|
+
|
|
52
|
+
void encodeComputeNorms(
|
|
53
|
+
id<MTLComputeCommandEncoder> enc,
|
|
54
|
+
id<MTLBuffer> vectors,
|
|
55
|
+
id<MTLBuffer> norms,
|
|
56
|
+
int nb,
|
|
57
|
+
int d);
|
|
58
|
+
|
|
40
59
|
void encodeTopKThreadgroup(
|
|
41
60
|
id<MTLComputeCommandEncoder> enc,
|
|
42
61
|
id<MTLBuffer> distances,
|
|
@@ -47,6 +66,31 @@ class MetalKernels {
|
|
|
47
66
|
int k,
|
|
48
67
|
bool wantMin);
|
|
49
68
|
|
|
69
|
+
void encodeIVFScanList(
|
|
70
|
+
id<MTLComputeCommandEncoder> enc,
|
|
71
|
+
IVFScanVariant variant,
|
|
72
|
+
id<MTLBuffer> queries,
|
|
73
|
+
id<MTLBuffer> codes,
|
|
74
|
+
id<MTLBuffer> ids,
|
|
75
|
+
id<MTLBuffer> listOffset,
|
|
76
|
+
id<MTLBuffer> listLength,
|
|
77
|
+
id<MTLBuffer> coarseAssign,
|
|
78
|
+
id<MTLBuffer> perListDist,
|
|
79
|
+
id<MTLBuffer> perListIdx,
|
|
80
|
+
id<MTLBuffer> paramsBuf,
|
|
81
|
+
int nq,
|
|
82
|
+
int nprobe,
|
|
83
|
+
id<MTLBuffer> ilCodesOffset = nil);
|
|
84
|
+
|
|
85
|
+
void encodeIVFMergeLists(
|
|
86
|
+
id<MTLComputeCommandEncoder> enc,
|
|
87
|
+
id<MTLBuffer> perListDist,
|
|
88
|
+
id<MTLBuffer> perListIdx,
|
|
89
|
+
id<MTLBuffer> outDist,
|
|
90
|
+
id<MTLBuffer> outIdx,
|
|
91
|
+
id<MTLBuffer> paramsBuf,
|
|
92
|
+
int nq);
|
|
93
|
+
|
|
50
94
|
static int selectTopKVariantIndex(int k);
|
|
51
95
|
|
|
52
96
|
private:
|
|
@@ -56,8 +100,9 @@ class MetalKernels {
|
|
|
56
100
|
id<MTLLibrary> library_;
|
|
57
101
|
std::unordered_map<std::string, id<MTLComputePipelineState>> cache_;
|
|
58
102
|
|
|
59
|
-
static constexpr int kTopKVariantSizes[] =
|
|
60
|
-
|
|
103
|
+
static constexpr int kTopKVariantSizes[] =
|
|
104
|
+
{32, 64, 128, 256, 512, 1024, 2048};
|
|
105
|
+
static constexpr int kNumTopKVariants = 7;
|
|
61
106
|
};
|
|
62
107
|
|
|
63
108
|
MetalKernels& getMetalKernels(id<MTLDevice> device);
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
// @lint-ignore-every LICENSELINT
|
|
2
|
+
/**
|
|
3
|
+
* Copyright (c) Meta Platforms, Inc. and its affiliates.
|
|
4
|
+
*
|
|
5
|
+
* This source code is licensed under the MIT license found in the
|
|
6
|
+
* LICENSE file in the root directory of this source tree.
|
|
7
|
+
*
|
|
8
|
+
* C++-only API for Python/SWIG. No Objective-C types so SWIG can
|
|
9
|
+
* parse it. Implemented in MetalPythonBridge.mm.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
#pragma once
|
|
13
|
+
|
|
14
|
+
#include <faiss/Index.h>
|
|
15
|
+
|
|
16
|
+
namespace faiss {
|
|
17
|
+
namespace gpu_metal {
|
|
18
|
+
|
|
19
|
+
/// Opaque holder for Metal resources.
|
|
20
|
+
struct StandardMetalResourcesHolder {
|
|
21
|
+
void* impl = nullptr;
|
|
22
|
+
StandardMetalResourcesHolder();
|
|
23
|
+
~StandardMetalResourcesHolder();
|
|
24
|
+
StandardMetalResourcesHolder(const StandardMetalResourcesHolder&) = delete;
|
|
25
|
+
StandardMetalResourcesHolder& operator=(
|
|
26
|
+
const StandardMetalResourcesHolder&) = delete;
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
/// Same names as GPU API for unified Python binding.
|
|
30
|
+
int get_num_gpus();
|
|
31
|
+
void gpu_profiler_start();
|
|
32
|
+
void gpu_profiler_stop();
|
|
33
|
+
void gpu_sync_all_devices();
|
|
34
|
+
|
|
35
|
+
/// Clone CPU index to Metal GPU. Caller owns returned index.
|
|
36
|
+
faiss::Index* index_cpu_to_gpu(
|
|
37
|
+
StandardMetalResourcesHolder* res,
|
|
38
|
+
int device,
|
|
39
|
+
const faiss::Index* index);
|
|
40
|
+
|
|
41
|
+
/// Copy Metal index back to CPU. Caller owns returned index.
|
|
42
|
+
faiss::Index* index_gpu_to_cpu(const faiss::Index* index);
|
|
43
|
+
|
|
44
|
+
} // namespace gpu_metal
|
|
45
|
+
} // namespace faiss
|