faiss 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/lib/faiss/version.rb +1 -1
  4. data/vendor/faiss/faiss/Index.h +1 -1
  5. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +6 -7
  6. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +3 -3
  7. data/vendor/faiss/faiss/IndexHNSW.cpp +173 -143
  8. data/vendor/faiss/faiss/IndexIVF.cpp +2 -2
  9. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +2 -2
  10. data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -1
  11. data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +3 -3
  12. data/vendor/faiss/faiss/IndexIVFPQ.cpp +2 -3
  13. data/vendor/faiss/faiss/IndexIVFPQR.cpp +2 -3
  14. data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +4 -13
  15. data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -1
  16. data/vendor/faiss/faiss/IndexNSG.cpp +1 -2
  17. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +68 -6
  18. data/vendor/faiss/faiss/IndexScalarQuantizer.h +10 -0
  19. data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +1 -1
  20. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +902 -12
  21. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +702 -10
  22. data/vendor/faiss/faiss/factory_tools.cpp +4 -0
  23. data/vendor/faiss/faiss/gpu/GpuResources.h +3 -2
  24. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +11 -12
  25. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +3 -3
  26. data/vendor/faiss/faiss/gpu_metal/MetalDistance.h +87 -0
  27. data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +7 -0
  28. data/vendor/faiss/faiss/gpu_metal/MetalIndexIVFFlat.h +181 -0
  29. data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +48 -3
  30. data/vendor/faiss/faiss/gpu_metal/MetalPythonBridge.h +45 -0
  31. data/vendor/faiss/faiss/gpu_metal/impl/MetalIVFFlat.h +193 -0
  32. data/vendor/faiss/faiss/impl/HNSW.cpp +556 -199
  33. data/vendor/faiss/faiss/impl/HNSW.h +51 -13
  34. data/vendor/faiss/faiss/impl/NSG.cpp +15 -11
  35. data/vendor/faiss/faiss/impl/Panorama.h +11 -0
  36. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -2
  37. data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +1 -1
  38. data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +7 -1
  39. data/vendor/faiss/faiss/impl/ResultHandler.h +1 -0
  40. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +271 -8
  41. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +50 -0
  42. data/vendor/faiss/faiss/impl/VisitedTable.cpp +10 -10
  43. data/vendor/faiss/faiss/impl/VisitedTable.h +69 -34
  44. data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +3 -1
  45. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +35 -43
  46. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -15
  47. data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +86 -40
  48. data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +81 -50
  49. data/vendor/faiss/faiss/impl/index_read.cpp +100 -39
  50. data/vendor/faiss/faiss/impl/index_write.cpp +1 -0
  51. data/vendor/faiss/faiss/impl/io_macros.h +25 -0
  52. data/vendor/faiss/faiss/impl/platform_macros.h +12 -8
  53. data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +2 -0
  54. data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +2 -0
  55. data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +2 -0
  56. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +20 -0
  57. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +36 -0
  58. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +5 -0
  59. data/vendor/faiss/faiss/impl/pq_code_distance/pq_scan_impl.h +105 -0
  60. data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +2 -0
  61. data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +6 -0
  62. data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +327 -18
  63. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +264 -27
  64. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512-impl.h +553 -0
  65. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512-spr.cpp +559 -0
  66. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +199 -27
  67. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +366 -3
  68. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +144 -19
  69. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +26 -0
  70. data/vendor/faiss/faiss/impl/simd_dispatch.h +65 -8
  71. data/vendor/faiss/faiss/index_factory.cpp +5 -1
  72. data/vendor/faiss/faiss/index_io.h +16 -0
  73. data/vendor/faiss/faiss/invlists/DirectMap.cpp +4 -1
  74. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +13 -13
  75. data/vendor/faiss/faiss/invlists/InvertedLists.h +2 -2
  76. data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +119 -22
  77. data/vendor/faiss/faiss/svs/IndexSVSVamana.h +15 -5
  78. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.cpp +3 -2
  79. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +2 -1
  80. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +65 -24
  81. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +3 -2
  82. data/vendor/faiss/faiss/utils/bf16.h +34 -0
  83. data/vendor/faiss/faiss/utils/distances_simd.cpp +0 -1
  84. data/vendor/faiss/faiss/utils/hamming.cpp +8 -8
  85. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +2 -1
  86. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512_spr.cpp +15 -0
  87. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +6 -30
  88. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512_spr.h +171 -0
  89. data/vendor/faiss/faiss/utils/partitioning.cpp +0 -2
  90. data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +14 -68
  91. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512_spr.cpp +343 -0
  92. data/vendor/faiss/faiss/utils/simd_levels.cpp +12 -2
  93. metadata +12 -2
@@ -43,6 +43,10 @@ const std::map<faiss::ScalarQuantizer::QuantizerType, std::string> sq_types = {
43
43
  {faiss::ScalarQuantizer::QT_3bit_tqmse, "SQtqmse3"},
44
44
  {faiss::ScalarQuantizer::QT_4bit_tqmse, "SQtqmse4"},
45
45
  {faiss::ScalarQuantizer::QT_8bit_tqmse, "SQtqmse8"},
46
+ {faiss::ScalarQuantizer::QT_2bit_tq, "SQtq2"},
47
+ {faiss::ScalarQuantizer::QT_3bit_tq, "SQtq3"},
48
+ {faiss::ScalarQuantizer::QT_4bit_tq, "SQtq4"},
49
+ {faiss::ScalarQuantizer::QT_5bit_tq, "SQtq5"},
46
50
  };
47
51
 
48
52
  int get_hnsw_M(const faiss::IndexHNSW* index) {
@@ -33,7 +33,8 @@
33
33
 
34
34
  #if defined USE_NVIDIA_CUVS
35
35
  #include <raft/core/device_resources.hpp>
36
- #include <rmm/mr/device_memory_resource.hpp>
36
+ #include <cuda/memory_resource>
37
+ #include <optional>
37
38
  #endif
38
39
 
39
40
  namespace faiss {
@@ -163,7 +164,7 @@ struct AllocRequest : public AllocInfo {
163
164
  size_t size = 0;
164
165
 
165
166
  #if defined USE_NVIDIA_CUVS
166
- rmm::mr::device_memory_resource* mr = nullptr;
167
+ std::optional<cuda::mr::any_resource<cuda::mr::device_accessible>> mr;
167
168
  #endif
168
169
  };
169
170
 
@@ -92,8 +92,8 @@ std::string allocsToString(const std::unordered_map<void*, AllocRequest>& map) {
92
92
  StandardGpuResourcesImpl::StandardGpuResourcesImpl()
93
93
  :
94
94
  #if defined USE_NVIDIA_CUVS
95
- mmr_(new rmm::mr::managed_memory_resource),
96
- pmr_(new rmm::mr::pinned_host_memory_resource),
95
+ mmr_{},
96
+ pmr_{},
97
97
  #endif
98
98
  pinnedMemAlloc_(nullptr),
99
99
  pinnedMemAllocSize_(0),
@@ -164,7 +164,7 @@ StandardGpuResourcesImpl::~StandardGpuResourcesImpl() {
164
164
 
165
165
  if (pinnedMemAlloc_) {
166
166
  #if defined USE_NVIDIA_CUVS
167
- pmr_->deallocate_sync(pinnedMemAlloc_, pinnedMemAllocSize_);
167
+ pmr_.deallocate_sync(pinnedMemAlloc_, pinnedMemAllocSize_);
168
168
  #else
169
169
  auto err = cudaFreeHost(pinnedMemAlloc_);
170
170
  FAISS_ASSERT_FMT(
@@ -350,7 +350,7 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
350
350
  // pinned memory allocation
351
351
  if (defaultStreams_.empty() && pinnedMemSize_ > 0) {
352
352
  try {
353
- pinnedMemAlloc_ = pmr_->allocate_sync(pinnedMemSize_);
353
+ pinnedMemAlloc_ = pmr_.allocate_sync(pinnedMemSize_);
354
354
  } catch (const std::bad_alloc& rmm_ex) {
355
355
  FAISS_THROW_MSG("CUDA memory allocation error");
356
356
  }
@@ -546,10 +546,9 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) {
546
546
  } else if (adjReq.space == MemorySpace::Device) {
547
547
  #if defined USE_NVIDIA_CUVS
548
548
  try {
549
- rmm::mr::device_memory_resource* current_mr =
550
- rmm::mr::get_per_device_resource(
551
- rmm::cuda_device_id{adjReq.device});
552
- p = current_mr->allocate(adjReq.stream, adjReq.size);
549
+ auto current_mr = rmm::mr::get_per_device_resource_ref(
550
+ rmm::cuda_device_id{adjReq.device});
551
+ p = current_mr.allocate(adjReq.stream, adjReq.size);
553
552
  adjReq.mr = current_mr;
554
553
  } catch (const std::bad_alloc& rmm_ex) {
555
554
  FAISS_THROW_MSG("CUDA memory allocation error");
@@ -562,7 +561,7 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) {
562
561
  // FIXME: as of CUDA 11, a memory allocation error appears to be
563
562
  // presented via cudaGetLastError as well, and needs to be
564
563
  // cleared. Just call the function to clear it
565
- cudaGetLastError();
564
+ (void)cudaGetLastError();
566
565
 
567
566
  std::stringstream ss;
568
567
  ss << "StandardGpuResources: alloc fail " << adjReq.toString()
@@ -584,8 +583,8 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) {
584
583
  // TODO: change this to use the current device resource once RMM has
585
584
  // a way to retrieve a "guaranteed" managed memory resource for a
586
585
  // device.
587
- p = mmr_->allocate(adjReq.stream, adjReq.size);
588
- adjReq.mr = mmr_.get();
586
+ p = mmr_.allocate(adjReq.stream, adjReq.size);
587
+ adjReq.mr = mmr_;
589
588
  } catch (const std::bad_alloc& rmm_ex) {
590
589
  FAISS_THROW_MSG("CUDA memory allocation error");
591
590
  }
@@ -596,7 +595,7 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) {
596
595
  // FIXME: as of CUDA 11, a memory allocation error appears to be
597
596
  // presented via cudaGetLastError as well, and needs to be cleared.
598
597
  // Just call the function to clear it
599
- cudaGetLastError();
598
+ (void)cudaGetLastError();
600
599
 
601
600
  std::stringstream ss;
602
601
  ss << "StandardGpuResources: alloc fail " << adjReq.toString()
@@ -25,7 +25,7 @@
25
25
 
26
26
  #if defined USE_NVIDIA_CUVS
27
27
  #include <raft/core/device_resources.hpp>
28
- #include <rmm/mr/device_memory_resource.hpp>
28
+ #include <rmm/mr/managed_memory_resource.hpp>
29
29
  #include <rmm/mr/pinned_host_memory_resource.hpp>
30
30
  #endif
31
31
 
@@ -171,10 +171,10 @@ class StandardGpuResourcesImpl : public GpuResources {
171
171
  */
172
172
 
173
173
  // managed_memory_resource
174
- std::unique_ptr<rmm::mr::device_memory_resource> mmr_;
174
+ rmm::mr::managed_memory_resource mmr_;
175
175
 
176
176
  // pinned_host_memory_resource
177
- std::unique_ptr<rmm::mr::pinned_host_memory_resource> pmr_;
177
+ rmm::mr::pinned_host_memory_resource pmr_;
178
178
  #endif
179
179
 
180
180
  /// Pinned memory allocation for use with this GPU
@@ -0,0 +1,87 @@
1
+ // @lint-ignore-every LICENSELINT
2
+ /**
3
+ * Copyright (c) Meta Platforms, Inc. and its affiliates.
4
+ *
5
+ * This source code is licensed under the MIT license found in the
6
+ * LICENSE file in the root directory of this source tree.
7
+ *
8
+ * IVF distance computation and scan dispatch for Metal backend.
9
+ */
10
+
11
+ #pragma once
12
+
13
+ #import <Metal/Metal.h>
14
+
15
+ #include <cstddef>
16
+ #include <cstdint>
17
+ #include <memory>
18
+
19
+ namespace faiss {
20
+ namespace gpu_metal {
21
+
22
+ class MetalResources;
23
+
24
+ int getMetalDistanceMaxK();
25
+
26
+ bool runMetalComputeNorms(
27
+ id<MTLDevice> device,
28
+ id<MTLCommandQueue> queue,
29
+ id<MTLBuffer> vectors,
30
+ int nb,
31
+ int d,
32
+ id<MTLBuffer> normsBuf,
33
+ bool waitForCompletion = true);
34
+
35
+ bool runMetalIVFFlatScan(
36
+ id<MTLDevice> device,
37
+ id<MTLCommandQueue> queue,
38
+ id<MTLBuffer> queries,
39
+ id<MTLBuffer> codes,
40
+ id<MTLBuffer> ids,
41
+ id<MTLBuffer> listOffset,
42
+ id<MTLBuffer> listLength,
43
+ id<MTLBuffer> coarseAssign,
44
+ int nq,
45
+ int d,
46
+ int k,
47
+ int nprobe,
48
+ bool isL2,
49
+ id<MTLBuffer> outDistances,
50
+ id<MTLBuffer> outIndices,
51
+ id<MTLBuffer> perListDistBuf,
52
+ id<MTLBuffer> perListIdxBuf,
53
+ id<MTLBuffer> interleavedCodes = nil,
54
+ id<MTLBuffer> interleavedCodesOffset = nil,
55
+ bool waitForCompletion = true);
56
+
57
+ bool runMetalIVFFlatFullSearch(
58
+ id<MTLDevice> device,
59
+ id<MTLCommandQueue> queue,
60
+ id<MTLBuffer> queries,
61
+ int nq,
62
+ int d,
63
+ int k,
64
+ int nprobe,
65
+ bool isL2,
66
+ id<MTLBuffer> centroids,
67
+ int nlist,
68
+ id<MTLBuffer> codes,
69
+ id<MTLBuffer> ids,
70
+ id<MTLBuffer> listOffset,
71
+ id<MTLBuffer> listLength,
72
+ id<MTLBuffer> outDistances,
73
+ id<MTLBuffer> outIndices,
74
+ id<MTLBuffer> perListDistBuf,
75
+ id<MTLBuffer> perListIdxBuf,
76
+ id<MTLBuffer> coarseDistBuf,
77
+ id<MTLBuffer> coarseIdxBuf,
78
+ id<MTLBuffer> distMatrixBuf,
79
+ id<MTLBuffer> centroidNormsBuf = nil,
80
+ int avgListLen = 256,
81
+ id<MTLBuffer> interleavedCodes = nil,
82
+ id<MTLBuffer> interleavedCodesOffset = nil,
83
+ bool centroidsAreFP16 = false,
84
+ bool waitForCompletion = true);
85
+
86
+ } // namespace gpu_metal
87
+ } // namespace faiss
@@ -11,6 +11,7 @@
11
11
  #pragma once
12
12
 
13
13
  #include <faiss/Index.h>
14
+ #include <faiss/gpu/GpuIndicesOptions.h>
14
15
  #include <faiss/gpu_metal/MetalResources.h>
15
16
  #include <memory>
16
17
 
@@ -20,6 +21,12 @@ namespace gpu_metal {
20
21
  /// Configuration for Metal index (mirrors GpuIndexConfig roles).
21
22
  struct MetalIndexConfig {
22
23
  int device = 0;
24
+
25
+ bool useFloat16CoarseQuantizer = false;
26
+
27
+ faiss::gpu::IndicesOptions indicesOptions = faiss::gpu::INDICES_64_BIT;
28
+
29
+ bool interleavedLayout = true;
23
30
  };
24
31
 
25
32
  /// Base class for Metal-backed indexes. Mirrors faiss::gpu::GpuIndex.
@@ -0,0 +1,181 @@
1
+ // @lint-ignore-every LICENSELINT
2
+ /**
3
+ * Copyright (c) Meta Platforms, Inc. and its affiliates.
4
+ *
5
+ * This source code is licensed under the MIT license found in the
6
+ * LICENSE file in the root directory of this source tree.
7
+ *
8
+ * Minimal Metal IVFFlat wrapper.
9
+ *
10
+ */
11
+
12
+ #pragma once
13
+
14
+ #import <Metal/Metal.h>
15
+
16
+ #include <faiss/IndexIVFFlat.h>
17
+ #include <faiss/gpu/GpuIndicesOptions.h>
18
+ #include <faiss/gpu_metal/MetalIndex.h>
19
+
20
+ #include <memory>
21
+
22
+ namespace faiss {
23
+ namespace gpu_metal {
24
+ class MetalIVFFlatImpl;
25
+ } // namespace gpu_metal
26
+ } // namespace faiss
27
+
28
+ namespace faiss {
29
+ namespace gpu_metal {
30
+
31
+ /// IVFFlat index wrapper for Metal backend.
32
+ /// Currently delegates to an internal CPU IndexIVFFlat; later phases
33
+ /// may move list scanning to GPU.
34
+ class MetalIndexIVFFlat : public MetalIndex {
35
+ public:
36
+ struct AppendDebugStats {
37
+ size_t relayoutEvents = 0;
38
+ size_t movedLists = 0;
39
+ size_t movedVectors = 0;
40
+ size_t reusedSegmentAllocs = 0;
41
+ size_t tailSegmentAllocs = 0;
42
+ size_t reusedCapacityVecs = 0;
43
+ size_t tailCapacityVecs = 0;
44
+ size_t tailShrinkEvents = 0;
45
+ size_t tailShrunkVecs = 0;
46
+ };
47
+
48
+ /// Construct empty IVFFlat index with its own CPU quantizer.
49
+ MetalIndexIVFFlat(
50
+ std::shared_ptr<MetalResources> resources,
51
+ int dims,
52
+ idx_t nlist,
53
+ faiss::MetricType metric,
54
+ float metricArg = 0.0f,
55
+ MetalIndexConfig config = MetalIndexConfig());
56
+
57
+ /// Construct empty IVFFlat index with caller-provided coarse quantizer.
58
+ /// If ownFields is true, this index takes ownership of `coarseQuantizer`.
59
+ MetalIndexIVFFlat(
60
+ std::shared_ptr<MetalResources> resources,
61
+ faiss::Index* coarseQuantizer,
62
+ int dims,
63
+ idx_t nlist,
64
+ faiss::MetricType metric,
65
+ float metricArg = 0.0f,
66
+ MetalIndexConfig config = MetalIndexConfig(),
67
+ bool ownFields = false);
68
+
69
+ /// Construct from an existing CPU IndexIVFFlat (used by cloners later).
70
+ MetalIndexIVFFlat(
71
+ std::shared_ptr<MetalResources> resources,
72
+ const faiss::IndexIVFFlat* cpuIndex,
73
+ MetalIndexConfig config = MetalIndexConfig());
74
+
75
+ ~MetalIndexIVFFlat() override;
76
+
77
+ void train(idx_t n, const float* x) override;
78
+ void add(idx_t n, const float* x) override;
79
+ void add_with_ids(idx_t n, const float* x, const idx_t* xids) override;
80
+ void reset() override;
81
+
82
+ void search(
83
+ idx_t n,
84
+ const float* x,
85
+ idx_t k,
86
+ float* distances,
87
+ idx_t* labels,
88
+ const SearchParameters* params = nullptr) const override;
89
+
90
+ /// Search with caller-provided coarse assignments (skips coarse quantizer).
91
+ /// @param assign Coarse list assignments (n x nprobe), row-major idx_t
92
+ /// @param centroid_dis Distances to assigned centroids (n x nprobe); unused
93
+ /// by GPU scan but accepted for API compatibility
94
+ /// @param store_pairs Ignored (always false for GPU path)
95
+ void search_preassigned(
96
+ idx_t n,
97
+ const float* x,
98
+ idx_t k,
99
+ const idx_t* assign,
100
+ const float* centroid_dis,
101
+ float* distances,
102
+ idx_t* labels,
103
+ bool store_pairs,
104
+ const IVFSearchParameters* params = nullptr,
105
+ IndexIVFStats* stats = nullptr) const;
106
+
107
+ /// Copy from a CPU IndexIVFFlat (helper for future cloner support).
108
+ void copyFrom(const faiss::IndexIVFFlat* index);
109
+
110
+ /// Copy to a CPU IndexIVFFlat.
111
+ void copyTo(faiss::IndexIVFFlat* index) const;
112
+
113
+ /// Reconstruct a single stored vector by internal key.
114
+ void reconstruct(idx_t key, float* recons) const override;
115
+
116
+ /// Reconstruct n contiguous stored vectors starting at i0.
117
+ void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
118
+
119
+ /// Re-upload coarse quantizer centroids to GPU after external changes.
120
+ void updateQuantizer();
121
+
122
+ /// Return the vector indices in inverted list `listId`.
123
+ std::vector<idx_t> getListIndices(idx_t listId) const;
124
+
125
+ /// Return raw vector data from inverted list `listId`.
126
+ std::vector<float> getListVectorData(idx_t listId) const;
127
+
128
+ /// Release unused GPU memory.
129
+ void reclaimMemory();
130
+
131
+ /// Pre-allocate GPU storage for the given total number of vectors.
132
+ void reserveMemory(idx_t numVecs);
133
+
134
+ /// Accessors (needed by cloner and tests).
135
+ idx_t nlist() const;
136
+ size_t nprobe() const;
137
+ bool interleavedLayout() const;
138
+ faiss::gpu::IndicesOptions indicesOptions() const;
139
+ AppendDebugStats appendDebugStats() const;
140
+ void resetAppendDebugStats();
141
+
142
+ private:
143
+ std::unique_ptr<faiss::IndexIVFFlat> cpuIndex_;
144
+ std::unique_ptr<MetalIVFFlatImpl> gpuIvf_;
145
+ faiss::gpu::IndicesOptions indicesOptions_;
146
+ bool interleavedLayout_;
147
+
148
+ // Persistent search buffers — allocated once, grown lazily.
149
+ // Declared mutable so search() (const) can resize them.
150
+ mutable id<MTLBuffer> searchQueriesBuf_ = nil;
151
+ mutable id<MTLBuffer> searchCoarseBuf_ = nil;
152
+ mutable id<MTLBuffer> searchOutDistBuf_ = nil;
153
+ mutable id<MTLBuffer> searchOutIdxBuf_ = nil;
154
+ mutable size_t searchQueriesCap_ = 0; // bytes
155
+ mutable size_t searchCoarseCap_ = 0;
156
+ mutable size_t searchOutDistCap_ = 0;
157
+ mutable size_t searchOutIdxCap_ = 0;
158
+ mutable id<MTLBuffer> searchPerListDistBuf_ = nil;
159
+ mutable id<MTLBuffer> searchPerListIdxBuf_ = nil;
160
+ mutable size_t searchPerListDistCap_ = 0;
161
+ mutable size_t searchPerListIdxCap_ = 0;
162
+
163
+ // GPU coarse quantizer buffers (cached, rebuilt on train)
164
+ mutable id<MTLBuffer> centroidBuf_ = nil;
165
+ mutable id<MTLBuffer> centroidNormsBuf_ = nil; // pre-computed ||c||²
166
+ mutable id<MTLBuffer> coarseOutDistBuf_ = nil;
167
+ mutable id<MTLBuffer> coarseOutIdxBuf_ = nil;
168
+ mutable size_t coarseOutDistCap_ = 0;
169
+ mutable size_t coarseOutIdxCap_ = 0;
170
+ mutable id<MTLBuffer> distMatrixBuf_ = nil;
171
+ mutable size_t distMatrixCap_ = 0;
172
+
173
+ /// Ensures buf is at least `needed` bytes, reallocating if necessary.
174
+ void ensureSearchBuf_(id<MTLBuffer>& buf, size_t& cap, size_t needed) const;
175
+
176
+ /// (Re)uploads quantizer centroids to centroidBuf_.
177
+ void uploadCentroids_() const;
178
+ };
179
+
180
+ } // namespace gpu_metal
181
+ } // namespace faiss
@@ -19,13 +19,15 @@
19
19
  namespace faiss {
20
20
  namespace gpu_metal {
21
21
 
22
+ enum class IVFScanVariant { Standard, Small, Interleaved };
23
+
22
24
  class MetalKernels {
23
25
  public:
24
26
  explicit MetalKernels(id<MTLDevice> device);
25
27
  ~MetalKernels();
26
28
 
27
29
  bool isValid() const;
28
- static constexpr int kMaxK = 256;
30
+ static constexpr int kMaxK = 2048;
29
31
 
30
32
  void encodeDistanceMatrix(
31
33
  id<MTLComputeCommandEncoder> enc,
@@ -37,6 +39,23 @@ class MetalKernels {
37
39
  int d,
38
40
  MetricType metric);
39
41
 
42
+ void encodeL2WithNorms(
43
+ id<MTLComputeCommandEncoder> enc,
44
+ id<MTLBuffer> queries,
45
+ id<MTLBuffer> vectors,
46
+ id<MTLBuffer> distances,
47
+ id<MTLBuffer> vecNorms,
48
+ int nq,
49
+ int nb,
50
+ int d);
51
+
52
+ void encodeComputeNorms(
53
+ id<MTLComputeCommandEncoder> enc,
54
+ id<MTLBuffer> vectors,
55
+ id<MTLBuffer> norms,
56
+ int nb,
57
+ int d);
58
+
40
59
  void encodeTopKThreadgroup(
41
60
  id<MTLComputeCommandEncoder> enc,
42
61
  id<MTLBuffer> distances,
@@ -47,6 +66,31 @@ class MetalKernels {
47
66
  int k,
48
67
  bool wantMin);
49
68
 
69
+ void encodeIVFScanList(
70
+ id<MTLComputeCommandEncoder> enc,
71
+ IVFScanVariant variant,
72
+ id<MTLBuffer> queries,
73
+ id<MTLBuffer> codes,
74
+ id<MTLBuffer> ids,
75
+ id<MTLBuffer> listOffset,
76
+ id<MTLBuffer> listLength,
77
+ id<MTLBuffer> coarseAssign,
78
+ id<MTLBuffer> perListDist,
79
+ id<MTLBuffer> perListIdx,
80
+ id<MTLBuffer> paramsBuf,
81
+ int nq,
82
+ int nprobe,
83
+ id<MTLBuffer> ilCodesOffset = nil);
84
+
85
+ void encodeIVFMergeLists(
86
+ id<MTLComputeCommandEncoder> enc,
87
+ id<MTLBuffer> perListDist,
88
+ id<MTLBuffer> perListIdx,
89
+ id<MTLBuffer> outDist,
90
+ id<MTLBuffer> outIdx,
91
+ id<MTLBuffer> paramsBuf,
92
+ int nq);
93
+
50
94
  static int selectTopKVariantIndex(int k);
51
95
 
52
96
  private:
@@ -56,8 +100,9 @@ class MetalKernels {
56
100
  id<MTLLibrary> library_;
57
101
  std::unordered_map<std::string, id<MTLComputePipelineState>> cache_;
58
102
 
59
- static constexpr int kTopKVariantSizes[] = {32, 64, 128, 256};
60
- static constexpr int kNumTopKVariants = 4;
103
+ static constexpr int kTopKVariantSizes[] =
104
+ {32, 64, 128, 256, 512, 1024, 2048};
105
+ static constexpr int kNumTopKVariants = 7;
61
106
  };
62
107
 
63
108
  MetalKernels& getMetalKernels(id<MTLDevice> device);
@@ -0,0 +1,45 @@
1
+ // @lint-ignore-every LICENSELINT
2
+ /**
3
+ * Copyright (c) Meta Platforms, Inc. and its affiliates.
4
+ *
5
+ * This source code is licensed under the MIT license found in the
6
+ * LICENSE file in the root directory of this source tree.
7
+ *
8
+ * C++-only API for Python/SWIG. No Objective-C types so SWIG can
9
+ * parse it. Implemented in MetalPythonBridge.mm.
10
+ */
11
+
12
+ #pragma once
13
+
14
+ #include <faiss/Index.h>
15
+
16
+ namespace faiss {
17
+ namespace gpu_metal {
18
+
19
+ /// Opaque holder for Metal resources.
20
+ struct StandardMetalResourcesHolder {
21
+ void* impl = nullptr;
22
+ StandardMetalResourcesHolder();
23
+ ~StandardMetalResourcesHolder();
24
+ StandardMetalResourcesHolder(const StandardMetalResourcesHolder&) = delete;
25
+ StandardMetalResourcesHolder& operator=(
26
+ const StandardMetalResourcesHolder&) = delete;
27
+ };
28
+
29
+ /// Same names as GPU API for unified Python binding.
30
+ int get_num_gpus();
31
+ void gpu_profiler_start();
32
+ void gpu_profiler_stop();
33
+ void gpu_sync_all_devices();
34
+
35
+ /// Clone CPU index to Metal GPU. Caller owns returned index.
36
+ faiss::Index* index_cpu_to_gpu(
37
+ StandardMetalResourcesHolder* res,
38
+ int device,
39
+ const faiss::Index* index);
40
+
41
+ /// Copy Metal index back to CPU. Caller owns returned index.
42
+ faiss::Index* index_gpu_to_cpu(const faiss::Index* index);
43
+
44
+ } // namespace gpu_metal
45
+ } // namespace faiss