faiss 0.6.1 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/Index.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +6 -7
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +3 -3
- data/vendor/faiss/faiss/IndexHNSW.cpp +173 -143
- data/vendor/faiss/faiss/IndexIVF.cpp +2 -2
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +2 -2
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -1
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +3 -3
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +2 -3
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +2 -3
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +4 -13
- data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -1
- data/vendor/faiss/faiss/IndexNSG.cpp +1 -2
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +68 -6
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +10 -0
- data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +1 -1
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +902 -12
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +702 -10
- data/vendor/faiss/faiss/factory_tools.cpp +4 -0
- data/vendor/faiss/faiss/gpu/GpuResources.h +3 -2
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +11 -12
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +3 -3
- data/vendor/faiss/faiss/gpu_metal/MetalDistance.h +87 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +7 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndexIVFFlat.h +181 -0
- data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +48 -3
- data/vendor/faiss/faiss/gpu_metal/MetalPythonBridge.h +45 -0
- data/vendor/faiss/faiss/gpu_metal/impl/MetalIVFFlat.h +193 -0
- data/vendor/faiss/faiss/impl/HNSW.cpp +556 -199
- data/vendor/faiss/faiss/impl/HNSW.h +51 -13
- data/vendor/faiss/faiss/impl/NSG.cpp +15 -11
- data/vendor/faiss/faiss/impl/Panorama.h +11 -0
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -2
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +1 -1
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +7 -1
- data/vendor/faiss/faiss/impl/ResultHandler.h +1 -0
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +271 -8
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +50 -0
- data/vendor/faiss/faiss/impl/VisitedTable.cpp +10 -10
- data/vendor/faiss/faiss/impl/VisitedTable.h +69 -34
- data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +3 -1
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +35 -43
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -15
- data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +86 -40
- data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +81 -50
- data/vendor/faiss/faiss/impl/index_read.cpp +100 -39
- data/vendor/faiss/faiss/impl/index_write.cpp +1 -0
- data/vendor/faiss/faiss/impl/io_macros.h +25 -0
- data/vendor/faiss/faiss/impl/platform_macros.h +12 -8
- data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +2 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +2 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +2 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +20 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +36 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +5 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_scan_impl.h +105 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +2 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +6 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +327 -18
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +264 -27
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512-impl.h +553 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512-spr.cpp +559 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +199 -27
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +366 -3
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +144 -19
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +26 -0
- data/vendor/faiss/faiss/impl/simd_dispatch.h +65 -8
- data/vendor/faiss/faiss/index_factory.cpp +5 -1
- data/vendor/faiss/faiss/index_io.h +16 -0
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +4 -1
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +13 -13
- data/vendor/faiss/faiss/invlists/InvertedLists.h +2 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +119 -22
- data/vendor/faiss/faiss/svs/IndexSVSVamana.h +15 -5
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.cpp +3 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +2 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +65 -24
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +3 -2
- data/vendor/faiss/faiss/utils/bf16.h +34 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +0 -1
- data/vendor/faiss/faiss/utils/hamming.cpp +8 -8
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +2 -1
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512_spr.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +6 -30
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512_spr.h +171 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +0 -2
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +14 -68
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512_spr.cpp +343 -0
- data/vendor/faiss/faiss/utils/simd_levels.cpp +12 -2
- metadata +12 -2
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
// @lint-ignore-every LICENSELINT
|
|
2
|
+
/**
|
|
3
|
+
* Copyright (c) Meta Platforms, Inc. and its affiliates.
|
|
4
|
+
*
|
|
5
|
+
* This source code is licensed under the MIT license found in the
|
|
6
|
+
* LICENSE file in the root directory of this source tree.
|
|
7
|
+
*
|
|
8
|
+
* Metal IVF Flat implementation: GPU-resident IVF list storage and helpers.
|
|
9
|
+
* Mirrors the roles of faiss/gpu/impl/IVFFlat.cuh (storage side only).
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
#pragma once
|
|
13
|
+
|
|
14
|
+
#import <Metal/Metal.h>
|
|
15
|
+
|
|
16
|
+
#include <cstddef>
|
|
17
|
+
#include <cstdint>
|
|
18
|
+
#include <memory>
|
|
19
|
+
#include <vector>
|
|
20
|
+
|
|
21
|
+
#include <faiss/Index.h>
|
|
22
|
+
#include <faiss/MetricType.h>
|
|
23
|
+
#include <faiss/gpu/GpuIndicesOptions.h>
|
|
24
|
+
#include <faiss/gpu_metal/MetalResources.h>
|
|
25
|
+
|
|
26
|
+
namespace faiss {
|
|
27
|
+
namespace gpu_metal {
|
|
28
|
+
|
|
29
|
+
/// GPU-resident IVF list storage for flat (float32) codes.
|
|
30
|
+
/// Layout: all lists are stored contiguously in a single codes/ids buffer;
|
|
31
|
+
/// lists are described by (listOffset[list], listLength[list]).
|
|
32
|
+
class MetalIVFFlatImpl {
|
|
33
|
+
public:
|
|
34
|
+
struct AppendDebugStats {
|
|
35
|
+
size_t relayoutEvents = 0;
|
|
36
|
+
size_t movedLists = 0;
|
|
37
|
+
size_t movedVectors = 0;
|
|
38
|
+
size_t reusedSegmentAllocs = 0;
|
|
39
|
+
size_t tailSegmentAllocs = 0;
|
|
40
|
+
size_t reusedCapacityVecs = 0;
|
|
41
|
+
size_t tailCapacityVecs = 0;
|
|
42
|
+
size_t tailShrinkEvents = 0;
|
|
43
|
+
size_t tailShrunkVecs = 0;
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
MetalIVFFlatImpl(
|
|
47
|
+
std::shared_ptr<MetalResources> resources,
|
|
48
|
+
int dim,
|
|
49
|
+
idx_t nlist,
|
|
50
|
+
faiss::MetricType metric,
|
|
51
|
+
float metricArg,
|
|
52
|
+
faiss::gpu::IndicesOptions indicesOptions,
|
|
53
|
+
bool interleavedLayout);
|
|
54
|
+
|
|
55
|
+
~MetalIVFFlatImpl();
|
|
56
|
+
|
|
57
|
+
/// Reset all IVF lists and free GPU storage.
|
|
58
|
+
void reset();
|
|
59
|
+
|
|
60
|
+
/// Reserve host/GPU storage for at least totalVecs vectors.
|
|
61
|
+
void reserveMemory(idx_t totalVecs);
|
|
62
|
+
|
|
63
|
+
/// Append a batch of vectors to IVF lists.
|
|
64
|
+
/// - x: host pointer, size n * dim
|
|
65
|
+
/// - list_nos: host pointer, size n; -1 entries are skipped
|
|
66
|
+
/// - xids: host pointer, size n (may be null to use internal ids)
|
|
67
|
+
void appendVectors(
|
|
68
|
+
idx_t n,
|
|
69
|
+
const float* x,
|
|
70
|
+
const idx_t* list_nos,
|
|
71
|
+
const idx_t* xids);
|
|
72
|
+
|
|
73
|
+
/// Accessors for future GPU search path.
|
|
74
|
+
int dim() const {
|
|
75
|
+
return dim_;
|
|
76
|
+
}
|
|
77
|
+
idx_t nlist() const {
|
|
78
|
+
return nlist_;
|
|
79
|
+
}
|
|
80
|
+
faiss::MetricType metricType() const {
|
|
81
|
+
return metric_type_;
|
|
82
|
+
}
|
|
83
|
+
float metricArg() const {
|
|
84
|
+
return metric_arg_;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
const std::vector<size_t>& listLength() const {
|
|
88
|
+
return listLength_;
|
|
89
|
+
}
|
|
90
|
+
const std::vector<size_t>& listOffset() const {
|
|
91
|
+
return listOffset_;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
id<MTLBuffer> codesBuffer() const {
|
|
95
|
+
return codesBuffer_;
|
|
96
|
+
}
|
|
97
|
+
id<MTLBuffer> idsBuffer() const {
|
|
98
|
+
return idsBuffer_;
|
|
99
|
+
}
|
|
100
|
+
/// Pre-built GPU buffer of (nlist) uint32_t offsets (updated on every add).
|
|
101
|
+
id<MTLBuffer> listOffsetGpuBuffer() const {
|
|
102
|
+
return listOffsetBuf_;
|
|
103
|
+
}
|
|
104
|
+
/// Pre-built GPU buffer of (nlist) uint32_t lengths (updated on every add).
|
|
105
|
+
id<MTLBuffer> listLengthGpuBuffer() const {
|
|
106
|
+
return listLengthBuf_;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
size_t totalVecs() const {
|
|
110
|
+
return totalVecs_;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/// Interleaved codes buffer (blocks of 32 vectors, dims interleaved).
|
|
114
|
+
id<MTLBuffer> interleavedCodesBuffer() const {
|
|
115
|
+
return interleavedCodesBuf_;
|
|
116
|
+
}
|
|
117
|
+
/// Per-list float offsets into the interleaved codes buffer.
|
|
118
|
+
id<MTLBuffer> interleavedCodesOffsetBuffer() const {
|
|
119
|
+
return interleavedCodesOffsetBuf_;
|
|
120
|
+
}
|
|
121
|
+
bool interleavedLayout() const {
|
|
122
|
+
return interleavedLayout_;
|
|
123
|
+
}
|
|
124
|
+
/// Rebuild interleaved buffers from host storage if they are stale.
|
|
125
|
+
void ensureInterleavedLayoutUpToDate();
|
|
126
|
+
const AppendDebugStats& appendDebugStats() const {
|
|
127
|
+
return appendStats_;
|
|
128
|
+
}
|
|
129
|
+
void resetAppendDebugStats() {
|
|
130
|
+
appendStats_ = AppendDebugStats{};
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
private:
|
|
134
|
+
struct FreeSegment {
|
|
135
|
+
size_t offset = 0;
|
|
136
|
+
size_t length = 0;
|
|
137
|
+
};
|
|
138
|
+
|
|
139
|
+
bool ensureCapacityForAppend_(
|
|
140
|
+
const std::vector<size_t>& addPerList,
|
|
141
|
+
std::vector<uint8_t>* movedLists);
|
|
142
|
+
void uploadToGpu_(
|
|
143
|
+
const std::vector<size_t>& oldLength,
|
|
144
|
+
const std::vector<size_t>& addPerList,
|
|
145
|
+
const std::vector<uint8_t>& movedLists,
|
|
146
|
+
bool forceFullUpload);
|
|
147
|
+
void rebuildInterleavedBuffers_();
|
|
148
|
+
size_t allocSegment_(size_t length);
|
|
149
|
+
void freeSegment_(
|
|
150
|
+
size_t offset,
|
|
151
|
+
size_t length,
|
|
152
|
+
bool allowTailShrink = true);
|
|
153
|
+
void coalesceFreeSegments_();
|
|
154
|
+
void tryShrinkTail_();
|
|
155
|
+
|
|
156
|
+
std::shared_ptr<MetalResources> resources_;
|
|
157
|
+
|
|
158
|
+
int dim_;
|
|
159
|
+
idx_t nlist_;
|
|
160
|
+
faiss::MetricType metric_type_;
|
|
161
|
+
float metric_arg_;
|
|
162
|
+
faiss::gpu::IndicesOptions indicesOptions_;
|
|
163
|
+
bool interleavedLayout_;
|
|
164
|
+
|
|
165
|
+
// Per-list metadata
|
|
166
|
+
std::vector<size_t> listLength_;
|
|
167
|
+
std::vector<size_t> listOffset_;
|
|
168
|
+
std::vector<size_t> listCapacity_;
|
|
169
|
+
|
|
170
|
+
// Host copies of IVF data (flat layout)
|
|
171
|
+
std::vector<float> hostCodes_; // size = totalVecs_ * dim_
|
|
172
|
+
std::vector<idx_t> hostIds_; // size = totalVecs_
|
|
173
|
+
std::vector<FreeSegment> freeSegments_;
|
|
174
|
+
AppendDebugStats appendStats_;
|
|
175
|
+
size_t totalVecs_;
|
|
176
|
+
size_t totalCapacityVecs_;
|
|
177
|
+
|
|
178
|
+
// GPU storage
|
|
179
|
+
id<MTLBuffer> codesBuffer_;
|
|
180
|
+
id<MTLBuffer> idsBuffer_;
|
|
181
|
+
id<MTLBuffer> listOffsetBuf_; // (nlist) uint32_t, list element offsets
|
|
182
|
+
id<MTLBuffer> listLengthBuf_; // (nlist) uint32_t, list sizes
|
|
183
|
+
|
|
184
|
+
// Interleaved codes layout (blocks of 32 vectors, dims interleaved)
|
|
185
|
+
id<MTLBuffer> interleavedCodesBuf_;
|
|
186
|
+
id<MTLBuffer> interleavedCodesOffsetBuf_; // (nlist) uint32_t, float offsets
|
|
187
|
+
bool interleavedDirty_ = true;
|
|
188
|
+
|
|
189
|
+
static constexpr int kInterleavedGroupSize = 32;
|
|
190
|
+
};
|
|
191
|
+
|
|
192
|
+
} // namespace gpu_metal
|
|
193
|
+
} // namespace faiss
|