faiss 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/lib/faiss/version.rb +1 -1
  4. data/vendor/faiss/faiss/Index.h +1 -1
  5. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +6 -7
  6. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +3 -3
  7. data/vendor/faiss/faiss/IndexHNSW.cpp +173 -143
  8. data/vendor/faiss/faiss/IndexIVF.cpp +2 -2
  9. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +2 -2
  10. data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -1
  11. data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +3 -3
  12. data/vendor/faiss/faiss/IndexIVFPQ.cpp +2 -3
  13. data/vendor/faiss/faiss/IndexIVFPQR.cpp +2 -3
  14. data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +4 -13
  15. data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -1
  16. data/vendor/faiss/faiss/IndexNSG.cpp +1 -2
  17. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +68 -6
  18. data/vendor/faiss/faiss/IndexScalarQuantizer.h +10 -0
  19. data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +1 -1
  20. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +902 -12
  21. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +702 -10
  22. data/vendor/faiss/faiss/factory_tools.cpp +4 -0
  23. data/vendor/faiss/faiss/gpu/GpuResources.h +3 -2
  24. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +11 -12
  25. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +3 -3
  26. data/vendor/faiss/faiss/gpu_metal/MetalDistance.h +87 -0
  27. data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +7 -0
  28. data/vendor/faiss/faiss/gpu_metal/MetalIndexIVFFlat.h +181 -0
  29. data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +48 -3
  30. data/vendor/faiss/faiss/gpu_metal/MetalPythonBridge.h +45 -0
  31. data/vendor/faiss/faiss/gpu_metal/impl/MetalIVFFlat.h +193 -0
  32. data/vendor/faiss/faiss/impl/HNSW.cpp +556 -199
  33. data/vendor/faiss/faiss/impl/HNSW.h +51 -13
  34. data/vendor/faiss/faiss/impl/NSG.cpp +15 -11
  35. data/vendor/faiss/faiss/impl/Panorama.h +11 -0
  36. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -2
  37. data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +1 -1
  38. data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +7 -1
  39. data/vendor/faiss/faiss/impl/ResultHandler.h +1 -0
  40. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +271 -8
  41. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +50 -0
  42. data/vendor/faiss/faiss/impl/VisitedTable.cpp +10 -10
  43. data/vendor/faiss/faiss/impl/VisitedTable.h +69 -34
  44. data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +3 -1
  45. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +35 -43
  46. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -15
  47. data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +86 -40
  48. data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +81 -50
  49. data/vendor/faiss/faiss/impl/index_read.cpp +100 -39
  50. data/vendor/faiss/faiss/impl/index_write.cpp +1 -0
  51. data/vendor/faiss/faiss/impl/io_macros.h +25 -0
  52. data/vendor/faiss/faiss/impl/platform_macros.h +12 -8
  53. data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +2 -0
  54. data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +2 -0
  55. data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +2 -0
  56. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +20 -0
  57. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +36 -0
  58. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +5 -0
  59. data/vendor/faiss/faiss/impl/pq_code_distance/pq_scan_impl.h +105 -0
  60. data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +2 -0
  61. data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +6 -0
  62. data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +327 -18
  63. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +264 -27
  64. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512-impl.h +553 -0
  65. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512-spr.cpp +559 -0
  66. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +199 -27
  67. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +366 -3
  68. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +144 -19
  69. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +26 -0
  70. data/vendor/faiss/faiss/impl/simd_dispatch.h +65 -8
  71. data/vendor/faiss/faiss/index_factory.cpp +5 -1
  72. data/vendor/faiss/faiss/index_io.h +16 -0
  73. data/vendor/faiss/faiss/invlists/DirectMap.cpp +4 -1
  74. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +13 -13
  75. data/vendor/faiss/faiss/invlists/InvertedLists.h +2 -2
  76. data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +119 -22
  77. data/vendor/faiss/faiss/svs/IndexSVSVamana.h +15 -5
  78. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.cpp +3 -2
  79. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +2 -1
  80. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +65 -24
  81. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +3 -2
  82. data/vendor/faiss/faiss/utils/bf16.h +34 -0
  83. data/vendor/faiss/faiss/utils/distances_simd.cpp +0 -1
  84. data/vendor/faiss/faiss/utils/hamming.cpp +8 -8
  85. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +2 -1
  86. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512_spr.cpp +15 -0
  87. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +6 -30
  88. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512_spr.h +171 -0
  89. data/vendor/faiss/faiss/utils/partitioning.cpp +0 -2
  90. data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +14 -68
  91. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512_spr.cpp +343 -0
  92. data/vendor/faiss/faiss/utils/simd_levels.cpp +12 -2
  93. metadata +12 -2
@@ -0,0 +1,193 @@
1
+ // @lint-ignore-every LICENSELINT
2
+ /**
3
+ * Copyright (c) Meta Platforms, Inc. and its affiliates.
4
+ *
5
+ * This source code is licensed under the MIT license found in the
6
+ * LICENSE file in the root directory of this source tree.
7
+ *
8
+ * Metal IVF Flat implementation: GPU-resident IVF list storage and helpers.
9
+ * Mirrors the roles of faiss/gpu/impl/IVFFlat.cuh (storage side only).
10
+ */
11
+
12
+ #pragma once
13
+
14
+ #import <Metal/Metal.h>
15
+
16
+ #include <cstddef>
17
+ #include <cstdint>
18
+ #include <memory>
19
+ #include <vector>
20
+
21
+ #include <faiss/Index.h>
22
+ #include <faiss/MetricType.h>
23
+ #include <faiss/gpu/GpuIndicesOptions.h>
24
+ #include <faiss/gpu_metal/MetalResources.h>
25
+
26
+ namespace faiss {
27
+ namespace gpu_metal {
28
+
29
+ /// GPU-resident IVF list storage for flat (float32) codes.
30
+ /// Layout: all lists are stored contiguously in a single codes/ids buffer;
31
+ /// lists are described by (listOffset[list], listLength[list]).
32
+ class MetalIVFFlatImpl {
33
+ public:
34
+ struct AppendDebugStats {
35
+ size_t relayoutEvents = 0;
36
+ size_t movedLists = 0;
37
+ size_t movedVectors = 0;
38
+ size_t reusedSegmentAllocs = 0;
39
+ size_t tailSegmentAllocs = 0;
40
+ size_t reusedCapacityVecs = 0;
41
+ size_t tailCapacityVecs = 0;
42
+ size_t tailShrinkEvents = 0;
43
+ size_t tailShrunkVecs = 0;
44
+ };
45
+
46
+ MetalIVFFlatImpl(
47
+ std::shared_ptr<MetalResources> resources,
48
+ int dim,
49
+ idx_t nlist,
50
+ faiss::MetricType metric,
51
+ float metricArg,
52
+ faiss::gpu::IndicesOptions indicesOptions,
53
+ bool interleavedLayout);
54
+
55
+ ~MetalIVFFlatImpl();
56
+
57
+ /// Reset all IVF lists and free GPU storage.
58
+ void reset();
59
+
60
+ /// Reserve host/GPU storage for at least totalVecs vectors.
61
+ void reserveMemory(idx_t totalVecs);
62
+
63
+ /// Append a batch of vectors to IVF lists.
64
+ /// - x: host pointer, size n * dim
65
+ /// - list_nos: host pointer, size n; -1 entries are skipped
66
+ /// - xids: host pointer, size n (may be null to use internal ids)
67
+ void appendVectors(
68
+ idx_t n,
69
+ const float* x,
70
+ const idx_t* list_nos,
71
+ const idx_t* xids);
72
+
73
+ /// Accessors for future GPU search path.
74
+ int dim() const {
75
+ return dim_;
76
+ }
77
+ idx_t nlist() const {
78
+ return nlist_;
79
+ }
80
+ faiss::MetricType metricType() const {
81
+ return metric_type_;
82
+ }
83
+ float metricArg() const {
84
+ return metric_arg_;
85
+ }
86
+
87
+ const std::vector<size_t>& listLength() const {
88
+ return listLength_;
89
+ }
90
+ const std::vector<size_t>& listOffset() const {
91
+ return listOffset_;
92
+ }
93
+
94
+ id<MTLBuffer> codesBuffer() const {
95
+ return codesBuffer_;
96
+ }
97
+ id<MTLBuffer> idsBuffer() const {
98
+ return idsBuffer_;
99
+ }
100
+ /// Pre-built GPU buffer of (nlist) uint32_t offsets (updated on every add).
101
+ id<MTLBuffer> listOffsetGpuBuffer() const {
102
+ return listOffsetBuf_;
103
+ }
104
+ /// Pre-built GPU buffer of (nlist) uint32_t lengths (updated on every add).
105
+ id<MTLBuffer> listLengthGpuBuffer() const {
106
+ return listLengthBuf_;
107
+ }
108
+
109
+ size_t totalVecs() const {
110
+ return totalVecs_;
111
+ }
112
+
113
+ /// Interleaved codes buffer (blocks of 32 vectors, dims interleaved).
114
+ id<MTLBuffer> interleavedCodesBuffer() const {
115
+ return interleavedCodesBuf_;
116
+ }
117
+ /// Per-list float offsets into the interleaved codes buffer.
118
+ id<MTLBuffer> interleavedCodesOffsetBuffer() const {
119
+ return interleavedCodesOffsetBuf_;
120
+ }
121
+ bool interleavedLayout() const {
122
+ return interleavedLayout_;
123
+ }
124
+ /// Rebuild interleaved buffers from host storage if they are stale.
125
+ void ensureInterleavedLayoutUpToDate();
126
+ const AppendDebugStats& appendDebugStats() const {
127
+ return appendStats_;
128
+ }
129
+ void resetAppendDebugStats() {
130
+ appendStats_ = AppendDebugStats{};
131
+ }
132
+
133
+ private:
134
+ struct FreeSegment {
135
+ size_t offset = 0;
136
+ size_t length = 0;
137
+ };
138
+
139
+ bool ensureCapacityForAppend_(
140
+ const std::vector<size_t>& addPerList,
141
+ std::vector<uint8_t>* movedLists);
142
+ void uploadToGpu_(
143
+ const std::vector<size_t>& oldLength,
144
+ const std::vector<size_t>& addPerList,
145
+ const std::vector<uint8_t>& movedLists,
146
+ bool forceFullUpload);
147
+ void rebuildInterleavedBuffers_();
148
+ size_t allocSegment_(size_t length);
149
+ void freeSegment_(
150
+ size_t offset,
151
+ size_t length,
152
+ bool allowTailShrink = true);
153
+ void coalesceFreeSegments_();
154
+ void tryShrinkTail_();
155
+
156
+ std::shared_ptr<MetalResources> resources_;
157
+
158
+ int dim_;
159
+ idx_t nlist_;
160
+ faiss::MetricType metric_type_;
161
+ float metric_arg_;
162
+ faiss::gpu::IndicesOptions indicesOptions_;
163
+ bool interleavedLayout_;
164
+
165
+ // Per-list metadata
166
+ std::vector<size_t> listLength_;
167
+ std::vector<size_t> listOffset_;
168
+ std::vector<size_t> listCapacity_;
169
+
170
+ // Host copies of IVF data (flat layout)
171
+ std::vector<float> hostCodes_; // size = totalVecs_ * dim_
172
+ std::vector<idx_t> hostIds_; // size = totalVecs_
173
+ std::vector<FreeSegment> freeSegments_;
174
+ AppendDebugStats appendStats_;
175
+ size_t totalVecs_;
176
+ size_t totalCapacityVecs_;
177
+
178
+ // GPU storage
179
+ id<MTLBuffer> codesBuffer_;
180
+ id<MTLBuffer> idsBuffer_;
181
+ id<MTLBuffer> listOffsetBuf_; // (nlist) uint32_t, list element offsets
182
+ id<MTLBuffer> listLengthBuf_; // (nlist) uint32_t, list sizes
183
+
184
+ // Interleaved codes layout (blocks of 32 vectors, dims interleaved)
185
+ id<MTLBuffer> interleavedCodesBuf_;
186
+ id<MTLBuffer> interleavedCodesOffsetBuf_; // (nlist) uint32_t, float offsets
187
+ bool interleavedDirty_ = true;
188
+
189
+ static constexpr int kInterleavedGroupSize = 32;
190
+ };
191
+
192
+ } // namespace gpu_metal
193
+ } // namespace faiss