faiss 0.2.6 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +1 -1
- data/lib/faiss/version.rb +1 -1
- data/lib/faiss.rb +2 -2
- data/vendor/faiss/faiss/AutoTune.cpp +15 -4
- data/vendor/faiss/faiss/AutoTune.h +0 -1
- data/vendor/faiss/faiss/Clustering.cpp +1 -5
- data/vendor/faiss/faiss/Clustering.h +0 -2
- data/vendor/faiss/faiss/IVFlib.h +0 -2
- data/vendor/faiss/faiss/Index.h +1 -2
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +17 -3
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +10 -1
- data/vendor/faiss/faiss/IndexBinary.h +0 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +2 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -0
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +1 -3
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +273 -48
- data/vendor/faiss/faiss/IndexBinaryIVF.h +18 -11
- data/vendor/faiss/faiss/IndexFastScan.cpp +13 -10
- data/vendor/faiss/faiss/IndexFastScan.h +5 -1
- data/vendor/faiss/faiss/IndexFlat.cpp +16 -3
- data/vendor/faiss/faiss/IndexFlat.h +1 -1
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +5 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +7 -2
- data/vendor/faiss/faiss/IndexHNSW.cpp +3 -6
- data/vendor/faiss/faiss/IndexHNSW.h +0 -1
- data/vendor/faiss/faiss/IndexIDMap.cpp +4 -4
- data/vendor/faiss/faiss/IndexIDMap.h +0 -2
- data/vendor/faiss/faiss/IndexIVF.cpp +155 -129
- data/vendor/faiss/faiss/IndexIVF.h +121 -61
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +2 -2
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +12 -11
- data/vendor/faiss/faiss/IndexIVFFastScan.h +6 -1
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +221 -165
- data/vendor/faiss/faiss/IndexIVFPQ.h +1 -0
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +6 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +0 -2
- data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -2
- data/vendor/faiss/faiss/IndexNNDescent.h +0 -1
- data/vendor/faiss/faiss/IndexNSG.cpp +1 -2
- data/vendor/faiss/faiss/IndexPQ.cpp +7 -9
- data/vendor/faiss/faiss/IndexRefine.cpp +1 -1
- data/vendor/faiss/faiss/IndexReplicas.cpp +3 -4
- data/vendor/faiss/faiss/IndexReplicas.h +0 -1
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +8 -1
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +7 -0
- data/vendor/faiss/faiss/IndexShards.cpp +26 -109
- data/vendor/faiss/faiss/IndexShards.h +2 -3
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +246 -0
- data/vendor/faiss/faiss/IndexShardsIVF.h +42 -0
- data/vendor/faiss/faiss/MetaIndexes.cpp +86 -0
- data/vendor/faiss/faiss/MetaIndexes.h +29 -0
- data/vendor/faiss/faiss/MetricType.h +14 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +8 -10
- data/vendor/faiss/faiss/VectorTransform.h +1 -3
- data/vendor/faiss/faiss/clone_index.cpp +232 -18
- data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +25 -3
- data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +7 -0
- data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +78 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +20 -6
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +7 -1
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +21 -7
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +7 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +7 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +10 -3
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +7 -1
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +11 -3
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +25 -2
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +76 -29
- data/vendor/faiss/faiss/gpu/GpuCloner.h +2 -2
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +14 -13
- data/vendor/faiss/faiss/gpu/GpuDistance.h +18 -6
- data/vendor/faiss/faiss/gpu/GpuIndex.h +23 -21
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +10 -10
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -12
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +29 -50
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +3 -3
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +8 -8
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +4 -4
- data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +2 -5
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +9 -7
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +4 -4
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -2
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +1 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +55 -6
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +20 -6
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +95 -25
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +67 -16
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +4 -4
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +7 -7
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +4 -4
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +0 -7
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +9 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +2 -7
- data/vendor/faiss/faiss/impl/CodePacker.cpp +67 -0
- data/vendor/faiss/faiss/impl/CodePacker.h +71 -0
- data/vendor/faiss/faiss/impl/DistanceComputer.h +0 -2
- data/vendor/faiss/faiss/impl/HNSW.cpp +3 -7
- data/vendor/faiss/faiss/impl/HNSW.h +6 -9
- data/vendor/faiss/faiss/impl/IDSelector.cpp +1 -1
- data/vendor/faiss/faiss/impl/IDSelector.h +39 -1
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +62 -51
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +11 -12
- data/vendor/faiss/faiss/impl/NNDescent.cpp +3 -9
- data/vendor/faiss/faiss/impl/NNDescent.h +10 -10
- data/vendor/faiss/faiss/impl/NSG.cpp +1 -6
- data/vendor/faiss/faiss/impl/NSG.h +4 -7
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +1 -15
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +11 -10
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +0 -7
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -12
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -4
- data/vendor/faiss/faiss/impl/Quantizer.h +6 -3
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +796 -174
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +16 -8
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +3 -5
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +4 -4
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +3 -3
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +4 -4
- data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +291 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +74 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance.h +123 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +102 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +13 -10
- data/vendor/faiss/faiss/impl/index_write.cpp +3 -4
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +0 -1
- data/vendor/faiss/faiss/impl/kmeans1d.h +3 -3
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
- data/vendor/faiss/faiss/impl/platform_macros.h +61 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +48 -4
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +18 -4
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +2 -2
- data/vendor/faiss/faiss/index_factory.cpp +8 -10
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +29 -12
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +8 -2
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
- data/vendor/faiss/faiss/invlists/DirectMap.h +2 -4
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +118 -18
- data/vendor/faiss/faiss/invlists/InvertedLists.h +44 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
- data/vendor/faiss/faiss/python/python_callbacks.h +1 -1
- data/vendor/faiss/faiss/utils/AlignedTable.h +3 -1
- data/vendor/faiss/faiss/utils/Heap.cpp +139 -3
- data/vendor/faiss/faiss/utils/Heap.h +35 -1
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +84 -0
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +196 -0
- data/vendor/faiss/faiss/utils/approx_topk/generic.h +138 -0
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +34 -0
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +367 -0
- data/vendor/faiss/faiss/utils/distances.cpp +61 -7
- data/vendor/faiss/faiss/utils/distances.h +11 -0
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +346 -0
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +36 -0
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +42 -0
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +40 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +352 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +32 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +515 -327
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +17 -1
- data/vendor/faiss/faiss/utils/extra_distances.cpp +37 -8
- data/vendor/faiss/faiss/utils/extra_distances.h +2 -1
- data/vendor/faiss/faiss/utils/fp16-fp16c.h +7 -0
- data/vendor/faiss/faiss/utils/fp16-inl.h +7 -0
- data/vendor/faiss/faiss/utils/fp16.h +7 -0
- data/vendor/faiss/faiss/utils/hamming-inl.h +0 -456
- data/vendor/faiss/faiss/utils/hamming.cpp +104 -120
- data/vendor/faiss/faiss/utils/hamming.h +21 -10
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +535 -0
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +48 -0
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +519 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +26 -0
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +614 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +21 -25
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +344 -3
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +390 -0
- data/vendor/faiss/faiss/utils/simdlib_neon.h +655 -130
- data/vendor/faiss/faiss/utils/sorting.cpp +692 -0
- data/vendor/faiss/faiss/utils/sorting.h +71 -0
- data/vendor/faiss/faiss/utils/transpose/transpose-avx2-inl.h +165 -0
- data/vendor/faiss/faiss/utils/utils.cpp +4 -176
- data/vendor/faiss/faiss/utils/utils.h +2 -9
- metadata +29 -3
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +0 -26
|
@@ -21,6 +21,7 @@
|
|
|
21
21
|
#include <faiss/impl/AuxIndexStructures.h>
|
|
22
22
|
#include <faiss/impl/FaissAssert.h>
|
|
23
23
|
#include <faiss/utils/hamming.h>
|
|
24
|
+
#include <faiss/utils/sorting.h>
|
|
24
25
|
#include <faiss/utils/utils.h>
|
|
25
26
|
|
|
26
27
|
namespace faiss {
|
|
@@ -28,28 +29,14 @@ namespace faiss {
|
|
|
28
29
|
IndexBinaryIVF::IndexBinaryIVF(IndexBinary* quantizer, size_t d, size_t nlist)
|
|
29
30
|
: IndexBinary(d),
|
|
30
31
|
invlists(new ArrayInvertedLists(nlist, code_size)),
|
|
31
|
-
own_invlists(true),
|
|
32
|
-
nprobe(1),
|
|
33
|
-
max_codes(0),
|
|
34
32
|
quantizer(quantizer),
|
|
35
|
-
nlist(nlist)
|
|
36
|
-
own_fields(false),
|
|
37
|
-
clustering_index(nullptr) {
|
|
33
|
+
nlist(nlist) {
|
|
38
34
|
FAISS_THROW_IF_NOT(d == quantizer->d);
|
|
39
35
|
is_trained = quantizer->is_trained && (quantizer->ntotal == nlist);
|
|
40
|
-
|
|
41
36
|
cp.niter = 10;
|
|
42
37
|
}
|
|
43
38
|
|
|
44
|
-
IndexBinaryIVF::IndexBinaryIVF()
|
|
45
|
-
: invlists(nullptr),
|
|
46
|
-
own_invlists(false),
|
|
47
|
-
nprobe(1),
|
|
48
|
-
max_codes(0),
|
|
49
|
-
quantizer(nullptr),
|
|
50
|
-
nlist(0),
|
|
51
|
-
own_fields(false),
|
|
52
|
-
clustering_index(nullptr) {}
|
|
39
|
+
IndexBinaryIVF::IndexBinaryIVF() {}
|
|
53
40
|
|
|
54
41
|
void IndexBinaryIVF::add(idx_t n, const uint8_t* x) {
|
|
55
42
|
add_with_ids(n, x, nullptr);
|
|
@@ -158,7 +145,7 @@ void IndexBinaryIVF::reconstruct_n(idx_t i0, idx_t ni, uint8_t* recons) const {
|
|
|
158
145
|
|
|
159
146
|
for (idx_t list_no = 0; list_no < nlist; list_no++) {
|
|
160
147
|
size_t list_size = invlists->list_size(list_no);
|
|
161
|
-
const
|
|
148
|
+
const idx_t* idlist = invlists->get_ids(list_no);
|
|
162
149
|
|
|
163
150
|
for (idx_t offset = 0; offset < list_size; offset++) {
|
|
164
151
|
idx_t id = idlist[offset];
|
|
@@ -174,11 +161,11 @@ void IndexBinaryIVF::reconstruct_n(idx_t i0, idx_t ni, uint8_t* recons) const {
|
|
|
174
161
|
|
|
175
162
|
void IndexBinaryIVF::search_and_reconstruct(
|
|
176
163
|
idx_t n,
|
|
177
|
-
const uint8_t* x,
|
|
164
|
+
const uint8_t* __restrict x,
|
|
178
165
|
idx_t k,
|
|
179
|
-
int32_t* distances,
|
|
180
|
-
idx_t* labels,
|
|
181
|
-
uint8_t* recons,
|
|
166
|
+
int32_t* __restrict distances,
|
|
167
|
+
idx_t* __restrict labels,
|
|
168
|
+
uint8_t* __restrict recons,
|
|
182
169
|
const SearchParameters* params) const {
|
|
183
170
|
FAISS_THROW_IF_NOT_MSG(
|
|
184
171
|
!params, "search params not supported for this index");
|
|
@@ -320,8 +307,6 @@ void IndexBinaryIVF::replace_invlists(InvertedLists* il, bool own) {
|
|
|
320
307
|
|
|
321
308
|
namespace {
|
|
322
309
|
|
|
323
|
-
using idx_t = Index::idx_t;
|
|
324
|
-
|
|
325
310
|
template <class HammingComputer>
|
|
326
311
|
struct IVFBinaryScannerL2 : BinaryInvertedListScanner {
|
|
327
312
|
HammingComputer hc;
|
|
@@ -346,10 +331,10 @@ struct IVFBinaryScannerL2 : BinaryInvertedListScanner {
|
|
|
346
331
|
|
|
347
332
|
size_t scan_codes(
|
|
348
333
|
size_t n,
|
|
349
|
-
const uint8_t* codes,
|
|
350
|
-
const idx_t* ids,
|
|
351
|
-
int32_t* simi,
|
|
352
|
-
idx_t* idxi,
|
|
334
|
+
const uint8_t* __restrict codes,
|
|
335
|
+
const idx_t* __restrict ids,
|
|
336
|
+
int32_t* __restrict simi,
|
|
337
|
+
idx_t* __restrict idxi,
|
|
353
338
|
size_t k) const override {
|
|
354
339
|
using C = CMax<int32_t, idx_t>;
|
|
355
340
|
|
|
@@ -368,8 +353,8 @@ struct IVFBinaryScannerL2 : BinaryInvertedListScanner {
|
|
|
368
353
|
|
|
369
354
|
void scan_codes_range(
|
|
370
355
|
size_t n,
|
|
371
|
-
const uint8_t* codes,
|
|
372
|
-
const idx_t* ids,
|
|
356
|
+
const uint8_t* __restrict codes,
|
|
357
|
+
const idx_t* __restrict ids,
|
|
373
358
|
int radius,
|
|
374
359
|
RangeQueryResult& result) const override {
|
|
375
360
|
size_t nup = 0;
|
|
@@ -387,12 +372,12 @@ struct IVFBinaryScannerL2 : BinaryInvertedListScanner {
|
|
|
387
372
|
void search_knn_hamming_heap(
|
|
388
373
|
const IndexBinaryIVF& ivf,
|
|
389
374
|
size_t n,
|
|
390
|
-
const uint8_t* x,
|
|
375
|
+
const uint8_t* __restrict x,
|
|
391
376
|
idx_t k,
|
|
392
|
-
const idx_t* keys,
|
|
393
|
-
const int32_t* coarse_dis,
|
|
394
|
-
int32_t* distances,
|
|
395
|
-
idx_t* labels,
|
|
377
|
+
const idx_t* __restrict keys,
|
|
378
|
+
const int32_t* __restrict coarse_dis,
|
|
379
|
+
int32_t* __restrict distances,
|
|
380
|
+
idx_t* __restrict labels,
|
|
396
381
|
bool store_pairs,
|
|
397
382
|
const IVFSearchParameters* params) {
|
|
398
383
|
idx_t nprobe = params ? params->nprobe : ivf.nprobe;
|
|
@@ -448,7 +433,7 @@ void search_knn_hamming_heap(
|
|
|
448
433
|
size_t list_size = ivf.invlists->list_size(key);
|
|
449
434
|
InvertedLists::ScopedCodes scodes(ivf.invlists, key);
|
|
450
435
|
std::unique_ptr<InvertedLists::ScopedIds> sids;
|
|
451
|
-
const
|
|
436
|
+
const idx_t* ids = nullptr;
|
|
452
437
|
|
|
453
438
|
if (!store_pairs) {
|
|
454
439
|
sids.reset(new InvertedLists::ScopedIds(ivf.invlists, key));
|
|
@@ -483,11 +468,11 @@ template <class HammingComputer, bool store_pairs>
|
|
|
483
468
|
void search_knn_hamming_count(
|
|
484
469
|
const IndexBinaryIVF& ivf,
|
|
485
470
|
size_t nx,
|
|
486
|
-
const uint8_t* x,
|
|
487
|
-
const idx_t* keys,
|
|
471
|
+
const uint8_t* __restrict x,
|
|
472
|
+
const idx_t* __restrict keys,
|
|
488
473
|
int k,
|
|
489
|
-
int32_t* distances,
|
|
490
|
-
idx_t* labels,
|
|
474
|
+
int32_t* __restrict distances,
|
|
475
|
+
idx_t* __restrict labels,
|
|
491
476
|
const IVFSearchParameters* params) {
|
|
492
477
|
const int nBuckets = ivf.d + 1;
|
|
493
478
|
std::vector<int> all_counters(nx * nBuckets, 0);
|
|
@@ -533,7 +518,7 @@ void search_knn_hamming_count(
|
|
|
533
518
|
size_t list_size = ivf.invlists->list_size(key);
|
|
534
519
|
InvertedLists::ScopedCodes scodes(ivf.invlists, key);
|
|
535
520
|
const uint8_t* list_vecs = scodes.get();
|
|
536
|
-
const
|
|
521
|
+
const idx_t* ids =
|
|
537
522
|
store_pairs ? nullptr : ivf.invlists->get_ids(key);
|
|
538
523
|
|
|
539
524
|
for (size_t j = 0; j < list_size; j++) {
|
|
@@ -571,6 +556,185 @@ void search_knn_hamming_count(
|
|
|
571
556
|
indexIVF_stats.ndis += ndis;
|
|
572
557
|
}
|
|
573
558
|
|
|
559
|
+
/* Manages NQ queries at a time, stores results */
|
|
560
|
+
template <class HammingComputer, int NQ, int K>
|
|
561
|
+
struct BlockSearch {
|
|
562
|
+
HammingComputer hcs[NQ];
|
|
563
|
+
// heaps to update for each query
|
|
564
|
+
int32_t* distances[NQ];
|
|
565
|
+
idx_t* labels[NQ];
|
|
566
|
+
// curent top of heap
|
|
567
|
+
int32_t heap_tops[NQ];
|
|
568
|
+
|
|
569
|
+
BlockSearch(
|
|
570
|
+
size_t code_size,
|
|
571
|
+
const uint8_t* __restrict x,
|
|
572
|
+
const int32_t* __restrict keys,
|
|
573
|
+
int32_t* __restrict all_distances,
|
|
574
|
+
idx_t* __restrict all_labels) {
|
|
575
|
+
for (idx_t q = 0; q < NQ; q++) {
|
|
576
|
+
idx_t qno = keys[q];
|
|
577
|
+
hcs[q] = HammingComputer(x + qno * code_size, code_size);
|
|
578
|
+
distances[q] = all_distances + qno * K;
|
|
579
|
+
labels[q] = all_labels + qno * K;
|
|
580
|
+
heap_tops[q] = distances[q][0];
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
void add_bcode(const uint8_t* bcode, idx_t id) {
|
|
585
|
+
using C = CMax<int32_t, idx_t>;
|
|
586
|
+
for (int q = 0; q < NQ; q++) {
|
|
587
|
+
int dis = hcs[q].hamming(bcode);
|
|
588
|
+
if (dis < heap_tops[q]) {
|
|
589
|
+
heap_replace_top<C>(K, distances[q], labels[q], dis, id);
|
|
590
|
+
heap_tops[q] = distances[q][0];
|
|
591
|
+
}
|
|
592
|
+
}
|
|
593
|
+
}
|
|
594
|
+
};
|
|
595
|
+
|
|
596
|
+
template <class HammingComputer, int NQ>
|
|
597
|
+
struct BlockSearchVariableK {
|
|
598
|
+
int k;
|
|
599
|
+
HammingComputer hcs[NQ];
|
|
600
|
+
// heaps to update for each query
|
|
601
|
+
int32_t* distances[NQ];
|
|
602
|
+
idx_t* labels[NQ];
|
|
603
|
+
// curent top of heap
|
|
604
|
+
int32_t heap_tops[NQ];
|
|
605
|
+
|
|
606
|
+
BlockSearchVariableK(
|
|
607
|
+
size_t code_size,
|
|
608
|
+
int k,
|
|
609
|
+
const uint8_t* __restrict x,
|
|
610
|
+
const int32_t* __restrict keys,
|
|
611
|
+
int32_t* __restrict all_distances,
|
|
612
|
+
idx_t* __restrict all_labels)
|
|
613
|
+
: k(k) {
|
|
614
|
+
for (idx_t q = 0; q < NQ; q++) {
|
|
615
|
+
idx_t qno = keys[q];
|
|
616
|
+
hcs[q] = HammingComputer(x + qno * code_size, code_size);
|
|
617
|
+
distances[q] = all_distances + qno * k;
|
|
618
|
+
labels[q] = all_labels + qno * k;
|
|
619
|
+
heap_tops[q] = distances[q][0];
|
|
620
|
+
}
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
void add_bcode(const uint8_t* bcode, idx_t id) {
|
|
624
|
+
using C = CMax<int32_t, idx_t>;
|
|
625
|
+
for (int q = 0; q < NQ; q++) {
|
|
626
|
+
int dis = hcs[q].hamming(bcode);
|
|
627
|
+
if (dis < heap_tops[q]) {
|
|
628
|
+
heap_replace_top<C>(k, distances[q], labels[q], dis, id);
|
|
629
|
+
heap_tops[q] = distances[q][0];
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
}
|
|
633
|
+
};
|
|
634
|
+
|
|
635
|
+
template <class HammingComputer>
|
|
636
|
+
void search_knn_hamming_per_invlist(
|
|
637
|
+
const IndexBinaryIVF& ivf,
|
|
638
|
+
size_t n,
|
|
639
|
+
const uint8_t* __restrict x,
|
|
640
|
+
idx_t k,
|
|
641
|
+
const idx_t* __restrict keys_in,
|
|
642
|
+
const int32_t* __restrict coarse_dis,
|
|
643
|
+
int32_t* __restrict distances,
|
|
644
|
+
idx_t* __restrict labels,
|
|
645
|
+
bool store_pairs,
|
|
646
|
+
const IVFSearchParameters* params) {
|
|
647
|
+
idx_t nprobe = params ? params->nprobe : ivf.nprobe;
|
|
648
|
+
nprobe = std::min((idx_t)ivf.nlist, nprobe);
|
|
649
|
+
idx_t max_codes = params ? params->max_codes : ivf.max_codes;
|
|
650
|
+
FAISS_THROW_IF_NOT(max_codes == 0);
|
|
651
|
+
FAISS_THROW_IF_NOT(!store_pairs);
|
|
652
|
+
MetricType metric_type = ivf.metric_type;
|
|
653
|
+
|
|
654
|
+
// reorder buckets
|
|
655
|
+
std::vector<int64_t> lims(n + 1);
|
|
656
|
+
int32_t* keys = new int32_t[n * nprobe];
|
|
657
|
+
std::unique_ptr<int32_t[]> delete_keys(keys);
|
|
658
|
+
for (idx_t i = 0; i < n * nprobe; i++) {
|
|
659
|
+
keys[i] = keys_in[i];
|
|
660
|
+
}
|
|
661
|
+
matrix_bucket_sort_inplace(n, nprobe, keys, ivf.nlist, lims.data(), 0);
|
|
662
|
+
|
|
663
|
+
using C = CMax<int32_t, idx_t>;
|
|
664
|
+
heap_heapify<C>(n * k, distances, labels);
|
|
665
|
+
const size_t code_size = ivf.code_size;
|
|
666
|
+
|
|
667
|
+
for (idx_t l = 0; l < ivf.nlist; l++) {
|
|
668
|
+
idx_t l0 = lims[l], nq = lims[l + 1] - l0;
|
|
669
|
+
|
|
670
|
+
InvertedLists::ScopedCodes scodes(ivf.invlists, l);
|
|
671
|
+
InvertedLists::ScopedIds sidx(ivf.invlists, l);
|
|
672
|
+
idx_t nb = ivf.invlists->list_size(l);
|
|
673
|
+
const uint8_t* bcodes = scodes.get();
|
|
674
|
+
const idx_t* ids = sidx.get();
|
|
675
|
+
|
|
676
|
+
idx_t i = 0;
|
|
677
|
+
|
|
678
|
+
// process as much as possible by blocks
|
|
679
|
+
constexpr int BS = 4;
|
|
680
|
+
|
|
681
|
+
if (k == 1) {
|
|
682
|
+
for (; i + BS <= nq; i += BS) {
|
|
683
|
+
BlockSearch<HammingComputer, BS, 1> bc(
|
|
684
|
+
code_size, x, keys + l0 + i, distances, labels);
|
|
685
|
+
for (idx_t j = 0; j < nb; j++) {
|
|
686
|
+
bc.add_bcode(bcodes + j * code_size, ids[j]);
|
|
687
|
+
}
|
|
688
|
+
}
|
|
689
|
+
} else if (k == 2) {
|
|
690
|
+
for (; i + BS <= nq; i += BS) {
|
|
691
|
+
BlockSearch<HammingComputer, BS, 2> bc(
|
|
692
|
+
code_size, x, keys + l0 + i, distances, labels);
|
|
693
|
+
for (idx_t j = 0; j < nb; j++) {
|
|
694
|
+
bc.add_bcode(bcodes + j * code_size, ids[j]);
|
|
695
|
+
}
|
|
696
|
+
}
|
|
697
|
+
} else if (k == 4) {
|
|
698
|
+
for (; i + BS <= nq; i += BS) {
|
|
699
|
+
BlockSearch<HammingComputer, BS, 4> bc(
|
|
700
|
+
code_size, x, keys + l0 + i, distances, labels);
|
|
701
|
+
for (idx_t j = 0; j < nb; j++) {
|
|
702
|
+
bc.add_bcode(bcodes + j * code_size, ids[j]);
|
|
703
|
+
}
|
|
704
|
+
}
|
|
705
|
+
} else {
|
|
706
|
+
for (; i + BS <= nq; i += BS) {
|
|
707
|
+
BlockSearchVariableK<HammingComputer, BS> bc(
|
|
708
|
+
code_size, k, x, keys + l0 + i, distances, labels);
|
|
709
|
+
for (idx_t j = 0; j < nb; j++) {
|
|
710
|
+
bc.add_bcode(bcodes + j * code_size, ids[j]);
|
|
711
|
+
}
|
|
712
|
+
}
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
// leftovers
|
|
716
|
+
for (; i < nq; i++) {
|
|
717
|
+
idx_t qno = keys[l0 + i];
|
|
718
|
+
HammingComputer hc(x + qno * code_size, code_size);
|
|
719
|
+
idx_t* __restrict idxi = labels + qno * k;
|
|
720
|
+
int32_t* __restrict simi = distances + qno * k;
|
|
721
|
+
int32_t simi0 = simi[0];
|
|
722
|
+
for (idx_t j = 0; j < nb; j++) {
|
|
723
|
+
int dis = hc.hamming(bcodes + j * code_size);
|
|
724
|
+
|
|
725
|
+
if (dis < simi0) {
|
|
726
|
+
idx_t id = store_pairs ? lo_build(l, j) : ids[j];
|
|
727
|
+
heap_replace_top<C>(k, simi, idxi, dis, id);
|
|
728
|
+
simi0 = simi[0];
|
|
729
|
+
}
|
|
730
|
+
}
|
|
731
|
+
}
|
|
732
|
+
}
|
|
733
|
+
for (idx_t i = 0; i < n; i++) {
|
|
734
|
+
heap_reorder<C>(k, distances + i * k, labels + i * k);
|
|
735
|
+
}
|
|
736
|
+
}
|
|
737
|
+
|
|
574
738
|
template <bool store_pairs>
|
|
575
739
|
void search_knn_hamming_count_1(
|
|
576
740
|
const IndexBinaryIVF& ivf,
|
|
@@ -601,7 +765,56 @@ void search_knn_hamming_count_1(
|
|
|
601
765
|
}
|
|
602
766
|
}
|
|
603
767
|
|
|
604
|
-
|
|
768
|
+
void search_knn_hamming_per_invlist_1(
|
|
769
|
+
const IndexBinaryIVF& ivf,
|
|
770
|
+
size_t n,
|
|
771
|
+
const uint8_t* x,
|
|
772
|
+
idx_t k,
|
|
773
|
+
const idx_t* keys,
|
|
774
|
+
const int32_t* coarse_dis,
|
|
775
|
+
int32_t* distances,
|
|
776
|
+
idx_t* labels,
|
|
777
|
+
bool store_pairs,
|
|
778
|
+
const IVFSearchParameters* params) {
|
|
779
|
+
switch (ivf.code_size) {
|
|
780
|
+
#define HANDLE_CS(cs) \
|
|
781
|
+
case cs: \
|
|
782
|
+
search_knn_hamming_per_invlist<HammingComputer##cs>( \
|
|
783
|
+
ivf, \
|
|
784
|
+
n, \
|
|
785
|
+
x, \
|
|
786
|
+
k, \
|
|
787
|
+
keys, \
|
|
788
|
+
coarse_dis, \
|
|
789
|
+
distances, \
|
|
790
|
+
labels, \
|
|
791
|
+
store_pairs, \
|
|
792
|
+
params); \
|
|
793
|
+
break;
|
|
794
|
+
HANDLE_CS(4);
|
|
795
|
+
HANDLE_CS(8);
|
|
796
|
+
HANDLE_CS(16);
|
|
797
|
+
HANDLE_CS(20);
|
|
798
|
+
HANDLE_CS(32);
|
|
799
|
+
HANDLE_CS(64);
|
|
800
|
+
#undef HANDLE_CS
|
|
801
|
+
default:
|
|
802
|
+
search_knn_hamming_per_invlist<HammingComputerDefault>(
|
|
803
|
+
ivf,
|
|
804
|
+
n,
|
|
805
|
+
x,
|
|
806
|
+
k,
|
|
807
|
+
keys,
|
|
808
|
+
coarse_dis,
|
|
809
|
+
distances,
|
|
810
|
+
labels,
|
|
811
|
+
store_pairs,
|
|
812
|
+
params);
|
|
813
|
+
break;
|
|
814
|
+
}
|
|
815
|
+
}
|
|
816
|
+
|
|
817
|
+
} // anonymous namespace
|
|
605
818
|
|
|
606
819
|
BinaryInvertedListScanner* IndexBinaryIVF::get_InvertedListScanner(
|
|
607
820
|
bool store_pairs) const {
|
|
@@ -635,7 +848,19 @@ void IndexBinaryIVF::search_preassigned(
|
|
|
635
848
|
idx_t* labels,
|
|
636
849
|
bool store_pairs,
|
|
637
850
|
const IVFSearchParameters* params) const {
|
|
638
|
-
if (
|
|
851
|
+
if (per_invlist_search) {
|
|
852
|
+
search_knn_hamming_per_invlist_1(
|
|
853
|
+
*this,
|
|
854
|
+
n,
|
|
855
|
+
x,
|
|
856
|
+
k,
|
|
857
|
+
idx,
|
|
858
|
+
coarse_dis,
|
|
859
|
+
distances,
|
|
860
|
+
labels,
|
|
861
|
+
store_pairs,
|
|
862
|
+
params);
|
|
863
|
+
} else if (use_heap) {
|
|
639
864
|
search_knn_hamming_heap(
|
|
640
865
|
*this,
|
|
641
866
|
n,
|
|
@@ -660,9 +885,9 @@ void IndexBinaryIVF::search_preassigned(
|
|
|
660
885
|
|
|
661
886
|
void IndexBinaryIVF::range_search(
|
|
662
887
|
idx_t n,
|
|
663
|
-
const uint8_t* x,
|
|
888
|
+
const uint8_t* __restrict x,
|
|
664
889
|
int radius,
|
|
665
|
-
RangeSearchResult* res,
|
|
890
|
+
RangeSearchResult* __restrict res,
|
|
666
891
|
const SearchParameters* params) const {
|
|
667
892
|
FAISS_THROW_IF_NOT_MSG(
|
|
668
893
|
!params, "search params not supported for this index");
|
|
@@ -684,11 +909,11 @@ void IndexBinaryIVF::range_search(
|
|
|
684
909
|
|
|
685
910
|
void IndexBinaryIVF::range_search_preassigned(
|
|
686
911
|
idx_t n,
|
|
687
|
-
const uint8_t* x,
|
|
912
|
+
const uint8_t* __restrict x,
|
|
688
913
|
int radius,
|
|
689
|
-
const idx_t* assign,
|
|
690
|
-
const int32_t* centroid_dis,
|
|
691
|
-
RangeSearchResult* res) const {
|
|
914
|
+
const idx_t* __restrict assign,
|
|
915
|
+
const int32_t* __restrict centroid_dis,
|
|
916
|
+
RangeSearchResult* __restrict res) const {
|
|
692
917
|
const size_t nprobe = std::min(nlist, this->nprobe);
|
|
693
918
|
bool store_pairs = false;
|
|
694
919
|
size_t nlistv = 0, ndis = 0;
|
|
@@ -32,27 +32,36 @@ struct BinaryInvertedListScanner;
|
|
|
32
32
|
*/
|
|
33
33
|
struct IndexBinaryIVF : IndexBinary {
|
|
34
34
|
/// Access to the actual data
|
|
35
|
-
InvertedLists* invlists;
|
|
36
|
-
bool own_invlists;
|
|
35
|
+
InvertedLists* invlists = nullptr;
|
|
36
|
+
bool own_invlists = true;
|
|
37
37
|
|
|
38
|
-
size_t nprobe; ///< number of probes at query time
|
|
39
|
-
size_t max_codes; ///< max nb of codes to visit to do a query
|
|
38
|
+
size_t nprobe = 1; ///< number of probes at query time
|
|
39
|
+
size_t max_codes = 0; ///< max nb of codes to visit to do a query
|
|
40
40
|
|
|
41
41
|
/** Select between using a heap or counting to select the k smallest values
|
|
42
42
|
* when scanning inverted lists.
|
|
43
43
|
*/
|
|
44
44
|
bool use_heap = true;
|
|
45
45
|
|
|
46
|
+
/** collect computations per batch */
|
|
47
|
+
bool per_invlist_search = false;
|
|
48
|
+
|
|
46
49
|
/// map for direct access to the elements. Enables reconstruct().
|
|
47
50
|
DirectMap direct_map;
|
|
48
51
|
|
|
49
|
-
|
|
50
|
-
|
|
52
|
+
/// quantizer that maps vectors to inverted lists
|
|
53
|
+
IndexBinary* quantizer = nullptr;
|
|
54
|
+
|
|
55
|
+
/// number of possible key values
|
|
56
|
+
size_t nlist = 0;
|
|
51
57
|
|
|
52
|
-
|
|
58
|
+
/// whether object owns the quantizer
|
|
59
|
+
bool own_fields = false;
|
|
53
60
|
|
|
54
61
|
ClusteringParameters cp; ///< to override default clustering params
|
|
55
|
-
|
|
62
|
+
|
|
63
|
+
/// to override index used during clustering
|
|
64
|
+
Index* clustering_index = nullptr;
|
|
56
65
|
|
|
57
66
|
/** The Inverted file takes a quantizer (an IndexBinary) on input,
|
|
58
67
|
* which implements the function mapping a vector to a list
|
|
@@ -196,7 +205,7 @@ struct IndexBinaryIVF : IndexBinary {
|
|
|
196
205
|
return invlists->list_size(list_no);
|
|
197
206
|
}
|
|
198
207
|
|
|
199
|
-
/**
|
|
208
|
+
/** initialize a direct map
|
|
200
209
|
*
|
|
201
210
|
* @param new_maintain_direct_map if true, create a direct map,
|
|
202
211
|
* else clear it
|
|
@@ -209,8 +218,6 @@ struct IndexBinaryIVF : IndexBinary {
|
|
|
209
218
|
};
|
|
210
219
|
|
|
211
220
|
struct BinaryInvertedListScanner {
|
|
212
|
-
using idx_t = Index::idx_t;
|
|
213
|
-
|
|
214
221
|
/// from now on we handle this query.
|
|
215
222
|
virtual void set_query(const uint8_t* query_vector) = 0;
|
|
216
223
|
|
|
@@ -98,18 +98,21 @@ void IndexFastScan::add(idx_t n, const float* x) {
|
|
|
98
98
|
ntotal += n;
|
|
99
99
|
}
|
|
100
100
|
|
|
101
|
+
CodePacker* IndexFastScan::get_CodePacker() const {
|
|
102
|
+
return new CodePackerPQ4(M, bbs);
|
|
103
|
+
}
|
|
104
|
+
|
|
101
105
|
size_t IndexFastScan::remove_ids(const IDSelector& sel) {
|
|
102
106
|
idx_t j = 0;
|
|
107
|
+
std::vector<uint8_t> buffer(code_size);
|
|
108
|
+
CodePackerPQ4 packer(M, bbs);
|
|
103
109
|
for (idx_t i = 0; i < ntotal; i++) {
|
|
104
110
|
if (sel.is_member(i)) {
|
|
105
111
|
// should be removed
|
|
106
112
|
} else {
|
|
107
113
|
if (i > j) {
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
pq4_get_packed_element(codes.data(), bbs, M, i, sq);
|
|
111
|
-
pq4_set_packed_element(codes.data(), code, bbs, M, j, sq);
|
|
112
|
-
}
|
|
114
|
+
packer.unpack_1(codes.data(), i, buffer.data());
|
|
115
|
+
packer.pack_1(buffer.data(), j, codes.data());
|
|
113
116
|
}
|
|
114
117
|
j++;
|
|
115
118
|
}
|
|
@@ -142,12 +145,12 @@ void IndexFastScan::merge_from(Index& otherIndex, idx_t add_id) {
|
|
|
142
145
|
IndexFastScan* other = static_cast<IndexFastScan*>(&otherIndex);
|
|
143
146
|
ntotal2 = roundup(ntotal + other->ntotal, bbs);
|
|
144
147
|
codes.resize(ntotal2 * M2 / 2);
|
|
148
|
+
std::vector<uint8_t> buffer(code_size);
|
|
149
|
+
CodePackerPQ4 packer(M, bbs);
|
|
150
|
+
|
|
145
151
|
for (int i = 0; i < other->ntotal; i++) {
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
pq4_get_packed_element(other->codes.data(), bbs, M, i, sq);
|
|
149
|
-
pq4_set_packed_element(codes.data(), code, bbs, M, ntotal + i, sq);
|
|
150
|
-
}
|
|
152
|
+
packer.unpack_1(other->codes.data(), i, buffer.data());
|
|
153
|
+
packer.pack_1(buffer.data(), ntotal + i, codes.data());
|
|
151
154
|
}
|
|
152
155
|
ntotal += other->ntotal;
|
|
153
156
|
other->reset();
|
|
@@ -12,6 +12,8 @@
|
|
|
12
12
|
|
|
13
13
|
namespace faiss {
|
|
14
14
|
|
|
15
|
+
struct CodePacker;
|
|
16
|
+
|
|
15
17
|
/** Fast scan version of IndexPQ and IndexAQ. Works for 4-bit PQ and AQ for now.
|
|
16
18
|
*
|
|
17
19
|
* The codes are not stored sequentially but grouped in blocks of size bbs.
|
|
@@ -25,7 +27,6 @@ namespace faiss {
|
|
|
25
27
|
* 14: no qbs with heap accumulator
|
|
26
28
|
* 15: no qbs with reservoir accumulator
|
|
27
29
|
*/
|
|
28
|
-
|
|
29
30
|
struct IndexFastScan : Index {
|
|
30
31
|
// implementation to select
|
|
31
32
|
int implem = 0;
|
|
@@ -126,6 +127,9 @@ struct IndexFastScan : Index {
|
|
|
126
127
|
|
|
127
128
|
void reconstruct(idx_t key, float* recons) const override;
|
|
128
129
|
size_t remove_ids(const IDSelector& sel) override;
|
|
130
|
+
|
|
131
|
+
CodePacker* get_CodePacker() const;
|
|
132
|
+
|
|
129
133
|
void merge_from(Index& otherIndex, idx_t add_id = 0) override;
|
|
130
134
|
void check_compatible_for_merge(const Index& otherIndex) const override;
|
|
131
135
|
};
|
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
#include <faiss/utils/Heap.h>
|
|
15
15
|
#include <faiss/utils/distances.h>
|
|
16
16
|
#include <faiss/utils/extra_distances.h>
|
|
17
|
+
#include <faiss/utils/sorting.h>
|
|
17
18
|
#include <faiss/utils/utils.h>
|
|
18
19
|
#include <cstring>
|
|
19
20
|
|
|
@@ -39,6 +40,10 @@ void IndexFlat::search(
|
|
|
39
40
|
} else if (metric_type == METRIC_L2) {
|
|
40
41
|
float_maxheap_array_t res = {size_t(n), size_t(k), labels, distances};
|
|
41
42
|
knn_L2sqr(x, get_xb(), d, n, ntotal, &res, nullptr, sel);
|
|
43
|
+
} else if (is_similarity_metric(metric_type)) {
|
|
44
|
+
float_minheap_array_t res = {size_t(n), size_t(k), labels, distances};
|
|
45
|
+
knn_extra_metrics(
|
|
46
|
+
x, get_xb(), d, n, ntotal, metric_type, metric_arg, &res);
|
|
42
47
|
} else {
|
|
43
48
|
FAISS_THROW_IF_NOT(!sel);
|
|
44
49
|
float_maxheap_array_t res = {size_t(n), size_t(k), labels, distances};
|
|
@@ -90,7 +95,7 @@ namespace {
|
|
|
90
95
|
|
|
91
96
|
struct FlatL2Dis : FlatCodesDistanceComputer {
|
|
92
97
|
size_t d;
|
|
93
|
-
|
|
98
|
+
idx_t nb;
|
|
94
99
|
const float* q;
|
|
95
100
|
const float* b;
|
|
96
101
|
size_t ndis;
|
|
@@ -121,7 +126,7 @@ struct FlatL2Dis : FlatCodesDistanceComputer {
|
|
|
121
126
|
|
|
122
127
|
struct FlatIPDis : FlatCodesDistanceComputer {
|
|
123
128
|
size_t d;
|
|
124
|
-
|
|
129
|
+
idx_t nb;
|
|
125
130
|
const float* q;
|
|
126
131
|
const float* b;
|
|
127
132
|
size_t ndis;
|
|
@@ -222,7 +227,7 @@ void IndexFlat1D::search(
|
|
|
222
227
|
perm.size() == ntotal, "Call update_permutation before search");
|
|
223
228
|
const float* xb = get_xb();
|
|
224
229
|
|
|
225
|
-
#pragma omp parallel for
|
|
230
|
+
#pragma omp parallel for if (n > 10000)
|
|
226
231
|
for (idx_t i = 0; i < n; i++) {
|
|
227
232
|
float q = x[i]; // query
|
|
228
233
|
float* D = distances + i * k;
|
|
@@ -232,6 +237,14 @@ void IndexFlat1D::search(
|
|
|
232
237
|
idx_t i0 = 0, i1 = ntotal;
|
|
233
238
|
idx_t wp = 0;
|
|
234
239
|
|
|
240
|
+
if (ntotal == 0) {
|
|
241
|
+
for (idx_t j = 0; j < k; j++) {
|
|
242
|
+
I[j] = -1;
|
|
243
|
+
D[j] = HUGE_VAL;
|
|
244
|
+
}
|
|
245
|
+
goto done;
|
|
246
|
+
}
|
|
247
|
+
|
|
235
248
|
if (xb[perm[i0]] > q) {
|
|
236
249
|
i1 = 0;
|
|
237
250
|
goto finish_right;
|
|
@@ -82,7 +82,7 @@ struct IndexFlatL2 : IndexFlat {
|
|
|
82
82
|
|
|
83
83
|
/// optimized version for 1D "vectors".
|
|
84
84
|
struct IndexFlat1D : IndexFlatL2 {
|
|
85
|
-
bool continuous_update; ///< is the permutation updated continuously?
|
|
85
|
+
bool continuous_update = true; ///< is the permutation updated continuously?
|
|
86
86
|
|
|
87
87
|
std::vector<idx_t> perm; ///< sorted database indices
|
|
88
88
|
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
#include <faiss/IndexFlatCodes.h>
|
|
9
9
|
|
|
10
10
|
#include <faiss/impl/AuxIndexStructures.h>
|
|
11
|
+
#include <faiss/impl/CodePacker.h>
|
|
11
12
|
#include <faiss/impl/DistanceComputer.h>
|
|
12
13
|
#include <faiss/impl/FaissAssert.h>
|
|
13
14
|
#include <faiss/impl/IDSelector.h>
|
|
@@ -98,4 +99,8 @@ void IndexFlatCodes::merge_from(Index& otherIndex, idx_t add_id) {
|
|
|
98
99
|
other->reset();
|
|
99
100
|
}
|
|
100
101
|
|
|
102
|
+
CodePacker* IndexFlatCodes::get_CodePacker() const {
|
|
103
|
+
return new CodePackerFlat(code_size);
|
|
104
|
+
}
|
|
105
|
+
|
|
101
106
|
} // namespace faiss
|
|
@@ -15,6 +15,8 @@
|
|
|
15
15
|
|
|
16
16
|
namespace faiss {
|
|
17
17
|
|
|
18
|
+
struct CodePacker;
|
|
19
|
+
|
|
18
20
|
/** Index that encodes all vectors as fixed-size codes (size code_size). Storage
|
|
19
21
|
* is in the codes vector */
|
|
20
22
|
struct IndexFlatCodes : Index {
|
|
@@ -39,8 +41,8 @@ struct IndexFlatCodes : Index {
|
|
|
39
41
|
|
|
40
42
|
size_t sa_code_size() const override;
|
|
41
43
|
|
|
42
|
-
/** remove some ids. NB that
|
|
43
|
-
*
|
|
44
|
+
/** remove some ids. NB that because of the structure of the
|
|
45
|
+
* index, the semantics of this operation are
|
|
44
46
|
* different from the usual ones: the new ids are shifted */
|
|
45
47
|
size_t remove_ids(const IDSelector& sel) override;
|
|
46
48
|
|
|
@@ -51,6 +53,9 @@ struct IndexFlatCodes : Index {
|
|
|
51
53
|
return get_FlatCodesDistanceComputer();
|
|
52
54
|
}
|
|
53
55
|
|
|
56
|
+
// returns a new instance of a CodePacker
|
|
57
|
+
CodePacker* get_CodePacker() const;
|
|
58
|
+
|
|
54
59
|
void check_compatible_for_merge(const Index& otherIndex) const override;
|
|
55
60
|
|
|
56
61
|
virtual void merge_from(Index& otherIndex, idx_t add_id = 0) override;
|