faiss 0.4.1 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +39 -29
- data/vendor/faiss/faiss/Clustering.cpp +4 -2
- data/vendor/faiss/faiss/IVFlib.cpp +14 -7
- data/vendor/faiss/faiss/Index.h +72 -3
- data/vendor/faiss/faiss/Index2Layer.cpp +2 -4
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +0 -1
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +1 -0
- data/vendor/faiss/faiss/IndexBinary.h +46 -3
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +118 -4
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +41 -0
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +0 -1
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +18 -7
- data/vendor/faiss/faiss/IndexBinaryIVF.h +5 -1
- data/vendor/faiss/faiss/IndexFlat.cpp +6 -4
- data/vendor/faiss/faiss/IndexHNSW.cpp +65 -24
- data/vendor/faiss/faiss/IndexHNSW.h +10 -1
- data/vendor/faiss/faiss/IndexIDMap.cpp +96 -18
- data/vendor/faiss/faiss/IndexIDMap.h +20 -0
- data/vendor/faiss/faiss/IndexIVF.cpp +28 -10
- data/vendor/faiss/faiss/IndexIVF.h +16 -1
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +84 -16
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +18 -6
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +33 -21
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +16 -6
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +24 -15
- data/vendor/faiss/faiss/IndexIVFFastScan.h +4 -2
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +59 -43
- data/vendor/faiss/faiss/IndexIVFFlat.h +10 -2
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +16 -3
- data/vendor/faiss/faiss/IndexIVFPQ.h +8 -1
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +14 -6
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +2 -1
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +14 -4
- data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +28 -3
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +8 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +9 -2
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +2 -1
- data/vendor/faiss/faiss/IndexLattice.cpp +8 -4
- data/vendor/faiss/faiss/IndexNNDescent.cpp +0 -7
- data/vendor/faiss/faiss/IndexNSG.cpp +3 -3
- data/vendor/faiss/faiss/IndexPQ.cpp +0 -1
- data/vendor/faiss/faiss/IndexPQ.h +1 -0
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +0 -2
- data/vendor/faiss/faiss/IndexPreTransform.cpp +4 -2
- data/vendor/faiss/faiss/IndexRefine.cpp +11 -6
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +16 -4
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +10 -3
- data/vendor/faiss/faiss/IndexShards.cpp +7 -6
- data/vendor/faiss/faiss/MatrixStats.cpp +16 -8
- data/vendor/faiss/faiss/MetaIndexes.cpp +12 -6
- data/vendor/faiss/faiss/MetricType.h +5 -3
- data/vendor/faiss/faiss/clone_index.cpp +2 -4
- data/vendor/faiss/faiss/cppcontrib/factory_tools.cpp +6 -0
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +9 -4
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +32 -10
- data/vendor/faiss/faiss/gpu/GpuIndex.h +88 -0
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryCagra.h +125 -0
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +39 -4
- data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +3 -3
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +1 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +3 -2
- data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +41 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +6 -3
- data/vendor/faiss/faiss/impl/HNSW.cpp +34 -19
- data/vendor/faiss/faiss/impl/IDSelector.cpp +2 -1
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +2 -3
- data/vendor/faiss/faiss/impl/NNDescent.cpp +17 -9
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +42 -21
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +6 -24
- data/vendor/faiss/faiss/impl/ResultHandler.h +56 -47
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +28 -15
- data/vendor/faiss/faiss/impl/index_read.cpp +36 -11
- data/vendor/faiss/faiss/impl/index_write.cpp +19 -6
- data/vendor/faiss/faiss/impl/io.cpp +9 -5
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +18 -11
- data/vendor/faiss/faiss/impl/mapped_io.cpp +4 -7
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +0 -1
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +0 -1
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +6 -6
- data/vendor/faiss/faiss/impl/zerocopy_io.cpp +1 -1
- data/vendor/faiss/faiss/impl/zerocopy_io.h +2 -2
- data/vendor/faiss/faiss/index_factory.cpp +49 -33
- data/vendor/faiss/faiss/index_factory.h +8 -2
- data/vendor/faiss/faiss/index_io.h +0 -3
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +2 -1
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +12 -6
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +8 -4
- data/vendor/faiss/faiss/utils/Heap.cpp +15 -8
- data/vendor/faiss/faiss/utils/Heap.h +23 -12
- data/vendor/faiss/faiss/utils/distances.cpp +42 -21
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +1 -1
- data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -3
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +27 -4
- data/vendor/faiss/faiss/utils/extra_distances.cpp +8 -4
- data/vendor/faiss/faiss/utils/hamming.cpp +20 -10
- data/vendor/faiss/faiss/utils/partitioning.cpp +8 -4
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +17 -9
- data/vendor/faiss/faiss/utils/rabitq_simd.h +539 -0
- data/vendor/faiss/faiss/utils/random.cpp +14 -7
- data/vendor/faiss/faiss/utils/utils.cpp +0 -3
- metadata +5 -2
@@ -6,15 +6,12 @@
|
|
6
6
|
*/
|
7
7
|
|
8
8
|
#include <stdio.h>
|
9
|
-
#include <string.h>
|
10
9
|
|
11
|
-
#
|
10
|
+
#if defined(__linux__) || defined(__FreeBSD__)
|
12
11
|
|
13
12
|
#include <fcntl.h>
|
14
13
|
#include <sys/mman.h>
|
15
14
|
#include <sys/stat.h>
|
16
|
-
#include <sys/types.h>
|
17
|
-
#include <unistd.h>
|
18
15
|
|
19
16
|
#elif defined(_WIN32)
|
20
17
|
|
@@ -30,7 +27,7 @@
|
|
30
27
|
|
31
28
|
namespace faiss {
|
32
29
|
|
33
|
-
#
|
30
|
+
#if defined(__linux__) || defined(__FreeBSD__)
|
34
31
|
|
35
32
|
struct MmappedFileMappingOwner::PImpl {
|
36
33
|
void* ptr = nullptr;
|
@@ -171,12 +168,12 @@ struct MmappedFileMappingOwner::PImpl {
|
|
171
168
|
const int fd = _fileno(f);
|
172
169
|
if (fd == -1) {
|
173
170
|
// no good
|
174
|
-
|
171
|
+
FAISS_THROW_MSG("could not get a HANDLE");
|
175
172
|
}
|
176
173
|
|
177
174
|
HANDLE file_handle = (HANDLE)_get_osfhandle(fd);
|
178
175
|
if (file_handle == INVALID_HANDLE_VALUE) {
|
179
|
-
|
176
|
+
FAISS_THROW_MSG("could not get an OS HANDLE");
|
180
177
|
}
|
181
178
|
|
182
179
|
// get the size of the file
|
@@ -292,8 +292,8 @@ void beam_search_encode_step(
|
|
292
292
|
cent_ids.data() + i * beam_size * new_beam_size;
|
293
293
|
|
294
294
|
// here we could be a tad more efficient by merging sorted arrays
|
295
|
-
for (int
|
296
|
-
new_distances_i[
|
295
|
+
for (int j = 0; j < new_beam_size; j++) {
|
296
|
+
new_distances_i[j] = C::neutral();
|
297
297
|
}
|
298
298
|
std::vector<int> perm(new_beam_size, -1);
|
299
299
|
heap_addn<C>(
|
@@ -325,8 +325,8 @@ void beam_search_encode_step(
|
|
325
325
|
const float* cent_distances_i =
|
326
326
|
cent_distances.data() + i * beam_size * K;
|
327
327
|
// then we have to select the best results
|
328
|
-
for (int
|
329
|
-
new_distances_i[
|
328
|
+
for (int j = 0; j < new_beam_size; j++) {
|
329
|
+
new_distances_i[j] = C::neutral();
|
330
330
|
}
|
331
331
|
std::vector<int> perm(new_beam_size, -1);
|
332
332
|
|
@@ -558,8 +558,8 @@ void beam_search_encode_step_tab(
|
|
558
558
|
const float* cent_distances_i = cent_distances.data();
|
559
559
|
|
560
560
|
// then we have to select the best results
|
561
|
-
for (int
|
562
|
-
new_distances_i[
|
561
|
+
for (int j = 0; j < new_beam_size; j++) {
|
562
|
+
new_distances_i[j] = C::neutral();
|
563
563
|
}
|
564
564
|
std::vector<int> perm(new_beam_size, -1);
|
565
565
|
|
@@ -15,11 +15,11 @@ namespace faiss {
|
|
15
15
|
|
16
16
|
// ZeroCopyIOReader just maps the data from a given pointer.
|
17
17
|
struct ZeroCopyIOReader : public faiss::IOReader {
|
18
|
-
uint8_t* data_;
|
18
|
+
const uint8_t* data_;
|
19
19
|
size_t rp_ = 0;
|
20
20
|
size_t total_ = 0;
|
21
21
|
|
22
|
-
ZeroCopyIOReader(uint8_t* data, size_t size);
|
22
|
+
ZeroCopyIOReader(const uint8_t* data, size_t size);
|
23
23
|
~ZeroCopyIOReader();
|
24
24
|
|
25
25
|
void reset();
|
@@ -170,7 +170,7 @@ AdditiveQuantizer::Search_type_t aq_parse_search_type(
|
|
170
170
|
return metric == METRIC_L2 ? AdditiveQuantizer::ST_decompress
|
171
171
|
: AdditiveQuantizer::ST_LUT_nonorm;
|
172
172
|
}
|
173
|
-
int pos = stok.rfind(
|
173
|
+
int pos = stok.rfind('_');
|
174
174
|
return aq_search_type[stok.substr(pos)];
|
175
175
|
}
|
176
176
|
|
@@ -311,7 +311,8 @@ IndexIVF* parse_IndexIVF(
|
|
311
311
|
const std::string& code_string,
|
312
312
|
std::unique_ptr<Index>& quantizer,
|
313
313
|
size_t nlist,
|
314
|
-
MetricType mt
|
314
|
+
MetricType mt,
|
315
|
+
bool own_il) {
|
315
316
|
std::smatch sm;
|
316
317
|
auto match = [&sm, &code_string](const std::string pattern) {
|
317
318
|
return re_match(code_string, pattern, sm);
|
@@ -320,18 +321,25 @@ IndexIVF* parse_IndexIVF(
|
|
320
321
|
int d = quantizer->d;
|
321
322
|
|
322
323
|
if (match("Flat")) {
|
323
|
-
return new IndexIVFFlat(get_q(), d, nlist, mt);
|
324
|
+
return new IndexIVFFlat(get_q(), d, nlist, mt, own_il);
|
324
325
|
}
|
325
326
|
if (match("FlatDedup")) {
|
326
|
-
return new IndexIVFFlatDedup(get_q(), d, nlist, mt);
|
327
|
+
return new IndexIVFFlatDedup(get_q(), d, nlist, mt, own_il);
|
327
328
|
}
|
328
329
|
if (match(sq_pattern)) {
|
329
330
|
return new IndexIVFScalarQuantizer(
|
330
|
-
get_q(),
|
331
|
+
get_q(),
|
332
|
+
d,
|
333
|
+
nlist,
|
334
|
+
sq_types[sm[1].str()],
|
335
|
+
mt,
|
336
|
+
/*by_residual=*/true,
|
337
|
+
own_il);
|
331
338
|
}
|
332
339
|
if (match("PQ([0-9]+)(x[0-9]+)?(np)?")) {
|
333
340
|
int M = mres_to_int(sm[1]), nbit = mres_to_int(sm[2], 8, 1);
|
334
|
-
IndexIVFPQ* index_ivf =
|
341
|
+
IndexIVFPQ* index_ivf =
|
342
|
+
new IndexIVFPQ(get_q(), d, nlist, M, nbit, mt, own_il);
|
335
343
|
index_ivf->do_polysemous_training = sm[3].str() != "np";
|
336
344
|
return index_ivf;
|
337
345
|
}
|
@@ -340,13 +348,13 @@ IndexIVF* parse_IndexIVF(
|
|
340
348
|
mt == METRIC_L2,
|
341
349
|
"IVFPQR not implemented for inner product search");
|
342
350
|
int M1 = mres_to_int(sm[1]), M2 = mres_to_int(sm[2]);
|
343
|
-
return new IndexIVFPQR(get_q(), d, nlist, M1, 8, M2, 8);
|
351
|
+
return new IndexIVFPQR(get_q(), d, nlist, M1, 8, M2, 8, own_il);
|
344
352
|
}
|
345
353
|
if (match("PQ([0-9]+)x4fs(r?)(_[0-9]+)?")) {
|
346
354
|
int M = mres_to_int(sm[1]);
|
347
355
|
int bbs = mres_to_int(sm[3], 32, 1);
|
348
|
-
IndexIVFPQFastScan* index_ivf =
|
349
|
-
|
356
|
+
IndexIVFPQFastScan* index_ivf = new IndexIVFPQFastScan(
|
357
|
+
get_q(), d, nlist, M, 4, mt, bbs, own_il);
|
350
358
|
index_ivf->by_residual = sm[2].str() == "r";
|
351
359
|
return index_ivf;
|
352
360
|
}
|
@@ -357,11 +365,11 @@ IndexIVF* parse_IndexIVF(
|
|
357
365
|
IndexIVF* index_ivf;
|
358
366
|
if (sm[1].str() == "RQ") {
|
359
367
|
index_ivf = new IndexIVFResidualQuantizer(
|
360
|
-
get_q(), d, nlist, nbits, mt, st);
|
368
|
+
get_q(), d, nlist, nbits, mt, st, own_il);
|
361
369
|
} else {
|
362
370
|
FAISS_THROW_IF_NOT(nbits.size() > 0);
|
363
371
|
index_ivf = new IndexIVFLocalSearchQuantizer(
|
364
|
-
get_q(), d, nlist, nbits.size(), nbits[0], mt, st);
|
372
|
+
get_q(), d, nlist, nbits.size(), nbits[0], mt, st, own_il);
|
365
373
|
}
|
366
374
|
return index_ivf;
|
367
375
|
}
|
@@ -373,10 +381,10 @@ IndexIVF* parse_IndexIVF(
|
|
373
381
|
IndexIVF* index_ivf;
|
374
382
|
if (sm[1].str() == "PRQ") {
|
375
383
|
index_ivf = new IndexIVFProductResidualQuantizer(
|
376
|
-
get_q(), d, nlist, nsplits, Msub, nbit, mt, st);
|
384
|
+
get_q(), d, nlist, nsplits, Msub, nbit, mt, st, own_il);
|
377
385
|
} else {
|
378
386
|
index_ivf = new IndexIVFProductLocalSearchQuantizer(
|
379
|
-
get_q(), d, nlist, nsplits, Msub, nbit, mt, st);
|
387
|
+
get_q(), d, nlist, nsplits, Msub, nbit, mt, st, own_il);
|
380
388
|
}
|
381
389
|
return index_ivf;
|
382
390
|
}
|
@@ -387,10 +395,10 @@ IndexIVF* parse_IndexIVF(
|
|
387
395
|
IndexIVFAdditiveQuantizerFastScan* index_ivf;
|
388
396
|
if (sm[1].str() == "RQ") {
|
389
397
|
index_ivf = new IndexIVFResidualQuantizerFastScan(
|
390
|
-
get_q(), d, nlist, M, 4, mt, st, bbs);
|
398
|
+
get_q(), d, nlist, M, 4, mt, st, bbs, own_il);
|
391
399
|
} else {
|
392
400
|
index_ivf = new IndexIVFLocalSearchQuantizerFastScan(
|
393
|
-
get_q(), d, nlist, M, 4, mt, st, bbs);
|
401
|
+
get_q(), d, nlist, M, 4, mt, st, bbs, own_il);
|
394
402
|
}
|
395
403
|
index_ivf->by_residual = (sm[3].str() == "r");
|
396
404
|
return index_ivf;
|
@@ -404,10 +412,10 @@ IndexIVF* parse_IndexIVF(
|
|
404
412
|
IndexIVFAdditiveQuantizerFastScan* index_ivf;
|
405
413
|
if (sm[1].str() == "PRQ") {
|
406
414
|
index_ivf = new IndexIVFProductResidualQuantizerFastScan(
|
407
|
-
get_q(), d, nlist, nsplits, Msub, 4, mt, st, bbs);
|
415
|
+
get_q(), d, nlist, nsplits, Msub, 4, mt, st, bbs, own_il);
|
408
416
|
} else {
|
409
417
|
index_ivf = new IndexIVFProductLocalSearchQuantizerFastScan(
|
410
|
-
get_q(), d, nlist, nsplits, Msub, 4, mt, st, bbs);
|
418
|
+
get_q(), d, nlist, nsplits, Msub, 4, mt, st, bbs, own_il);
|
411
419
|
}
|
412
420
|
index_ivf->by_residual = (sm[4].str() == "r");
|
413
421
|
return index_ivf;
|
@@ -425,8 +433,8 @@ IndexIVF* parse_IndexIVF(
|
|
425
433
|
// the rationale for -1e10 is that this corresponds to simple
|
426
434
|
// thresholding
|
427
435
|
float period = sm[3].length() > 0 ? std::stof(sm[3]) : -1e10;
|
428
|
-
IndexIVFSpectralHash* index_ivf =
|
429
|
-
|
436
|
+
IndexIVFSpectralHash* index_ivf = new IndexIVFSpectralHash(
|
437
|
+
get_q(), d, nlist, outdim, period, own_il);
|
430
438
|
index_ivf->replace_vt(vt.release(), true);
|
431
439
|
if (sm[4].length()) {
|
432
440
|
std::string s = sm[4].str();
|
@@ -440,7 +448,7 @@ IndexIVF* parse_IndexIVF(
|
|
440
448
|
return index_ivf;
|
441
449
|
}
|
442
450
|
if (match(rabitq_pattern)) {
|
443
|
-
return new IndexIVFRaBitQ(get_q(), d, nlist, mt);
|
451
|
+
return new IndexIVFRaBitQ(get_q(), d, nlist, mt, own_il);
|
444
452
|
}
|
445
453
|
return nullptr;
|
446
454
|
}
|
@@ -677,7 +685,8 @@ Index* parse_other_indexes(
|
|
677
685
|
std::unique_ptr<Index> index_factory_sub(
|
678
686
|
int d,
|
679
687
|
std::string description,
|
680
|
-
MetricType metric
|
688
|
+
MetricType metric,
|
689
|
+
bool own_invlists = true) {
|
681
690
|
// handle composite indexes
|
682
691
|
|
683
692
|
bool verbose = index_factory_verbose;
|
@@ -838,7 +847,7 @@ std::unique_ptr<Index> index_factory_sub(
|
|
838
847
|
|
839
848
|
// IndexRowwiseMinMax, fp32 version
|
840
849
|
if (description.compare(0, 7, "MinMax,") == 0) {
|
841
|
-
size_t comma = description.find(
|
850
|
+
size_t comma = description.find(',');
|
842
851
|
std::string sub_index_string = description.substr(comma + 1);
|
843
852
|
auto sub_index = index_factory_sub(d, sub_index_string, metric);
|
844
853
|
|
@@ -850,7 +859,7 @@ std::unique_ptr<Index> index_factory_sub(
|
|
850
859
|
|
851
860
|
// IndexRowwiseMinMax, fp16 version
|
852
861
|
if (description.compare(0, 11, "MinMaxFP16,") == 0) {
|
853
|
-
size_t comma = description.find(
|
862
|
+
size_t comma = description.find(',');
|
854
863
|
std::string sub_index_string = description.substr(comma + 1);
|
855
864
|
auto sub_index = index_factory_sub(d, sub_index_string, metric);
|
856
865
|
|
@@ -864,7 +873,7 @@ std::unique_ptr<Index> index_factory_sub(
|
|
864
873
|
{
|
865
874
|
size_t nlist;
|
866
875
|
bool use_2layer;
|
867
|
-
size_t comma = description.find(
|
876
|
+
size_t comma = description.find(',');
|
868
877
|
std::string coarse_string = description.substr(0, comma);
|
869
878
|
// Match coarse quantizer part first
|
870
879
|
std::unique_ptr<Index> quantizer(parse_coarse_quantizer(
|
@@ -894,8 +903,8 @@ std::unique_ptr<Index> index_factory_sub(
|
|
894
903
|
return std::unique_ptr<Index>(index_2l);
|
895
904
|
}
|
896
905
|
|
897
|
-
IndexIVF* index_ivf =
|
898
|
-
|
906
|
+
IndexIVF* index_ivf = parse_IndexIVF(
|
907
|
+
code_description, quantizer, nlist, metric, own_invlists);
|
899
908
|
|
900
909
|
FAISS_THROW_IF_NOT_FMT(
|
901
910
|
index_ivf,
|
@@ -911,25 +920,32 @@ std::unique_ptr<Index> index_factory_sub(
|
|
911
920
|
|
912
921
|
} // anonymous namespace
|
913
922
|
|
914
|
-
Index* index_factory(
|
915
|
-
|
923
|
+
Index* index_factory(
|
924
|
+
int d,
|
925
|
+
const char* description,
|
926
|
+
MetricType metric,
|
927
|
+
bool own_invlists) {
|
928
|
+
return index_factory_sub(d, description, metric, own_invlists).release();
|
916
929
|
}
|
917
930
|
|
918
|
-
IndexBinary* index_binary_factory(
|
931
|
+
IndexBinary* index_binary_factory(
|
932
|
+
int d,
|
933
|
+
const char* description,
|
934
|
+
bool own_invlists) {
|
919
935
|
IndexBinary* index = nullptr;
|
920
936
|
|
921
937
|
int ncentroids = -1;
|
922
938
|
int M, nhash, b;
|
923
939
|
|
924
940
|
if (sscanf(description, "BIVF%d_HNSW%d", &ncentroids, &M) == 2) {
|
925
|
-
IndexBinaryIVF* index_ivf =
|
926
|
-
new
|
941
|
+
IndexBinaryIVF* index_ivf = new IndexBinaryIVF(
|
942
|
+
new IndexBinaryHNSW(d, M), d, ncentroids, own_invlists);
|
927
943
|
index_ivf->own_fields = true;
|
928
944
|
index = index_ivf;
|
929
945
|
|
930
946
|
} else if (sscanf(description, "BIVF%d", &ncentroids) == 1) {
|
931
|
-
IndexBinaryIVF* index_ivf =
|
932
|
-
new
|
947
|
+
IndexBinaryIVF* index_ivf = new IndexBinaryIVF(
|
948
|
+
new IndexBinaryFlat(d), d, ncentroids, own_invlists);
|
933
949
|
index_ivf->own_fields = true;
|
934
950
|
index = index_ivf;
|
935
951
|
|
@@ -17,11 +17,17 @@ namespace faiss {
|
|
17
17
|
Index* index_factory(
|
18
18
|
int d,
|
19
19
|
const char* description,
|
20
|
-
MetricType metric = METRIC_L2
|
20
|
+
MetricType metric = METRIC_L2,
|
21
|
+
// Whether to maintain inverted list within faiss index (only applicable
|
22
|
+
// to IndexIVF*)
|
23
|
+
bool own_invlists = true);
|
21
24
|
|
22
25
|
/// set to > 0 to get more logs from index_factory
|
23
26
|
FAISS_API extern int index_factory_verbose;
|
24
27
|
|
25
|
-
IndexBinary* index_binary_factory(
|
28
|
+
IndexBinary* index_binary_factory(
|
29
|
+
int d,
|
30
|
+
const char* description,
|
31
|
+
bool own_invlists = true);
|
26
32
|
|
27
33
|
} // namespace faiss
|
@@ -280,8 +280,9 @@ size_t ArrayInvertedLists::add_entries(
|
|
280
280
|
size_t n_entry,
|
281
281
|
const idx_t* ids_in,
|
282
282
|
const uint8_t* code) {
|
283
|
-
if (n_entry == 0)
|
283
|
+
if (n_entry == 0) {
|
284
284
|
return 0;
|
285
|
+
}
|
285
286
|
assert(list_no < nlist);
|
286
287
|
size_t o = ids[list_no].size();
|
287
288
|
ids[list_no].resize(o + n_entry);
|
@@ -526,8 +527,9 @@ void SliceInvertedLists::prefetch_lists(const idx_t* list_nos, int nlist)
|
|
526
527
|
std::vector<idx_t> translated_list_nos;
|
527
528
|
for (int j = 0; j < nlist; j++) {
|
528
529
|
idx_t list_no = list_nos[j];
|
529
|
-
if (list_no < 0)
|
530
|
+
if (list_no < 0) {
|
530
531
|
continue;
|
532
|
+
}
|
531
533
|
translated_list_nos.push_back(translate_list_no(this, list_no));
|
532
534
|
}
|
533
535
|
il->prefetch_lists(translated_list_nos.data(), translated_list_nos.size());
|
@@ -630,8 +632,9 @@ void VStackInvertedLists::prefetch_lists(const idx_t* list_nos, int nlist)
|
|
630
632
|
std::vector<int> n_per_il(ils.size(), 0);
|
631
633
|
for (int j = 0; j < nlist; j++) {
|
632
634
|
idx_t list_no = list_nos[j];
|
633
|
-
if (list_no < 0)
|
635
|
+
if (list_no < 0) {
|
634
636
|
continue;
|
637
|
+
}
|
635
638
|
int i = ilno[j] = translate_list_no(this, list_no);
|
636
639
|
n_per_il[i]++;
|
637
640
|
}
|
@@ -642,8 +645,9 @@ void VStackInvertedLists::prefetch_lists(const idx_t* list_nos, int nlist)
|
|
642
645
|
std::vector<idx_t> sorted_list_nos(cum_n_per_il.back());
|
643
646
|
for (int j = 0; j < nlist; j++) {
|
644
647
|
idx_t list_no = list_nos[j];
|
645
|
-
if (list_no < 0)
|
648
|
+
if (list_no < 0) {
|
646
649
|
continue;
|
650
|
+
}
|
647
651
|
int i = ilno[j];
|
648
652
|
list_no -= cumsz[i];
|
649
653
|
sorted_list_nos[cum_n_per_il[i]++] = list_no;
|
@@ -716,8 +720,9 @@ void MaskedInvertedLists::prefetch_lists(const idx_t* list_nos, int nlist)
|
|
716
720
|
std::vector<idx_t> list0, list1;
|
717
721
|
for (int i = 0; i < nlist; i++) {
|
718
722
|
idx_t list_no = list_nos[i];
|
719
|
-
if (list_no < 0)
|
723
|
+
if (list_no < 0) {
|
720
724
|
continue;
|
725
|
+
}
|
721
726
|
size_t sz = il0->list_size(list_no);
|
722
727
|
(sz ? list0 : list1).push_back(list_no);
|
723
728
|
}
|
@@ -782,8 +787,9 @@ void StopWordsInvertedLists::prefetch_lists(const idx_t* list_nos, int nlist)
|
|
782
787
|
std::vector<idx_t> list0;
|
783
788
|
for (int i = 0; i < nlist; i++) {
|
784
789
|
idx_t list_no = list_nos[i];
|
785
|
-
if (list_no < 0)
|
790
|
+
if (list_no < 0) {
|
786
791
|
continue;
|
792
|
+
}
|
787
793
|
if (il0->list_size(list_no) < maxsize) {
|
788
794
|
list0.push_back(list_no);
|
789
795
|
}
|
@@ -148,8 +148,9 @@ struct OnDiskInvertedLists::OngoingPrefetch {
|
|
148
148
|
|
149
149
|
bool one_list() {
|
150
150
|
idx_t list_no = pf->get_next_list();
|
151
|
-
if (list_no == -1)
|
151
|
+
if (list_no == -1) {
|
152
152
|
return false;
|
153
|
+
}
|
153
154
|
const OnDiskInvertedLists* od = pf->od;
|
154
155
|
od->locks->lock_1(list_no);
|
155
156
|
size_t n = od->list_size(list_no);
|
@@ -195,8 +196,9 @@ struct OnDiskInvertedLists::OngoingPrefetch {
|
|
195
196
|
static void* prefetch_list(void* arg) {
|
196
197
|
Thread* th = static_cast<Thread*>(arg);
|
197
198
|
|
198
|
-
while (th->one_list())
|
199
|
+
while (th->one_list()) {
|
199
200
|
;
|
201
|
+
}
|
200
202
|
|
201
203
|
return nullptr;
|
202
204
|
}
|
@@ -404,8 +406,9 @@ void OnDiskInvertedLists::update_entries(
|
|
404
406
|
const idx_t* ids_in,
|
405
407
|
const uint8_t* codes_in) {
|
406
408
|
FAISS_THROW_IF_NOT(!read_only);
|
407
|
-
if (n_entry == 0)
|
409
|
+
if (n_entry == 0) {
|
408
410
|
return;
|
411
|
+
}
|
409
412
|
[[maybe_unused]] const List& l = lists[list_no];
|
410
413
|
assert(n_entry + offset <= l.size);
|
411
414
|
idx_t* ids = const_cast<idx_t*>(get_ids(list_no));
|
@@ -515,8 +518,9 @@ size_t OnDiskInvertedLists::allocate_slot(size_t capacity) {
|
|
515
518
|
|
516
519
|
void OnDiskInvertedLists::free_slot(size_t offset, size_t capacity) {
|
517
520
|
// should hold lock2
|
518
|
-
if (capacity == 0)
|
521
|
+
if (capacity == 0) {
|
519
522
|
return;
|
523
|
+
}
|
520
524
|
|
521
525
|
auto it = slots.begin();
|
522
526
|
while (it != slots.end() && it->offset <= offset) {
|
@@ -17,21 +17,24 @@ namespace faiss {
|
|
17
17
|
template <typename C>
|
18
18
|
void HeapArray<C>::heapify() {
|
19
19
|
#pragma omp parallel for
|
20
|
-
for (int64_t j = 0; j < nh; j++)
|
20
|
+
for (int64_t j = 0; j < nh; j++) {
|
21
21
|
heap_heapify<C>(k, val + j * k, ids + j * k);
|
22
|
+
}
|
22
23
|
}
|
23
24
|
|
24
25
|
template <typename C>
|
25
26
|
void HeapArray<C>::reorder() {
|
26
27
|
#pragma omp parallel for
|
27
|
-
for (int64_t j = 0; j < nh; j++)
|
28
|
+
for (int64_t j = 0; j < nh; j++) {
|
28
29
|
heap_reorder<C>(k, val + j * k, ids + j * k);
|
30
|
+
}
|
29
31
|
}
|
30
32
|
|
31
33
|
template <typename C>
|
32
34
|
void HeapArray<C>::addn(size_t nj, const T* vin, TI j0, size_t i0, int64_t ni) {
|
33
|
-
if (ni == -1)
|
35
|
+
if (ni == -1) {
|
34
36
|
ni = nh;
|
37
|
+
}
|
35
38
|
assert(i0 >= 0 && i0 + ni <= nh);
|
36
39
|
#pragma omp parallel for if (ni * nj > 100000)
|
37
40
|
for (int64_t i = i0; i < i0 + ni; i++) {
|
@@ -60,8 +63,9 @@ void HeapArray<C>::addn_with_ids(
|
|
60
63
|
addn(nj, vin, 0, i0, ni);
|
61
64
|
return;
|
62
65
|
}
|
63
|
-
if (ni == -1)
|
66
|
+
if (ni == -1) {
|
64
67
|
ni = nh;
|
68
|
+
}
|
65
69
|
assert(i0 >= 0 && i0 + ni <= nh);
|
66
70
|
#pragma omp parallel for if (ni * nj > 100000)
|
67
71
|
for (int64_t i = i0; i < i0 + ni; i++) {
|
@@ -115,19 +119,22 @@ void HeapArray<C>::per_line_extrema(T* out_val, TI* out_ids) const {
|
|
115
119
|
int64_t imin = -1;
|
116
120
|
typename C::T xval = C::Crev::neutral();
|
117
121
|
const typename C::T* x_ = val + j * k;
|
118
|
-
for (size_t i = 0; i < k; i++)
|
122
|
+
for (size_t i = 0; i < k; i++) {
|
119
123
|
if (C::cmp(x_[i], xval)) {
|
120
124
|
xval = x_[i];
|
121
125
|
imin = i;
|
122
126
|
}
|
123
|
-
|
127
|
+
}
|
128
|
+
if (out_val) {
|
124
129
|
out_val[j] = xval;
|
130
|
+
}
|
125
131
|
|
126
132
|
if (out_ids) {
|
127
|
-
if (ids && imin != -1)
|
133
|
+
if (ids && imin != -1) {
|
128
134
|
out_ids[j] = ids[j * k + imin];
|
129
|
-
else
|
135
|
+
} else {
|
130
136
|
out_ids[j] = imin;
|
137
|
+
}
|
131
138
|
}
|
132
139
|
}
|
133
140
|
}
|
@@ -53,8 +53,9 @@ inline void heap_pop(size_t k, typename C::T* bh_val, typename C::TI* bh_ids) {
|
|
53
53
|
while (1) {
|
54
54
|
i1 = i << 1;
|
55
55
|
i2 = i1 + 1;
|
56
|
-
if (i1 > k)
|
56
|
+
if (i1 > k) {
|
57
57
|
break;
|
58
|
+
}
|
58
59
|
if ((i2 == k + 1) ||
|
59
60
|
C::cmp2(bh_val[i1], bh_val[i2], bh_ids[i1], bh_ids[i2])) {
|
60
61
|
if (C::cmp2(val, bh_val[i1], id, bh_ids[i1])) {
|
@@ -220,8 +221,9 @@ inline void heap_pop(size_t k, std::pair<typename C::T, typename C::TI>* bh) {
|
|
220
221
|
while (1) {
|
221
222
|
i1 = i << 1;
|
222
223
|
i2 = i1 + 1;
|
223
|
-
if (i1 > k)
|
224
|
+
if (i1 > k) {
|
224
225
|
break;
|
226
|
+
}
|
225
227
|
if ((i2 == k + 1) ||
|
226
228
|
C::cmp2(bh[i1].first, bh[i2].first, bh[i1].second, bh[i2].second)) {
|
227
229
|
if (C::cmp2(val, bh[i1].first, id, bh[i1].second)) {
|
@@ -320,15 +322,18 @@ inline void heap_heapify(
|
|
320
322
|
const typename C::T* x = nullptr,
|
321
323
|
const typename C::TI* ids = nullptr,
|
322
324
|
size_t k0 = 0) {
|
323
|
-
if (k0 > 0)
|
325
|
+
if (k0 > 0) {
|
324
326
|
assert(x);
|
327
|
+
}
|
325
328
|
|
326
329
|
if (ids) {
|
327
|
-
for (size_t i = 0; i < k0; i++)
|
330
|
+
for (size_t i = 0; i < k0; i++) {
|
328
331
|
heap_push<C>(i + 1, bh_val, bh_ids, x[i], ids[i]);
|
332
|
+
}
|
329
333
|
} else {
|
330
|
-
for (size_t i = 0; i < k0; i++)
|
334
|
+
for (size_t i = 0; i < k0; i++) {
|
331
335
|
heap_push<C>(i + 1, bh_val, bh_ids, x[i], i);
|
336
|
+
}
|
332
337
|
}
|
333
338
|
|
334
339
|
for (size_t i = k0; i < k; i++) {
|
@@ -373,18 +378,19 @@ inline void heap_addn(
|
|
373
378
|
const typename C::TI* ids,
|
374
379
|
size_t n) {
|
375
380
|
size_t i;
|
376
|
-
if (ids)
|
381
|
+
if (ids) {
|
377
382
|
for (i = 0; i < n; i++) {
|
378
383
|
if (C::cmp(bh_val[0], x[i])) {
|
379
384
|
heap_replace_top<C>(k, bh_val, bh_ids, x[i], ids[i]);
|
380
385
|
}
|
381
386
|
}
|
382
|
-
else
|
387
|
+
} else {
|
383
388
|
for (i = 0; i < n; i++) {
|
384
389
|
if (C::cmp(bh_val[0], x[i])) {
|
385
390
|
heap_replace_top<C>(k, bh_val, bh_ids, x[i], i);
|
386
391
|
}
|
387
392
|
}
|
393
|
+
}
|
388
394
|
}
|
389
395
|
|
390
396
|
/* Partial instanciation for heaps with TI = int64_t */
|
@@ -433,8 +439,9 @@ inline size_t heap_reorder(
|
|
433
439
|
heap_pop<C>(k - i, bh_val, bh_ids);
|
434
440
|
bh_val[k - ii - 1] = val;
|
435
441
|
bh_ids[k - ii - 1] = id;
|
436
|
-
if (id != -1)
|
442
|
+
if (id != -1) {
|
437
443
|
ii++;
|
444
|
+
}
|
438
445
|
}
|
439
446
|
/* Count the number of elements which are effectively returned */
|
440
447
|
size_t nel = ii;
|
@@ -573,17 +580,20 @@ inline void indirect_heap_pop(
|
|
573
580
|
while (1) {
|
574
581
|
size_t i1 = i << 1;
|
575
582
|
size_t i2 = i1 + 1;
|
576
|
-
if (i1 > k)
|
583
|
+
if (i1 > k) {
|
577
584
|
break;
|
585
|
+
}
|
578
586
|
typename C::TI id1 = bh_ids[i1], id2 = bh_ids[i2];
|
579
587
|
if (i2 == k + 1 || C::cmp(bh_val[id1], bh_val[id2])) {
|
580
|
-
if (C::cmp(val, bh_val[id1]))
|
588
|
+
if (C::cmp(val, bh_val[id1])) {
|
581
589
|
break;
|
590
|
+
}
|
582
591
|
bh_ids[i] = id1;
|
583
592
|
i = i1;
|
584
593
|
} else {
|
585
|
-
if (C::cmp(val, bh_val[id2]))
|
594
|
+
if (C::cmp(val, bh_val[id2])) {
|
586
595
|
break;
|
596
|
+
}
|
587
597
|
bh_ids[i] = id2;
|
588
598
|
i = i2;
|
589
599
|
}
|
@@ -602,8 +612,9 @@ inline void indirect_heap_push(
|
|
602
612
|
size_t i = k;
|
603
613
|
while (i > 1) {
|
604
614
|
size_t i_father = i >> 1;
|
605
|
-
if (!C::cmp(val, bh_val[bh_ids[i_father]]))
|
615
|
+
if (!C::cmp(val, bh_val[bh_ids[i_father]])) {
|
606
616
|
break;
|
617
|
+
}
|
607
618
|
bh_ids[i] = bh_ids[i_father];
|
608
619
|
i = i_father;
|
609
620
|
}
|