faiss 0.2.6 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +1 -1
- data/lib/faiss/version.rb +1 -1
- data/lib/faiss.rb +2 -2
- data/vendor/faiss/faiss/AutoTune.cpp +15 -4
- data/vendor/faiss/faiss/AutoTune.h +0 -1
- data/vendor/faiss/faiss/Clustering.cpp +1 -5
- data/vendor/faiss/faiss/Clustering.h +0 -2
- data/vendor/faiss/faiss/IVFlib.h +0 -2
- data/vendor/faiss/faiss/Index.h +1 -2
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +17 -3
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +10 -1
- data/vendor/faiss/faiss/IndexBinary.h +0 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +2 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -0
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +1 -3
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +273 -48
- data/vendor/faiss/faiss/IndexBinaryIVF.h +18 -11
- data/vendor/faiss/faiss/IndexFastScan.cpp +13 -10
- data/vendor/faiss/faiss/IndexFastScan.h +5 -1
- data/vendor/faiss/faiss/IndexFlat.cpp +16 -3
- data/vendor/faiss/faiss/IndexFlat.h +1 -1
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +5 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +7 -2
- data/vendor/faiss/faiss/IndexHNSW.cpp +3 -6
- data/vendor/faiss/faiss/IndexHNSW.h +0 -1
- data/vendor/faiss/faiss/IndexIDMap.cpp +4 -4
- data/vendor/faiss/faiss/IndexIDMap.h +0 -2
- data/vendor/faiss/faiss/IndexIVF.cpp +155 -129
- data/vendor/faiss/faiss/IndexIVF.h +121 -61
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +2 -2
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +12 -11
- data/vendor/faiss/faiss/IndexIVFFastScan.h +6 -1
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +221 -165
- data/vendor/faiss/faiss/IndexIVFPQ.h +1 -0
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +6 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +0 -2
- data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -2
- data/vendor/faiss/faiss/IndexNNDescent.h +0 -1
- data/vendor/faiss/faiss/IndexNSG.cpp +1 -2
- data/vendor/faiss/faiss/IndexPQ.cpp +7 -9
- data/vendor/faiss/faiss/IndexRefine.cpp +1 -1
- data/vendor/faiss/faiss/IndexReplicas.cpp +3 -4
- data/vendor/faiss/faiss/IndexReplicas.h +0 -1
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +8 -1
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +7 -0
- data/vendor/faiss/faiss/IndexShards.cpp +26 -109
- data/vendor/faiss/faiss/IndexShards.h +2 -3
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +246 -0
- data/vendor/faiss/faiss/IndexShardsIVF.h +42 -0
- data/vendor/faiss/faiss/MetaIndexes.cpp +86 -0
- data/vendor/faiss/faiss/MetaIndexes.h +29 -0
- data/vendor/faiss/faiss/MetricType.h +14 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +8 -10
- data/vendor/faiss/faiss/VectorTransform.h +1 -3
- data/vendor/faiss/faiss/clone_index.cpp +232 -18
- data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +25 -3
- data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +7 -0
- data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +78 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +20 -6
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +7 -1
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +21 -7
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +7 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +7 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +10 -3
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +7 -1
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +11 -3
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +25 -2
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +76 -29
- data/vendor/faiss/faiss/gpu/GpuCloner.h +2 -2
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +14 -13
- data/vendor/faiss/faiss/gpu/GpuDistance.h +18 -6
- data/vendor/faiss/faiss/gpu/GpuIndex.h +23 -21
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +10 -10
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -12
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +29 -50
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +3 -3
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +8 -8
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +4 -4
- data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +2 -5
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +9 -7
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +4 -4
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -2
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +1 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +55 -6
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +20 -6
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +95 -25
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +67 -16
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +4 -4
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +7 -7
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +4 -4
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +0 -7
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +9 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +2 -7
- data/vendor/faiss/faiss/impl/CodePacker.cpp +67 -0
- data/vendor/faiss/faiss/impl/CodePacker.h +71 -0
- data/vendor/faiss/faiss/impl/DistanceComputer.h +0 -2
- data/vendor/faiss/faiss/impl/HNSW.cpp +3 -7
- data/vendor/faiss/faiss/impl/HNSW.h +6 -9
- data/vendor/faiss/faiss/impl/IDSelector.cpp +1 -1
- data/vendor/faiss/faiss/impl/IDSelector.h +39 -1
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +62 -51
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +11 -12
- data/vendor/faiss/faiss/impl/NNDescent.cpp +3 -9
- data/vendor/faiss/faiss/impl/NNDescent.h +10 -10
- data/vendor/faiss/faiss/impl/NSG.cpp +1 -6
- data/vendor/faiss/faiss/impl/NSG.h +4 -7
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +1 -15
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +11 -10
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +0 -7
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -12
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -4
- data/vendor/faiss/faiss/impl/Quantizer.h +6 -3
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +796 -174
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +16 -8
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +3 -5
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +4 -4
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +3 -3
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +4 -4
- data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +291 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +74 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance.h +123 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +102 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +13 -10
- data/vendor/faiss/faiss/impl/index_write.cpp +3 -4
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +0 -1
- data/vendor/faiss/faiss/impl/kmeans1d.h +3 -3
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
- data/vendor/faiss/faiss/impl/platform_macros.h +61 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +48 -4
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +18 -4
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +2 -2
- data/vendor/faiss/faiss/index_factory.cpp +8 -10
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +29 -12
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +8 -2
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
- data/vendor/faiss/faiss/invlists/DirectMap.h +2 -4
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +118 -18
- data/vendor/faiss/faiss/invlists/InvertedLists.h +44 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
- data/vendor/faiss/faiss/python/python_callbacks.h +1 -1
- data/vendor/faiss/faiss/utils/AlignedTable.h +3 -1
- data/vendor/faiss/faiss/utils/Heap.cpp +139 -3
- data/vendor/faiss/faiss/utils/Heap.h +35 -1
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +84 -0
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +196 -0
- data/vendor/faiss/faiss/utils/approx_topk/generic.h +138 -0
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +34 -0
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +367 -0
- data/vendor/faiss/faiss/utils/distances.cpp +61 -7
- data/vendor/faiss/faiss/utils/distances.h +11 -0
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +346 -0
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +36 -0
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +42 -0
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +40 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +352 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +32 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +515 -327
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +17 -1
- data/vendor/faiss/faiss/utils/extra_distances.cpp +37 -8
- data/vendor/faiss/faiss/utils/extra_distances.h +2 -1
- data/vendor/faiss/faiss/utils/fp16-fp16c.h +7 -0
- data/vendor/faiss/faiss/utils/fp16-inl.h +7 -0
- data/vendor/faiss/faiss/utils/fp16.h +7 -0
- data/vendor/faiss/faiss/utils/hamming-inl.h +0 -456
- data/vendor/faiss/faiss/utils/hamming.cpp +104 -120
- data/vendor/faiss/faiss/utils/hamming.h +21 -10
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +535 -0
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +48 -0
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +519 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +26 -0
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +614 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +21 -25
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +344 -3
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +390 -0
- data/vendor/faiss/faiss/utils/simdlib_neon.h +655 -130
- data/vendor/faiss/faiss/utils/sorting.cpp +692 -0
- data/vendor/faiss/faiss/utils/sorting.h +71 -0
- data/vendor/faiss/faiss/utils/transpose/transpose-avx2-inl.h +165 -0
- data/vendor/faiss/faiss/utils/utils.cpp +4 -176
- data/vendor/faiss/faiss/utils/utils.h +2 -9
- metadata +29 -3
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +0 -26
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// // // AVX-512 version. It is not used, but let it be for the future
|
|
9
|
+
// // // needs.
|
|
10
|
+
// // template <class SearchResultType, typename T = PQDecoder>
|
|
11
|
+
// // typename std::enable_if<(std::is_same<T, PQDecoder8>::value), void>::
|
|
12
|
+
// // type distance_four_codes(
|
|
13
|
+
// // const uint8_t* __restrict code0,
|
|
14
|
+
// // const uint8_t* __restrict code1,
|
|
15
|
+
// // const uint8_t* __restrict code2,
|
|
16
|
+
// // const uint8_t* __restrict code3,
|
|
17
|
+
// // float& result0,
|
|
18
|
+
// // float& result1,
|
|
19
|
+
// // float& result2,
|
|
20
|
+
// // float& result3
|
|
21
|
+
// // ) const {
|
|
22
|
+
// // result0 = 0;
|
|
23
|
+
// // result1 = 0;
|
|
24
|
+
// // result2 = 0;
|
|
25
|
+
// // result3 = 0;
|
|
26
|
+
|
|
27
|
+
// // size_t m = 0;
|
|
28
|
+
// // const size_t pqM16 = pq.M / 16;
|
|
29
|
+
|
|
30
|
+
// // constexpr intptr_t N = 4;
|
|
31
|
+
|
|
32
|
+
// // const float* tab = sim_table;
|
|
33
|
+
|
|
34
|
+
// // if (pqM16 > 0) {
|
|
35
|
+
// // // process 16 values per loop
|
|
36
|
+
// // const __m512i ksub = _mm512_set1_epi32(pq.ksub);
|
|
37
|
+
// // __m512i offsets_0 = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7,
|
|
38
|
+
// // 8, 9, 10, 11, 12, 13, 14, 15);
|
|
39
|
+
// // offsets_0 = _mm512_mullo_epi32(offsets_0, ksub);
|
|
40
|
+
|
|
41
|
+
// // // accumulators of partial sums
|
|
42
|
+
// // __m512 partialSums[N];
|
|
43
|
+
// // for (intptr_t j = 0; j < N; j++) {
|
|
44
|
+
// // partialSums[j] = _mm512_setzero_ps();
|
|
45
|
+
// // }
|
|
46
|
+
|
|
47
|
+
// // // loop
|
|
48
|
+
// // for (m = 0; m < pqM16 * 16; m += 16) {
|
|
49
|
+
// // // load 16 uint8 values
|
|
50
|
+
// // __m128i mm1[N];
|
|
51
|
+
// // mm1[0] = _mm_loadu_si128((const __m128i_u*)(code0 + m));
|
|
52
|
+
// // mm1[1] = _mm_loadu_si128((const __m128i_u*)(code1 + m));
|
|
53
|
+
// // mm1[2] = _mm_loadu_si128((const __m128i_u*)(code2 + m));
|
|
54
|
+
// // mm1[3] = _mm_loadu_si128((const __m128i_u*)(code3 + m));
|
|
55
|
+
|
|
56
|
+
// // // process first 8 codes
|
|
57
|
+
// // for (intptr_t j = 0; j < N; j++) {
|
|
58
|
+
// // // convert uint8 values (low part of __m128i) to int32
|
|
59
|
+
// // // values
|
|
60
|
+
// // const __m512i idx1 = _mm512_cvtepu8_epi32(mm1[j]);
|
|
61
|
+
|
|
62
|
+
// // // add offsets
|
|
63
|
+
// // const __m512i indices_to_read_from =
|
|
64
|
+
// // _mm512_add_epi32(idx1, offsets_0);
|
|
65
|
+
|
|
66
|
+
// // // gather 8 values, similar to 8 operations of
|
|
67
|
+
// // // tab[idx]
|
|
68
|
+
// // __m512 collected =
|
|
69
|
+
// // _mm512_i32gather_ps(
|
|
70
|
+
// // indices_to_read_from, tab, sizeof(float));
|
|
71
|
+
|
|
72
|
+
// // // collect partial sums
|
|
73
|
+
// // partialSums[j] = _mm512_add_ps(partialSums[j],
|
|
74
|
+
// // collected);
|
|
75
|
+
// // }
|
|
76
|
+
// // tab += pq.ksub * 16;
|
|
77
|
+
|
|
78
|
+
// // }
|
|
79
|
+
|
|
80
|
+
// // // horizontal sum for partialSum
|
|
81
|
+
// // result0 += _mm512_reduce_add_ps(partialSums[0]);
|
|
82
|
+
// // result1 += _mm512_reduce_add_ps(partialSums[1]);
|
|
83
|
+
// // result2 += _mm512_reduce_add_ps(partialSums[2]);
|
|
84
|
+
// // result3 += _mm512_reduce_add_ps(partialSums[3]);
|
|
85
|
+
// // }
|
|
86
|
+
|
|
87
|
+
// // //
|
|
88
|
+
// // if (m < pq.M) {
|
|
89
|
+
// // // process leftovers
|
|
90
|
+
// // PQDecoder decoder0(code0 + m, pq.nbits);
|
|
91
|
+
// // PQDecoder decoder1(code1 + m, pq.nbits);
|
|
92
|
+
// // PQDecoder decoder2(code2 + m, pq.nbits);
|
|
93
|
+
// // PQDecoder decoder3(code3 + m, pq.nbits);
|
|
94
|
+
// // for (; m < pq.M; m++) {
|
|
95
|
+
// // result0 += tab[decoder0.decode()];
|
|
96
|
+
// // result1 += tab[decoder1.decode()];
|
|
97
|
+
// // result2 += tab[decoder2.decode()];
|
|
98
|
+
// // result3 += tab[decoder3.decode()];
|
|
99
|
+
// // tab += pq.ksub;
|
|
100
|
+
// // }
|
|
101
|
+
// // }
|
|
102
|
+
// // }
|
|
@@ -65,7 +65,7 @@ namespace faiss {
|
|
|
65
65
|
static void read_index_header(Index* idx, IOReader* f) {
|
|
66
66
|
READ1(idx->d);
|
|
67
67
|
READ1(idx->ntotal);
|
|
68
|
-
|
|
68
|
+
idx_t dummy;
|
|
69
69
|
READ1(dummy);
|
|
70
70
|
READ1(dummy);
|
|
71
71
|
READ1(idx->is_trained);
|
|
@@ -279,6 +279,8 @@ static void read_AdditiveQuantizer(AdditiveQuantizer* aq, IOReader* f) {
|
|
|
279
279
|
aq->search_type == AdditiveQuantizer::ST_norm_lsq2x4 ||
|
|
280
280
|
aq->search_type == AdditiveQuantizer::ST_norm_rq2x4) {
|
|
281
281
|
READXBVECTOR(aq->qnorm.codes);
|
|
282
|
+
aq->qnorm.ntotal = aq->qnorm.codes.size() / 4;
|
|
283
|
+
aq->qnorm.update_permutation();
|
|
282
284
|
}
|
|
283
285
|
|
|
284
286
|
if (aq->search_type == AdditiveQuantizer::ST_norm_lsq2x4 ||
|
|
@@ -439,7 +441,6 @@ static void read_direct_map(DirectMap* dm, IOReader* f) {
|
|
|
439
441
|
dm->type = (DirectMap::Type)maintain_direct_map;
|
|
440
442
|
READVECTOR(dm->array);
|
|
441
443
|
if (dm->type == DirectMap::Hashtable) {
|
|
442
|
-
using idx_t = Index::idx_t;
|
|
443
444
|
std::vector<std::pair<idx_t, idx_t>> v;
|
|
444
445
|
READVECTOR(v);
|
|
445
446
|
std::unordered_map<idx_t, idx_t>& map = dm->hashtable;
|
|
@@ -453,7 +454,7 @@ static void read_direct_map(DirectMap* dm, IOReader* f) {
|
|
|
453
454
|
static void read_ivf_header(
|
|
454
455
|
IndexIVF* ivf,
|
|
455
456
|
IOReader* f,
|
|
456
|
-
std::vector<std::vector<
|
|
457
|
+
std::vector<std::vector<idx_t>>* ids = nullptr) {
|
|
457
458
|
read_index_header(ivf, f);
|
|
458
459
|
READ1(ivf->nlist);
|
|
459
460
|
READ1(ivf->nprobe);
|
|
@@ -470,7 +471,7 @@ static void read_ivf_header(
|
|
|
470
471
|
// used for legacy formats
|
|
471
472
|
static ArrayInvertedLists* set_array_invlist(
|
|
472
473
|
IndexIVF* ivf,
|
|
473
|
-
std::vector<std::vector<
|
|
474
|
+
std::vector<std::vector<idx_t>>& ids) {
|
|
474
475
|
ArrayInvertedLists* ail =
|
|
475
476
|
new ArrayInvertedLists(ivf->nlist, ivf->code_size);
|
|
476
477
|
std::swap(ail->ids, ids);
|
|
@@ -487,7 +488,7 @@ static IndexIVFPQ* read_ivfpq(IOReader* f, uint32_t h, int io_flags) {
|
|
|
487
488
|
: nullptr;
|
|
488
489
|
IndexIVFPQ* ivpq = ivfpqr ? ivfpqr : new IndexIVFPQ();
|
|
489
490
|
|
|
490
|
-
std::vector<std::vector<
|
|
491
|
+
std::vector<std::vector<idx_t>> ids;
|
|
491
492
|
read_ivf_header(ivpq, f, legacy ? &ids : nullptr);
|
|
492
493
|
READ1(ivpq->by_residual);
|
|
493
494
|
READ1(ivpq->code_size);
|
|
@@ -728,10 +729,11 @@ Index* read_index(IOReader* f, int io_flags) {
|
|
|
728
729
|
READ1(ivaqfs->max_train_points);
|
|
729
730
|
|
|
730
731
|
read_InvertedLists(ivaqfs, f, io_flags);
|
|
732
|
+
ivaqfs->init_code_packer();
|
|
731
733
|
idx = ivaqfs;
|
|
732
734
|
} else if (h == fourcc("IvFl") || h == fourcc("IvFL")) { // legacy
|
|
733
735
|
IndexIVFFlat* ivfl = new IndexIVFFlat();
|
|
734
|
-
std::vector<std::vector<
|
|
736
|
+
std::vector<std::vector<idx_t>> ids;
|
|
735
737
|
read_ivf_header(ivfl, f, &ids);
|
|
736
738
|
ivfl->code_size = ivfl->d * sizeof(float);
|
|
737
739
|
ArrayInvertedLists* ail = set_array_invlist(ivfl, ids);
|
|
@@ -754,10 +756,10 @@ Index* read_index(IOReader* f, int io_flags) {
|
|
|
754
756
|
read_ivf_header(ivfl, f);
|
|
755
757
|
ivfl->code_size = ivfl->d * sizeof(float);
|
|
756
758
|
{
|
|
757
|
-
std::vector<
|
|
759
|
+
std::vector<idx_t> tab;
|
|
758
760
|
READVECTOR(tab);
|
|
759
761
|
for (long i = 0; i < tab.size(); i += 2) {
|
|
760
|
-
std::pair<
|
|
762
|
+
std::pair<idx_t, idx_t> pair(tab[i], tab[i + 1]);
|
|
761
763
|
ivfl->instances.insert(pair);
|
|
762
764
|
}
|
|
763
765
|
}
|
|
@@ -788,7 +790,7 @@ Index* read_index(IOReader* f, int io_flags) {
|
|
|
788
790
|
idx = idxl;
|
|
789
791
|
} else if (h == fourcc("IvSQ")) { // legacy
|
|
790
792
|
IndexIVFScalarQuantizer* ivsc = new IndexIVFScalarQuantizer();
|
|
791
|
-
std::vector<std::vector<
|
|
793
|
+
std::vector<std::vector<idx_t>> ids;
|
|
792
794
|
read_ivf_header(ivsc, f, &ids);
|
|
793
795
|
read_ScalarQuantizer(&ivsc->sq, f);
|
|
794
796
|
READ1(ivsc->code_size);
|
|
@@ -1002,6 +1004,7 @@ Index* read_index(IOReader* f, int io_flags) {
|
|
|
1002
1004
|
ivpq->nbits = pq.nbits;
|
|
1003
1005
|
ivpq->ksub = (1 << pq.nbits);
|
|
1004
1006
|
ivpq->code_size = pq.code_size;
|
|
1007
|
+
ivpq->init_code_packer();
|
|
1005
1008
|
|
|
1006
1009
|
idx = ivpq;
|
|
1007
1010
|
} else if (h == fourcc("IRMf")) {
|
|
@@ -1072,7 +1075,7 @@ static void read_index_binary_header(IndexBinary* idx, IOReader* f) {
|
|
|
1072
1075
|
static void read_binary_ivf_header(
|
|
1073
1076
|
IndexBinaryIVF* ivf,
|
|
1074
1077
|
IOReader* f,
|
|
1075
|
-
std::vector<std::vector<
|
|
1078
|
+
std::vector<std::vector<idx_t>>* ids = nullptr) {
|
|
1076
1079
|
read_index_binary_header(ivf, f);
|
|
1077
1080
|
READ1(ivf->nlist);
|
|
1078
1081
|
READ1(ivf->nprobe);
|
|
@@ -84,7 +84,7 @@ namespace faiss {
|
|
|
84
84
|
static void write_index_header(const Index* idx, IOWriter* f) {
|
|
85
85
|
WRITE1(idx->d);
|
|
86
86
|
WRITE1(idx->ntotal);
|
|
87
|
-
|
|
87
|
+
idx_t dummy = 1 << 20;
|
|
88
88
|
WRITE1(dummy);
|
|
89
89
|
WRITE1(dummy);
|
|
90
90
|
WRITE1(idx->is_trained);
|
|
@@ -373,7 +373,6 @@ static void write_direct_map(const DirectMap* dm, IOWriter* f) {
|
|
|
373
373
|
WRITE1(maintain_direct_map);
|
|
374
374
|
WRITEVECTOR(dm->array);
|
|
375
375
|
if (dm->type == DirectMap::Hashtable) {
|
|
376
|
-
using idx_t = Index::idx_t;
|
|
377
376
|
std::vector<std::pair<idx_t, idx_t>> v;
|
|
378
377
|
const std::unordered_map<idx_t, idx_t>& map = dm->hashtable;
|
|
379
378
|
v.resize(map.size());
|
|
@@ -615,7 +614,7 @@ void write_index(const Index* idx, IOWriter* f) {
|
|
|
615
614
|
WRITE1(h);
|
|
616
615
|
write_ivf_header(ivfl, f);
|
|
617
616
|
{
|
|
618
|
-
std::vector<
|
|
617
|
+
std::vector<idx_t> tab(2 * ivfl->instances.size());
|
|
619
618
|
long i = 0;
|
|
620
619
|
for (auto it = ivfl->instances.begin(); it != ivfl->instances.end();
|
|
621
620
|
++it) {
|
|
@@ -900,7 +899,7 @@ static void write_binary_multi_hash_map(
|
|
|
900
899
|
size_t ntotal,
|
|
901
900
|
IOWriter* f) {
|
|
902
901
|
int id_bits = 0;
|
|
903
|
-
while ((ntotal > ((
|
|
902
|
+
while ((ntotal > ((idx_t)1 << id_bits))) {
|
|
904
903
|
id_bits++;
|
|
905
904
|
}
|
|
906
905
|
WRITE1(id_bits);
|
|
@@ -22,10 +22,10 @@ namespace faiss {
|
|
|
22
22
|
* @param argmins argmin of each row
|
|
23
23
|
*/
|
|
24
24
|
void smawk(
|
|
25
|
-
const
|
|
26
|
-
const
|
|
25
|
+
const idx_t nrows,
|
|
26
|
+
const idx_t ncols,
|
|
27
27
|
const float* x,
|
|
28
|
-
|
|
28
|
+
idx_t* argmins);
|
|
29
29
|
|
|
30
30
|
/** Exact 1D K-Means by dynamic programming
|
|
31
31
|
*
|
|
@@ -636,7 +636,7 @@ void ZnSphereCodecRec::decode(uint64_t code, float* c) const {
|
|
|
636
636
|
}
|
|
637
637
|
}
|
|
638
638
|
|
|
639
|
-
// if not use_rec,
|
|
639
|
+
// if not use_rec, instantiate an arbitrary harmless znc_rec
|
|
640
640
|
ZnSphereCodecAlt::ZnSphereCodecAlt(int dim, int r2)
|
|
641
641
|
: ZnSphereCodec(dim, r2),
|
|
642
642
|
use_rec((dim & (dim - 1)) == 0),
|
|
@@ -7,6 +7,10 @@
|
|
|
7
7
|
|
|
8
8
|
#pragma once
|
|
9
9
|
|
|
10
|
+
// basic int types and size_t
|
|
11
|
+
#include <cstdint>
|
|
12
|
+
#include <cstdio>
|
|
13
|
+
|
|
10
14
|
#ifdef _MSC_VER
|
|
11
15
|
|
|
12
16
|
/*******************************************************
|
|
@@ -19,6 +23,10 @@
|
|
|
19
23
|
#define FAISS_API __declspec(dllimport)
|
|
20
24
|
#endif // FAISS_MAIN_LIB
|
|
21
25
|
|
|
26
|
+
#ifdef _MSC_VER
|
|
27
|
+
#define strtok_r strtok_s
|
|
28
|
+
#endif // _MSC_VER
|
|
29
|
+
|
|
22
30
|
#define __PRETTY_FUNCTION__ __FUNCSIG__
|
|
23
31
|
|
|
24
32
|
#define posix_memalign(p, a, s) \
|
|
@@ -87,3 +95,56 @@ inline int __builtin_clzll(uint64_t x) {
|
|
|
87
95
|
#define ALIGNED(x) __attribute__((aligned(x)))
|
|
88
96
|
|
|
89
97
|
#endif // _MSC_VER
|
|
98
|
+
|
|
99
|
+
#if defined(__GNUC__) || defined(__clang__)
|
|
100
|
+
#define FAISS_DEPRECATED(msg) __attribute__((deprecated(msg)))
|
|
101
|
+
#else
|
|
102
|
+
#define FAISS_DEPRECATED(msg)
|
|
103
|
+
#endif // GCC or Clang
|
|
104
|
+
|
|
105
|
+
// Localized enablement of imprecise floating point operations
|
|
106
|
+
// You need to use all 3 macros to cover all compilers.
|
|
107
|
+
#if defined(_MSC_VER)
|
|
108
|
+
#define FAISS_PRAGMA_IMPRECISE_LOOP
|
|
109
|
+
#define FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN \
|
|
110
|
+
__pragma(float_control(precise, off, push))
|
|
111
|
+
#define FAISS_PRAGMA_IMPRECISE_FUNCTION_END __pragma(float_control(pop))
|
|
112
|
+
#elif defined(__clang__)
|
|
113
|
+
#define FAISS_PRAGMA_IMPRECISE_LOOP \
|
|
114
|
+
_Pragma("clang loop vectorize(enable) interleave(enable)")
|
|
115
|
+
|
|
116
|
+
// clang-format off
|
|
117
|
+
|
|
118
|
+
// the following ifdef is needed, because old versions of clang (prior to 14)
|
|
119
|
+
// do not generate FMAs on x86 unless this pragma is used. On the other hand,
|
|
120
|
+
// ARM does not support the following pragma flag.
|
|
121
|
+
// TODO: find out how to enable FMAs on clang 10 and earlier.
|
|
122
|
+
#if defined(__x86_64__) && (defined(__clang_major__) && (__clang_major__ > 10))
|
|
123
|
+
#define FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN \
|
|
124
|
+
_Pragma("float_control(precise, off, push)")
|
|
125
|
+
#define FAISS_PRAGMA_IMPRECISE_FUNCTION_END _Pragma("float_control(pop)")
|
|
126
|
+
#else
|
|
127
|
+
#define FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
|
|
128
|
+
#define FAISS_PRAGMA_IMPRECISE_FUNCTION_END
|
|
129
|
+
#endif
|
|
130
|
+
#elif defined(__GNUC__)
|
|
131
|
+
// Unfortunately, GCC does not provide a pragma for detecting it.
|
|
132
|
+
// So, we have to stick to GNUC, which is defined by MANY compilers.
|
|
133
|
+
// This is why clang/icc needs to be checked first.
|
|
134
|
+
|
|
135
|
+
// todo: add __INTEL_COMPILER check for the classic ICC
|
|
136
|
+
// todo: add __INTEL_LLVM_COMPILER for ICX
|
|
137
|
+
|
|
138
|
+
#define FAISS_PRAGMA_IMPRECISE_LOOP
|
|
139
|
+
#define FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN \
|
|
140
|
+
_Pragma("GCC push_options") \
|
|
141
|
+
_Pragma("GCC optimize (\"unroll-loops,associative-math,no-signed-zeros\")")
|
|
142
|
+
#define FAISS_PRAGMA_IMPRECISE_FUNCTION_END \
|
|
143
|
+
_Pragma("GCC pop_options")
|
|
144
|
+
#else
|
|
145
|
+
#define FAISS_PRAGMA_IMPRECISE_LOOP
|
|
146
|
+
#define FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
|
|
147
|
+
#define FAISS_PRAGMA_IMPRECISE_FUNCTION_END
|
|
148
|
+
#endif
|
|
149
|
+
|
|
150
|
+
// clang-format on
|
|
@@ -88,7 +88,7 @@ void pq4_pack_codes_range(
|
|
|
88
88
|
size_t i0,
|
|
89
89
|
size_t i1,
|
|
90
90
|
size_t bbs,
|
|
91
|
-
size_t
|
|
91
|
+
size_t nsq,
|
|
92
92
|
uint8_t* blocks) {
|
|
93
93
|
const uint8_t perm0[16] = {
|
|
94
94
|
0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15};
|
|
@@ -98,9 +98,9 @@ void pq4_pack_codes_range(
|
|
|
98
98
|
size_t block1 = ((i1 - 1) / bbs) + 1;
|
|
99
99
|
|
|
100
100
|
for (size_t b = block0; b < block1; b++) {
|
|
101
|
-
uint8_t* codes2 = blocks + b * bbs *
|
|
101
|
+
uint8_t* codes2 = blocks + b * bbs * nsq / 2;
|
|
102
102
|
int64_t i_base = b * bbs - i0;
|
|
103
|
-
for (int sq = 0; sq <
|
|
103
|
+
for (int sq = 0; sq < nsq; sq += 2) {
|
|
104
104
|
for (size_t i = 0; i < bbs; i += 32) {
|
|
105
105
|
std::array<uint8_t, 32> c, c0, c1;
|
|
106
106
|
get_matrix_column(
|
|
@@ -127,7 +127,7 @@ namespace {
|
|
|
127
127
|
// get the specific address of the vector inside a block
|
|
128
128
|
// shift is used for determine the if the saved in bits 0..3 (false) or
|
|
129
129
|
// bits 4..7 (true)
|
|
130
|
-
|
|
130
|
+
size_t get_vector_specific_address(
|
|
131
131
|
size_t bbs,
|
|
132
132
|
size_t vector_id,
|
|
133
133
|
size_t sq,
|
|
@@ -189,6 +189,50 @@ void pq4_set_packed_element(
|
|
|
189
189
|
}
|
|
190
190
|
}
|
|
191
191
|
|
|
192
|
+
/***************************************************************
|
|
193
|
+
* CodePackerPQ4 implementation
|
|
194
|
+
***************************************************************/
|
|
195
|
+
|
|
196
|
+
CodePackerPQ4::CodePackerPQ4(size_t nsq, size_t bbs) {
|
|
197
|
+
this->nsq = nsq;
|
|
198
|
+
nvec = bbs;
|
|
199
|
+
code_size = (nsq * 4 + 7) / 8;
|
|
200
|
+
block_size = ((nsq + 1) / 2) * bbs;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
void CodePackerPQ4::pack_1(
|
|
204
|
+
const uint8_t* flat_code,
|
|
205
|
+
size_t offset,
|
|
206
|
+
uint8_t* block) const {
|
|
207
|
+
size_t bbs = nvec;
|
|
208
|
+
if (offset >= nvec) {
|
|
209
|
+
block += (offset / nvec) * block_size;
|
|
210
|
+
offset = offset % nvec;
|
|
211
|
+
}
|
|
212
|
+
for (size_t i = 0; i < code_size; i++) {
|
|
213
|
+
uint8_t code = flat_code[i];
|
|
214
|
+
pq4_set_packed_element(block, code & 15, bbs, nsq, offset, 2 * i);
|
|
215
|
+
pq4_set_packed_element(block, code >> 4, bbs, nsq, offset, 2 * i + 1);
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
void CodePackerPQ4::unpack_1(
|
|
220
|
+
const uint8_t* block,
|
|
221
|
+
size_t offset,
|
|
222
|
+
uint8_t* flat_code) const {
|
|
223
|
+
size_t bbs = nvec;
|
|
224
|
+
if (offset >= nvec) {
|
|
225
|
+
block += (offset / nvec) * block_size;
|
|
226
|
+
offset = offset % nvec;
|
|
227
|
+
}
|
|
228
|
+
for (size_t i = 0; i < code_size; i++) {
|
|
229
|
+
uint8_t code0, code1;
|
|
230
|
+
code0 = pq4_get_packed_element(block, bbs, nsq, offset, 2 * i);
|
|
231
|
+
code1 = pq4_get_packed_element(block, bbs, nsq, offset, 2 * i + 1);
|
|
232
|
+
flat_code[i] = code0 | (code1 << 4);
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
|
|
192
236
|
/***************************************************************
|
|
193
237
|
* Packing functions for Look-Up Tables (LUT)
|
|
194
238
|
***************************************************************/
|
|
@@ -10,6 +10,8 @@
|
|
|
10
10
|
#include <cstdint>
|
|
11
11
|
#include <cstdlib>
|
|
12
12
|
|
|
13
|
+
#include <faiss/impl/CodePacker.h>
|
|
14
|
+
|
|
13
15
|
/** PQ4 SIMD packing and accumulation functions
|
|
14
16
|
*
|
|
15
17
|
* The basic kernel accumulates nq query vectors with bbs = nb * 2 * 16 vectors
|
|
@@ -17,7 +19,7 @@
|
|
|
17
19
|
* otherwise register spilling becomes too large.
|
|
18
20
|
*
|
|
19
21
|
* The implementation of these functions is spread over 3 cpp files to reduce
|
|
20
|
-
* parallel compile times. Templates are
|
|
22
|
+
* parallel compile times. Templates are instantiated explicitly.
|
|
21
23
|
*/
|
|
22
24
|
|
|
23
25
|
namespace faiss {
|
|
@@ -29,7 +31,7 @@ namespace faiss {
|
|
|
29
31
|
* @param ntotal number of input codes
|
|
30
32
|
* @param nb output number of codes (ntotal rounded up to a multiple of
|
|
31
33
|
* bbs)
|
|
32
|
-
* @param
|
|
34
|
+
* @param nsq number of sub-quantizers (=M rounded up to a muliple of 2)
|
|
33
35
|
* @param bbs size of database blocks (multiple of 32)
|
|
34
36
|
* @param blocks output array, size nb * nsq / 2.
|
|
35
37
|
*/
|
|
@@ -39,7 +41,7 @@ void pq4_pack_codes(
|
|
|
39
41
|
size_t M,
|
|
40
42
|
size_t nb,
|
|
41
43
|
size_t bbs,
|
|
42
|
-
size_t
|
|
44
|
+
size_t nsq,
|
|
43
45
|
uint8_t* blocks);
|
|
44
46
|
|
|
45
47
|
/** Same as pack_codes but write in a given range of the output,
|
|
@@ -56,7 +58,7 @@ void pq4_pack_codes_range(
|
|
|
56
58
|
size_t i0,
|
|
57
59
|
size_t i1,
|
|
58
60
|
size_t bbs,
|
|
59
|
-
size_t
|
|
61
|
+
size_t nsq,
|
|
60
62
|
uint8_t* blocks);
|
|
61
63
|
|
|
62
64
|
/** get a single element from a packed codes table
|
|
@@ -84,6 +86,18 @@ void pq4_set_packed_element(
|
|
|
84
86
|
size_t vector_id,
|
|
85
87
|
size_t sq);
|
|
86
88
|
|
|
89
|
+
/** CodePacker API for the PQ4 fast-scan */
|
|
90
|
+
struct CodePackerPQ4 : CodePacker {
|
|
91
|
+
size_t nsq;
|
|
92
|
+
|
|
93
|
+
CodePackerPQ4(size_t nsq, size_t bbs);
|
|
94
|
+
|
|
95
|
+
void pack_1(const uint8_t* flat_code, size_t offset, uint8_t* block)
|
|
96
|
+
const final;
|
|
97
|
+
void unpack_1(const uint8_t* block, size_t offset, uint8_t* flat_code)
|
|
98
|
+
const final;
|
|
99
|
+
};
|
|
100
|
+
|
|
87
101
|
/** Pack Look-up table for consumption by the kernel.
|
|
88
102
|
*
|
|
89
103
|
* @param nq number of queries
|
|
@@ -189,7 +189,7 @@ void accumulate(
|
|
|
189
189
|
DISPATCH(3);
|
|
190
190
|
DISPATCH(4);
|
|
191
191
|
}
|
|
192
|
-
FAISS_THROW_FMT("accumulate nq=%d not
|
|
192
|
+
FAISS_THROW_FMT("accumulate nq=%d not instantiated", nq);
|
|
193
193
|
|
|
194
194
|
#undef DISPATCH
|
|
195
195
|
}
|
|
@@ -263,7 +263,7 @@ void pq4_accumulate_loop_qbs(
|
|
|
263
263
|
DISPATCH(4);
|
|
264
264
|
#undef DISPATCH
|
|
265
265
|
default:
|
|
266
|
-
FAISS_THROW_FMT("accumulate nq=%d not
|
|
266
|
+
FAISS_THROW_FMT("accumulate nq=%d not instantiated", nq);
|
|
267
267
|
}
|
|
268
268
|
i0 += nq;
|
|
269
269
|
LUT += nq * nsq * 16;
|
|
@@ -10,8 +10,6 @@
|
|
|
10
10
|
*/
|
|
11
11
|
|
|
12
12
|
#include <faiss/index_factory.h>
|
|
13
|
-
#include "faiss/MetricType.h"
|
|
14
|
-
#include "faiss/impl/FaissAssert.h"
|
|
15
13
|
|
|
16
14
|
#include <cinttypes>
|
|
17
15
|
#include <cmath>
|
|
@@ -665,19 +663,19 @@ std::unique_ptr<Index> index_factory_sub(
|
|
|
665
663
|
re_match(description, "(.+),Refine\\((.+)\\)", sm)) {
|
|
666
664
|
std::unique_ptr<Index> filter_index =
|
|
667
665
|
index_factory_sub(d, sm[1].str(), metric);
|
|
668
|
-
std::unique_ptr<Index> refine_index;
|
|
669
666
|
|
|
667
|
+
IndexRefine* index_rf = nullptr;
|
|
670
668
|
if (sm.size() == 3) { // Refine
|
|
671
|
-
refine_index =
|
|
669
|
+
std::unique_ptr<Index> refine_index =
|
|
670
|
+
index_factory_sub(d, sm[2].str(), metric);
|
|
671
|
+
index_rf = new IndexRefine(
|
|
672
|
+
filter_index.release(), refine_index.release());
|
|
673
|
+
index_rf->own_refine_index = true;
|
|
672
674
|
} else { // RFlat
|
|
673
|
-
|
|
675
|
+
index_rf = new IndexRefineFlat(filter_index.release(), nullptr);
|
|
674
676
|
}
|
|
675
|
-
|
|
676
|
-
new IndexRefine(filter_index.get(), refine_index.get());
|
|
677
|
+
FAISS_ASSERT(index_rf != nullptr);
|
|
677
678
|
index_rf->own_fields = true;
|
|
678
|
-
filter_index.release();
|
|
679
|
-
refine_index.release();
|
|
680
|
-
index_rf->own_refine_index = true;
|
|
681
679
|
return std::unique_ptr<Index>(index_rf);
|
|
682
680
|
}
|
|
683
681
|
|
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
|
|
8
8
|
#include <faiss/invlists/BlockInvertedLists.h>
|
|
9
9
|
|
|
10
|
+
#include <faiss/impl/CodePacker.h>
|
|
10
11
|
#include <faiss/impl/FaissAssert.h>
|
|
11
12
|
|
|
12
13
|
#include <faiss/impl/io.h>
|
|
@@ -25,29 +26,43 @@ BlockInvertedLists::BlockInvertedLists(
|
|
|
25
26
|
codes.resize(nlist);
|
|
26
27
|
}
|
|
27
28
|
|
|
29
|
+
BlockInvertedLists::BlockInvertedLists(size_t nlist, const CodePacker* packer)
|
|
30
|
+
: InvertedLists(nlist, InvertedLists::INVALID_CODE_SIZE),
|
|
31
|
+
n_per_block(packer->nvec),
|
|
32
|
+
block_size(packer->block_size),
|
|
33
|
+
packer(packer) {
|
|
34
|
+
ids.resize(nlist);
|
|
35
|
+
codes.resize(nlist);
|
|
36
|
+
}
|
|
37
|
+
|
|
28
38
|
BlockInvertedLists::BlockInvertedLists()
|
|
29
|
-
: InvertedLists(0, InvertedLists::INVALID_CODE_SIZE)
|
|
30
|
-
n_per_block(0),
|
|
31
|
-
block_size(0) {}
|
|
39
|
+
: InvertedLists(0, InvertedLists::INVALID_CODE_SIZE) {}
|
|
32
40
|
|
|
33
41
|
size_t BlockInvertedLists::add_entries(
|
|
34
42
|
size_t list_no,
|
|
35
43
|
size_t n_entry,
|
|
36
44
|
const idx_t* ids_in,
|
|
37
45
|
const uint8_t* code) {
|
|
38
|
-
if (n_entry == 0)
|
|
46
|
+
if (n_entry == 0) {
|
|
39
47
|
return 0;
|
|
48
|
+
}
|
|
40
49
|
FAISS_THROW_IF_NOT(list_no < nlist);
|
|
41
50
|
size_t o = ids[list_no].size();
|
|
42
|
-
FAISS_THROW_IF_NOT(
|
|
43
|
-
o == 0); // not clear how we should handle subsequent adds
|
|
44
51
|
ids[list_no].resize(o + n_entry);
|
|
45
52
|
memcpy(&ids[list_no][o], ids_in, sizeof(ids_in[0]) * n_entry);
|
|
46
|
-
|
|
47
|
-
// copy whole blocks
|
|
48
|
-
size_t n_block = (n_entry + n_per_block - 1) / n_per_block;
|
|
53
|
+
size_t n_block = (o + n_entry + n_per_block - 1) / n_per_block;
|
|
49
54
|
codes[list_no].resize(n_block * block_size);
|
|
50
|
-
|
|
55
|
+
if (o % block_size == 0) {
|
|
56
|
+
// copy whole blocks
|
|
57
|
+
memcpy(&codes[list_no][o * code_size], code, n_block * block_size);
|
|
58
|
+
} else {
|
|
59
|
+
FAISS_THROW_IF_NOT_MSG(packer, "missing code packer");
|
|
60
|
+
std::vector<uint8_t> buffer(packer->code_size);
|
|
61
|
+
for (size_t i = 0; i < n_entry; i++) {
|
|
62
|
+
packer->unpack_1(code, i, buffer.data());
|
|
63
|
+
packer->pack_1(buffer.data(), i + o, codes[list_no].data());
|
|
64
|
+
}
|
|
65
|
+
}
|
|
51
66
|
return o;
|
|
52
67
|
}
|
|
53
68
|
|
|
@@ -61,7 +76,7 @@ const uint8_t* BlockInvertedLists::get_codes(size_t list_no) const {
|
|
|
61
76
|
return codes[list_no].get();
|
|
62
77
|
}
|
|
63
78
|
|
|
64
|
-
const
|
|
79
|
+
const idx_t* BlockInvertedLists::get_ids(size_t list_no) const {
|
|
65
80
|
assert(list_no < nlist);
|
|
66
81
|
return ids[list_no].data();
|
|
67
82
|
}
|
|
@@ -95,7 +110,9 @@ void BlockInvertedLists::update_entries(
|
|
|
95
110
|
*/
|
|
96
111
|
}
|
|
97
112
|
|
|
98
|
-
BlockInvertedLists::~BlockInvertedLists() {
|
|
113
|
+
BlockInvertedLists::~BlockInvertedLists() {
|
|
114
|
+
delete packer;
|
|
115
|
+
}
|
|
99
116
|
|
|
100
117
|
/**************************************************
|
|
101
118
|
* IO hook implementation
|
|
@@ -14,6 +14,8 @@
|
|
|
14
14
|
|
|
15
15
|
namespace faiss {
|
|
16
16
|
|
|
17
|
+
struct CodePacker;
|
|
18
|
+
|
|
17
19
|
/** Inverted Lists that are organized by blocks.
|
|
18
20
|
*
|
|
19
21
|
* Different from the regular inverted lists, the codes are organized by blocks
|
|
@@ -28,13 +30,17 @@ namespace faiss {
|
|
|
28
30
|
* data.
|
|
29
31
|
*/
|
|
30
32
|
struct BlockInvertedLists : InvertedLists {
|
|
31
|
-
size_t n_per_block; // nb of vectors stored per block
|
|
32
|
-
size_t block_size; // nb bytes per block
|
|
33
|
+
size_t n_per_block = 0; // nb of vectors stored per block
|
|
34
|
+
size_t block_size = 0; // nb bytes per block
|
|
35
|
+
|
|
36
|
+
// required to interpret the content of the blocks (owned by this)
|
|
37
|
+
const CodePacker* packer = nullptr;
|
|
33
38
|
|
|
34
39
|
std::vector<AlignedTable<uint8_t>> codes;
|
|
35
40
|
std::vector<std::vector<idx_t>> ids;
|
|
36
41
|
|
|
37
42
|
BlockInvertedLists(size_t nlist, size_t vec_per_block, size_t block_size);
|
|
43
|
+
BlockInvertedLists(size_t nlist, const CodePacker* packer);
|
|
38
44
|
|
|
39
45
|
BlockInvertedLists();
|
|
40
46
|
|
|
@@ -68,7 +68,7 @@ void DirectMap::clear() {
|
|
|
68
68
|
hashtable.clear();
|
|
69
69
|
}
|
|
70
70
|
|
|
71
|
-
|
|
71
|
+
idx_t DirectMap::get(idx_t key) const {
|
|
72
72
|
if (type == Array) {
|
|
73
73
|
FAISS_THROW_IF_NOT_MSG(key >= 0 && key < array.size(), "invalid key");
|
|
74
74
|
idx_t lo = array[key];
|