faiss 0.1.5 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -0
- data/README.md +12 -0
- data/ext/faiss/ext.cpp +1 -1
- data/ext/faiss/extconf.rb +6 -2
- data/ext/faiss/index.cpp +114 -43
- data/ext/faiss/index_binary.cpp +24 -30
- data/ext/faiss/kmeans.cpp +20 -16
- data/ext/faiss/numo.hpp +867 -0
- data/ext/faiss/pca_matrix.cpp +13 -14
- data/ext/faiss/product_quantizer.cpp +23 -24
- data/ext/faiss/utils.cpp +10 -37
- data/ext/faiss/utils.h +2 -13
- data/lib/faiss.rb +0 -5
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +292 -291
- data/vendor/faiss/faiss/AutoTune.h +55 -56
- data/vendor/faiss/faiss/Clustering.cpp +334 -195
- data/vendor/faiss/faiss/Clustering.h +88 -35
- data/vendor/faiss/faiss/IVFlib.cpp +171 -195
- data/vendor/faiss/faiss/IVFlib.h +48 -51
- data/vendor/faiss/faiss/Index.cpp +85 -103
- data/vendor/faiss/faiss/Index.h +54 -48
- data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
- data/vendor/faiss/faiss/Index2Layer.h +22 -22
- data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
- data/vendor/faiss/faiss/IndexBinary.h +140 -132
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
- data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
- data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
- data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
- data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
- data/vendor/faiss/faiss/IndexFlat.h +35 -46
- data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
- data/vendor/faiss/faiss/IndexHNSW.h +57 -41
- data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
- data/vendor/faiss/faiss/IndexIVF.h +146 -113
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
- data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
- data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
- data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
- data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
- data/vendor/faiss/faiss/IndexLSH.h +21 -26
- data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
- data/vendor/faiss/faiss/IndexLattice.h +11 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
- data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
- data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
- data/vendor/faiss/faiss/IndexNSG.h +85 -0
- data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
- data/vendor/faiss/faiss/IndexPQ.h +64 -67
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
- data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
- data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
- data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
- data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
- data/vendor/faiss/faiss/IndexRefine.h +22 -23
- data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
- data/vendor/faiss/faiss/IndexReplicas.h +62 -56
- data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
- data/vendor/faiss/faiss/IndexResidual.h +152 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
- data/vendor/faiss/faiss/IndexShards.cpp +256 -240
- data/vendor/faiss/faiss/IndexShards.h +85 -73
- data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
- data/vendor/faiss/faiss/MatrixStats.h +7 -10
- data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
- data/vendor/faiss/faiss/MetaIndexes.h +40 -34
- data/vendor/faiss/faiss/MetricType.h +7 -7
- data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
- data/vendor/faiss/faiss/VectorTransform.h +61 -89
- data/vendor/faiss/faiss/clone_index.cpp +77 -73
- data/vendor/faiss/faiss/clone_index.h +4 -9
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
- data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
- data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
- data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
- data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
- data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
- data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
- data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
- data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
- data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
- data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
- data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
- data/vendor/faiss/faiss/impl/FaissException.h +41 -29
- data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
- data/vendor/faiss/faiss/impl/HNSW.h +179 -200
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
- data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
- data/vendor/faiss/faiss/impl/NSG.h +199 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
- data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
- data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
- data/vendor/faiss/faiss/impl/io.cpp +75 -94
- data/vendor/faiss/faiss/impl/io.h +31 -41
- data/vendor/faiss/faiss/impl/io_macros.h +40 -29
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
- data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
- data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
- data/vendor/faiss/faiss/index_factory.cpp +269 -218
- data/vendor/faiss/faiss/index_factory.h +6 -7
- data/vendor/faiss/faiss/index_io.h +23 -26
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
- data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
- data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
- data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
- data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
- data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
- data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
- data/vendor/faiss/faiss/utils/Heap.h +186 -209
- data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
- data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
- data/vendor/faiss/faiss/utils/distances.cpp +301 -310
- data/vendor/faiss/faiss/utils/distances.h +133 -118
- data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
- data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
- data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
- data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
- data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
- data/vendor/faiss/faiss/utils/hamming.h +62 -85
- data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
- data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
- data/vendor/faiss/faiss/utils/partitioning.h +26 -21
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
- data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
- data/vendor/faiss/faiss/utils/random.cpp +39 -63
- data/vendor/faiss/faiss/utils/random.h +13 -16
- data/vendor/faiss/faiss/utils/simdlib.h +4 -2
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
- data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
- data/vendor/faiss/faiss/utils/utils.cpp +304 -287
- data/vendor/faiss/faiss/utils/utils.h +53 -48
- metadata +24 -10
- data/lib/faiss/index.rb +0 -20
- data/lib/faiss/index_binary.rb +0 -20
- data/lib/faiss/kmeans.rb +0 -15
- data/lib/faiss/pca_matrix.rb +0 -15
- data/lib/faiss/product_quantizer.rb +0 -22
|
@@ -5,8 +5,6 @@
|
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
// -*- c++ -*-
|
|
9
|
-
|
|
10
8
|
#pragma once
|
|
11
9
|
|
|
12
10
|
#include <faiss/Index.h>
|
|
@@ -16,10 +14,11 @@ namespace faiss {
|
|
|
16
14
|
|
|
17
15
|
/** Build and index with the sequence of processing steps described in
|
|
18
16
|
* the string. */
|
|
19
|
-
Index
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
17
|
+
Index* index_factory(
|
|
18
|
+
int d,
|
|
19
|
+
const char* description,
|
|
20
|
+
MetricType metric = METRIC_L2);
|
|
23
21
|
|
|
22
|
+
IndexBinary* index_binary_factory(int d, const char* description);
|
|
24
23
|
|
|
25
|
-
}
|
|
24
|
+
} // namespace faiss
|
|
@@ -12,10 +12,9 @@
|
|
|
12
12
|
#ifndef FAISS_INDEX_IO_H
|
|
13
13
|
#define FAISS_INDEX_IO_H
|
|
14
14
|
|
|
15
|
-
|
|
16
15
|
#include <cstdio>
|
|
17
|
-
#include <typeinfo>
|
|
18
16
|
#include <string>
|
|
17
|
+
#include <typeinfo>
|
|
19
18
|
#include <vector>
|
|
20
19
|
|
|
21
20
|
/** I/O functions can read/write to a filename, a file handle or to an
|
|
@@ -36,13 +35,13 @@ struct IOReader;
|
|
|
36
35
|
struct IOWriter;
|
|
37
36
|
struct InvertedLists;
|
|
38
37
|
|
|
39
|
-
void write_index
|
|
40
|
-
void write_index
|
|
41
|
-
void write_index
|
|
38
|
+
void write_index(const Index* idx, const char* fname);
|
|
39
|
+
void write_index(const Index* idx, FILE* f);
|
|
40
|
+
void write_index(const Index* idx, IOWriter* writer);
|
|
42
41
|
|
|
43
|
-
void write_index_binary
|
|
44
|
-
void write_index_binary
|
|
45
|
-
void write_index_binary
|
|
42
|
+
void write_index_binary(const IndexBinary* idx, const char* fname);
|
|
43
|
+
void write_index_binary(const IndexBinary* idx, FILE* f);
|
|
44
|
+
void write_index_binary(const IndexBinary* idx, IOWriter* writer);
|
|
46
45
|
|
|
47
46
|
// The read_index flags are implemented only for a subset of index types.
|
|
48
47
|
const int IO_FLAG_READ_ONLY = 2;
|
|
@@ -51,32 +50,30 @@ const int IO_FLAG_READ_ONLY = 2;
|
|
|
51
50
|
const int IO_FLAG_ONDISK_SAME_DIR = 4;
|
|
52
51
|
// don't load IVF data to RAM, only list sizes
|
|
53
52
|
const int IO_FLAG_SKIP_IVF_DATA = 8;
|
|
54
|
-
// try to memmap data (useful to load an ArrayInvertedLists as an
|
|
53
|
+
// try to memmap data (useful to load an ArrayInvertedLists as an
|
|
54
|
+
// OnDiskInvertedLists)
|
|
55
55
|
const int IO_FLAG_MMAP = IO_FLAG_SKIP_IVF_DATA | 0x646f0000;
|
|
56
56
|
|
|
57
|
+
Index* read_index(const char* fname, int io_flags = 0);
|
|
58
|
+
Index* read_index(FILE* f, int io_flags = 0);
|
|
59
|
+
Index* read_index(IOReader* reader, int io_flags = 0);
|
|
57
60
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
+
IndexBinary* read_index_binary(const char* fname, int io_flags = 0);
|
|
62
|
+
IndexBinary* read_index_binary(FILE* f, int io_flags = 0);
|
|
63
|
+
IndexBinary* read_index_binary(IOReader* reader, int io_flags = 0);
|
|
61
64
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
IndexBinary *read_index_binary (IOReader *reader, int io_flags = 0);
|
|
65
|
+
void write_VectorTransform(const VectorTransform* vt, const char* fname);
|
|
66
|
+
VectorTransform* read_VectorTransform(const char* fname);
|
|
65
67
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
+
ProductQuantizer* read_ProductQuantizer(const char* fname);
|
|
69
|
+
ProductQuantizer* read_ProductQuantizer(IOReader* reader);
|
|
68
70
|
|
|
69
|
-
ProductQuantizer
|
|
70
|
-
ProductQuantizer
|
|
71
|
-
|
|
72
|
-
void write_ProductQuantizer (const ProductQuantizer*pq, const char *fname);
|
|
73
|
-
void write_ProductQuantizer (const ProductQuantizer*pq, IOWriter *f);
|
|
74
|
-
|
|
75
|
-
void write_InvertedLists (const InvertedLists *ils, IOWriter *f);
|
|
76
|
-
InvertedLists *read_InvertedLists (IOReader *reader, int io_flags = 0);
|
|
71
|
+
void write_ProductQuantizer(const ProductQuantizer* pq, const char* fname);
|
|
72
|
+
void write_ProductQuantizer(const ProductQuantizer* pq, IOWriter* f);
|
|
77
73
|
|
|
74
|
+
void write_InvertedLists(const InvertedLists* ils, IOWriter* f);
|
|
75
|
+
InvertedLists* read_InvertedLists(IOReader* reader, int io_flags = 0);
|
|
78
76
|
|
|
79
77
|
} // namespace faiss
|
|
80
78
|
|
|
81
|
-
|
|
82
79
|
#endif
|
|
@@ -12,81 +12,80 @@
|
|
|
12
12
|
#include <faiss/impl/io.h>
|
|
13
13
|
#include <faiss/impl/io_macros.h>
|
|
14
14
|
|
|
15
|
-
|
|
16
15
|
namespace faiss {
|
|
17
16
|
|
|
18
|
-
BlockInvertedLists::BlockInvertedLists
|
|
19
|
-
size_t nlist,
|
|
20
|
-
size_t
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
17
|
+
BlockInvertedLists::BlockInvertedLists(
|
|
18
|
+
size_t nlist,
|
|
19
|
+
size_t n_per_block,
|
|
20
|
+
size_t block_size)
|
|
21
|
+
: InvertedLists(nlist, InvertedLists::INVALID_CODE_SIZE),
|
|
22
|
+
n_per_block(n_per_block),
|
|
23
|
+
block_size(block_size) {
|
|
24
|
+
ids.resize(nlist);
|
|
25
|
+
codes.resize(nlist);
|
|
26
26
|
}
|
|
27
27
|
|
|
28
|
-
BlockInvertedLists::BlockInvertedLists
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
{}
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
size_t
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
{
|
|
38
|
-
if (n_entry == 0)
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
28
|
+
BlockInvertedLists::BlockInvertedLists()
|
|
29
|
+
: InvertedLists(0, InvertedLists::INVALID_CODE_SIZE),
|
|
30
|
+
n_per_block(0),
|
|
31
|
+
block_size(0) {}
|
|
32
|
+
|
|
33
|
+
size_t BlockInvertedLists::add_entries(
|
|
34
|
+
size_t list_no,
|
|
35
|
+
size_t n_entry,
|
|
36
|
+
const idx_t* ids_in,
|
|
37
|
+
const uint8_t* code) {
|
|
38
|
+
if (n_entry == 0)
|
|
39
|
+
return 0;
|
|
40
|
+
FAISS_THROW_IF_NOT(list_no < nlist);
|
|
41
|
+
size_t o = ids[list_no].size();
|
|
42
|
+
FAISS_THROW_IF_NOT(
|
|
43
|
+
o == 0); // not clear how we should handle subsequent adds
|
|
44
|
+
ids[list_no].resize(o + n_entry);
|
|
45
|
+
memcpy(&ids[list_no][o], ids_in, sizeof(ids_in[0]) * n_entry);
|
|
44
46
|
|
|
45
47
|
// copy whole blocks
|
|
46
48
|
size_t n_block = (n_entry + n_per_block - 1) / n_per_block;
|
|
47
|
-
codes
|
|
48
|
-
memcpy
|
|
49
|
+
codes[list_no].resize(n_block * block_size);
|
|
50
|
+
memcpy(&codes[list_no][o * code_size], code, n_block * block_size);
|
|
49
51
|
return o;
|
|
50
52
|
}
|
|
51
53
|
|
|
52
|
-
size_t BlockInvertedLists::list_size(size_t list_no) const
|
|
53
|
-
|
|
54
|
-
assert (list_no < nlist);
|
|
54
|
+
size_t BlockInvertedLists::list_size(size_t list_no) const {
|
|
55
|
+
assert(list_no < nlist);
|
|
55
56
|
return ids[list_no].size();
|
|
56
57
|
}
|
|
57
58
|
|
|
58
|
-
const uint8_t
|
|
59
|
-
|
|
60
|
-
assert (list_no < nlist);
|
|
59
|
+
const uint8_t* BlockInvertedLists::get_codes(size_t list_no) const {
|
|
60
|
+
assert(list_no < nlist);
|
|
61
61
|
return codes[list_no].get();
|
|
62
62
|
}
|
|
63
63
|
|
|
64
|
-
const InvertedLists::idx_t
|
|
65
|
-
|
|
66
|
-
assert (list_no < nlist);
|
|
64
|
+
const InvertedLists::idx_t* BlockInvertedLists::get_ids(size_t list_no) const {
|
|
65
|
+
assert(list_no < nlist);
|
|
67
66
|
return ids[list_no].data();
|
|
68
67
|
}
|
|
69
68
|
|
|
70
|
-
void BlockInvertedLists::resize
|
|
71
|
-
|
|
72
|
-
ids[list_no].resize (new_size);
|
|
69
|
+
void BlockInvertedLists::resize(size_t list_no, size_t new_size) {
|
|
70
|
+
ids[list_no].resize(new_size);
|
|
73
71
|
size_t prev_nbytes = codes[list_no].size();
|
|
74
72
|
size_t n_block = (new_size + n_per_block - 1) / n_per_block;
|
|
75
73
|
size_t new_nbytes = n_block * block_size;
|
|
76
|
-
codes[list_no].resize
|
|
74
|
+
codes[list_no].resize(new_nbytes);
|
|
77
75
|
if (prev_nbytes < new_nbytes) {
|
|
78
76
|
// set new elements to 0
|
|
79
|
-
memset(
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
);
|
|
77
|
+
memset(codes[list_no].data() + prev_nbytes,
|
|
78
|
+
0,
|
|
79
|
+
new_nbytes - prev_nbytes);
|
|
83
80
|
}
|
|
84
81
|
}
|
|
85
82
|
|
|
86
|
-
void BlockInvertedLists::update_entries
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
83
|
+
void BlockInvertedLists::update_entries(
|
|
84
|
+
size_t,
|
|
85
|
+
size_t,
|
|
86
|
+
size_t,
|
|
87
|
+
const idx_t*,
|
|
88
|
+
const uint8_t*) {
|
|
90
89
|
FAISS_THROW_MSG("not impemented");
|
|
91
90
|
/*
|
|
92
91
|
assert (list_no < nlist);
|
|
@@ -96,29 +95,25 @@ void BlockInvertedLists::update_entries (
|
|
|
96
95
|
*/
|
|
97
96
|
}
|
|
98
97
|
|
|
99
|
-
|
|
100
|
-
BlockInvertedLists::~BlockInvertedLists ()
|
|
101
|
-
{}
|
|
98
|
+
BlockInvertedLists::~BlockInvertedLists() {}
|
|
102
99
|
|
|
103
100
|
/**************************************************
|
|
104
101
|
* IO hook implementation
|
|
105
102
|
**************************************************/
|
|
106
103
|
|
|
107
|
-
BlockInvertedListsIOHook::BlockInvertedListsIOHook()
|
|
108
|
-
|
|
109
|
-
{}
|
|
110
|
-
|
|
104
|
+
BlockInvertedListsIOHook::BlockInvertedListsIOHook()
|
|
105
|
+
: InvertedListsIOHook("ilbl", typeid(BlockInvertedLists).name()) {}
|
|
111
106
|
|
|
112
|
-
void BlockInvertedListsIOHook::write(const InvertedLists
|
|
113
|
-
{
|
|
114
|
-
uint32_t h = fourcc
|
|
115
|
-
WRITE1
|
|
116
|
-
const BlockInvertedLists
|
|
117
|
-
|
|
118
|
-
WRITE1
|
|
119
|
-
WRITE1
|
|
120
|
-
WRITE1
|
|
121
|
-
WRITE1
|
|
107
|
+
void BlockInvertedListsIOHook::write(const InvertedLists* ils_in, IOWriter* f)
|
|
108
|
+
const {
|
|
109
|
+
uint32_t h = fourcc("ilbl");
|
|
110
|
+
WRITE1(h);
|
|
111
|
+
const BlockInvertedLists* il =
|
|
112
|
+
dynamic_cast<const BlockInvertedLists*>(ils_in);
|
|
113
|
+
WRITE1(il->nlist);
|
|
114
|
+
WRITE1(il->code_size);
|
|
115
|
+
WRITE1(il->n_per_block);
|
|
116
|
+
WRITE1(il->block_size);
|
|
122
117
|
|
|
123
118
|
for (size_t i = 0; i < il->nlist; i++) {
|
|
124
119
|
WRITEVECTOR(il->ids[i]);
|
|
@@ -126,13 +121,13 @@ void BlockInvertedListsIOHook::write(const InvertedLists *ils_in, IOWriter *f) c
|
|
|
126
121
|
}
|
|
127
122
|
}
|
|
128
123
|
|
|
129
|
-
InvertedLists
|
|
130
|
-
{
|
|
131
|
-
BlockInvertedLists
|
|
132
|
-
READ1
|
|
133
|
-
READ1
|
|
134
|
-
READ1
|
|
135
|
-
READ1
|
|
124
|
+
InvertedLists* BlockInvertedListsIOHook::read(IOReader* f, int /* io_flags */)
|
|
125
|
+
const {
|
|
126
|
+
BlockInvertedLists* il = new BlockInvertedLists();
|
|
127
|
+
READ1(il->nlist);
|
|
128
|
+
READ1(il->code_size);
|
|
129
|
+
READ1(il->n_per_block);
|
|
130
|
+
READ1(il->block_size);
|
|
136
131
|
|
|
137
132
|
il->ids.resize(il->nlist);
|
|
138
133
|
il->codes.resize(il->nlist);
|
|
@@ -145,7 +140,4 @@ InvertedLists * BlockInvertedListsIOHook::read(IOReader *f, int /* io_flags */)
|
|
|
145
140
|
return il;
|
|
146
141
|
}
|
|
147
142
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
143
|
} // namespace faiss
|
|
@@ -5,13 +5,12 @@
|
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
|
|
9
8
|
#pragma once
|
|
10
9
|
|
|
10
|
+
#include <faiss/index_io.h>
|
|
11
11
|
#include <faiss/invlists/InvertedLists.h>
|
|
12
12
|
#include <faiss/invlists/InvertedListsIOHook.h>
|
|
13
13
|
#include <faiss/utils/AlignedTable.h>
|
|
14
|
-
#include <faiss/index_io.h>
|
|
15
14
|
|
|
16
15
|
namespace faiss {
|
|
17
16
|
|
|
@@ -28,49 +27,48 @@ namespace faiss {
|
|
|
28
27
|
* The writing functions add_entries and update_entries operate on block-aligned
|
|
29
28
|
* data.
|
|
30
29
|
*/
|
|
31
|
-
struct BlockInvertedLists: InvertedLists {
|
|
32
|
-
|
|
33
|
-
size_t
|
|
34
|
-
size_t block_size; // nb bytes per block
|
|
30
|
+
struct BlockInvertedLists : InvertedLists {
|
|
31
|
+
size_t n_per_block; // nb of vectors stored per block
|
|
32
|
+
size_t block_size; // nb bytes per block
|
|
35
33
|
|
|
36
34
|
std::vector<AlignedTable<uint8_t>> codes;
|
|
37
35
|
std::vector<std::vector<idx_t>> ids;
|
|
38
36
|
|
|
39
|
-
|
|
40
|
-
BlockInvertedLists (
|
|
41
|
-
size_t nlist, size_t vec_per_block,
|
|
42
|
-
size_t block_size
|
|
43
|
-
);
|
|
37
|
+
BlockInvertedLists(size_t nlist, size_t vec_per_block, size_t block_size);
|
|
44
38
|
|
|
45
39
|
BlockInvertedLists();
|
|
46
40
|
|
|
47
41
|
size_t list_size(size_t list_no) const override;
|
|
48
|
-
const uint8_t
|
|
49
|
-
const idx_t
|
|
42
|
+
const uint8_t* get_codes(size_t list_no) const override;
|
|
43
|
+
const idx_t* get_ids(size_t list_no) const override;
|
|
50
44
|
|
|
51
45
|
// works only on empty BlockInvertedLists
|
|
52
46
|
// the codes should be of size ceil(n_entry / n_per_block) * block_size
|
|
53
47
|
// and padded with 0s
|
|
54
|
-
size_t add_entries
|
|
55
|
-
|
|
56
|
-
|
|
48
|
+
size_t add_entries(
|
|
49
|
+
size_t list_no,
|
|
50
|
+
size_t n_entry,
|
|
51
|
+
const idx_t* ids,
|
|
52
|
+
const uint8_t* code) override;
|
|
57
53
|
|
|
58
54
|
/// not implemented
|
|
59
|
-
void update_entries
|
|
60
|
-
|
|
55
|
+
void update_entries(
|
|
56
|
+
size_t list_no,
|
|
57
|
+
size_t offset,
|
|
58
|
+
size_t n_entry,
|
|
59
|
+
const idx_t* ids,
|
|
60
|
+
const uint8_t* code) override;
|
|
61
61
|
|
|
62
62
|
// also pads new data with 0s
|
|
63
|
-
void resize
|
|
64
|
-
|
|
65
|
-
~BlockInvertedLists () override;
|
|
63
|
+
void resize(size_t list_no, size_t new_size) override;
|
|
66
64
|
|
|
65
|
+
~BlockInvertedLists() override;
|
|
67
66
|
};
|
|
68
67
|
|
|
69
68
|
struct BlockInvertedListsIOHook : InvertedListsIOHook {
|
|
70
69
|
BlockInvertedListsIOHook();
|
|
71
|
-
void write(const InvertedLists
|
|
72
|
-
InvertedLists
|
|
70
|
+
void write(const InvertedLists* ils, IOWriter* f) const override;
|
|
71
|
+
InvertedLists* read(IOReader* f, int io_flags) const override;
|
|
73
72
|
};
|
|
74
73
|
|
|
75
|
-
|
|
76
74
|
} // namespace faiss
|
|
@@ -9,142 +9,130 @@
|
|
|
9
9
|
|
|
10
10
|
#include <faiss/invlists/DirectMap.h>
|
|
11
11
|
|
|
12
|
-
#include <cstdio>
|
|
13
12
|
#include <cassert>
|
|
13
|
+
#include <cstdio>
|
|
14
14
|
|
|
15
|
-
#include <faiss/impl/FaissAssert.h>
|
|
16
15
|
#include <faiss/impl/AuxIndexStructures.h>
|
|
16
|
+
#include <faiss/impl/FaissAssert.h>
|
|
17
17
|
|
|
18
18
|
namespace faiss {
|
|
19
19
|
|
|
20
|
-
DirectMap::DirectMap(): type(NoMap)
|
|
21
|
-
{}
|
|
20
|
+
DirectMap::DirectMap() : type(NoMap) {}
|
|
22
21
|
|
|
23
|
-
void DirectMap::set_type
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
22
|
+
void DirectMap::set_type(
|
|
23
|
+
Type new_type,
|
|
24
|
+
const InvertedLists* invlists,
|
|
25
|
+
size_t ntotal) {
|
|
26
|
+
FAISS_THROW_IF_NOT(
|
|
27
|
+
new_type == NoMap || new_type == Array || new_type == Hashtable);
|
|
27
28
|
|
|
28
29
|
if (new_type == type) {
|
|
29
30
|
// nothing to do
|
|
30
31
|
return;
|
|
31
32
|
}
|
|
32
33
|
|
|
33
|
-
array.clear
|
|
34
|
-
hashtable.clear
|
|
34
|
+
array.clear();
|
|
35
|
+
hashtable.clear();
|
|
35
36
|
type = new_type;
|
|
36
37
|
|
|
37
38
|
if (new_type == NoMap) {
|
|
38
39
|
return;
|
|
39
40
|
} else if (new_type == Array) {
|
|
40
|
-
array.resize
|
|
41
|
+
array.resize(ntotal, -1);
|
|
41
42
|
} else if (new_type == Hashtable) {
|
|
42
|
-
hashtable.reserve
|
|
43
|
+
hashtable.reserve(ntotal);
|
|
43
44
|
}
|
|
44
45
|
|
|
45
46
|
for (size_t key = 0; key < invlists->nlist; key++) {
|
|
46
|
-
size_t list_size = invlists->list_size
|
|
47
|
-
InvertedLists::ScopedIds idlist
|
|
47
|
+
size_t list_size = invlists->list_size(key);
|
|
48
|
+
InvertedLists::ScopedIds idlist(invlists, key);
|
|
48
49
|
|
|
49
50
|
if (new_type == Array) {
|
|
50
51
|
for (long ofs = 0; ofs < list_size; ofs++) {
|
|
51
|
-
FAISS_THROW_IF_NOT_MSG
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
array
|
|
52
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
53
|
+
0 <= idlist[ofs] && idlist[ofs] < ntotal,
|
|
54
|
+
"direct map supported only for seuquential ids");
|
|
55
|
+
array[idlist[ofs]] = lo_build(key, ofs);
|
|
55
56
|
}
|
|
56
57
|
} else if (new_type == Hashtable) {
|
|
57
58
|
for (long ofs = 0; ofs < list_size; ofs++) {
|
|
58
|
-
hashtable
|
|
59
|
+
hashtable[idlist[ofs]] = lo_build(key, ofs);
|
|
59
60
|
}
|
|
60
61
|
}
|
|
61
62
|
}
|
|
62
63
|
}
|
|
63
64
|
|
|
64
|
-
void DirectMap::clear()
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
hashtable.clear ();
|
|
65
|
+
void DirectMap::clear() {
|
|
66
|
+
array.clear();
|
|
67
|
+
hashtable.clear();
|
|
68
68
|
}
|
|
69
69
|
|
|
70
|
-
|
|
71
|
-
DirectMap::idx_t DirectMap::get (idx_t key) const
|
|
72
|
-
{
|
|
70
|
+
DirectMap::idx_t DirectMap::get(idx_t key) const {
|
|
73
71
|
if (type == Array) {
|
|
74
|
-
FAISS_THROW_IF_NOT_MSG (
|
|
75
|
-
key >= 0 && key < array.size(), "invalid key"
|
|
76
|
-
);
|
|
72
|
+
FAISS_THROW_IF_NOT_MSG(key >= 0 && key < array.size(), "invalid key");
|
|
77
73
|
idx_t lo = array[key];
|
|
78
74
|
FAISS_THROW_IF_NOT_MSG(lo >= 0, "-1 entry in direct_map");
|
|
79
75
|
return lo;
|
|
80
76
|
} else if (type == Hashtable) {
|
|
81
|
-
auto res = hashtable.find
|
|
82
|
-
FAISS_THROW_IF_NOT_MSG
|
|
77
|
+
auto res = hashtable.find(key);
|
|
78
|
+
FAISS_THROW_IF_NOT_MSG(res != hashtable.end(), "key not found");
|
|
83
79
|
return res->second;
|
|
84
80
|
} else {
|
|
85
|
-
FAISS_THROW_MSG
|
|
81
|
+
FAISS_THROW_MSG("direct map not initialized");
|
|
86
82
|
}
|
|
87
83
|
}
|
|
88
84
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
{
|
|
93
|
-
if (type == NoMap) return;
|
|
85
|
+
void DirectMap::add_single_id(idx_t id, idx_t list_no, size_t offset) {
|
|
86
|
+
if (type == NoMap)
|
|
87
|
+
return;
|
|
94
88
|
|
|
95
89
|
if (type == Array) {
|
|
96
|
-
assert
|
|
90
|
+
assert(id == array.size());
|
|
97
91
|
if (list_no >= 0) {
|
|
98
|
-
array.push_back
|
|
92
|
+
array.push_back(lo_build(list_no, offset));
|
|
99
93
|
} else {
|
|
100
|
-
array.push_back
|
|
94
|
+
array.push_back(-1);
|
|
101
95
|
}
|
|
102
96
|
} else if (type == Hashtable) {
|
|
103
97
|
if (list_no >= 0) {
|
|
104
|
-
hashtable[id] = lo_build
|
|
98
|
+
hashtable[id] = lo_build(list_no, offset);
|
|
105
99
|
}
|
|
106
100
|
}
|
|
107
|
-
|
|
108
101
|
}
|
|
109
102
|
|
|
110
|
-
void DirectMap::check_can_add
|
|
103
|
+
void DirectMap::check_can_add(const idx_t* ids) {
|
|
111
104
|
if (type == Array && ids) {
|
|
112
|
-
FAISS_THROW_MSG
|
|
105
|
+
FAISS_THROW_MSG("cannot have array direct map and add with ids");
|
|
113
106
|
}
|
|
114
107
|
}
|
|
115
108
|
|
|
116
109
|
/********************* DirectMapAdd implementation */
|
|
117
110
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
if (type == DirectMap::Array) {
|
|
123
|
-
FAISS_THROW_IF_NOT (xids == nullptr);
|
|
111
|
+
DirectMapAdd::DirectMapAdd(DirectMap& direct_map, size_t n, const idx_t* xids)
|
|
112
|
+
: direct_map(direct_map), type(direct_map.type), n(n), xids(xids) {
|
|
113
|
+
if (type == DirectMap::Array) {
|
|
114
|
+
FAISS_THROW_IF_NOT(xids == nullptr);
|
|
124
115
|
ntotal = direct_map.array.size();
|
|
125
|
-
direct_map.array.resize
|
|
116
|
+
direct_map.array.resize(ntotal + n, -1);
|
|
126
117
|
} else if (type == DirectMap::Hashtable) {
|
|
127
118
|
// can't parallel update hashtable so use temp array
|
|
128
|
-
all_ofs.resize
|
|
119
|
+
all_ofs.resize(n, -1);
|
|
129
120
|
}
|
|
130
121
|
}
|
|
131
122
|
|
|
132
|
-
|
|
133
|
-
void DirectMapAdd::add (size_t i, idx_t list_no, size_t ofs)
|
|
134
|
-
{
|
|
123
|
+
void DirectMapAdd::add(size_t i, idx_t list_no, size_t ofs) {
|
|
135
124
|
if (type == DirectMap::Array) {
|
|
136
|
-
direct_map.array
|
|
125
|
+
direct_map.array[ntotal + i] = lo_build(list_no, ofs);
|
|
137
126
|
} else if (type == DirectMap::Hashtable) {
|
|
138
|
-
all_ofs
|
|
127
|
+
all_ofs[i] = lo_build(list_no, ofs);
|
|
139
128
|
}
|
|
140
129
|
}
|
|
141
130
|
|
|
142
|
-
DirectMapAdd::~DirectMapAdd
|
|
143
|
-
{
|
|
131
|
+
DirectMapAdd::~DirectMapAdd() {
|
|
144
132
|
if (type == DirectMap::Hashtable) {
|
|
145
133
|
for (int i = 0; i < n; i++) {
|
|
146
134
|
idx_t id = xids ? xids[i] : ntotal + i;
|
|
147
|
-
direct_map.hashtable
|
|
135
|
+
direct_map.hashtable[id] = all_ofs[i];
|
|
148
136
|
}
|
|
149
137
|
}
|
|
150
138
|
}
|
|
@@ -154,9 +142,7 @@ DirectMapAdd::~DirectMapAdd ()
|
|
|
154
142
|
using ScopedCodes = InvertedLists::ScopedCodes;
|
|
155
143
|
using ScopedIds = InvertedLists::ScopedIds;
|
|
156
144
|
|
|
157
|
-
|
|
158
|
-
size_t DirectMap::remove_ids(const IDSelector& sel, InvertedLists *invlists)
|
|
159
|
-
{
|
|
145
|
+
size_t DirectMap::remove_ids(const IDSelector& sel, InvertedLists* invlists) {
|
|
160
146
|
size_t nlist = invlists->nlist;
|
|
161
147
|
std::vector<idx_t> toremove(nlist);
|
|
162
148
|
|
|
@@ -166,16 +152,16 @@ size_t DirectMap::remove_ids(const IDSelector& sel, InvertedLists *invlists)
|
|
|
166
152
|
// exhaustive scan of IVF
|
|
167
153
|
#pragma omp parallel for
|
|
168
154
|
for (idx_t i = 0; i < nlist; i++) {
|
|
169
|
-
idx_t l0 = invlists->list_size
|
|
170
|
-
ScopedIds idsi
|
|
155
|
+
idx_t l0 = invlists->list_size(i), l = l0, j = 0;
|
|
156
|
+
ScopedIds idsi(invlists, i);
|
|
171
157
|
while (j < l) {
|
|
172
|
-
if (sel.is_member
|
|
158
|
+
if (sel.is_member(idsi[j])) {
|
|
173
159
|
l--;
|
|
174
|
-
invlists->update_entry
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
160
|
+
invlists->update_entry(
|
|
161
|
+
i,
|
|
162
|
+
j,
|
|
163
|
+
invlists->get_single_id(i, l),
|
|
164
|
+
ScopedCodes(invlists, i, l).get());
|
|
179
165
|
} else {
|
|
180
166
|
j++;
|
|
181
167
|
}
|
|
@@ -191,30 +177,28 @@ size_t DirectMap::remove_ids(const IDSelector& sel, InvertedLists *invlists)
|
|
|
191
177
|
}
|
|
192
178
|
}
|
|
193
179
|
} else if (type == Hashtable) {
|
|
194
|
-
const IDSelectorArray
|
|
195
|
-
|
|
196
|
-
FAISS_THROW_IF_NOT_MSG
|
|
197
|
-
|
|
198
|
-
"remove with hashtable works only with IDSelectorArray"
|
|
199
|
-
);
|
|
180
|
+
const IDSelectorArray* sela =
|
|
181
|
+
dynamic_cast<const IDSelectorArray*>(&sel);
|
|
182
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
183
|
+
sela, "remove with hashtable works only with IDSelectorArray");
|
|
200
184
|
|
|
201
185
|
for (idx_t i = 0; i < sela->n; i++) {
|
|
202
186
|
idx_t id = sela->ids[i];
|
|
203
|
-
auto res = hashtable.find
|
|
187
|
+
auto res = hashtable.find(id);
|
|
204
188
|
if (res != hashtable.end()) {
|
|
205
|
-
size_t list_no = lo_listno
|
|
206
|
-
size_t offset = lo_offset
|
|
207
|
-
idx_t last = invlists->list_size
|
|
208
|
-
hashtable.erase
|
|
189
|
+
size_t list_no = lo_listno(res->second);
|
|
190
|
+
size_t offset = lo_offset(res->second);
|
|
191
|
+
idx_t last = invlists->list_size(list_no) - 1;
|
|
192
|
+
hashtable.erase(res);
|
|
209
193
|
if (offset < last) {
|
|
210
|
-
idx_t last_id = invlists->get_single_id
|
|
211
|
-
invlists->update_entry
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
194
|
+
idx_t last_id = invlists->get_single_id(list_no, last);
|
|
195
|
+
invlists->update_entry(
|
|
196
|
+
list_no,
|
|
197
|
+
offset,
|
|
198
|
+
last_id,
|
|
199
|
+
ScopedCodes(invlists, list_no, last).get());
|
|
216
200
|
// update hash entry for last element
|
|
217
|
-
hashtable
|
|
201
|
+
hashtable[last_id] = list_no << 32 | offset;
|
|
218
202
|
}
|
|
219
203
|
invlists->resize(list_no, last);
|
|
220
204
|
nremove++;
|
|
@@ -227,41 +211,41 @@ size_t DirectMap::remove_ids(const IDSelector& sel, InvertedLists *invlists)
|
|
|
227
211
|
return nremove;
|
|
228
212
|
}
|
|
229
213
|
|
|
230
|
-
void DirectMap::update_codes
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
214
|
+
void DirectMap::update_codes(
|
|
215
|
+
InvertedLists* invlists,
|
|
216
|
+
int n,
|
|
217
|
+
const idx_t* ids,
|
|
218
|
+
const idx_t* assign,
|
|
219
|
+
const uint8_t* codes) {
|
|
220
|
+
FAISS_THROW_IF_NOT(type == Array);
|
|
236
221
|
|
|
237
222
|
size_t code_size = invlists->code_size;
|
|
238
223
|
|
|
239
224
|
for (size_t i = 0; i < n; i++) {
|
|
240
225
|
idx_t id = ids[i];
|
|
241
|
-
FAISS_THROW_IF_NOT_MSG
|
|
242
|
-
|
|
226
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
227
|
+
0 <= id && id < array.size(), "id to update out of range");
|
|
243
228
|
{ // remove old one
|
|
244
|
-
idx_t dm = array
|
|
245
|
-
int64_t ofs = lo_offset
|
|
246
|
-
int64_t il = lo_listno
|
|
247
|
-
size_t l = invlists->list_size
|
|
229
|
+
idx_t dm = array[id];
|
|
230
|
+
int64_t ofs = lo_offset(dm);
|
|
231
|
+
int64_t il = lo_listno(dm);
|
|
232
|
+
size_t l = invlists->list_size(il);
|
|
248
233
|
if (ofs != l - 1) { // move l - 1 to ofs
|
|
249
|
-
int64_t id2 = invlists->get_single_id
|
|
250
|
-
array[id2] = lo_build
|
|
251
|
-
invlists->update_entry
|
|
252
|
-
|
|
234
|
+
int64_t id2 = invlists->get_single_id(il, l - 1);
|
|
235
|
+
array[id2] = lo_build(il, ofs);
|
|
236
|
+
invlists->update_entry(
|
|
237
|
+
il, ofs, id2, invlists->get_single_code(il, l - 1));
|
|
253
238
|
}
|
|
254
|
-
invlists->resize
|
|
239
|
+
invlists->resize(il, l - 1);
|
|
255
240
|
}
|
|
256
241
|
{ // insert new one
|
|
257
242
|
int64_t il = assign[i];
|
|
258
|
-
size_t l = invlists->list_size
|
|
259
|
-
idx_t dm = lo_build
|
|
260
|
-
array
|
|
261
|
-
invlists->add_entry
|
|
243
|
+
size_t l = invlists->list_size(il);
|
|
244
|
+
idx_t dm = lo_build(il, l);
|
|
245
|
+
array[id] = dm;
|
|
246
|
+
invlists->add_entry(il, id, codes + i * code_size);
|
|
262
247
|
}
|
|
263
248
|
}
|
|
264
249
|
}
|
|
265
250
|
|
|
266
|
-
|
|
267
|
-
}
|
|
251
|
+
} // namespace faiss
|