faiss 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +292 -291
- data/vendor/faiss/faiss/AutoTune.h +55 -56
- data/vendor/faiss/faiss/Clustering.cpp +334 -195
- data/vendor/faiss/faiss/Clustering.h +88 -35
- data/vendor/faiss/faiss/IVFlib.cpp +171 -195
- data/vendor/faiss/faiss/IVFlib.h +48 -51
- data/vendor/faiss/faiss/Index.cpp +85 -103
- data/vendor/faiss/faiss/Index.h +54 -48
- data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
- data/vendor/faiss/faiss/Index2Layer.h +22 -22
- data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
- data/vendor/faiss/faiss/IndexBinary.h +140 -132
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
- data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
- data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
- data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
- data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
- data/vendor/faiss/faiss/IndexFlat.h +35 -46
- data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
- data/vendor/faiss/faiss/IndexHNSW.h +57 -41
- data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
- data/vendor/faiss/faiss/IndexIVF.h +146 -113
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
- data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
- data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
- data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
- data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
- data/vendor/faiss/faiss/IndexLSH.h +21 -26
- data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
- data/vendor/faiss/faiss/IndexLattice.h +11 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
- data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
- data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
- data/vendor/faiss/faiss/IndexNSG.h +85 -0
- data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
- data/vendor/faiss/faiss/IndexPQ.h +64 -67
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
- data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
- data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
- data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
- data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
- data/vendor/faiss/faiss/IndexRefine.h +22 -23
- data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
- data/vendor/faiss/faiss/IndexReplicas.h +62 -56
- data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
- data/vendor/faiss/faiss/IndexResidual.h +152 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
- data/vendor/faiss/faiss/IndexShards.cpp +256 -240
- data/vendor/faiss/faiss/IndexShards.h +85 -73
- data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
- data/vendor/faiss/faiss/MatrixStats.h +7 -10
- data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
- data/vendor/faiss/faiss/MetaIndexes.h +40 -34
- data/vendor/faiss/faiss/MetricType.h +7 -7
- data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
- data/vendor/faiss/faiss/VectorTransform.h +61 -89
- data/vendor/faiss/faiss/clone_index.cpp +77 -73
- data/vendor/faiss/faiss/clone_index.h +4 -9
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
- data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
- data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
- data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
- data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
- data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
- data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
- data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
- data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
- data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
- data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
- data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
- data/vendor/faiss/faiss/impl/FaissException.h +41 -29
- data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
- data/vendor/faiss/faiss/impl/HNSW.h +179 -200
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
- data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
- data/vendor/faiss/faiss/impl/NSG.h +199 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
- data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
- data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
- data/vendor/faiss/faiss/impl/io.cpp +75 -94
- data/vendor/faiss/faiss/impl/io.h +31 -41
- data/vendor/faiss/faiss/impl/io_macros.h +40 -29
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
- data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
- data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
- data/vendor/faiss/faiss/index_factory.cpp +269 -218
- data/vendor/faiss/faiss/index_factory.h +6 -7
- data/vendor/faiss/faiss/index_io.h +23 -26
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
- data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
- data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
- data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
- data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
- data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
- data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
- data/vendor/faiss/faiss/utils/Heap.h +186 -209
- data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
- data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
- data/vendor/faiss/faiss/utils/distances.cpp +301 -310
- data/vendor/faiss/faiss/utils/distances.h +133 -118
- data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
- data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
- data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
- data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
- data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
- data/vendor/faiss/faiss/utils/hamming.h +62 -85
- data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
- data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
- data/vendor/faiss/faiss/utils/partitioning.h +26 -21
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
- data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
- data/vendor/faiss/faiss/utils/random.cpp +39 -63
- data/vendor/faiss/faiss/utils/random.h +13 -16
- data/vendor/faiss/faiss/utils/simdlib.h +4 -2
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
- data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
- data/vendor/faiss/faiss/utils/utils.cpp +304 -287
- data/vendor/faiss/faiss/utils/utils.h +53 -48
- metadata +20 -2
|
@@ -10,12 +10,10 @@
|
|
|
10
10
|
#ifndef FAISS_INDEX_IVFSH_H
|
|
11
11
|
#define FAISS_INDEX_IVFSH_H
|
|
12
12
|
|
|
13
|
-
|
|
14
13
|
#include <vector>
|
|
15
14
|
|
|
16
15
|
#include <faiss/IndexIVF.h>
|
|
17
16
|
|
|
18
|
-
|
|
19
17
|
namespace faiss {
|
|
20
18
|
|
|
21
19
|
struct VectorTransform;
|
|
@@ -28,9 +26,8 @@ struct VectorTransform;
|
|
|
28
26
|
* threshold_type, and split into intervals of size period. Half of
|
|
29
27
|
* the interval is a 0 bit, the other half a 1.
|
|
30
28
|
*/
|
|
31
|
-
struct IndexIVFSpectralHash: IndexIVF {
|
|
32
|
-
|
|
33
|
-
VectorTransform *vt; // transformation from d to nbit dim
|
|
29
|
+
struct IndexIVFSpectralHash : IndexIVF {
|
|
30
|
+
VectorTransform* vt; // transformation from d to nbit dim
|
|
34
31
|
bool own_fields;
|
|
35
32
|
|
|
36
33
|
int nbit;
|
|
@@ -47,29 +44,30 @@ struct IndexIVFSpectralHash: IndexIVF {
|
|
|
47
44
|
// size nlist * nbit or 0 if Thresh_global
|
|
48
45
|
std::vector<float> trained;
|
|
49
46
|
|
|
50
|
-
IndexIVFSpectralHash
|
|
51
|
-
|
|
47
|
+
IndexIVFSpectralHash(
|
|
48
|
+
Index* quantizer,
|
|
49
|
+
size_t d,
|
|
50
|
+
size_t nlist,
|
|
51
|
+
int nbit,
|
|
52
|
+
float period);
|
|
52
53
|
|
|
53
|
-
IndexIVFSpectralHash
|
|
54
|
+
IndexIVFSpectralHash();
|
|
54
55
|
|
|
55
56
|
void train_residual(idx_t n, const float* x) override;
|
|
56
57
|
|
|
57
|
-
void encode_vectors(
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
const override;
|
|
58
|
+
void encode_vectors(
|
|
59
|
+
idx_t n,
|
|
60
|
+
const float* x,
|
|
61
|
+
const idx_t* list_nos,
|
|
62
|
+
uint8_t* codes,
|
|
63
|
+
bool include_listnos = false) const override;
|
|
64
64
|
|
|
65
|
-
|
|
65
|
+
InvertedListScanner* get_InvertedListScanner(
|
|
66
|
+
bool store_pairs) const override;
|
|
66
67
|
|
|
68
|
+
~IndexIVFSpectralHash() override;
|
|
67
69
|
};
|
|
68
70
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
}; // namespace faiss
|
|
73
|
-
|
|
71
|
+
} // namespace faiss
|
|
74
72
|
|
|
75
73
|
#endif
|
|
@@ -14,10 +14,9 @@
|
|
|
14
14
|
|
|
15
15
|
#include <algorithm>
|
|
16
16
|
|
|
17
|
-
#include <faiss/utils/utils.h>
|
|
18
|
-
#include <faiss/utils/hamming.h>
|
|
19
17
|
#include <faiss/impl/FaissAssert.h>
|
|
20
|
-
|
|
18
|
+
#include <faiss/utils/hamming.h>
|
|
19
|
+
#include <faiss/utils/utils.h>
|
|
21
20
|
|
|
22
21
|
namespace faiss {
|
|
23
22
|
|
|
@@ -25,11 +24,12 @@ namespace faiss {
|
|
|
25
24
|
* IndexLSH
|
|
26
25
|
***************************************************************/
|
|
27
26
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
27
|
+
IndexLSH::IndexLSH(idx_t d, int nbits, bool rotate_data, bool train_thresholds)
|
|
28
|
+
: Index(d),
|
|
29
|
+
nbits(nbits),
|
|
30
|
+
rotate_data(rotate_data),
|
|
31
|
+
train_thresholds(train_thresholds),
|
|
32
|
+
rrot(d, nbits) {
|
|
33
33
|
is_trained = !train_thresholds;
|
|
34
34
|
|
|
35
35
|
bytes_per_vec = (nbits + 7) / 8;
|
|
@@ -37,131 +37,119 @@ IndexLSH::IndexLSH (idx_t d, int nbits, bool rotate_data, bool train_thresholds)
|
|
|
37
37
|
if (rotate_data) {
|
|
38
38
|
rrot.init(5);
|
|
39
39
|
} else {
|
|
40
|
-
FAISS_THROW_IF_NOT
|
|
40
|
+
FAISS_THROW_IF_NOT(d >= nbits);
|
|
41
41
|
}
|
|
42
42
|
}
|
|
43
43
|
|
|
44
|
-
IndexLSH::IndexLSH
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
44
|
+
IndexLSH::IndexLSH()
|
|
45
|
+
: nbits(0),
|
|
46
|
+
bytes_per_vec(0),
|
|
47
|
+
rotate_data(false),
|
|
48
|
+
train_thresholds(false) {}
|
|
49
49
|
|
|
50
|
-
const float
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
float *xt = nullptr;
|
|
50
|
+
const float* IndexLSH::apply_preprocess(idx_t n, const float* x) const {
|
|
51
|
+
float* xt = nullptr;
|
|
54
52
|
if (rotate_data) {
|
|
55
53
|
// also applies bias if exists
|
|
56
|
-
xt = rrot.apply
|
|
54
|
+
xt = rrot.apply(n, x);
|
|
57
55
|
} else if (d != nbits) {
|
|
58
|
-
assert
|
|
59
|
-
xt = new float
|
|
60
|
-
float
|
|
56
|
+
assert(nbits < d);
|
|
57
|
+
xt = new float[nbits * n];
|
|
58
|
+
float* xp = xt;
|
|
61
59
|
for (idx_t i = 0; i < n; i++) {
|
|
62
|
-
const float
|
|
60
|
+
const float* xl = x + i * d;
|
|
63
61
|
for (int j = 0; j < nbits; j++)
|
|
64
|
-
*xp++ = xl
|
|
62
|
+
*xp++ = xl[j];
|
|
65
63
|
}
|
|
66
64
|
}
|
|
67
65
|
|
|
68
66
|
if (train_thresholds) {
|
|
69
|
-
|
|
70
67
|
if (xt == NULL) {
|
|
71
|
-
xt = new float
|
|
72
|
-
memcpy
|
|
68
|
+
xt = new float[nbits * n];
|
|
69
|
+
memcpy(xt, x, sizeof(*x) * n * nbits);
|
|
73
70
|
}
|
|
74
71
|
|
|
75
|
-
float
|
|
72
|
+
float* xp = xt;
|
|
76
73
|
for (idx_t i = 0; i < n; i++)
|
|
77
74
|
for (int j = 0; j < nbits; j++)
|
|
78
|
-
*xp++ -= thresholds
|
|
75
|
+
*xp++ -= thresholds[j];
|
|
79
76
|
}
|
|
80
77
|
|
|
81
78
|
return xt ? xt : x;
|
|
82
79
|
}
|
|
83
80
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
void IndexLSH::train (idx_t n, const float *x)
|
|
87
|
-
{
|
|
81
|
+
void IndexLSH::train(idx_t n, const float* x) {
|
|
88
82
|
if (train_thresholds) {
|
|
89
|
-
thresholds.resize
|
|
83
|
+
thresholds.resize(nbits);
|
|
90
84
|
train_thresholds = false;
|
|
91
|
-
const float
|
|
92
|
-
ScopeDeleter<float> del
|
|
85
|
+
const float* xt = apply_preprocess(n, x);
|
|
86
|
+
ScopeDeleter<float> del(xt == x ? nullptr : xt);
|
|
93
87
|
train_thresholds = true;
|
|
94
88
|
|
|
95
|
-
float
|
|
96
|
-
ScopeDeleter<float> del2
|
|
89
|
+
float* transposed_x = new float[n * nbits];
|
|
90
|
+
ScopeDeleter<float> del2(transposed_x);
|
|
97
91
|
|
|
98
92
|
for (idx_t i = 0; i < n; i++)
|
|
99
93
|
for (idx_t j = 0; j < nbits; j++)
|
|
100
|
-
transposed_x
|
|
94
|
+
transposed_x[j * n + i] = xt[i * nbits + j];
|
|
101
95
|
|
|
102
96
|
for (idx_t i = 0; i < nbits; i++) {
|
|
103
|
-
float
|
|
97
|
+
float* xi = transposed_x + i * n;
|
|
104
98
|
// std::nth_element
|
|
105
|
-
std::sort
|
|
99
|
+
std::sort(xi, xi + n);
|
|
106
100
|
if (n % 2 == 1)
|
|
107
|
-
thresholds
|
|
101
|
+
thresholds[i] = xi[n / 2];
|
|
108
102
|
else
|
|
109
|
-
thresholds
|
|
110
|
-
|
|
103
|
+
thresholds[i] = (xi[n / 2 - 1] + xi[n / 2]) / 2;
|
|
111
104
|
}
|
|
112
105
|
}
|
|
113
106
|
is_trained = true;
|
|
114
107
|
}
|
|
115
108
|
|
|
109
|
+
void IndexLSH::add(idx_t n, const float* x) {
|
|
110
|
+
FAISS_THROW_IF_NOT(is_trained);
|
|
111
|
+
codes.resize((ntotal + n) * bytes_per_vec);
|
|
116
112
|
|
|
117
|
-
|
|
118
|
-
{
|
|
119
|
-
FAISS_THROW_IF_NOT (is_trained);
|
|
120
|
-
codes.resize ((ntotal + n) * bytes_per_vec);
|
|
121
|
-
|
|
122
|
-
sa_encode (n, x, &codes[ntotal * bytes_per_vec]);
|
|
113
|
+
sa_encode(n, x, &codes[ntotal * bytes_per_vec]);
|
|
123
114
|
|
|
124
115
|
ntotal += n;
|
|
125
116
|
}
|
|
126
117
|
|
|
127
|
-
|
|
128
|
-
void IndexLSH::search (
|
|
118
|
+
void IndexLSH::search(
|
|
129
119
|
idx_t n,
|
|
130
|
-
const float
|
|
120
|
+
const float* x,
|
|
131
121
|
idx_t k,
|
|
132
|
-
float
|
|
133
|
-
idx_t
|
|
134
|
-
|
|
135
|
-
FAISS_THROW_IF_NOT (is_trained);
|
|
136
|
-
const float *xt = apply_preprocess (n, x);
|
|
137
|
-
ScopeDeleter<float> del (xt == x ? nullptr : xt);
|
|
122
|
+
float* distances,
|
|
123
|
+
idx_t* labels) const {
|
|
124
|
+
FAISS_THROW_IF_NOT(k > 0);
|
|
138
125
|
|
|
139
|
-
|
|
140
|
-
|
|
126
|
+
FAISS_THROW_IF_NOT(is_trained);
|
|
127
|
+
const float* xt = apply_preprocess(n, x);
|
|
128
|
+
ScopeDeleter<float> del(xt == x ? nullptr : xt);
|
|
141
129
|
|
|
142
|
-
|
|
130
|
+
uint8_t* qcodes = new uint8_t[n * bytes_per_vec];
|
|
131
|
+
ScopeDeleter<uint8_t> del2(qcodes);
|
|
143
132
|
|
|
144
|
-
|
|
145
|
-
ScopeDeleter<int> del3 (idistances);
|
|
133
|
+
fvecs2bitvecs(xt, qcodes, nbits, n);
|
|
146
134
|
|
|
147
|
-
|
|
135
|
+
int* idistances = new int[n * k];
|
|
136
|
+
ScopeDeleter<int> del3(idistances);
|
|
148
137
|
|
|
149
|
-
|
|
150
|
-
ntotal, bytes_per_vec, true);
|
|
138
|
+
int_maxheap_array_t res = {size_t(n), size_t(k), labels, idistances};
|
|
151
139
|
|
|
140
|
+
hammings_knn_hc(&res, qcodes, codes.data(), ntotal, bytes_per_vec, true);
|
|
152
141
|
|
|
153
142
|
// convert distances to floats
|
|
154
143
|
for (int i = 0; i < k * n; i++)
|
|
155
144
|
distances[i] = idistances[i];
|
|
156
|
-
|
|
157
145
|
}
|
|
158
146
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
FAISS_THROW_IF_NOT
|
|
147
|
+
void IndexLSH::transfer_thresholds(LinearTransform* vt) {
|
|
148
|
+
if (!train_thresholds)
|
|
149
|
+
return;
|
|
150
|
+
FAISS_THROW_IF_NOT(nbits == vt->d_out);
|
|
163
151
|
if (!vt->have_bias) {
|
|
164
|
-
vt->b.resize
|
|
152
|
+
vt->b.resize(nbits, 0);
|
|
165
153
|
vt->have_bias = true;
|
|
166
154
|
}
|
|
167
155
|
for (int i = 0; i < nbits; i++)
|
|
@@ -175,51 +163,42 @@ void IndexLSH::reset() {
|
|
|
175
163
|
ntotal = 0;
|
|
176
164
|
}
|
|
177
165
|
|
|
178
|
-
|
|
179
|
-
size_t IndexLSH::sa_code_size () const
|
|
180
|
-
{
|
|
166
|
+
size_t IndexLSH::sa_code_size() const {
|
|
181
167
|
return bytes_per_vec;
|
|
182
168
|
}
|
|
183
169
|
|
|
184
|
-
void IndexLSH::sa_encode
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
ScopeDeleter<float> del (xt == x ? nullptr : xt);
|
|
190
|
-
fvecs2bitvecs (xt, bytes, nbits, n);
|
|
170
|
+
void IndexLSH::sa_encode(idx_t n, const float* x, uint8_t* bytes) const {
|
|
171
|
+
FAISS_THROW_IF_NOT(is_trained);
|
|
172
|
+
const float* xt = apply_preprocess(n, x);
|
|
173
|
+
ScopeDeleter<float> del(xt == x ? nullptr : xt);
|
|
174
|
+
fvecs2bitvecs(xt, bytes, nbits, n);
|
|
191
175
|
}
|
|
192
176
|
|
|
193
|
-
void IndexLSH::sa_decode
|
|
194
|
-
|
|
195
|
-
{
|
|
196
|
-
float *xt = x;
|
|
177
|
+
void IndexLSH::sa_decode(idx_t n, const uint8_t* bytes, float* x) const {
|
|
178
|
+
float* xt = x;
|
|
197
179
|
ScopeDeleter<float> del;
|
|
198
180
|
if (rotate_data || nbits != d) {
|
|
199
|
-
xt = new float
|
|
181
|
+
xt = new float[n * nbits];
|
|
200
182
|
del.set(xt);
|
|
201
183
|
}
|
|
202
|
-
bitvecs2fvecs
|
|
184
|
+
bitvecs2fvecs(bytes, xt, nbits, n);
|
|
203
185
|
|
|
204
186
|
if (train_thresholds) {
|
|
205
|
-
float
|
|
187
|
+
float* xp = xt;
|
|
206
188
|
for (idx_t i = 0; i < n; i++) {
|
|
207
189
|
for (int j = 0; j < nbits; j++) {
|
|
208
|
-
*xp++ += thresholds
|
|
190
|
+
*xp++ += thresholds[j];
|
|
209
191
|
}
|
|
210
192
|
}
|
|
211
193
|
}
|
|
212
194
|
|
|
213
195
|
if (rotate_data) {
|
|
214
|
-
rrot.reverse_transform
|
|
196
|
+
rrot.reverse_transform(n, xt, x);
|
|
215
197
|
} else if (nbits != d) {
|
|
216
198
|
for (idx_t i = 0; i < n; i++) {
|
|
217
|
-
memcpy
|
|
218
|
-
nbits * sizeof(xt[0]));
|
|
199
|
+
memcpy(x + i * d, xt + i * nbits, nbits * sizeof(xt[0]));
|
|
219
200
|
}
|
|
220
201
|
}
|
|
221
202
|
}
|
|
222
203
|
|
|
223
|
-
|
|
224
|
-
|
|
225
204
|
} // namespace faiss
|
|
@@ -17,25 +17,25 @@
|
|
|
17
17
|
|
|
18
18
|
namespace faiss {
|
|
19
19
|
|
|
20
|
-
|
|
21
20
|
/** The sign of each vector component is put in a binary signature */
|
|
22
|
-
struct IndexLSH:Index {
|
|
21
|
+
struct IndexLSH : Index {
|
|
23
22
|
typedef unsigned char uint8_t;
|
|
24
23
|
|
|
25
|
-
int nbits;
|
|
26
|
-
int bytes_per_vec;
|
|
27
|
-
bool rotate_data;
|
|
28
|
-
bool train_thresholds;
|
|
24
|
+
int nbits; ///< nb of bits per vector
|
|
25
|
+
int bytes_per_vec; ///< nb of 8-bits per encoded vector
|
|
26
|
+
bool rotate_data; ///< whether to apply a random rotation to input
|
|
27
|
+
bool train_thresholds; ///< whether we train thresholds or use 0
|
|
29
28
|
|
|
30
29
|
RandomRotationMatrix rrot; ///< optional random rotation
|
|
31
30
|
|
|
32
|
-
std::vector
|
|
31
|
+
std::vector<float> thresholds; ///< thresholds to compare with
|
|
33
32
|
|
|
34
33
|
/// encoded dataset
|
|
35
34
|
std::vector<uint8_t> codes;
|
|
36
35
|
|
|
37
|
-
IndexLSH
|
|
38
|
-
idx_t d,
|
|
36
|
+
IndexLSH(
|
|
37
|
+
idx_t d,
|
|
38
|
+
int nbits,
|
|
39
39
|
bool rotate_data = true,
|
|
40
40
|
bool train_thresholds = false);
|
|
41
41
|
|
|
@@ -46,45 +46,40 @@ struct IndexLSH:Index {
|
|
|
46
46
|
* @return output vectors, size n * bits. May be the same pointer
|
|
47
47
|
* as x, otherwise it should be deleted by the caller
|
|
48
48
|
*/
|
|
49
|
-
const float
|
|
49
|
+
const float* apply_preprocess(idx_t n, const float* x) const;
|
|
50
50
|
|
|
51
51
|
void train(idx_t n, const float* x) override;
|
|
52
52
|
|
|
53
53
|
void add(idx_t n, const float* x) override;
|
|
54
54
|
|
|
55
55
|
void search(
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
56
|
+
idx_t n,
|
|
57
|
+
const float* x,
|
|
58
|
+
idx_t k,
|
|
59
|
+
float* distances,
|
|
60
|
+
idx_t* labels) const override;
|
|
61
61
|
|
|
62
62
|
void reset() override;
|
|
63
63
|
|
|
64
64
|
/// transfer the thresholds to a pre-processing stage (and unset
|
|
65
65
|
/// train_thresholds)
|
|
66
|
-
void transfer_thresholds
|
|
66
|
+
void transfer_thresholds(LinearTransform* vt);
|
|
67
67
|
|
|
68
68
|
~IndexLSH() override {}
|
|
69
69
|
|
|
70
|
-
IndexLSH
|
|
70
|
+
IndexLSH();
|
|
71
71
|
|
|
72
72
|
/* standalone codec interface.
|
|
73
73
|
*
|
|
74
74
|
* The vectors are decoded to +/- 1 (not 0, 1) */
|
|
75
75
|
|
|
76
|
-
size_t sa_code_size
|
|
77
|
-
|
|
78
|
-
void sa_encode (idx_t n, const float *x,
|
|
79
|
-
uint8_t *bytes) const override;
|
|
76
|
+
size_t sa_code_size() const override;
|
|
80
77
|
|
|
81
|
-
void
|
|
82
|
-
float *x) const override;
|
|
78
|
+
void sa_encode(idx_t n, const float* x, uint8_t* bytes) const override;
|
|
83
79
|
|
|
80
|
+
void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
|
|
84
81
|
};
|
|
85
82
|
|
|
86
|
-
|
|
87
|
-
}
|
|
88
|
-
|
|
83
|
+
} // namespace faiss
|
|
89
84
|
|
|
90
85
|
#endif
|
|
@@ -7,26 +7,23 @@
|
|
|
7
7
|
|
|
8
8
|
// -*- c++ -*-
|
|
9
9
|
|
|
10
|
-
|
|
11
10
|
#include <faiss/IndexLattice.h>
|
|
12
|
-
#include <faiss/utils/hamming.h> // for the bitstring routines
|
|
13
11
|
#include <faiss/impl/FaissAssert.h>
|
|
14
12
|
#include <faiss/utils/distances.h>
|
|
13
|
+
#include <faiss/utils/hamming.h> // for the bitstring routines
|
|
15
14
|
|
|
16
15
|
namespace faiss {
|
|
17
16
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
{
|
|
26
|
-
FAISS_THROW_IF_NOT (d % nsq == 0);
|
|
17
|
+
IndexLattice::IndexLattice(idx_t d, int nsq, int scale_nbit, int r2)
|
|
18
|
+
: Index(d),
|
|
19
|
+
nsq(nsq),
|
|
20
|
+
dsq(d / nsq),
|
|
21
|
+
zn_sphere_codec(dsq, r2),
|
|
22
|
+
scale_nbit(scale_nbit) {
|
|
23
|
+
FAISS_THROW_IF_NOT(d % nsq == 0);
|
|
27
24
|
|
|
28
25
|
lattice_nbit = 0;
|
|
29
|
-
while (!(
|
|
26
|
+
while (!(((uint64_t)1 << lattice_nbit) >= zn_sphere_codec.nv)) {
|
|
30
27
|
lattice_nbit++;
|
|
31
28
|
}
|
|
32
29
|
|
|
@@ -37,12 +34,11 @@ IndexLattice::IndexLattice (idx_t d, int nsq, int scale_nbit, int r2):
|
|
|
37
34
|
is_trained = false;
|
|
38
35
|
}
|
|
39
36
|
|
|
40
|
-
void IndexLattice::train(idx_t n, const float* x)
|
|
41
|
-
{
|
|
37
|
+
void IndexLattice::train(idx_t n, const float* x) {
|
|
42
38
|
// compute ranges per sub-block
|
|
43
|
-
trained.resize
|
|
44
|
-
float
|
|
45
|
-
float
|
|
39
|
+
trained.resize(nsq * 2);
|
|
40
|
+
float* mins = trained.data();
|
|
41
|
+
float* maxs = trained.data() + nsq;
|
|
46
42
|
for (int sq = 0; sq < nsq; sq++) {
|
|
47
43
|
mins[sq] = HUGE_VAL;
|
|
48
44
|
maxs[sq] = -1;
|
|
@@ -50,45 +46,43 @@ void IndexLattice::train(idx_t n, const float* x)
|
|
|
50
46
|
|
|
51
47
|
for (idx_t i = 0; i < n; i++) {
|
|
52
48
|
for (int sq = 0; sq < nsq; sq++) {
|
|
53
|
-
float norm2 = fvec_norm_L2sqr
|
|
54
|
-
if (norm2 > maxs[sq])
|
|
55
|
-
|
|
49
|
+
float norm2 = fvec_norm_L2sqr(x + i * d + sq * dsq, dsq);
|
|
50
|
+
if (norm2 > maxs[sq])
|
|
51
|
+
maxs[sq] = norm2;
|
|
52
|
+
if (norm2 < mins[sq])
|
|
53
|
+
mins[sq] = norm2;
|
|
56
54
|
}
|
|
57
55
|
}
|
|
58
56
|
|
|
59
57
|
for (int sq = 0; sq < nsq; sq++) {
|
|
60
|
-
mins[sq] = sqrtf
|
|
61
|
-
maxs[sq] = sqrtf
|
|
58
|
+
mins[sq] = sqrtf(mins[sq]);
|
|
59
|
+
maxs[sq] = sqrtf(maxs[sq]);
|
|
62
60
|
}
|
|
63
61
|
|
|
64
62
|
is_trained = true;
|
|
65
63
|
}
|
|
66
64
|
|
|
67
65
|
/* The standalone codec interface */
|
|
68
|
-
size_t IndexLattice::sa_code_size
|
|
69
|
-
{
|
|
66
|
+
size_t IndexLattice::sa_code_size() const {
|
|
70
67
|
return code_size;
|
|
71
68
|
}
|
|
72
69
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
{
|
|
77
|
-
|
|
78
|
-
const float * mins = trained.data();
|
|
79
|
-
const float * maxs = mins + nsq;
|
|
70
|
+
void IndexLattice::sa_encode(idx_t n, const float* x, uint8_t* codes) const {
|
|
71
|
+
const float* mins = trained.data();
|
|
72
|
+
const float* maxs = mins + nsq;
|
|
80
73
|
int64_t sc = int64_t(1) << scale_nbit;
|
|
81
74
|
|
|
82
75
|
#pragma omp parallel for
|
|
83
76
|
for (idx_t i = 0; i < n; i++) {
|
|
84
77
|
BitstringWriter wr(codes + i * code_size, code_size);
|
|
85
|
-
const float
|
|
78
|
+
const float* xi = x + i * d;
|
|
86
79
|
for (int j = 0; j < nsq; j++) {
|
|
87
|
-
float nj =
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
if (nj >= sc)
|
|
80
|
+
float nj = (sqrtf(fvec_norm_L2sqr(xi, dsq)) - mins[j]) * sc /
|
|
81
|
+
(maxs[j] - mins[j]);
|
|
82
|
+
if (nj < 0)
|
|
83
|
+
nj = 0;
|
|
84
|
+
if (nj >= sc)
|
|
85
|
+
nj = sc - 1;
|
|
92
86
|
wr.write((int64_t)nj, scale_nbit);
|
|
93
87
|
wr.write(zn_sphere_codec.encode(xi), lattice_nbit);
|
|
94
88
|
xi += dsq;
|
|
@@ -96,23 +90,22 @@ void IndexLattice::sa_encode (idx_t n, const float *x, uint8_t *codes) const
|
|
|
96
90
|
}
|
|
97
91
|
}
|
|
98
92
|
|
|
99
|
-
void IndexLattice::sa_decode
|
|
100
|
-
|
|
101
|
-
const float
|
|
102
|
-
const float * maxs = mins + nsq;
|
|
93
|
+
void IndexLattice::sa_decode(idx_t n, const uint8_t* codes, float* x) const {
|
|
94
|
+
const float* mins = trained.data();
|
|
95
|
+
const float* maxs = mins + nsq;
|
|
103
96
|
float sc = int64_t(1) << scale_nbit;
|
|
104
97
|
float r = sqrtf(zn_sphere_codec.r2);
|
|
105
98
|
|
|
106
99
|
#pragma omp parallel for
|
|
107
100
|
for (idx_t i = 0; i < n; i++) {
|
|
108
101
|
BitstringReader rd(codes + i * code_size, code_size);
|
|
109
|
-
float
|
|
102
|
+
float* xi = x + i * d;
|
|
110
103
|
for (int j = 0; j < nsq; j++) {
|
|
111
104
|
float norm =
|
|
112
|
-
|
|
113
|
-
|
|
105
|
+
(rd.read(scale_nbit) + 0.5) * (maxs[j] - mins[j]) / sc +
|
|
106
|
+
mins[j];
|
|
114
107
|
norm /= r;
|
|
115
|
-
zn_sphere_codec.decode
|
|
108
|
+
zn_sphere_codec.decode(rd.read(lattice_nbit), xi);
|
|
116
109
|
for (int l = 0; l < dsq; l++) {
|
|
117
110
|
xi[l] *= norm;
|
|
118
111
|
}
|
|
@@ -121,23 +114,16 @@ void IndexLattice::sa_decode (idx_t n, const uint8_t *codes, float *x) const
|
|
|
121
114
|
}
|
|
122
115
|
}
|
|
123
116
|
|
|
124
|
-
void IndexLattice::add(idx_t
|
|
125
|
-
{
|
|
117
|
+
void IndexLattice::add(idx_t, const float*) {
|
|
126
118
|
FAISS_THROW_MSG("not implemented");
|
|
127
119
|
}
|
|
128
120
|
|
|
129
|
-
|
|
130
|
-
void IndexLattice::search(idx_t , const float* , idx_t ,
|
|
131
|
-
float* , idx_t* ) const
|
|
132
|
-
{
|
|
121
|
+
void IndexLattice::search(idx_t, const float*, idx_t, float*, idx_t*) const {
|
|
133
122
|
FAISS_THROW_MSG("not implemented");
|
|
134
123
|
}
|
|
135
124
|
|
|
136
|
-
|
|
137
|
-
void IndexLattice::reset()
|
|
138
|
-
{
|
|
125
|
+
void IndexLattice::reset() {
|
|
139
126
|
FAISS_THROW_MSG("not implemented");
|
|
140
127
|
}
|
|
141
128
|
|
|
142
|
-
|
|
143
|
-
} // namespace faiss
|
|
129
|
+
} // namespace faiss
|