faiss 0.2.0 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/LICENSE.txt +1 -1
- data/README.md +7 -7
- data/ext/faiss/extconf.rb +6 -3
- data/ext/faiss/numo.hpp +4 -4
- data/ext/faiss/utils.cpp +1 -1
- data/ext/faiss/utils.h +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +292 -291
- data/vendor/faiss/faiss/AutoTune.h +55 -56
- data/vendor/faiss/faiss/Clustering.cpp +365 -194
- data/vendor/faiss/faiss/Clustering.h +102 -35
- data/vendor/faiss/faiss/IVFlib.cpp +171 -195
- data/vendor/faiss/faiss/IVFlib.h +48 -51
- data/vendor/faiss/faiss/Index.cpp +85 -103
- data/vendor/faiss/faiss/Index.h +54 -48
- data/vendor/faiss/faiss/Index2Layer.cpp +126 -224
- data/vendor/faiss/faiss/Index2Layer.h +22 -36
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +195 -0
- data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
- data/vendor/faiss/faiss/IndexBinary.h +140 -132
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
- data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
- data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
- data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
- data/vendor/faiss/faiss/IndexFlat.cpp +115 -176
- data/vendor/faiss/faiss/IndexFlat.h +42 -59
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
- data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
- data/vendor/faiss/faiss/IndexHNSW.h +57 -41
- data/vendor/faiss/faiss/IndexIVF.cpp +545 -453
- data/vendor/faiss/faiss/IndexIVF.h +169 -118
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +247 -252
- data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +459 -517
- data/vendor/faiss/faiss/IndexIVFPQ.h +75 -67
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
- data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +163 -150
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +38 -25
- data/vendor/faiss/faiss/IndexLSH.cpp +66 -113
- data/vendor/faiss/faiss/IndexLSH.h +20 -38
- data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
- data/vendor/faiss/faiss/IndexLattice.h +11 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +229 -0
- data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
- data/vendor/faiss/faiss/IndexNSG.cpp +301 -0
- data/vendor/faiss/faiss/IndexNSG.h +85 -0
- data/vendor/faiss/faiss/IndexPQ.cpp +387 -495
- data/vendor/faiss/faiss/IndexPQ.h +64 -82
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
- data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
- data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
- data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
- data/vendor/faiss/faiss/IndexRefine.cpp +139 -127
- data/vendor/faiss/faiss/IndexRefine.h +32 -23
- data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
- data/vendor/faiss/faiss/IndexReplicas.h +62 -56
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +111 -172
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -59
- data/vendor/faiss/faiss/IndexShards.cpp +256 -240
- data/vendor/faiss/faiss/IndexShards.h +85 -73
- data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
- data/vendor/faiss/faiss/MatrixStats.h +7 -10
- data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
- data/vendor/faiss/faiss/MetaIndexes.h +40 -34
- data/vendor/faiss/faiss/MetricType.h +7 -7
- data/vendor/faiss/faiss/VectorTransform.cpp +654 -475
- data/vendor/faiss/faiss/VectorTransform.h +64 -89
- data/vendor/faiss/faiss/clone_index.cpp +78 -73
- data/vendor/faiss/faiss/clone_index.h +4 -9
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
- data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +198 -171
- data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
- data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
- data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
- data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
- data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
- data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
- data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
- data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
- data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +503 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +175 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
- data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
- data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
- data/vendor/faiss/faiss/impl/FaissException.h +41 -29
- data/vendor/faiss/faiss/impl/HNSW.cpp +606 -617
- data/vendor/faiss/faiss/impl/HNSW.h +179 -200
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +855 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +244 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
- data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +679 -0
- data/vendor/faiss/faiss/impl/NSG.h +199 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +758 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +188 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +647 -707
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
- data/vendor/faiss/faiss/impl/index_read.cpp +631 -480
- data/vendor/faiss/faiss/impl/index_write.cpp +547 -407
- data/vendor/faiss/faiss/impl/io.cpp +76 -95
- data/vendor/faiss/faiss/impl/io.h +31 -41
- data/vendor/faiss/faiss/impl/io_macros.h +60 -29
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
- data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
- data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
- data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
- data/vendor/faiss/faiss/index_factory.cpp +619 -397
- data/vendor/faiss/faiss/index_factory.h +8 -6
- data/vendor/faiss/faiss/index_io.h +23 -26
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
- data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
- data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
- data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
- data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
- data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
- data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
- data/vendor/faiss/faiss/utils/Heap.h +186 -209
- data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
- data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
- data/vendor/faiss/faiss/utils/distances.cpp +305 -312
- data/vendor/faiss/faiss/utils/distances.h +170 -122
- data/vendor/faiss/faiss/utils/distances_simd.cpp +498 -508
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
- data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
- data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
- data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
- data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
- data/vendor/faiss/faiss/utils/hamming.h +62 -85
- data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
- data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
- data/vendor/faiss/faiss/utils/partitioning.h +26 -21
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
- data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
- data/vendor/faiss/faiss/utils/random.cpp +39 -63
- data/vendor/faiss/faiss/utils/random.h +13 -16
- data/vendor/faiss/faiss/utils/simdlib.h +4 -2
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
- data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
- data/vendor/faiss/faiss/utils/utils.cpp +304 -287
- data/vendor/faiss/faiss/utils/utils.h +54 -49
- metadata +29 -4
|
@@ -7,43 +7,48 @@
|
|
|
7
7
|
|
|
8
8
|
// -*- c++ -*-
|
|
9
9
|
|
|
10
|
-
|
|
11
10
|
#include <faiss/IndexIVFSpectralHash.h>
|
|
12
11
|
|
|
13
|
-
#include <memory>
|
|
14
|
-
#include <algorithm>
|
|
15
12
|
#include <stdint.h>
|
|
13
|
+
#include <algorithm>
|
|
14
|
+
#include <memory>
|
|
16
15
|
|
|
16
|
+
#include <faiss/IndexLSH.h>
|
|
17
|
+
#include <faiss/IndexPreTransform.h>
|
|
18
|
+
#include <faiss/VectorTransform.h>
|
|
19
|
+
#include <faiss/impl/AuxIndexStructures.h>
|
|
20
|
+
#include <faiss/impl/FaissAssert.h>
|
|
17
21
|
#include <faiss/utils/hamming.h>
|
|
18
22
|
#include <faiss/utils/utils.h>
|
|
19
|
-
#include <faiss/impl/FaissAssert.h>
|
|
20
|
-
#include <faiss/impl/AuxIndexStructures.h>
|
|
21
|
-
#include <faiss/VectorTransform.h>
|
|
22
23
|
|
|
23
24
|
namespace faiss {
|
|
24
25
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
26
|
+
IndexIVFSpectralHash::IndexIVFSpectralHash(
|
|
27
|
+
Index* quantizer,
|
|
28
|
+
size_t d,
|
|
29
|
+
size_t nlist,
|
|
30
|
+
int nbit,
|
|
31
|
+
float period)
|
|
32
|
+
: IndexIVF(quantizer, d, nlist, (nbit + 7) / 8, METRIC_L2),
|
|
33
|
+
nbit(nbit),
|
|
34
|
+
period(period),
|
|
35
|
+
threshold_type(Thresh_global) {
|
|
36
|
+
RandomRotationMatrix* rr = new RandomRotationMatrix(d, nbit);
|
|
37
|
+
rr->init(1234);
|
|
35
38
|
vt = rr;
|
|
36
39
|
own_fields = true;
|
|
37
40
|
is_trained = false;
|
|
38
41
|
}
|
|
39
42
|
|
|
40
|
-
IndexIVFSpectralHash::IndexIVFSpectralHash()
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
43
|
+
IndexIVFSpectralHash::IndexIVFSpectralHash()
|
|
44
|
+
: IndexIVF(),
|
|
45
|
+
vt(nullptr),
|
|
46
|
+
own_fields(false),
|
|
47
|
+
nbit(0),
|
|
48
|
+
period(0),
|
|
49
|
+
threshold_type(Thresh_global) {}
|
|
44
50
|
|
|
45
|
-
IndexIVFSpectralHash::~IndexIVFSpectralHash
|
|
46
|
-
{
|
|
51
|
+
IndexIVFSpectralHash::~IndexIVFSpectralHash() {
|
|
47
52
|
if (own_fields) {
|
|
48
53
|
delete vt;
|
|
49
54
|
}
|
|
@@ -51,35 +56,33 @@ IndexIVFSpectralHash::~IndexIVFSpectralHash ()
|
|
|
51
56
|
|
|
52
57
|
namespace {
|
|
53
58
|
|
|
54
|
-
|
|
55
|
-
float median (size_t n, float *x) {
|
|
59
|
+
float median(size_t n, float* x) {
|
|
56
60
|
std::sort(x, x + n);
|
|
57
61
|
if (n % 2 == 1) {
|
|
58
|
-
return x
|
|
62
|
+
return x[n / 2];
|
|
59
63
|
} else {
|
|
60
|
-
return (x
|
|
64
|
+
return (x[n / 2 - 1] + x[n / 2]) / 2;
|
|
61
65
|
}
|
|
62
66
|
}
|
|
63
67
|
|
|
64
|
-
}
|
|
65
|
-
|
|
68
|
+
} // namespace
|
|
66
69
|
|
|
67
|
-
void IndexIVFSpectralHash::train_residual
|
|
68
|
-
{
|
|
70
|
+
void IndexIVFSpectralHash::train_residual(idx_t n, const float* x) {
|
|
69
71
|
if (!vt->is_trained) {
|
|
70
|
-
vt->train
|
|
72
|
+
vt->train(n, x);
|
|
71
73
|
}
|
|
72
74
|
|
|
73
75
|
if (threshold_type == Thresh_global) {
|
|
74
76
|
// nothing to do
|
|
75
77
|
return;
|
|
76
|
-
} else if (
|
|
77
|
-
|
|
78
|
+
} else if (
|
|
79
|
+
threshold_type == Thresh_centroid ||
|
|
80
|
+
threshold_type == Thresh_centroid_half) {
|
|
78
81
|
// convert all centroids with vt
|
|
79
|
-
std::vector<float> centroids
|
|
80
|
-
quantizer->reconstruct_n
|
|
82
|
+
std::vector<float> centroids(nlist * d);
|
|
83
|
+
quantizer->reconstruct_n(0, nlist, centroids.data());
|
|
81
84
|
trained.resize(nlist * nbit);
|
|
82
|
-
vt->apply_noalloc
|
|
85
|
+
vt->apply_noalloc(nlist, centroids.data(), trained.data());
|
|
83
86
|
if (threshold_type == Thresh_centroid_half) {
|
|
84
87
|
for (size_t i = 0; i < nlist * nbit; i++) {
|
|
85
88
|
trained[i] -= 0.25 * period;
|
|
@@ -90,12 +93,12 @@ void IndexIVFSpectralHash::train_residual (idx_t n, const float *x)
|
|
|
90
93
|
// otherwise train medians
|
|
91
94
|
|
|
92
95
|
// assign
|
|
93
|
-
std::unique_ptr<idx_t
|
|
94
|
-
quantizer->assign
|
|
96
|
+
std::unique_ptr<idx_t[]> idx(new idx_t[n]);
|
|
97
|
+
quantizer->assign(n, x, idx.get());
|
|
95
98
|
|
|
96
99
|
std::vector<size_t> sizes(nlist + 1);
|
|
97
100
|
for (size_t i = 0; i < n; i++) {
|
|
98
|
-
FAISS_THROW_IF_NOT
|
|
101
|
+
FAISS_THROW_IF_NOT(idx[i] >= 0);
|
|
99
102
|
sizes[idx[i]]++;
|
|
100
103
|
}
|
|
101
104
|
|
|
@@ -107,10 +110,10 @@ void IndexIVFSpectralHash::train_residual (idx_t n, const float *x)
|
|
|
107
110
|
}
|
|
108
111
|
|
|
109
112
|
// transform
|
|
110
|
-
std::unique_ptr<float
|
|
113
|
+
std::unique_ptr<float[]> xt(vt->apply(n, x));
|
|
111
114
|
|
|
112
115
|
// transpose + reorder
|
|
113
|
-
std::unique_ptr<float
|
|
116
|
+
std::unique_ptr<float[]> xo(new float[n * nbit]);
|
|
114
117
|
|
|
115
118
|
for (size_t i = 0; i < n; i++) {
|
|
116
119
|
size_t idest = sizes[idx[i]]++;
|
|
@@ -119,14 +122,14 @@ void IndexIVFSpectralHash::train_residual (idx_t n, const float *x)
|
|
|
119
122
|
}
|
|
120
123
|
}
|
|
121
124
|
|
|
122
|
-
trained.resize
|
|
125
|
+
trained.resize(n * nbit);
|
|
123
126
|
// compute medians
|
|
124
127
|
#pragma omp for
|
|
125
128
|
for (int i = 0; i < nlist; i++) {
|
|
126
129
|
size_t i0 = i == 0 ? 0 : sizes[i - 1];
|
|
127
130
|
size_t i1 = sizes[i];
|
|
128
131
|
for (int j = 0; j < nbit; j++) {
|
|
129
|
-
float
|
|
132
|
+
float* xoi = xo.get() + i0 + n * j;
|
|
130
133
|
if (i0 == i1) { // nothing to train
|
|
131
134
|
trained[i * nbit + j] = 0.0;
|
|
132
135
|
} else if (i1 == i0 + 1) {
|
|
@@ -138,75 +141,71 @@ void IndexIVFSpectralHash::train_residual (idx_t n, const float *x)
|
|
|
138
141
|
}
|
|
139
142
|
}
|
|
140
143
|
|
|
141
|
-
|
|
142
144
|
namespace {
|
|
143
145
|
|
|
144
|
-
void binarize_with_freq(
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
146
|
+
void binarize_with_freq(
|
|
147
|
+
size_t nbit,
|
|
148
|
+
float freq,
|
|
149
|
+
const float* x,
|
|
150
|
+
const float* c,
|
|
151
|
+
uint8_t* codes) {
|
|
152
|
+
memset(codes, 0, (nbit + 7) / 8);
|
|
149
153
|
for (size_t i = 0; i < nbit; i++) {
|
|
150
154
|
float xf = (x[i] - c[i]);
|
|
151
|
-
|
|
152
|
-
|
|
155
|
+
int64_t xi = int64_t(floor(xf * freq));
|
|
156
|
+
int64_t bit = xi & 1;
|
|
153
157
|
codes[i >> 3] |= bit << (i & 7);
|
|
154
158
|
}
|
|
155
159
|
}
|
|
156
160
|
|
|
161
|
+
}; // namespace
|
|
157
162
|
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
bool include_listnos) const
|
|
166
|
-
{
|
|
167
|
-
FAISS_THROW_IF_NOT (is_trained);
|
|
163
|
+
void IndexIVFSpectralHash::encode_vectors(
|
|
164
|
+
idx_t n,
|
|
165
|
+
const float* x_in,
|
|
166
|
+
const idx_t* list_nos,
|
|
167
|
+
uint8_t* codes,
|
|
168
|
+
bool include_listnos) const {
|
|
169
|
+
FAISS_THROW_IF_NOT(is_trained);
|
|
168
170
|
float freq = 2.0 / period;
|
|
169
|
-
|
|
170
|
-
FAISS_THROW_IF_NOT_MSG (!include_listnos, "listnos encoding not supported");
|
|
171
|
+
size_t coarse_size = include_listnos ? coarse_code_size() : 0;
|
|
171
172
|
|
|
172
173
|
// transform with vt
|
|
173
|
-
std::unique_ptr<float
|
|
174
|
+
std::unique_ptr<float[]> x(vt->apply(n, x_in));
|
|
174
175
|
|
|
175
|
-
|
|
176
|
-
{
|
|
177
|
-
std::vector<float> zero (nbit);
|
|
176
|
+
std::vector<float> zero(nbit);
|
|
178
177
|
|
|
179
|
-
// each thread takes care of a subset of lists
|
|
180
178
|
#pragma omp for
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
179
|
+
for (idx_t i = 0; i < n; i++) {
|
|
180
|
+
int64_t list_no = list_nos[i];
|
|
181
|
+
uint8_t* code = codes + i * (code_size + coarse_size);
|
|
182
|
+
|
|
183
|
+
if (list_no >= 0) {
|
|
184
|
+
if (coarse_size) {
|
|
185
|
+
encode_listno(list_no, code);
|
|
186
|
+
}
|
|
187
|
+
const float* c;
|
|
188
|
+
|
|
189
|
+
if (threshold_type == Thresh_global) {
|
|
190
|
+
c = zero.data();
|
|
191
|
+
} else {
|
|
192
|
+
c = trained.data() + list_no * nbit;
|
|
194
193
|
}
|
|
194
|
+
binarize_with_freq(
|
|
195
|
+
nbit, freq, x.get() + i * nbit, c, code + coarse_size);
|
|
196
|
+
} else {
|
|
197
|
+
memset(code, 0, code_size + coarse_size);
|
|
195
198
|
}
|
|
196
199
|
}
|
|
197
200
|
}
|
|
198
201
|
|
|
199
202
|
namespace {
|
|
200
203
|
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
struct IVFScanner: InvertedListScanner {
|
|
204
|
-
|
|
204
|
+
template <class HammingComputer>
|
|
205
|
+
struct IVFScanner : InvertedListScanner {
|
|
205
206
|
// copied from index structure
|
|
206
|
-
const IndexIVFSpectralHash
|
|
207
|
-
size_t code_size;
|
|
207
|
+
const IndexIVFSpectralHash* index;
|
|
208
208
|
size_t nbit;
|
|
209
|
-
bool store_pairs;
|
|
210
209
|
|
|
211
210
|
float period, freq;
|
|
212
211
|
std::vector<float> q;
|
|
@@ -216,61 +215,57 @@ struct IVFScanner: InvertedListScanner {
|
|
|
216
215
|
|
|
217
216
|
using idx_t = Index::idx_t;
|
|
218
217
|
|
|
219
|
-
IVFScanner
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
218
|
+
IVFScanner(const IndexIVFSpectralHash* index, bool store_pairs)
|
|
219
|
+
: index(index),
|
|
220
|
+
nbit(index->nbit),
|
|
221
|
+
period(index->period),
|
|
222
|
+
freq(2.0 / index->period),
|
|
223
|
+
q(nbit),
|
|
224
|
+
zero(nbit),
|
|
225
|
+
qcode(index->code_size),
|
|
226
|
+
hc(qcode.data(), index->code_size) {
|
|
227
|
+
this->store_pairs = store_pairs;
|
|
228
|
+
this->code_size = index->code_size;
|
|
229
229
|
}
|
|
230
230
|
|
|
231
|
-
|
|
232
|
-
void set_query (const float *query) override {
|
|
231
|
+
void set_query(const float* query) override {
|
|
233
232
|
FAISS_THROW_IF_NOT(query);
|
|
234
233
|
FAISS_THROW_IF_NOT(q.size() == nbit);
|
|
235
|
-
index->vt->apply_noalloc
|
|
234
|
+
index->vt->apply_noalloc(1, query, q.data());
|
|
236
235
|
|
|
237
|
-
if (index->threshold_type ==
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
(nbit, freq, q.data(), zero.data(), qcode.data());
|
|
241
|
-
hc.set (qcode.data(), code_size);
|
|
236
|
+
if (index->threshold_type == IndexIVFSpectralHash::Thresh_global) {
|
|
237
|
+
binarize_with_freq(nbit, freq, q.data(), zero.data(), qcode.data());
|
|
238
|
+
hc.set(qcode.data(), code_size);
|
|
242
239
|
}
|
|
243
240
|
}
|
|
244
241
|
|
|
245
|
-
idx_t list_no
|
|
246
|
-
|
|
247
|
-
void set_list (idx_t list_no, float /*coarse_dis*/) override {
|
|
242
|
+
void set_list(idx_t list_no, float /*coarse_dis*/) override {
|
|
248
243
|
this->list_no = list_no;
|
|
249
244
|
if (index->threshold_type != IndexIVFSpectralHash::Thresh_global) {
|
|
250
|
-
const float
|
|
251
|
-
binarize_with_freq
|
|
252
|
-
hc.set
|
|
245
|
+
const float* c = index->trained.data() + list_no * nbit;
|
|
246
|
+
binarize_with_freq(nbit, freq, q.data(), c, qcode.data());
|
|
247
|
+
hc.set(qcode.data(), code_size);
|
|
253
248
|
}
|
|
254
249
|
}
|
|
255
250
|
|
|
256
|
-
float distance_to_code
|
|
257
|
-
return hc.hamming
|
|
251
|
+
float distance_to_code(const uint8_t* code) const final {
|
|
252
|
+
return hc.hamming(code);
|
|
258
253
|
}
|
|
259
254
|
|
|
260
|
-
size_t scan_codes
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
255
|
+
size_t scan_codes(
|
|
256
|
+
size_t list_size,
|
|
257
|
+
const uint8_t* codes,
|
|
258
|
+
const idx_t* ids,
|
|
259
|
+
float* simi,
|
|
260
|
+
idx_t* idxi,
|
|
261
|
+
size_t k) const override {
|
|
266
262
|
size_t nup = 0;
|
|
267
263
|
for (size_t j = 0; j < list_size; j++) {
|
|
264
|
+
float dis = hc.hamming(codes);
|
|
268
265
|
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
int64_t id = store_pairs ? lo_build (list_no, j) : ids[j];
|
|
273
|
-
maxheap_replace_top (k, simi, idxi, dis, id);
|
|
266
|
+
if (dis < simi[0]) {
|
|
267
|
+
int64_t id = store_pairs ? lo_build(list_no, j) : ids[j];
|
|
268
|
+
maxheap_replace_top(k, simi, idxi, dis, id);
|
|
274
269
|
nup++;
|
|
275
270
|
}
|
|
276
271
|
codes += code_size;
|
|
@@ -278,34 +273,31 @@ struct IVFScanner: InvertedListScanner {
|
|
|
278
273
|
return nup;
|
|
279
274
|
}
|
|
280
275
|
|
|
281
|
-
void scan_codes_range
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
276
|
+
void scan_codes_range(
|
|
277
|
+
size_t list_size,
|
|
278
|
+
const uint8_t* codes,
|
|
279
|
+
const idx_t* ids,
|
|
280
|
+
float radius,
|
|
281
|
+
RangeQueryResult& res) const override {
|
|
287
282
|
for (size_t j = 0; j < list_size; j++) {
|
|
288
|
-
float dis = hc.hamming
|
|
283
|
+
float dis = hc.hamming(codes);
|
|
289
284
|
if (dis < radius) {
|
|
290
|
-
int64_t id = store_pairs ? lo_build
|
|
291
|
-
res.add
|
|
285
|
+
int64_t id = store_pairs ? lo_build(list_no, j) : ids[j];
|
|
286
|
+
res.add(dis, id);
|
|
292
287
|
}
|
|
293
288
|
codes += code_size;
|
|
294
289
|
}
|
|
295
290
|
}
|
|
296
|
-
|
|
297
|
-
|
|
298
291
|
};
|
|
299
292
|
|
|
300
293
|
} // anonymous namespace
|
|
301
294
|
|
|
302
|
-
InvertedListScanner* IndexIVFSpectralHash::get_InvertedListScanner
|
|
303
|
-
|
|
304
|
-
{
|
|
295
|
+
InvertedListScanner* IndexIVFSpectralHash::get_InvertedListScanner(
|
|
296
|
+
bool store_pairs) const {
|
|
305
297
|
switch (code_size) {
|
|
306
298
|
#define HANDLE_CODE_SIZE(cs) \
|
|
307
|
-
case cs:
|
|
308
|
-
return new IVFScanner<HammingComputer
|
|
299
|
+
case cs: \
|
|
300
|
+
return new IVFScanner<HammingComputer##cs>(this, store_pairs)
|
|
309
301
|
HANDLE_CODE_SIZE(4);
|
|
310
302
|
HANDLE_CODE_SIZE(8);
|
|
311
303
|
HANDLE_CODE_SIZE(16);
|
|
@@ -314,17 +306,38 @@ InvertedListScanner* IndexIVFSpectralHash::get_InvertedListScanner
|
|
|
314
306
|
HANDLE_CODE_SIZE(64);
|
|
315
307
|
#undef HANDLE_CODE_SIZE
|
|
316
308
|
default:
|
|
317
|
-
|
|
318
|
-
return new IVFScanner<HammingComputerM8>(this, store_pairs);
|
|
319
|
-
} else if (code_size % 4 == 0) {
|
|
320
|
-
return new IVFScanner<HammingComputerM4>(this, store_pairs);
|
|
321
|
-
} else {
|
|
322
|
-
FAISS_THROW_MSG("not supported");
|
|
323
|
-
}
|
|
309
|
+
return new IVFScanner<HammingComputerDefault>(this, store_pairs);
|
|
324
310
|
}
|
|
325
|
-
|
|
326
311
|
}
|
|
327
312
|
|
|
313
|
+
void IndexIVFSpectralHash::replace_vt(VectorTransform* vt_in, bool own) {
|
|
314
|
+
FAISS_THROW_IF_NOT(vt_in->d_out == nbit);
|
|
315
|
+
FAISS_THROW_IF_NOT(vt_in->d_in == d);
|
|
316
|
+
if (own_fields) {
|
|
317
|
+
delete vt;
|
|
318
|
+
}
|
|
319
|
+
vt = vt_in;
|
|
320
|
+
threshold_type = Thresh_global;
|
|
321
|
+
is_trained = quantizer->is_trained && quantizer->ntotal == nlist &&
|
|
322
|
+
vt->is_trained;
|
|
323
|
+
own_fields = own;
|
|
324
|
+
}
|
|
328
325
|
|
|
326
|
+
/*
|
|
327
|
+
Check that the encoder is a single vector transform followed by a LSH
|
|
328
|
+
that just does thresholding.
|
|
329
|
+
If this is not the case, the linear transform + threhsolds of the IndexLSH
|
|
330
|
+
should be merged into the VectorTransform (which is feasible).
|
|
331
|
+
*/
|
|
332
|
+
|
|
333
|
+
void IndexIVFSpectralHash::replace_vt(IndexPreTransform* encoder, bool own) {
|
|
334
|
+
FAISS_THROW_IF_NOT(encoder->chain.size() == 1);
|
|
335
|
+
auto sub_index = dynamic_cast<IndexLSH*>(encoder->index);
|
|
336
|
+
FAISS_THROW_IF_NOT_MSG(sub_index, "final index should be LSH");
|
|
337
|
+
FAISS_THROW_IF_NOT(sub_index->nbits == nbit);
|
|
338
|
+
FAISS_THROW_IF_NOT(!sub_index->rotate_data);
|
|
339
|
+
FAISS_THROW_IF_NOT(!sub_index->train_thresholds);
|
|
340
|
+
replace_vt(encoder->chain[0], own);
|
|
341
|
+
}
|
|
329
342
|
|
|
330
|
-
}
|
|
343
|
+
} // namespace faiss
|
|
@@ -10,15 +10,14 @@
|
|
|
10
10
|
#ifndef FAISS_INDEX_IVFSH_H
|
|
11
11
|
#define FAISS_INDEX_IVFSH_H
|
|
12
12
|
|
|
13
|
-
|
|
14
13
|
#include <vector>
|
|
15
14
|
|
|
16
15
|
#include <faiss/IndexIVF.h>
|
|
17
16
|
|
|
18
|
-
|
|
19
17
|
namespace faiss {
|
|
20
18
|
|
|
21
19
|
struct VectorTransform;
|
|
20
|
+
struct IndexPreTransform;
|
|
22
21
|
|
|
23
22
|
/** Inverted list that stores binary codes of size nbit. Before the
|
|
24
23
|
* binary conversion, the dimension of the vectors is transformed from
|
|
@@ -27,49 +26,63 @@ struct VectorTransform;
|
|
|
27
26
|
* Each coordinate is subtracted from a value determined by
|
|
28
27
|
* threshold_type, and split into intervals of size period. Half of
|
|
29
28
|
* the interval is a 0 bit, the other half a 1.
|
|
29
|
+
*
|
|
30
30
|
*/
|
|
31
|
-
struct IndexIVFSpectralHash: IndexIVF {
|
|
32
|
-
|
|
33
|
-
VectorTransform
|
|
31
|
+
struct IndexIVFSpectralHash : IndexIVF {
|
|
32
|
+
/// transformation from d to nbit dim
|
|
33
|
+
VectorTransform* vt;
|
|
34
|
+
/// own the vt
|
|
34
35
|
bool own_fields;
|
|
35
36
|
|
|
37
|
+
/// nb of bits of the binary signature
|
|
36
38
|
int nbit;
|
|
39
|
+
/// interval size for 0s and 1s
|
|
37
40
|
float period;
|
|
38
41
|
|
|
39
42
|
enum ThresholdType {
|
|
40
|
-
Thresh_global,
|
|
41
|
-
Thresh_centroid,
|
|
42
|
-
Thresh_centroid_half,
|
|
43
|
-
Thresh_median
|
|
43
|
+
Thresh_global, ///< global threshold at 0
|
|
44
|
+
Thresh_centroid, ///< compare to centroid
|
|
45
|
+
Thresh_centroid_half, ///< central interval around centroid
|
|
46
|
+
Thresh_median ///< median of training set
|
|
44
47
|
};
|
|
45
48
|
ThresholdType threshold_type;
|
|
46
49
|
|
|
47
|
-
|
|
50
|
+
/// Trained threshold.
|
|
51
|
+
/// size nlist * nbit or 0 if Thresh_global
|
|
48
52
|
std::vector<float> trained;
|
|
49
53
|
|
|
50
|
-
IndexIVFSpectralHash
|
|
51
|
-
|
|
54
|
+
IndexIVFSpectralHash(
|
|
55
|
+
Index* quantizer,
|
|
56
|
+
size_t d,
|
|
57
|
+
size_t nlist,
|
|
58
|
+
int nbit,
|
|
59
|
+
float period);
|
|
52
60
|
|
|
53
|
-
IndexIVFSpectralHash
|
|
61
|
+
IndexIVFSpectralHash();
|
|
54
62
|
|
|
55
63
|
void train_residual(idx_t n, const float* x) override;
|
|
56
64
|
|
|
57
|
-
void encode_vectors(
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
65
|
+
void encode_vectors(
|
|
66
|
+
idx_t n,
|
|
67
|
+
const float* x,
|
|
68
|
+
const idx_t* list_nos,
|
|
69
|
+
uint8_t* codes,
|
|
70
|
+
bool include_listnos = false) const override;
|
|
61
71
|
|
|
62
|
-
InvertedListScanner
|
|
63
|
-
|
|
72
|
+
InvertedListScanner* get_InvertedListScanner(
|
|
73
|
+
bool store_pairs) const override;
|
|
64
74
|
|
|
65
|
-
|
|
75
|
+
/** replace the vector transform for an empty (and possibly untrained) index
|
|
76
|
+
*/
|
|
77
|
+
void replace_vt(VectorTransform* vt, bool own = false);
|
|
66
78
|
|
|
67
|
-
|
|
79
|
+
/** convenience function to get the VT from an index constucted by an
|
|
80
|
+
* index_factory (should end in "LSH") */
|
|
81
|
+
void replace_vt(IndexPreTransform* index, bool own = false);
|
|
68
82
|
|
|
83
|
+
~IndexIVFSpectralHash() override;
|
|
84
|
+
};
|
|
69
85
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
}; // namespace faiss
|
|
73
|
-
|
|
86
|
+
} // namespace faiss
|
|
74
87
|
|
|
75
88
|
#endif
|