faiss 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +292 -291
- data/vendor/faiss/faiss/AutoTune.h +55 -56
- data/vendor/faiss/faiss/Clustering.cpp +334 -195
- data/vendor/faiss/faiss/Clustering.h +88 -35
- data/vendor/faiss/faiss/IVFlib.cpp +171 -195
- data/vendor/faiss/faiss/IVFlib.h +48 -51
- data/vendor/faiss/faiss/Index.cpp +85 -103
- data/vendor/faiss/faiss/Index.h +54 -48
- data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
- data/vendor/faiss/faiss/Index2Layer.h +22 -22
- data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
- data/vendor/faiss/faiss/IndexBinary.h +140 -132
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
- data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
- data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
- data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
- data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
- data/vendor/faiss/faiss/IndexFlat.h +35 -46
- data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
- data/vendor/faiss/faiss/IndexHNSW.h +57 -41
- data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
- data/vendor/faiss/faiss/IndexIVF.h +146 -113
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
- data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
- data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
- data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
- data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
- data/vendor/faiss/faiss/IndexLSH.h +21 -26
- data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
- data/vendor/faiss/faiss/IndexLattice.h +11 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
- data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
- data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
- data/vendor/faiss/faiss/IndexNSG.h +85 -0
- data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
- data/vendor/faiss/faiss/IndexPQ.h +64 -67
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
- data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
- data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
- data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
- data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
- data/vendor/faiss/faiss/IndexRefine.h +22 -23
- data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
- data/vendor/faiss/faiss/IndexReplicas.h +62 -56
- data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
- data/vendor/faiss/faiss/IndexResidual.h +152 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
- data/vendor/faiss/faiss/IndexShards.cpp +256 -240
- data/vendor/faiss/faiss/IndexShards.h +85 -73
- data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
- data/vendor/faiss/faiss/MatrixStats.h +7 -10
- data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
- data/vendor/faiss/faiss/MetaIndexes.h +40 -34
- data/vendor/faiss/faiss/MetricType.h +7 -7
- data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
- data/vendor/faiss/faiss/VectorTransform.h +61 -89
- data/vendor/faiss/faiss/clone_index.cpp +77 -73
- data/vendor/faiss/faiss/clone_index.h +4 -9
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
- data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
- data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
- data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
- data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
- data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
- data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
- data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
- data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
- data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
- data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
- data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
- data/vendor/faiss/faiss/impl/FaissException.h +41 -29
- data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
- data/vendor/faiss/faiss/impl/HNSW.h +179 -200
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
- data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
- data/vendor/faiss/faiss/impl/NSG.h +199 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
- data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
- data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
- data/vendor/faiss/faiss/impl/io.cpp +75 -94
- data/vendor/faiss/faiss/impl/io.h +31 -41
- data/vendor/faiss/faiss/impl/io_macros.h +40 -29
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
- data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
- data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
- data/vendor/faiss/faiss/index_factory.cpp +269 -218
- data/vendor/faiss/faiss/index_factory.h +6 -7
- data/vendor/faiss/faiss/index_io.h +23 -26
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
- data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
- data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
- data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
- data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
- data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
- data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
- data/vendor/faiss/faiss/utils/Heap.h +186 -209
- data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
- data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
- data/vendor/faiss/faiss/utils/distances.cpp +301 -310
- data/vendor/faiss/faiss/utils/distances.h +133 -118
- data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
- data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
- data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
- data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
- data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
- data/vendor/faiss/faiss/utils/hamming.h +62 -85
- data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
- data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
- data/vendor/faiss/faiss/utils/partitioning.h +26 -21
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
- data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
- data/vendor/faiss/faiss/utils/random.cpp +39 -63
- data/vendor/faiss/faiss/utils/random.h +13 -16
- data/vendor/faiss/faiss/utils/simdlib.h +4 -2
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
- data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
- data/vendor/faiss/faiss/utils/utils.cpp +304 -287
- data/vendor/faiss/faiss/utils/utils.h +53 -48
- metadata +20 -2
|
@@ -5,8 +5,6 @@
|
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
|
|
9
|
-
|
|
10
8
|
#include <cuda_profiler_api.h>
|
|
11
9
|
#include <faiss/IndexFlat.h>
|
|
12
10
|
#include <faiss/IndexIVFPQ.h>
|
|
@@ -32,108 +30,131 @@ DEFINE_bool(per_batch_time, false, "print per-batch times");
|
|
|
32
30
|
DEFINE_bool(reserve_memory, false, "whether or not to pre-reserve memory");
|
|
33
31
|
|
|
34
32
|
int main(int argc, char** argv) {
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
cudaProfilerStop();
|
|
38
|
-
|
|
39
|
-
int dim = FLAGS_dim;
|
|
40
|
-
int numCentroids = FLAGS_centroids;
|
|
41
|
-
int bytesPerVec = FLAGS_bytes_per_vec;
|
|
42
|
-
int bitsPerCode = FLAGS_bits_per_code;
|
|
33
|
+
gflags::ParseCommandLineFlags(&argc, &argv, true);
|
|
43
34
|
|
|
44
|
-
|
|
35
|
+
cudaProfilerStop();
|
|
45
36
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
37
|
+
int dim = FLAGS_dim;
|
|
38
|
+
int numCentroids = FLAGS_centroids;
|
|
39
|
+
int bytesPerVec = FLAGS_bytes_per_vec;
|
|
40
|
+
int bitsPerCode = FLAGS_bits_per_code;
|
|
49
41
|
|
|
50
|
-
|
|
51
|
-
faiss::IndexIVFPQ cpuIndex(&coarseQuantizer, dim, numCentroids,
|
|
52
|
-
bytesPerVec, bitsPerCode);
|
|
53
|
-
if (FLAGS_time_cpu) {
|
|
54
|
-
cpuIndex.train(numTrain, trainVecs.data());
|
|
55
|
-
}
|
|
42
|
+
faiss::gpu::StandardGpuResources res;
|
|
56
43
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
44
|
+
// IndexIVFPQ will complain, but just give us enough to get through this
|
|
45
|
+
int numTrain = 4 * numCentroids;
|
|
46
|
+
std::vector<float> trainVecs = faiss::gpu::randVecs(numTrain, dim);
|
|
60
47
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
gpuIndex.train(numTrain, trainVecs.data());
|
|
67
|
-
if (FLAGS_reserve_memory) {
|
|
68
|
-
size_t numVecs = (size_t) FLAGS_batches * (size_t) FLAGS_batch_size;
|
|
69
|
-
gpuIndex.reserveMemory(numVecs);
|
|
48
|
+
faiss::IndexFlatL2 coarseQuantizer(dim);
|
|
49
|
+
faiss::IndexIVFPQ cpuIndex(
|
|
50
|
+
&coarseQuantizer, dim, numCentroids, bytesPerVec, bitsPerCode);
|
|
51
|
+
if (FLAGS_time_cpu) {
|
|
52
|
+
cpuIndex.train(numTrain, trainVecs.data());
|
|
70
53
|
}
|
|
71
|
-
}
|
|
72
54
|
|
|
73
|
-
|
|
74
|
-
|
|
55
|
+
faiss::gpu::GpuIndexIVFPQConfig config;
|
|
56
|
+
config.device = 0;
|
|
57
|
+
config.indicesOptions = (faiss::gpu::IndicesOptions)FLAGS_index;
|
|
75
58
|
|
|
76
|
-
|
|
77
|
-
|
|
59
|
+
faiss::gpu::GpuIndexIVFPQ gpuIndex(
|
|
60
|
+
&res,
|
|
61
|
+
dim,
|
|
62
|
+
numCentroids,
|
|
63
|
+
bytesPerVec,
|
|
64
|
+
bitsPerCode,
|
|
65
|
+
faiss::METRIC_L2,
|
|
66
|
+
config);
|
|
78
67
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
68
|
+
if (FLAGS_time_gpu) {
|
|
69
|
+
gpuIndex.train(numTrain, trainVecs.data());
|
|
70
|
+
if (FLAGS_reserve_memory) {
|
|
71
|
+
size_t numVecs = (size_t)FLAGS_batches * (size_t)FLAGS_batch_size;
|
|
72
|
+
gpuIndex.reserveMemory(numVecs);
|
|
73
|
+
}
|
|
84
74
|
}
|
|
85
75
|
|
|
86
|
-
|
|
76
|
+
cudaDeviceSynchronize();
|
|
77
|
+
CUDA_VERIFY(cudaProfilerStart());
|
|
78
|
+
|
|
79
|
+
float totalGpuTime = 0.0f;
|
|
80
|
+
float totalCpuTime = 0.0f;
|
|
81
|
+
|
|
82
|
+
for (int i = 0; i < FLAGS_batches; ++i) {
|
|
83
|
+
if (!FLAGS_per_batch_time) {
|
|
84
|
+
if (i % 10 == 0) {
|
|
85
|
+
printf("Adding batch %d\n", i + 1);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
auto addVecs = faiss::gpu::randVecs(FLAGS_batch_size, dim);
|
|
90
|
+
|
|
91
|
+
if (FLAGS_time_gpu) {
|
|
92
|
+
faiss::gpu::CpuTimer timer;
|
|
93
|
+
gpuIndex.add(FLAGS_batch_size, addVecs.data());
|
|
94
|
+
CUDA_VERIFY(cudaDeviceSynchronize());
|
|
95
|
+
auto time = timer.elapsedMilliseconds();
|
|
96
|
+
|
|
97
|
+
totalGpuTime += time;
|
|
98
|
+
|
|
99
|
+
if (FLAGS_per_batch_time) {
|
|
100
|
+
printf("Batch %d | GPU time to add %d vecs: %.3f ms (%.5f ms per)\n",
|
|
101
|
+
i + 1,
|
|
102
|
+
FLAGS_batch_size,
|
|
103
|
+
time,
|
|
104
|
+
time / (float)FLAGS_batch_size);
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
if (FLAGS_time_cpu) {
|
|
109
|
+
faiss::gpu::CpuTimer timer;
|
|
110
|
+
cpuIndex.add(FLAGS_batch_size, addVecs.data());
|
|
111
|
+
auto time = timer.elapsedMilliseconds();
|
|
112
|
+
|
|
113
|
+
totalCpuTime += time;
|
|
114
|
+
|
|
115
|
+
if (FLAGS_per_batch_time) {
|
|
116
|
+
printf("Batch %d | CPU time to add %d vecs: %.3f ms (%.5f ms per)\n",
|
|
117
|
+
i + 1,
|
|
118
|
+
FLAGS_batch_size,
|
|
119
|
+
time,
|
|
120
|
+
time / (float)FLAGS_batch_size);
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
}
|
|
87
124
|
|
|
88
|
-
|
|
89
|
-
faiss::gpu::CpuTimer timer;
|
|
90
|
-
gpuIndex.add(FLAGS_batch_size, addVecs.data());
|
|
91
|
-
CUDA_VERIFY(cudaDeviceSynchronize());
|
|
92
|
-
auto time = timer.elapsedMilliseconds();
|
|
125
|
+
CUDA_VERIFY(cudaProfilerStop());
|
|
93
126
|
|
|
94
|
-
|
|
127
|
+
int total = FLAGS_batch_size * FLAGS_batches;
|
|
95
128
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
129
|
+
if (FLAGS_time_gpu) {
|
|
130
|
+
printf("%d dim, %d centroids, %d x %d encoding\n"
|
|
131
|
+
"GPU time to add %d vectors (%d batches, %d per batch): "
|
|
132
|
+
"%.3f ms (%.3f us per)\n",
|
|
133
|
+
dim,
|
|
134
|
+
numCentroids,
|
|
135
|
+
bytesPerVec,
|
|
136
|
+
bitsPerCode,
|
|
137
|
+
total,
|
|
138
|
+
FLAGS_batches,
|
|
139
|
+
FLAGS_batch_size,
|
|
140
|
+
totalGpuTime,
|
|
141
|
+
totalGpuTime * 1000.0f / (float)total);
|
|
100
142
|
}
|
|
101
143
|
|
|
102
144
|
if (FLAGS_time_cpu) {
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
145
|
+
printf("%d dim, %d centroids, %d x %d encoding\n"
|
|
146
|
+
"CPU time to add %d vectors (%d batches, %d per batch): "
|
|
147
|
+
"%.3f ms (%.3f us per)\n",
|
|
148
|
+
dim,
|
|
149
|
+
numCentroids,
|
|
150
|
+
bytesPerVec,
|
|
151
|
+
bitsPerCode,
|
|
152
|
+
total,
|
|
153
|
+
FLAGS_batches,
|
|
154
|
+
FLAGS_batch_size,
|
|
155
|
+
totalCpuTime,
|
|
156
|
+
totalCpuTime * 1000.0f / (float)total);
|
|
113
157
|
}
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
CUDA_VERIFY(cudaProfilerStop());
|
|
117
|
-
|
|
118
|
-
int total = FLAGS_batch_size * FLAGS_batches;
|
|
119
|
-
|
|
120
|
-
if (FLAGS_time_gpu) {
|
|
121
|
-
printf("%d dim, %d centroids, %d x %d encoding\n"
|
|
122
|
-
"GPU time to add %d vectors (%d batches, %d per batch): "
|
|
123
|
-
"%.3f ms (%.3f us per)\n",
|
|
124
|
-
dim, numCentroids, bytesPerVec, bitsPerCode,
|
|
125
|
-
total, FLAGS_batches, FLAGS_batch_size,
|
|
126
|
-
totalGpuTime, totalGpuTime * 1000.0f / (float) total);
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
if (FLAGS_time_cpu) {
|
|
130
|
-
printf("%d dim, %d centroids, %d x %d encoding\n"
|
|
131
|
-
"CPU time to add %d vectors (%d batches, %d per batch): "
|
|
132
|
-
"%.3f ms (%.3f us per)\n",
|
|
133
|
-
dim, numCentroids, bytesPerVec, bitsPerCode,
|
|
134
|
-
total, FLAGS_batches, FLAGS_batch_size,
|
|
135
|
-
totalCpuTime, totalCpuTime * 1000.0f / (float) total);
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
return 0;
|
|
158
|
+
|
|
159
|
+
return 0;
|
|
139
160
|
}
|
|
@@ -5,14 +5,13 @@
|
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
#include <faiss/IndexFlat.h>
|
|
9
9
|
#include <faiss/IndexIVFFlat.h>
|
|
10
10
|
#include <faiss/IndexIVFPQ.h>
|
|
11
|
-
#include <faiss/IndexFlat.h>
|
|
12
|
-
#include <faiss/index_io.h>
|
|
13
11
|
#include <faiss/gpu/test/TestUtils.h>
|
|
14
|
-
#include <
|
|
12
|
+
#include <faiss/index_io.h>
|
|
15
13
|
#include <gflags/gflags.h>
|
|
14
|
+
#include <vector>
|
|
16
15
|
|
|
17
16
|
// For IVFPQ:
|
|
18
17
|
DEFINE_bool(ivfpq, false, "use IVFPQ encoding");
|
|
@@ -32,71 +31,83 @@ DEFINE_int32(num_train, -1, "number of database vecs to train on");
|
|
|
32
31
|
|
|
33
32
|
template <typename T>
|
|
34
33
|
void fillAndSave(T& index, int numTrain, int num, int dim) {
|
|
35
|
-
|
|
36
|
-
|
|
34
|
+
auto trainVecs = faiss::gpu::randVecs(numTrain, dim);
|
|
35
|
+
index.train(numTrain, trainVecs.data());
|
|
37
36
|
|
|
38
|
-
|
|
37
|
+
constexpr int kAddChunk = 1000000;
|
|
39
38
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
39
|
+
for (int i = 0; i < num; i += kAddChunk) {
|
|
40
|
+
int numRemaining = (num - i) < kAddChunk ? (num - i) : kAddChunk;
|
|
41
|
+
auto vecs = faiss::gpu::randVecs(numRemaining, dim);
|
|
43
42
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
43
|
+
printf("adding at %d: %d\n", i, numRemaining);
|
|
44
|
+
index.add(numRemaining, vecs.data());
|
|
45
|
+
}
|
|
47
46
|
|
|
48
|
-
|
|
47
|
+
faiss::write_index(&index, FLAGS_out.c_str());
|
|
49
48
|
}
|
|
50
49
|
|
|
51
50
|
int main(int argc, char** argv) {
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
51
|
+
gflags::ParseCommandLineFlags(&argc, &argv, true);
|
|
52
|
+
|
|
53
|
+
// Either ivfpq or ivfflat must be set
|
|
54
|
+
if ((FLAGS_ivfpq && FLAGS_ivfflat) || (!FLAGS_ivfpq && !FLAGS_ivfflat)) {
|
|
55
|
+
printf("must specify either ivfpq or ivfflat\n");
|
|
56
|
+
return 1;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
auto dim = FLAGS_dim;
|
|
60
|
+
auto numCentroids = FLAGS_num_coarse;
|
|
61
|
+
auto num = FLAGS_num;
|
|
62
|
+
auto numTrain = FLAGS_num_train;
|
|
63
|
+
numTrain = numTrain == -1 ? std::max((num / 4), 1) : numTrain;
|
|
64
|
+
numTrain = std::min(num, numTrain);
|
|
65
|
+
|
|
66
|
+
if (FLAGS_ivfpq) {
|
|
67
|
+
faiss::IndexFlatL2 quantizer(dim);
|
|
68
|
+
faiss::IndexIVFPQ index(
|
|
69
|
+
&quantizer,
|
|
70
|
+
dim,
|
|
71
|
+
numCentroids,
|
|
72
|
+
FLAGS_codes,
|
|
73
|
+
FLAGS_bits_per_code);
|
|
74
|
+
index.verbose = true;
|
|
75
|
+
|
|
76
|
+
printf("IVFPQ: codes %d bits per code %d\n",
|
|
77
|
+
FLAGS_codes,
|
|
78
|
+
FLAGS_bits_per_code);
|
|
79
|
+
printf("Lists: %d\n", numCentroids);
|
|
80
|
+
printf("Database: dim %d num vecs %d trained on %d\n",
|
|
81
|
+
dim,
|
|
82
|
+
num,
|
|
83
|
+
numTrain);
|
|
84
|
+
printf("output file: %s\n", FLAGS_out.c_str());
|
|
85
|
+
|
|
86
|
+
fillAndSave(index, numTrain, num, dim);
|
|
87
|
+
} else if (FLAGS_ivfflat) {
|
|
88
|
+
faiss::IndexFlatL2 quantizerL2(dim);
|
|
89
|
+
faiss::IndexFlatIP quantizerIP(dim);
|
|
90
|
+
|
|
91
|
+
faiss::IndexFlat* quantizer = FLAGS_l2
|
|
92
|
+
? (faiss::IndexFlat*)&quantizerL2
|
|
93
|
+
: (faiss::IndexFlat*)&quantizerIP;
|
|
94
|
+
|
|
95
|
+
faiss::IndexIVFFlat index(
|
|
96
|
+
quantizer,
|
|
97
|
+
dim,
|
|
98
|
+
numCentroids,
|
|
99
|
+
FLAGS_l2 ? faiss::METRIC_L2 : faiss::METRIC_INNER_PRODUCT);
|
|
100
|
+
|
|
101
|
+
printf("IVFFlat: metric %s\n", FLAGS_l2 ? "L2" : "IP");
|
|
102
|
+
printf("Lists: %d\n", numCentroids);
|
|
103
|
+
printf("Database: dim %d num vecs %d trained on %d\n",
|
|
104
|
+
dim,
|
|
105
|
+
num,
|
|
106
|
+
numTrain);
|
|
107
|
+
printf("output file: %s\n", FLAGS_out.c_str());
|
|
108
|
+
|
|
109
|
+
fillAndSave(index, numTrain, num, dim);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
return 0;
|
|
102
113
|
}
|
|
@@ -5,270 +5,285 @@
|
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
|
|
9
8
|
#include <faiss/gpu/impl/InterleavedCodes.h>
|
|
10
|
-
#include <faiss/gpu/utils/StaticUtils.h>
|
|
11
9
|
#include <faiss/gpu/test/TestUtils.h>
|
|
12
|
-
#include <
|
|
10
|
+
#include <faiss/gpu/utils/StaticUtils.h>
|
|
13
11
|
#include <gtest/gtest.h>
|
|
12
|
+
#include <cmath>
|
|
14
13
|
#include <random>
|
|
15
14
|
#include <sstream>
|
|
16
15
|
#include <vector>
|
|
17
16
|
|
|
18
17
|
TEST(TestCodePacking, NonInterleavedCodes_UnpackPack) {
|
|
19
|
-
|
|
18
|
+
using namespace faiss::gpu;
|
|
20
19
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
20
|
+
// We are fine using non-fixed seeds here, the results should be fully
|
|
21
|
+
// deterministic
|
|
22
|
+
auto seed = std::random_device()();
|
|
23
|
+
std::mt19937 gen(seed);
|
|
24
|
+
std::uniform_int_distribution<uint8_t> dist;
|
|
26
25
|
|
|
27
|
-
|
|
26
|
+
std::cout << "seed " << seed << "\n";
|
|
28
27
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
28
|
+
for (auto bitsPerCode : {4, 5, 6, 8, 16, 32}) {
|
|
29
|
+
for (auto dims : {1, 7, 8, 31, 32}) {
|
|
30
|
+
for (auto numVecs : {1, 3, 4, 5, 6, 8, 31, 32, 33, 65}) {
|
|
31
|
+
std::cout << bitsPerCode << " " << dims << " " << numVecs
|
|
32
|
+
<< "\n";
|
|
33
33
|
|
|
34
|
-
|
|
35
|
-
|
|
34
|
+
int srcVecSize = utils::divUp(dims * bitsPerCode, 8);
|
|
35
|
+
std::vector<uint8_t> data(numVecs * srcVecSize);
|
|
36
36
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
37
|
+
for (auto& v : data) {
|
|
38
|
+
v = dist(gen);
|
|
39
|
+
}
|
|
40
40
|
|
|
41
|
-
|
|
42
|
-
|
|
41
|
+
// currently unimplemented
|
|
42
|
+
EXPECT_FALSE(bitsPerCode > 8 && bitsPerCode % 8 != 0);
|
|
43
43
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
44
|
+
// Due to bit packing, mask out bits that should be zero based
|
|
45
|
+
// on dimensions we shouldn't have present
|
|
46
|
+
int vectorSizeBits = dims * bitsPerCode;
|
|
47
|
+
int vectorSizeBytes = utils::divUp(vectorSizeBits, 8);
|
|
48
|
+
int remainder = vectorSizeBits % 8;
|
|
49
49
|
|
|
50
|
-
|
|
51
|
-
|
|
50
|
+
if (remainder > 0) {
|
|
51
|
+
uint8_t mask = 0xff >> (8 - remainder);
|
|
52
52
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
53
|
+
for (int i = 0; i < numVecs; ++i) {
|
|
54
|
+
int lastVecByte = (i + 1) * vectorSizeBytes - 1;
|
|
55
|
+
data[lastVecByte] &= mask;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
58
|
|
|
59
|
-
|
|
60
|
-
|
|
59
|
+
auto up =
|
|
60
|
+
unpackNonInterleaved(data, numVecs, dims, bitsPerCode);
|
|
61
|
+
auto p = packNonInterleaved(up, numVecs, dims, bitsPerCode);
|
|
61
62
|
|
|
62
|
-
|
|
63
|
-
|
|
63
|
+
EXPECT_EQ(data, p);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
64
66
|
}
|
|
65
|
-
}
|
|
66
67
|
}
|
|
67
68
|
|
|
68
69
|
TEST(TestCodePacking, NonInterleavedCodes_PackUnpack) {
|
|
69
|
-
|
|
70
|
+
using namespace faiss::gpu;
|
|
70
71
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
72
|
+
// We are fine using non-fixed seeds here, the results should be fully
|
|
73
|
+
// deterministic
|
|
74
|
+
std::random_device rd;
|
|
75
|
+
std::mt19937 gen(rd());
|
|
76
|
+
std::uniform_int_distribution<uint8_t> dist;
|
|
76
77
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
78
|
+
for (auto bitsPerCode : {4, 5, 6, 8, 16, 32}) {
|
|
79
|
+
for (auto dims : {1, 7, 8, 31, 32}) {
|
|
80
|
+
for (auto numVecs : {1, 3, 4, 5, 6, 8, 31, 32, 33, 65}) {
|
|
81
|
+
std::cout << bitsPerCode << " " << dims << " " << numVecs
|
|
82
|
+
<< "\n";
|
|
81
83
|
|
|
82
|
-
|
|
84
|
+
std::vector<uint8_t> data(
|
|
85
|
+
numVecs * dims * utils::divUp(bitsPerCode, 8));
|
|
83
86
|
|
|
84
|
-
|
|
85
|
-
|
|
87
|
+
// currently unimplemented
|
|
88
|
+
EXPECT_FALSE(bitsPerCode > 8 && bitsPerCode % 8 != 0);
|
|
86
89
|
|
|
87
|
-
|
|
88
|
-
|
|
90
|
+
// Mask out high bits we shouldn't have based on code size
|
|
91
|
+
uint8_t mask =
|
|
92
|
+
bitsPerCode < 8 ? (0xff >> (8 - bitsPerCode)) : 0xff;
|
|
89
93
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
94
|
+
for (auto& v : data) {
|
|
95
|
+
v = dist(gen) & mask;
|
|
96
|
+
}
|
|
93
97
|
|
|
94
|
-
|
|
95
|
-
|
|
98
|
+
auto p = packNonInterleaved(data, numVecs, dims, bitsPerCode);
|
|
99
|
+
auto up = unpackNonInterleaved(p, numVecs, dims, bitsPerCode);
|
|
96
100
|
|
|
97
|
-
|
|
98
|
-
|
|
101
|
+
EXPECT_EQ(data, up);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
99
104
|
}
|
|
100
|
-
}
|
|
101
105
|
}
|
|
102
106
|
|
|
103
107
|
TEST(TestCodePacking, InterleavedCodes_UnpackPack) {
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
108
|
+
using namespace faiss::gpu;
|
|
109
|
+
|
|
110
|
+
// We are fine using non-fixed seeds here, the results should be fully
|
|
111
|
+
// deterministic
|
|
112
|
+
std::random_device rd;
|
|
113
|
+
std::mt19937 gen(rd());
|
|
114
|
+
std::uniform_int_distribution<uint8_t> dist;
|
|
115
|
+
|
|
116
|
+
for (auto bitsPerCode : {4, 5, 6, 8, 16, 32}) {
|
|
117
|
+
for (auto dims : {1, 7, 8, 31, 32}) {
|
|
118
|
+
for (auto numVecs : {1, 3, 4, 5, 6, 8, 31, 32, 33, 65}) {
|
|
119
|
+
std::cout << bitsPerCode << " " << dims << " " << numVecs
|
|
120
|
+
<< "\n";
|
|
121
|
+
|
|
122
|
+
int blocks = utils::divUp(numVecs, 32);
|
|
123
|
+
int bytesPerDimBlock = 32 * bitsPerCode / 8;
|
|
124
|
+
int bytesPerBlock = bytesPerDimBlock * dims;
|
|
125
|
+
int size = blocks * bytesPerBlock;
|
|
126
|
+
|
|
127
|
+
std::vector<uint8_t> data(size);
|
|
128
|
+
|
|
129
|
+
if (bitsPerCode == 8 || bitsPerCode == 16 ||
|
|
130
|
+
bitsPerCode == 32) {
|
|
131
|
+
int bytesPerCode = bitsPerCode / 8;
|
|
132
|
+
|
|
133
|
+
for (int i = 0; i < blocks; ++i) {
|
|
134
|
+
for (int j = 0; j < dims; ++j) {
|
|
135
|
+
for (int k = 0; k < 32; ++k) {
|
|
136
|
+
for (int l = 0; l < bytesPerCode; ++l) {
|
|
137
|
+
int vec = i * 32 + k;
|
|
138
|
+
if (vec < numVecs) {
|
|
139
|
+
data[i * bytesPerBlock +
|
|
140
|
+
j * bytesPerDimBlock +
|
|
141
|
+
k * bytesPerCode + l] = dist(gen);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
} else if (bitsPerCode < 8) {
|
|
148
|
+
for (int i = 0; i < blocks; ++i) {
|
|
149
|
+
for (int j = 0; j < dims; ++j) {
|
|
150
|
+
for (int k = 0; k < bytesPerDimBlock; ++k) {
|
|
151
|
+
int loVec = i * 32 + (k * 8) / bitsPerCode;
|
|
152
|
+
int hiVec = loVec + 1;
|
|
153
|
+
int hiVec2 = hiVec + 1;
|
|
154
|
+
|
|
155
|
+
uint8_t lo = loVec < numVecs ? dist(gen) &
|
|
156
|
+
(0xff >> (8 - bitsPerCode))
|
|
157
|
+
: 0;
|
|
158
|
+
uint8_t hi = hiVec < numVecs ? dist(gen) &
|
|
159
|
+
(0xff >> (8 - bitsPerCode))
|
|
160
|
+
: 0;
|
|
161
|
+
uint8_t hi2 = hiVec2 < numVecs ? dist(gen) &
|
|
162
|
+
(0xff >> (8 - bitsPerCode))
|
|
163
|
+
: 0;
|
|
164
|
+
|
|
165
|
+
uint8_t v = 0;
|
|
166
|
+
if (bitsPerCode == 4) {
|
|
167
|
+
v = lo | (hi << 4);
|
|
168
|
+
} else if (bitsPerCode == 5) {
|
|
169
|
+
switch (k % 5) {
|
|
170
|
+
case 0:
|
|
171
|
+
// 5 msbs of lower as vOut lsbs
|
|
172
|
+
// 3 lsbs of upper as vOut msbs
|
|
173
|
+
v = (lo & 0x1f) | (hi << 5);
|
|
174
|
+
break;
|
|
175
|
+
case 1:
|
|
176
|
+
// 2 msbs of lower as vOut lsbs
|
|
177
|
+
// 5 lsbs of upper as vOut msbs
|
|
178
|
+
// 1 lsbs of upper2 as vOut msb
|
|
179
|
+
v = (lo >> 3) | (hi << 2) |
|
|
180
|
+
(hi2 << 7);
|
|
181
|
+
break;
|
|
182
|
+
case 2:
|
|
183
|
+
// 4 msbs of lower as vOut lsbs
|
|
184
|
+
// 4 lsbs of upper as vOut msbs
|
|
185
|
+
v = (lo >> 1) | (hi << 4);
|
|
186
|
+
break;
|
|
187
|
+
case 3:
|
|
188
|
+
// 1 msbs of lower as vOut lsbs
|
|
189
|
+
// 5 lsbs of upper as vOut msbs
|
|
190
|
+
// 2 lsbs of upper2 as vOut msb
|
|
191
|
+
v = (lo >> 4) | (hi << 1) |
|
|
192
|
+
(hi2 << 6);
|
|
193
|
+
break;
|
|
194
|
+
case 4:
|
|
195
|
+
// 3 msbs of lower as vOut lsbs
|
|
196
|
+
// 5 lsbs of upper as vOut msbs
|
|
197
|
+
v = (lo >> 2) | (hi << 3);
|
|
198
|
+
break;
|
|
199
|
+
}
|
|
200
|
+
} else if (bitsPerCode == 6) {
|
|
201
|
+
switch (k % 3) {
|
|
202
|
+
case 0:
|
|
203
|
+
// 6 msbs of lower as vOut lsbs
|
|
204
|
+
// 2 lsbs of upper as vOut msbs
|
|
205
|
+
v = (lo & 0x3f) | (hi << 6);
|
|
206
|
+
break;
|
|
207
|
+
case 1:
|
|
208
|
+
// 4 msbs of lower as vOut lsbs
|
|
209
|
+
// 4 lsbs of upper as vOut msbs
|
|
210
|
+
v = (lo >> 2) | (hi << 4);
|
|
211
|
+
break;
|
|
212
|
+
case 2:
|
|
213
|
+
// 2 msbs of lower as vOut lsbs
|
|
214
|
+
// 6 lsbs of upper as vOut msbs
|
|
215
|
+
v = (lo >> 4) | (hi << 2);
|
|
216
|
+
break;
|
|
217
|
+
}
|
|
218
|
+
} else {
|
|
219
|
+
// unimplemented
|
|
220
|
+
EXPECT_TRUE(false);
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
data[i * bytesPerBlock + j * bytesPerDimBlock +
|
|
224
|
+
k] = v;
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
}
|
|
207
228
|
} else {
|
|
208
|
-
|
|
209
|
-
|
|
229
|
+
// unimplemented
|
|
230
|
+
EXPECT_TRUE(false);
|
|
210
231
|
}
|
|
211
232
|
|
|
212
|
-
|
|
213
|
-
|
|
233
|
+
auto up = unpackInterleaved(data, numVecs, dims, bitsPerCode);
|
|
234
|
+
auto p = packInterleaved(up, numVecs, dims, bitsPerCode);
|
|
235
|
+
|
|
236
|
+
EXPECT_EQ(data, p);
|
|
214
237
|
}
|
|
215
|
-
}
|
|
216
|
-
} else {
|
|
217
|
-
// unimplemented
|
|
218
|
-
EXPECT_TRUE(false);
|
|
219
238
|
}
|
|
220
|
-
|
|
221
|
-
auto up = unpackInterleaved(data, numVecs, dims, bitsPerCode);
|
|
222
|
-
auto p = packInterleaved(up, numVecs, dims, bitsPerCode);
|
|
223
|
-
|
|
224
|
-
EXPECT_EQ(data, p);
|
|
225
|
-
}
|
|
226
239
|
}
|
|
227
|
-
}
|
|
228
240
|
}
|
|
229
241
|
|
|
230
242
|
TEST(TestCodePacking, InterleavedCodes_PackUnpack) {
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
243
|
+
using namespace faiss::gpu;
|
|
244
|
+
|
|
245
|
+
// We are fine using non-fixed seeds here, the results should be fully
|
|
246
|
+
// deterministic
|
|
247
|
+
std::random_device rd;
|
|
248
|
+
std::mt19937 gen(rd());
|
|
249
|
+
std::uniform_int_distribution<uint8_t> dist;
|
|
250
|
+
|
|
251
|
+
for (auto bitsPerCode : {4, 5, 6, 8, 16, 32}) {
|
|
252
|
+
for (auto dims : {1, 7, 8, 31, 32}) {
|
|
253
|
+
for (auto numVecs : {1, 3, 4, 5, 6, 8, 31, 32, 33, 65}) {
|
|
254
|
+
std::cout << bitsPerCode << " " << dims << " " << numVecs
|
|
255
|
+
<< "\n";
|
|
256
|
+
|
|
257
|
+
std::vector<uint8_t> data(
|
|
258
|
+
numVecs * dims * utils::divUp(bitsPerCode, 8));
|
|
259
|
+
|
|
260
|
+
if (bitsPerCode == 8 || bitsPerCode == 16 ||
|
|
261
|
+
bitsPerCode == 32) {
|
|
262
|
+
for (auto& v : data) {
|
|
263
|
+
v = dist(gen);
|
|
264
|
+
}
|
|
265
|
+
} else if (bitsPerCode < 8) {
|
|
266
|
+
uint8_t mask = 0xff >> (8 - bitsPerCode);
|
|
267
|
+
|
|
268
|
+
for (auto& v : data) {
|
|
269
|
+
v = dist(gen) & mask;
|
|
270
|
+
}
|
|
271
|
+
} else {
|
|
272
|
+
// unimplemented
|
|
273
|
+
EXPECT_TRUE(false);
|
|
274
|
+
}
|
|
260
275
|
|
|
261
|
-
|
|
262
|
-
|
|
276
|
+
auto p = packInterleaved(data, numVecs, dims, bitsPerCode);
|
|
277
|
+
auto up = unpackInterleaved(p, numVecs, dims, bitsPerCode);
|
|
263
278
|
|
|
264
|
-
|
|
265
|
-
|
|
279
|
+
EXPECT_EQ(data, up);
|
|
280
|
+
}
|
|
281
|
+
}
|
|
266
282
|
}
|
|
267
|
-
}
|
|
268
283
|
}
|
|
269
284
|
|
|
270
285
|
int main(int argc, char** argv) {
|
|
271
|
-
|
|
286
|
+
testing::InitGoogleTest(&argc, argv);
|
|
272
287
|
|
|
273
|
-
|
|
288
|
+
return RUN_ALL_TESTS();
|
|
274
289
|
}
|