faiss 0.2.5 → 0.2.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/LICENSE.txt +1 -1
- data/ext/faiss/extconf.rb +1 -1
- data/ext/faiss/index.cpp +13 -0
- data/lib/faiss/version.rb +1 -1
- data/lib/faiss.rb +2 -2
- data/vendor/faiss/faiss/AutoTune.cpp +15 -4
- data/vendor/faiss/faiss/AutoTune.h +0 -1
- data/vendor/faiss/faiss/Clustering.cpp +1 -5
- data/vendor/faiss/faiss/Clustering.h +0 -2
- data/vendor/faiss/faiss/IVFlib.h +0 -2
- data/vendor/faiss/faiss/Index.h +1 -2
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +17 -3
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +10 -1
- data/vendor/faiss/faiss/IndexBinary.h +0 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +2 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -0
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +1 -3
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +273 -48
- data/vendor/faiss/faiss/IndexBinaryIVF.h +18 -11
- data/vendor/faiss/faiss/IndexFastScan.cpp +13 -10
- data/vendor/faiss/faiss/IndexFastScan.h +5 -1
- data/vendor/faiss/faiss/IndexFlat.cpp +16 -3
- data/vendor/faiss/faiss/IndexFlat.h +1 -1
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +5 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +7 -2
- data/vendor/faiss/faiss/IndexHNSW.cpp +3 -6
- data/vendor/faiss/faiss/IndexHNSW.h +0 -1
- data/vendor/faiss/faiss/IndexIDMap.cpp +4 -4
- data/vendor/faiss/faiss/IndexIDMap.h +0 -2
- data/vendor/faiss/faiss/IndexIVF.cpp +155 -129
- data/vendor/faiss/faiss/IndexIVF.h +121 -61
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +2 -2
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +12 -11
- data/vendor/faiss/faiss/IndexIVFFastScan.h +6 -1
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +221 -165
- data/vendor/faiss/faiss/IndexIVFPQ.h +1 -0
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +6 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +0 -2
- data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -2
- data/vendor/faiss/faiss/IndexNNDescent.h +0 -1
- data/vendor/faiss/faiss/IndexNSG.cpp +1 -2
- data/vendor/faiss/faiss/IndexPQ.cpp +7 -9
- data/vendor/faiss/faiss/IndexRefine.cpp +1 -1
- data/vendor/faiss/faiss/IndexReplicas.cpp +3 -4
- data/vendor/faiss/faiss/IndexReplicas.h +0 -1
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +8 -1
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +7 -0
- data/vendor/faiss/faiss/IndexShards.cpp +26 -109
- data/vendor/faiss/faiss/IndexShards.h +2 -3
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +246 -0
- data/vendor/faiss/faiss/IndexShardsIVF.h +42 -0
- data/vendor/faiss/faiss/MetaIndexes.cpp +86 -0
- data/vendor/faiss/faiss/MetaIndexes.h +29 -0
- data/vendor/faiss/faiss/MetricType.h +14 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +8 -10
- data/vendor/faiss/faiss/VectorTransform.h +1 -3
- data/vendor/faiss/faiss/clone_index.cpp +232 -18
- data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +25 -3
- data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +7 -0
- data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +78 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +20 -6
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +7 -1
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +21 -7
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +7 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +7 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +10 -3
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +7 -1
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +11 -3
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +25 -2
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +76 -29
- data/vendor/faiss/faiss/gpu/GpuCloner.h +2 -2
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +14 -13
- data/vendor/faiss/faiss/gpu/GpuDistance.h +18 -6
- data/vendor/faiss/faiss/gpu/GpuIndex.h +23 -21
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +10 -10
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -12
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +29 -50
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +3 -3
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +8 -8
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +4 -4
- data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +2 -5
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +9 -7
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +4 -4
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -2
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +1 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +55 -6
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +20 -6
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +95 -25
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +67 -16
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +4 -4
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +7 -7
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +4 -4
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +0 -7
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +9 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +2 -7
- data/vendor/faiss/faiss/impl/CodePacker.cpp +67 -0
- data/vendor/faiss/faiss/impl/CodePacker.h +71 -0
- data/vendor/faiss/faiss/impl/DistanceComputer.h +0 -2
- data/vendor/faiss/faiss/impl/HNSW.cpp +3 -7
- data/vendor/faiss/faiss/impl/HNSW.h +6 -9
- data/vendor/faiss/faiss/impl/IDSelector.cpp +1 -1
- data/vendor/faiss/faiss/impl/IDSelector.h +39 -1
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +62 -51
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +11 -12
- data/vendor/faiss/faiss/impl/NNDescent.cpp +3 -9
- data/vendor/faiss/faiss/impl/NNDescent.h +10 -10
- data/vendor/faiss/faiss/impl/NSG.cpp +1 -6
- data/vendor/faiss/faiss/impl/NSG.h +4 -7
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +1 -15
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +11 -10
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +0 -7
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -12
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -4
- data/vendor/faiss/faiss/impl/Quantizer.h +6 -3
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +796 -174
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +16 -8
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +3 -5
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +4 -4
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +3 -3
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +4 -4
- data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +291 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +74 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance.h +123 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +102 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +13 -10
- data/vendor/faiss/faiss/impl/index_write.cpp +3 -4
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +0 -1
- data/vendor/faiss/faiss/impl/kmeans1d.h +3 -3
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
- data/vendor/faiss/faiss/impl/platform_macros.h +61 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +48 -4
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +18 -4
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +2 -2
- data/vendor/faiss/faiss/index_factory.cpp +8 -10
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +29 -12
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +8 -2
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
- data/vendor/faiss/faiss/invlists/DirectMap.h +2 -4
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +118 -18
- data/vendor/faiss/faiss/invlists/InvertedLists.h +44 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
- data/vendor/faiss/faiss/python/python_callbacks.h +1 -1
- data/vendor/faiss/faiss/utils/AlignedTable.h +3 -1
- data/vendor/faiss/faiss/utils/Heap.cpp +139 -3
- data/vendor/faiss/faiss/utils/Heap.h +35 -1
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +84 -0
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +196 -0
- data/vendor/faiss/faiss/utils/approx_topk/generic.h +138 -0
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +34 -0
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +367 -0
- data/vendor/faiss/faiss/utils/distances.cpp +61 -7
- data/vendor/faiss/faiss/utils/distances.h +11 -0
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +346 -0
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +36 -0
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +42 -0
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +40 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +352 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +32 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +515 -327
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +17 -1
- data/vendor/faiss/faiss/utils/extra_distances.cpp +37 -8
- data/vendor/faiss/faiss/utils/extra_distances.h +2 -1
- data/vendor/faiss/faiss/utils/fp16-fp16c.h +7 -0
- data/vendor/faiss/faiss/utils/fp16-inl.h +7 -0
- data/vendor/faiss/faiss/utils/fp16.h +7 -0
- data/vendor/faiss/faiss/utils/hamming-inl.h +0 -456
- data/vendor/faiss/faiss/utils/hamming.cpp +104 -120
- data/vendor/faiss/faiss/utils/hamming.h +21 -10
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +535 -0
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +48 -0
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +519 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +26 -0
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +614 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +21 -25
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +344 -3
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +390 -0
- data/vendor/faiss/faiss/utils/simdlib_neon.h +655 -130
- data/vendor/faiss/faiss/utils/sorting.cpp +692 -0
- data/vendor/faiss/faiss/utils/sorting.h +71 -0
- data/vendor/faiss/faiss/utils/transpose/transpose-avx2-inl.h +165 -0
- data/vendor/faiss/faiss/utils/utils.cpp +4 -176
- data/vendor/faiss/faiss/utils/utils.h +2 -9
- metadata +30 -4
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +0 -26
@@ -1,4 +1,10 @@
|
|
1
|
-
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
2
8
|
#ifndef LEVEL2_NEON_INL_H
|
3
9
|
#define LEVEL2_NEON_INL_H
|
4
10
|
|
@@ -1940,9 +1946,15 @@ struct Index2LevelDecoderImpl<
|
|
1940
1946
|
} // namespace
|
1941
1947
|
|
1942
1948
|
// Suitable for IVF256,PQ[1]x8
|
1949
|
+
// Subtable for IVF256,PQ[1]x10 (such as IVF256,PQ16x10np)
|
1950
|
+
// Subtable for IVF256,PQ[1]x12 (such as IVF256,PQ16x12np)
|
1951
|
+
// Suitable for IVF256,PQ[1]x16 (such as IVF256,PQ16x16np)
|
1943
1952
|
// Suitable for Residual[1]x8,PQ[2]x8
|
1944
|
-
// Suitable for IVF[9-16 bit],PQ[1]x8 (such as IVF1024,PQ16np)
|
1945
|
-
// Suitable for
|
1953
|
+
// Suitable for IVF[2^9-2^16 bit],PQ[1]x8 (such as IVF1024,PQ16np)
|
1954
|
+
// Suitable for IVF[2^9-2^16 bit],PQ[1]x10 (such as IVF1024,PQ16x10np)
|
1955
|
+
// Suitable for IVF[2^9-2^16 bit],PQ[1]x12 (such as IVF1024,PQ16x12np)
|
1956
|
+
// Suitable for IVF[2^9-2^16 bit],PQ[1]x16 (such as IVF1024,PQ16x16np)
|
1957
|
+
// Suitable for Residual[1]x[9-16 bit],PQ[2]x[3] (such as Residual2x9,PQ8)
|
1946
1958
|
template <
|
1947
1959
|
intptr_t DIM,
|
1948
1960
|
intptr_t COARSE_SIZE,
|
@@ -1951,11 +1963,13 @@ template <
|
|
1951
1963
|
intptr_t FINE_BITS = 8>
|
1952
1964
|
struct Index2LevelDecoder {
|
1953
1965
|
static_assert(
|
1954
|
-
COARSE_BITS == 8 || COARSE_BITS == 10 || COARSE_BITS ==
|
1955
|
-
|
1966
|
+
COARSE_BITS == 8 || COARSE_BITS == 10 || COARSE_BITS == 12 ||
|
1967
|
+
COARSE_BITS == 16,
|
1968
|
+
"Only 8, 10, 12 or 16 bits are currently supported for COARSE_BITS");
|
1956
1969
|
static_assert(
|
1957
|
-
FINE_BITS == 8 || FINE_BITS == 10 || FINE_BITS ==
|
1958
|
-
|
1970
|
+
FINE_BITS == 8 || FINE_BITS == 10 || FINE_BITS == 12 ||
|
1971
|
+
FINE_BITS == 16,
|
1972
|
+
"Only 8, 10, 12 or 16 bits are currently supported for FINE_BITS");
|
1959
1973
|
|
1960
1974
|
static constexpr intptr_t dim = DIM;
|
1961
1975
|
static constexpr intptr_t coarseSize = COARSE_SIZE;
|
@@ -1,4 +1,9 @@
|
|
1
|
-
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
2
7
|
|
3
8
|
#ifndef PQ_AVX2_INL_H
|
4
9
|
#define PQ_AVX2_INL_H
|
@@ -1488,12 +1493,14 @@ struct IndexPQDecoderImpl<
|
|
1488
1493
|
|
1489
1494
|
// Suitable for PQ[1]x8
|
1490
1495
|
// Suitable for PQ[1]x10
|
1496
|
+
// Suitable for PQ[1]x12
|
1491
1497
|
// Suitable for PQ[1]x16
|
1492
1498
|
template <intptr_t DIM, intptr_t FINE_SIZE, intptr_t FINE_BITS = 8>
|
1493
1499
|
struct IndexPQDecoder {
|
1494
1500
|
static_assert(
|
1495
|
-
FINE_BITS == 8 || FINE_BITS == 10 || FINE_BITS ==
|
1496
|
-
|
1501
|
+
FINE_BITS == 8 || FINE_BITS == 10 || FINE_BITS == 12 ||
|
1502
|
+
FINE_BITS == 16,
|
1503
|
+
"Only 8, 10, 12 or 16 bits are currently supported for FINE_BITS");
|
1497
1504
|
|
1498
1505
|
static constexpr intptr_t dim = DIM;
|
1499
1506
|
static constexpr intptr_t fineSize = FINE_SIZE;
|
@@ -1,4 +1,10 @@
|
|
1
|
-
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
2
8
|
#ifndef PQ_INL_H
|
3
9
|
#define PQ_INL_H
|
4
10
|
|
@@ -1,4 +1,10 @@
|
|
1
|
-
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
2
8
|
#ifndef PQ_NEON_INL_H
|
3
9
|
#define PQ_NEON_INL_H
|
4
10
|
|
@@ -1322,12 +1328,14 @@ struct IndexPQDecoderImpl<
|
|
1322
1328
|
|
1323
1329
|
// Suitable for PQ[1]x8
|
1324
1330
|
// Suitable for PQ[1]x10
|
1331
|
+
// Suitable for PQ[1]x12
|
1325
1332
|
// Suitable for PQ[1]x16
|
1326
1333
|
template <intptr_t DIM, intptr_t FINE_SIZE, intptr_t FINE_BITS = 8>
|
1327
1334
|
struct IndexPQDecoder {
|
1328
1335
|
static_assert(
|
1329
|
-
FINE_BITS == 8 || FINE_BITS == 10 || FINE_BITS ==
|
1330
|
-
|
1336
|
+
FINE_BITS == 8 || FINE_BITS == 10 || FINE_BITS == 12 ||
|
1337
|
+
FINE_BITS == 16,
|
1338
|
+
"Only 8, 10, 12 or 16 bits are currently supported for FINE_BITS");
|
1331
1339
|
|
1332
1340
|
static constexpr intptr_t dim = DIM;
|
1333
1341
|
static constexpr intptr_t fineSize = FINE_SIZE;
|
@@ -11,6 +11,8 @@
|
|
11
11
|
#include <faiss/IndexPreTransform.h>
|
12
12
|
#include <faiss/IndexReplicas.h>
|
13
13
|
#include <faiss/IndexShards.h>
|
14
|
+
#include <faiss/IndexShardsIVF.h>
|
15
|
+
|
14
16
|
#include <faiss/gpu/GpuIndex.h>
|
15
17
|
#include <faiss/gpu/GpuIndexFlat.h>
|
16
18
|
#include <faiss/gpu/GpuIndexIVFFlat.h>
|
@@ -33,7 +35,12 @@ using namespace ::faiss;
|
|
33
35
|
|
34
36
|
void GpuParameterSpace::initialize(const Index* index) {
|
35
37
|
if (DC(IndexPreTransform)) {
|
36
|
-
|
38
|
+
initialize(ix->index);
|
39
|
+
return;
|
40
|
+
}
|
41
|
+
if (DC(IndexShardsIVF)) {
|
42
|
+
ParameterSpace::initialize(index);
|
43
|
+
return;
|
37
44
|
}
|
38
45
|
if (DC(IndexReplicas)) {
|
39
46
|
if (ix->count() == 0)
|
@@ -53,6 +60,14 @@ void GpuParameterSpace::initialize(const Index* index) {
|
|
53
60
|
break;
|
54
61
|
pr.values.push_back(nprobe);
|
55
62
|
}
|
63
|
+
|
64
|
+
ParameterSpace ivf_pspace;
|
65
|
+
ivf_pspace.initialize(ix->quantizer);
|
66
|
+
|
67
|
+
for (const ParameterRange& p : ivf_pspace.parameter_ranges) {
|
68
|
+
ParameterRange& pr = add_range("quantizer_" + p.name);
|
69
|
+
pr.values = p.values;
|
70
|
+
}
|
56
71
|
}
|
57
72
|
// not sure we should call the parent initializer
|
58
73
|
}
|
@@ -72,7 +87,7 @@ void GpuParameterSpace::set_index_parameter(
|
|
72
87
|
}
|
73
88
|
if (name == "nprobe") {
|
74
89
|
if (DC(GpuIndexIVF)) {
|
75
|
-
ix->
|
90
|
+
ix->nprobe = size_t(val);
|
76
91
|
return;
|
77
92
|
}
|
78
93
|
}
|
@@ -83,6 +98,14 @@ void GpuParameterSpace::set_index_parameter(
|
|
83
98
|
}
|
84
99
|
}
|
85
100
|
|
101
|
+
if (name.find("quantizer_") == 0) {
|
102
|
+
if (DC(GpuIndexIVF)) {
|
103
|
+
std::string sub_name = name.substr(strlen("quantizer_"));
|
104
|
+
set_index_parameter(ix->quantizer, sub_name, val);
|
105
|
+
return;
|
106
|
+
}
|
107
|
+
}
|
108
|
+
|
86
109
|
// maybe normal index parameters apply?
|
87
110
|
ParameterSpace::set_index_parameter(index, name, val);
|
88
111
|
}
|
@@ -18,6 +18,7 @@
|
|
18
18
|
#include <faiss/IndexPreTransform.h>
|
19
19
|
#include <faiss/IndexReplicas.h>
|
20
20
|
#include <faiss/IndexScalarQuantizer.h>
|
21
|
+
#include <faiss/IndexShardsIVF.h>
|
21
22
|
#include <faiss/MetaIndexes.h>
|
22
23
|
#include <faiss/gpu/GpuIndex.h>
|
23
24
|
#include <faiss/gpu/GpuIndexFlat.h>
|
@@ -116,7 +117,6 @@ ToGpuCloner::ToGpuCloner(
|
|
116
117
|
: GpuClonerOptions(options), provider(prov), device(device) {}
|
117
118
|
|
118
119
|
Index* ToGpuCloner::clone_Index(const Index* index) {
|
119
|
-
using idx_t = Index::idx_t;
|
120
120
|
if (auto ifl = dynamic_cast<const IndexFlat*>(index)) {
|
121
121
|
GpuIndexFlatConfig config;
|
122
122
|
config.device = device;
|
@@ -227,8 +227,8 @@ ToGpuClonerMultiple::ToGpuClonerMultiple(
|
|
227
227
|
std::vector<int>& devices,
|
228
228
|
const GpuMultipleClonerOptions& options)
|
229
229
|
: GpuMultipleClonerOptions(options) {
|
230
|
-
|
231
|
-
for (
|
230
|
+
FAISS_THROW_IF_NOT(provider.size() == devices.size());
|
231
|
+
for (size_t i = 0; i < provider.size(); i++) {
|
232
232
|
sub_cloners.push_back(ToGpuCloner(provider[i], devices[i], options));
|
233
233
|
}
|
234
234
|
}
|
@@ -241,28 +241,43 @@ ToGpuClonerMultiple::ToGpuClonerMultiple(
|
|
241
241
|
void ToGpuClonerMultiple::copy_ivf_shard(
|
242
242
|
const IndexIVF* index_ivf,
|
243
243
|
IndexIVF* idx2,
|
244
|
-
|
245
|
-
|
244
|
+
idx_t n,
|
245
|
+
idx_t i) {
|
246
246
|
if (shard_type == 2) {
|
247
|
-
|
248
|
-
|
247
|
+
idx_t i0 = i * index_ivf->ntotal / n;
|
248
|
+
idx_t i1 = (i + 1) * index_ivf->ntotal / n;
|
249
249
|
|
250
250
|
if (verbose)
|
251
251
|
printf("IndexShards shard %ld indices %ld:%ld\n", i, i0, i1);
|
252
|
-
index_ivf->copy_subset_to(
|
252
|
+
index_ivf->copy_subset_to(
|
253
|
+
*idx2, InvertedLists::SUBSET_TYPE_ID_RANGE, i0, i1);
|
253
254
|
FAISS_ASSERT(idx2->ntotal == i1 - i0);
|
254
255
|
} else if (shard_type == 1) {
|
255
256
|
if (verbose)
|
256
257
|
printf("IndexShards shard %ld select modulo %ld = %ld\n", i, n, i);
|
257
|
-
index_ivf->copy_subset_to(
|
258
|
+
index_ivf->copy_subset_to(
|
259
|
+
*idx2, InvertedLists::SUBSET_TYPE_ID_MOD, n, i);
|
260
|
+
} else if (shard_type == 4) {
|
261
|
+
idx_t i0 = i * index_ivf->nlist / n;
|
262
|
+
idx_t i1 = (i + 1) * index_ivf->nlist / n;
|
263
|
+
if (verbose) {
|
264
|
+
printf("IndexShards %ld/%ld select lists %d:%d\n",
|
265
|
+
i,
|
266
|
+
n,
|
267
|
+
int(i0),
|
268
|
+
int(i1));
|
269
|
+
}
|
270
|
+
index_ivf->copy_subset_to(
|
271
|
+
*idx2, InvertedLists::SUBSET_TYPE_INVLIST, i0, i1);
|
258
272
|
} else {
|
259
273
|
FAISS_THROW_FMT("shard_type %d not implemented", shard_type);
|
260
274
|
}
|
261
275
|
}
|
262
276
|
|
263
277
|
Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
|
264
|
-
|
278
|
+
idx_t n = sub_cloners.size();
|
265
279
|
|
280
|
+
auto index_ivf = dynamic_cast<const faiss::IndexIVF*>(index);
|
266
281
|
auto index_ivfpq = dynamic_cast<const faiss::IndexIVFPQ*>(index);
|
267
282
|
auto index_ivfflat = dynamic_cast<const faiss::IndexIVFFlat*>(index);
|
268
283
|
auto index_ivfsq =
|
@@ -274,16 +289,36 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
|
|
274
289
|
"IndexIVFFlat, IndexIVFScalarQuantizer, "
|
275
290
|
"IndexFlat and IndexIVFPQ");
|
276
291
|
|
292
|
+
// decide what coarse quantizer the sub-indexes are going to have
|
293
|
+
const Index* quantizer = nullptr;
|
294
|
+
std::unique_ptr<Index> new_quantizer;
|
295
|
+
if (index_ivf) {
|
296
|
+
quantizer = index_ivf->quantizer;
|
297
|
+
if (common_ivf_quantizer &&
|
298
|
+
!dynamic_cast<const IndexFlat*>(quantizer)) {
|
299
|
+
// then we flatten the coarse quantizer so that everything remains
|
300
|
+
// on GPU
|
301
|
+
new_quantizer.reset(
|
302
|
+
new IndexFlat(quantizer->d, quantizer->metric_type));
|
303
|
+
std::vector<float> centroids(quantizer->d * quantizer->ntotal);
|
304
|
+
quantizer->reconstruct_n(0, quantizer->ntotal, centroids.data());
|
305
|
+
new_quantizer->add(quantizer->ntotal, centroids.data());
|
306
|
+
quantizer = new_quantizer.get();
|
307
|
+
}
|
308
|
+
}
|
309
|
+
|
277
310
|
std::vector<faiss::Index*> shards(n);
|
278
311
|
|
279
|
-
for (
|
312
|
+
for (idx_t i = 0; i < n; i++) {
|
280
313
|
// make a shallow copy
|
281
|
-
if (reserveVecs)
|
314
|
+
if (reserveVecs) {
|
282
315
|
sub_cloners[i].reserveVecs = (reserveVecs + n - 1) / n;
|
283
|
-
|
316
|
+
}
|
317
|
+
// note: const_casts here are harmless because the indexes build here
|
318
|
+
// are short-lived, translated immediately to GPU indexes.
|
284
319
|
if (index_ivfpq) {
|
285
320
|
faiss::IndexIVFPQ idx2(
|
286
|
-
|
321
|
+
const_cast<Index*>(quantizer),
|
287
322
|
index_ivfpq->d,
|
288
323
|
index_ivfpq->nlist,
|
289
324
|
index_ivfpq->code_size,
|
@@ -297,7 +332,7 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
|
|
297
332
|
shards[i] = sub_cloners[i].clone_Index(&idx2);
|
298
333
|
} else if (index_ivfflat) {
|
299
334
|
faiss::IndexIVFFlat idx2(
|
300
|
-
|
335
|
+
const_cast<Index*>(quantizer),
|
301
336
|
index->d,
|
302
337
|
index_ivfflat->nlist,
|
303
338
|
index_ivfflat->metric_type);
|
@@ -307,7 +342,7 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
|
|
307
342
|
shards[i] = sub_cloners[i].clone_Index(&idx2);
|
308
343
|
} else if (index_ivfsq) {
|
309
344
|
faiss::IndexIVFScalarQuantizer idx2(
|
310
|
-
|
345
|
+
const_cast<Index*>(quantizer),
|
311
346
|
index->d,
|
312
347
|
index_ivfsq->nlist,
|
313
348
|
index_ivfsq->sq.qtype,
|
@@ -323,40 +358,52 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
|
|
323
358
|
faiss::IndexFlat idx2(index->d, index->metric_type);
|
324
359
|
shards[i] = sub_cloners[i].clone_Index(&idx2);
|
325
360
|
if (index->ntotal > 0) {
|
326
|
-
|
327
|
-
|
361
|
+
idx_t i0 = index->ntotal * i / n;
|
362
|
+
idx_t i1 = index->ntotal * (i + 1) / n;
|
328
363
|
shards[i]->add(i1 - i0, index_flat->get_xb() + i0 * index->d);
|
329
364
|
}
|
330
365
|
}
|
331
366
|
}
|
332
367
|
|
333
368
|
bool successive_ids = index_flat != nullptr;
|
334
|
-
faiss::IndexShards* res
|
335
|
-
|
369
|
+
faiss::IndexShards* res;
|
370
|
+
if (common_ivf_quantizer && index_ivf) {
|
371
|
+
this->shard = false;
|
372
|
+
Index* common_quantizer = clone_Index(index_ivf->quantizer);
|
373
|
+
this->shard = true;
|
374
|
+
IndexShardsIVF* idx = new faiss::IndexShardsIVF(
|
375
|
+
common_quantizer, index_ivf->nlist, true, false);
|
376
|
+
idx->own_fields = true;
|
377
|
+
idx->own_indices = true;
|
378
|
+
res = idx;
|
379
|
+
} else {
|
380
|
+
res = new faiss::IndexShards(index->d, true, successive_ids);
|
381
|
+
res->own_indices = true;
|
382
|
+
}
|
336
383
|
|
337
384
|
for (int i = 0; i < n; i++) {
|
338
385
|
res->add_shard(shards[i]);
|
339
386
|
}
|
340
|
-
res->own_fields = true;
|
341
387
|
FAISS_ASSERT(index->ntotal == res->ntotal);
|
342
388
|
return res;
|
343
389
|
}
|
344
390
|
|
345
391
|
Index* ToGpuClonerMultiple::clone_Index(const Index* index) {
|
346
|
-
|
347
|
-
if (n == 1)
|
392
|
+
idx_t n = sub_cloners.size();
|
393
|
+
if (n == 1) {
|
348
394
|
return sub_cloners[0].clone_Index(index);
|
395
|
+
}
|
349
396
|
|
350
397
|
if (dynamic_cast<const IndexFlat*>(index) ||
|
351
|
-
dynamic_cast<const
|
352
|
-
dynamic_cast<const
|
353
|
-
dynamic_cast<const
|
398
|
+
dynamic_cast<const IndexIVFFlat*>(index) ||
|
399
|
+
dynamic_cast<const IndexIVFScalarQuantizer*>(index) ||
|
400
|
+
dynamic_cast<const IndexIVFPQ*>(index)) {
|
354
401
|
if (!shard) {
|
355
402
|
IndexReplicas* res = new IndexReplicas();
|
356
403
|
for (auto& sub_cloner : sub_cloners) {
|
357
404
|
res->addIndex(sub_cloner.clone_Index(index));
|
358
405
|
}
|
359
|
-
res->
|
406
|
+
res->own_indices = true;
|
360
407
|
return res;
|
361
408
|
} else {
|
362
409
|
return clone_Index_to_shards(index);
|
@@ -373,8 +420,8 @@ Index* ToGpuClonerMultiple::clone_Index(const Index* index) {
|
|
373
420
|
for (int m = 0; m < pq.M; m++) {
|
374
421
|
// which GPU(s) will be assigned to this sub-quantizer
|
375
422
|
|
376
|
-
|
377
|
-
|
423
|
+
idx_t i0 = m * n / pq.M;
|
424
|
+
idx_t i1 = pq.M <= n ? (m + 1) * n / pq.M : i0 + 1;
|
378
425
|
std::vector<ToGpuCloner> sub_cloners_2;
|
379
426
|
sub_cloners_2.insert(
|
380
427
|
sub_cloners_2.begin(),
|
@@ -14,41 +14,42 @@ namespace gpu {
|
|
14
14
|
|
15
15
|
/// set some options on how to copy to GPU
|
16
16
|
struct GpuClonerOptions {
|
17
|
-
GpuClonerOptions();
|
18
|
-
|
19
17
|
/// how should indices be stored on index types that support indices
|
20
18
|
/// (anything but GpuIndexFlat*)?
|
21
|
-
IndicesOptions indicesOptions;
|
19
|
+
IndicesOptions indicesOptions = INDICES_64_BIT;
|
22
20
|
|
23
21
|
/// is the coarse quantizer in float16?
|
24
|
-
bool useFloat16CoarseQuantizer;
|
22
|
+
bool useFloat16CoarseQuantizer = false;
|
25
23
|
|
26
24
|
/// for GpuIndexIVFFlat, is storage in float16?
|
27
25
|
/// for GpuIndexIVFPQ, are intermediate calculations in float16?
|
28
|
-
bool useFloat16;
|
26
|
+
bool useFloat16 = false;
|
29
27
|
|
30
28
|
/// use precomputed tables?
|
31
|
-
bool usePrecomputed;
|
29
|
+
bool usePrecomputed = false;
|
32
30
|
|
33
31
|
/// reserve vectors in the invfiles?
|
34
|
-
long reserveVecs;
|
32
|
+
long reserveVecs = 0;
|
35
33
|
|
36
34
|
/// For GpuIndexFlat, store data in transposed layout?
|
37
|
-
bool storeTransposed;
|
35
|
+
bool storeTransposed = false;
|
38
36
|
|
39
37
|
/// Set verbose options on the index
|
40
|
-
bool verbose;
|
38
|
+
bool verbose = false;
|
41
39
|
};
|
42
40
|
|
43
41
|
struct GpuMultipleClonerOptions : public GpuClonerOptions {
|
44
|
-
GpuMultipleClonerOptions();
|
45
|
-
|
46
42
|
/// Whether to shard the index across GPUs, versus replication
|
47
43
|
/// across GPUs
|
48
|
-
bool shard;
|
44
|
+
bool shard = false;
|
49
45
|
|
50
46
|
/// IndexIVF::copy_subset_to subset type
|
51
|
-
int shard_type;
|
47
|
+
int shard_type = 1;
|
48
|
+
|
49
|
+
/// set to true if an IndexIVF is to be dispatched to multiple GPUs with a
|
50
|
+
/// single common IVF quantizer, ie. only the inverted lists are sharded on
|
51
|
+
/// the sub-indexes (uses an IndexShardsIVF)
|
52
|
+
bool common_ivf_quantizer = false;
|
52
53
|
};
|
53
54
|
|
54
55
|
} // namespace gpu
|
@@ -45,7 +45,8 @@ struct GpuDistanceParams {
|
|
45
45
|
outDistances(nullptr),
|
46
46
|
ignoreOutDistances(false),
|
47
47
|
outIndicesType(IndicesDataType::I64),
|
48
|
-
outIndices(nullptr)
|
48
|
+
outIndices(nullptr),
|
49
|
+
device(-1) {}
|
49
50
|
|
50
51
|
//
|
51
52
|
// Search parameters
|
@@ -76,7 +77,7 @@ struct GpuDistanceParams {
|
|
76
77
|
const void* vectors;
|
77
78
|
DistanceDataType vectorType;
|
78
79
|
bool vectorsRowMajor;
|
79
|
-
|
80
|
+
idx_t numVectors;
|
80
81
|
|
81
82
|
/// Precomputed L2 norms for each vector in `vectors`, which can be
|
82
83
|
/// optionally provided in advance to speed computation for METRIC_L2
|
@@ -93,7 +94,7 @@ struct GpuDistanceParams {
|
|
93
94
|
const void* queries;
|
94
95
|
DistanceDataType queryType;
|
95
96
|
bool queriesRowMajor;
|
96
|
-
|
97
|
+
idx_t numQueries;
|
97
98
|
|
98
99
|
//
|
99
100
|
// Output results
|
@@ -112,6 +113,17 @@ struct GpuDistanceParams {
|
|
112
113
|
/// innermost (row major). Not used if k == -1 (all pairwise distances)
|
113
114
|
IndicesDataType outIndicesType;
|
114
115
|
void* outIndices;
|
116
|
+
|
117
|
+
//
|
118
|
+
// Execution information
|
119
|
+
//
|
120
|
+
|
121
|
+
/// On which GPU device should the search run?
|
122
|
+
/// -1 indicates that the current CUDA thread-local device
|
123
|
+
/// (via cudaGetDevice/cudaSetDevice) is used
|
124
|
+
/// Otherwise, an integer 0 <= device < numDevices indicates the device for
|
125
|
+
/// execution
|
126
|
+
int device;
|
115
127
|
};
|
116
128
|
|
117
129
|
/// A wrapper for gpu/impl/Distance.cuh to expose direct brute-force k-nearest
|
@@ -137,13 +149,13 @@ void bruteForceKnn(
|
|
137
149
|
// dims x numVectors, with numVectors innermost
|
138
150
|
const float* vectors,
|
139
151
|
bool vectorsRowMajor,
|
140
|
-
|
152
|
+
idx_t numVectors,
|
141
153
|
// If queriesRowMajor is true, this is
|
142
154
|
// numQueries x dims, with dims innermost; otherwise,
|
143
155
|
// dims x numQueries, with numQueries innermost
|
144
156
|
const float* queries,
|
145
157
|
bool queriesRowMajor,
|
146
|
-
|
158
|
+
idx_t numQueries,
|
147
159
|
int dims,
|
148
160
|
int k,
|
149
161
|
// A region of memory size numQueries x k, with k
|
@@ -151,7 +163,7 @@ void bruteForceKnn(
|
|
151
163
|
float* outDistances,
|
152
164
|
// A region of memory size numQueries x k, with k
|
153
165
|
// innermost (row major)
|
154
|
-
|
166
|
+
idx_t* outIndices);
|
155
167
|
|
156
168
|
} // namespace gpu
|
157
169
|
} // namespace faiss
|
@@ -51,30 +51,31 @@ class GpuIndex : public faiss::Index {
|
|
51
51
|
/// `x` can be resident on the CPU or any GPU; copies are performed
|
52
52
|
/// as needed
|
53
53
|
/// Handles paged adds if the add set is too large; calls addInternal_
|
54
|
-
void add(
|
54
|
+
void add(idx_t, const float* x) override;
|
55
55
|
|
56
56
|
/// `x` and `ids` can be resident on the CPU or any GPU; copies are
|
57
57
|
/// performed as needed
|
58
58
|
/// Handles paged adds if the add set is too large; calls addInternal_
|
59
|
-
void add_with_ids(
|
60
|
-
override;
|
59
|
+
void add_with_ids(idx_t n, const float* x, const idx_t* ids) override;
|
61
60
|
|
62
61
|
/// `x` and `labels` can be resident on the CPU or any GPU; copies are
|
63
62
|
/// performed as needed
|
64
63
|
void assign(
|
65
|
-
|
64
|
+
idx_t n,
|
66
65
|
const float* x,
|
67
|
-
|
68
|
-
Index
|
66
|
+
idx_t* labels,
|
67
|
+
// faiss::Index has idx_t for k
|
68
|
+
idx_t k = 1) const override;
|
69
69
|
|
70
70
|
/// `x`, `distances` and `labels` can be resident on the CPU or any
|
71
71
|
/// GPU; copies are performed as needed
|
72
72
|
void search(
|
73
|
-
|
73
|
+
idx_t n,
|
74
74
|
const float* x,
|
75
|
-
Index
|
75
|
+
// faiss::Index has idx_t for k
|
76
|
+
idx_t k,
|
76
77
|
float* distances,
|
77
|
-
|
78
|
+
idx_t* labels,
|
78
79
|
const SearchParameters* params = nullptr) const override;
|
79
80
|
|
80
81
|
/// `x`, `distances` and `labels` and `recons` can be resident on the CPU or
|
@@ -82,6 +83,7 @@ class GpuIndex : public faiss::Index {
|
|
82
83
|
void search_and_reconstruct(
|
83
84
|
idx_t n,
|
84
85
|
const float* x,
|
86
|
+
// faiss::Index has idx_t for k
|
85
87
|
idx_t k,
|
86
88
|
float* distances,
|
87
89
|
idx_t* labels,
|
@@ -90,16 +92,16 @@ class GpuIndex : public faiss::Index {
|
|
90
92
|
|
91
93
|
/// Overridden to force GPU indices to provide their own GPU-friendly
|
92
94
|
/// implementation
|
93
|
-
void compute_residual(const float* x, float* residual,
|
95
|
+
void compute_residual(const float* x, float* residual, idx_t key)
|
94
96
|
const override;
|
95
97
|
|
96
98
|
/// Overridden to force GPU indices to provide their own GPU-friendly
|
97
99
|
/// implementation
|
98
100
|
void compute_residual_n(
|
99
|
-
|
101
|
+
idx_t n,
|
100
102
|
const float* xs,
|
101
103
|
float* residuals,
|
102
|
-
const
|
104
|
+
const idx_t* keys) const override;
|
103
105
|
|
104
106
|
protected:
|
105
107
|
/// Copy what we need from the CPU equivalent
|
@@ -114,43 +116,43 @@ class GpuIndex : public faiss::Index {
|
|
114
116
|
|
115
117
|
/// Overridden to actually perform the add
|
116
118
|
/// All data is guaranteed to be resident on our device
|
117
|
-
virtual void addImpl_(
|
119
|
+
virtual void addImpl_(idx_t n, const float* x, const idx_t* ids) = 0;
|
118
120
|
|
119
121
|
/// Overridden to actually perform the search
|
120
122
|
/// All data is guaranteed to be resident on our device
|
121
123
|
virtual void searchImpl_(
|
122
|
-
|
124
|
+
idx_t n,
|
123
125
|
const float* x,
|
124
126
|
int k,
|
125
127
|
float* distances,
|
126
|
-
|
128
|
+
idx_t* labels,
|
127
129
|
const SearchParameters* params) const = 0;
|
128
130
|
|
129
131
|
private:
|
130
132
|
/// Handles paged adds if the add set is too large, passes to
|
131
133
|
/// addImpl_ to actually perform the add for the current page
|
132
|
-
void addPaged_(
|
134
|
+
void addPaged_(idx_t n, const float* x, const idx_t* ids);
|
133
135
|
|
134
136
|
/// Calls addImpl_ for a single page of GPU-resident data
|
135
|
-
void addPage_(
|
137
|
+
void addPage_(idx_t n, const float* x, const idx_t* ids);
|
136
138
|
|
137
139
|
/// Calls searchImpl_ for a single page of GPU-resident data
|
138
140
|
void searchNonPaged_(
|
139
|
-
|
141
|
+
idx_t n,
|
140
142
|
const float* x,
|
141
143
|
int k,
|
142
144
|
float* outDistancesData,
|
143
|
-
|
145
|
+
idx_t* outIndicesData,
|
144
146
|
const SearchParameters* params) const;
|
145
147
|
|
146
148
|
/// Calls searchImpl_ for a single page of GPU-resident data,
|
147
149
|
/// handling paging of the data and copies from the CPU
|
148
150
|
void searchFromCpuPaged_(
|
149
|
-
|
151
|
+
idx_t n,
|
150
152
|
const float* x,
|
151
153
|
int k,
|
152
154
|
float* outDistancesData,
|
153
|
-
|
155
|
+
idx_t* outIndicesData,
|
154
156
|
const SearchParameters* params) const;
|
155
157
|
|
156
158
|
protected:
|