faiss 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/ext/faiss/extconf.rb +9 -2
- data/ext/faiss/index.cpp +1 -1
- data/ext/faiss/index_binary.cpp +2 -2
- data/ext/faiss/product_quantizer.cpp +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +7 -7
- data/vendor/faiss/faiss/AutoTune.h +1 -2
- data/vendor/faiss/faiss/Clustering.cpp +39 -22
- data/vendor/faiss/faiss/Clustering.h +40 -21
- data/vendor/faiss/faiss/IVFlib.cpp +26 -12
- data/vendor/faiss/faiss/Index.cpp +1 -1
- data/vendor/faiss/faiss/Index.h +40 -10
- data/vendor/faiss/faiss/Index2Layer.cpp +7 -7
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +176 -166
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +15 -15
- data/vendor/faiss/faiss/IndexBinary.cpp +9 -4
- data/vendor/faiss/faiss/IndexBinary.h +8 -19
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +2 -1
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +24 -31
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +25 -50
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +107 -188
- data/vendor/faiss/faiss/IndexFastScan.cpp +95 -146
- data/vendor/faiss/faiss/IndexFastScan.h +9 -8
- data/vendor/faiss/faiss/IndexFlat.cpp +206 -10
- data/vendor/faiss/faiss/IndexFlat.h +20 -1
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +170 -5
- data/vendor/faiss/faiss/IndexFlatCodes.h +23 -4
- data/vendor/faiss/faiss/IndexHNSW.cpp +231 -382
- data/vendor/faiss/faiss/IndexHNSW.h +62 -49
- data/vendor/faiss/faiss/IndexIDMap.cpp +69 -28
- data/vendor/faiss/faiss/IndexIDMap.h +24 -2
- data/vendor/faiss/faiss/IndexIVF.cpp +162 -56
- data/vendor/faiss/faiss/IndexIVF.h +46 -6
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +33 -26
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +6 -2
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +19 -46
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -3
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +502 -401
- data/vendor/faiss/faiss/IndexIVFFastScan.h +63 -26
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +15 -5
- data/vendor/faiss/faiss/IndexIVFFlat.h +3 -2
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +172 -0
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.h +56 -0
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +79 -125
- data/vendor/faiss/faiss/IndexIVFPQ.h +6 -7
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +39 -52
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +4 -3
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +45 -29
- data/vendor/faiss/faiss/IndexIVFPQR.h +5 -2
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +25 -27
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +6 -6
- data/vendor/faiss/faiss/IndexLSH.cpp +14 -16
- data/vendor/faiss/faiss/IndexLattice.cpp +1 -19
- data/vendor/faiss/faiss/IndexLattice.h +3 -22
- data/vendor/faiss/faiss/IndexNNDescent.cpp +3 -33
- data/vendor/faiss/faiss/IndexNNDescent.h +1 -1
- data/vendor/faiss/faiss/IndexNSG.cpp +11 -27
- data/vendor/faiss/faiss/IndexNSG.h +11 -11
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +56 -0
- data/vendor/faiss/faiss/IndexNeuralNetCodec.h +49 -0
- data/vendor/faiss/faiss/IndexPQ.cpp +72 -88
- data/vendor/faiss/faiss/IndexPQ.h +1 -4
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +1 -1
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -31
- data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
- data/vendor/faiss/faiss/IndexRefine.cpp +54 -24
- data/vendor/faiss/faiss/IndexRefine.h +7 -0
- data/vendor/faiss/faiss/IndexReplicas.cpp +23 -26
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +25 -17
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -4
- data/vendor/faiss/faiss/IndexShards.cpp +21 -29
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +1 -2
- data/vendor/faiss/faiss/MatrixStats.cpp +17 -32
- data/vendor/faiss/faiss/MatrixStats.h +21 -9
- data/vendor/faiss/faiss/MetaIndexes.cpp +35 -35
- data/vendor/faiss/faiss/MetricType.h +7 -2
- data/vendor/faiss/faiss/VectorTransform.cpp +13 -26
- data/vendor/faiss/faiss/VectorTransform.h +7 -7
- data/vendor/faiss/faiss/clone_index.cpp +15 -10
- data/vendor/faiss/faiss/clone_index.h +3 -0
- data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +95 -17
- data/vendor/faiss/faiss/cppcontrib/factory_tools.cpp +152 -0
- data/vendor/faiss/faiss/cppcontrib/factory_tools.h +24 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +83 -30
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +123 -8
- data/vendor/faiss/faiss/gpu/GpuCloner.h +22 -0
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +13 -0
- data/vendor/faiss/faiss/gpu/GpuDistance.h +46 -38
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndex.h +30 -12
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +282 -0
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +4 -4
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +14 -9
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +20 -3
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -11
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +1 -3
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +24 -3
- data/vendor/faiss/faiss/gpu/GpuResources.h +39 -11
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +142 -17
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +57 -3
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +26 -21
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +7 -1
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +8 -5
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +25 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +129 -9
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +332 -40
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +299 -208
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +1 -0
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
- data/vendor/faiss/faiss/gpu/utils/RaftUtils.h +75 -0
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +4 -1
- data/vendor/faiss/faiss/gpu/utils/Timer.h +1 -1
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +3 -1
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +5 -5
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +26 -1
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +10 -3
- data/vendor/faiss/faiss/impl/DistanceComputer.h +70 -1
- data/vendor/faiss/faiss/impl/FaissAssert.h +4 -2
- data/vendor/faiss/faiss/impl/FaissException.h +13 -34
- data/vendor/faiss/faiss/impl/HNSW.cpp +605 -186
- data/vendor/faiss/faiss/impl/HNSW.h +52 -30
- data/vendor/faiss/faiss/impl/IDSelector.h +4 -4
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +11 -9
- data/vendor/faiss/faiss/impl/LookupTableScaler.h +34 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +42 -27
- data/vendor/faiss/faiss/impl/NSG.cpp +0 -29
- data/vendor/faiss/faiss/impl/NSG.h +1 -1
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +14 -12
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -22
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +6 -2
- data/vendor/faiss/faiss/impl/Quantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +27 -1015
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +5 -63
- data/vendor/faiss/faiss/impl/ResultHandler.h +347 -172
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +1104 -147
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +3 -8
- data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +285 -42
- data/vendor/faiss/faiss/impl/code_distance/code_distance-avx512.h +248 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +21 -14
- data/vendor/faiss/faiss/impl/code_distance/code_distance.h +22 -12
- data/vendor/faiss/faiss/impl/index_read.cpp +74 -34
- data/vendor/faiss/faiss/impl/index_read_utils.h +37 -0
- data/vendor/faiss/faiss/impl/index_write.cpp +88 -51
- data/vendor/faiss/faiss/impl/io.cpp +23 -15
- data/vendor/faiss/faiss/impl/io.h +4 -4
- data/vendor/faiss/faiss/impl/io_macros.h +6 -0
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
- data/vendor/faiss/faiss/impl/platform_macros.h +40 -1
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +14 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +7 -6
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +52 -38
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +487 -49
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +960 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +176 -0
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +481 -225
- data/vendor/faiss/faiss/index_factory.cpp +41 -20
- data/vendor/faiss/faiss/index_io.h +12 -5
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +28 -8
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +3 -0
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +10 -2
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +73 -17
- data/vendor/faiss/faiss/invlists/InvertedLists.h +26 -8
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +24 -9
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +2 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +4 -4
- data/vendor/faiss/faiss/utils/Heap.cpp +3 -1
- data/vendor/faiss/faiss/utils/Heap.h +105 -0
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +342 -0
- data/vendor/faiss/faiss/utils/NeuralNet.h +147 -0
- data/vendor/faiss/faiss/utils/WorkerThread.h +1 -0
- data/vendor/faiss/faiss/utils/bf16.h +36 -0
- data/vendor/faiss/faiss/utils/distances.cpp +147 -123
- data/vendor/faiss/faiss/utils/distances.h +86 -9
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +5 -5
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +2 -2
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +1 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +5 -5
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +1 -1
- data/vendor/faiss/faiss/utils/distances_simd.cpp +1589 -243
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +70 -0
- data/vendor/faiss/faiss/utils/extra_distances.cpp +85 -137
- data/vendor/faiss/faiss/utils/extra_distances.h +3 -2
- data/vendor/faiss/faiss/utils/fp16-arm.h +29 -0
- data/vendor/faiss/faiss/utils/fp16.h +2 -0
- data/vendor/faiss/faiss/utils/hamming.cpp +163 -111
- data/vendor/faiss/faiss/utils/hamming.h +58 -0
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +16 -89
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -0
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +19 -88
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +58 -0
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +14 -104
- data/vendor/faiss/faiss/utils/partitioning.cpp +3 -4
- data/vendor/faiss/faiss/utils/prefetch.h +77 -0
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +0 -14
- data/vendor/faiss/faiss/utils/random.cpp +43 -0
- data/vendor/faiss/faiss/utils/random.h +25 -0
- data/vendor/faiss/faiss/utils/simdlib.h +10 -1
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +0 -6
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +296 -0
- data/vendor/faiss/faiss/utils/simdlib_neon.h +77 -79
- data/vendor/faiss/faiss/utils/simdlib_ppc64.h +1084 -0
- data/vendor/faiss/faiss/utils/sorting.cpp +140 -5
- data/vendor/faiss/faiss/utils/sorting.h +27 -0
- data/vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h +176 -0
- data/vendor/faiss/faiss/utils/utils.cpp +120 -7
- data/vendor/faiss/faiss/utils/utils.h +60 -20
- metadata +23 -4
- data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +0 -102
|
@@ -35,10 +35,12 @@ IndexIVFPQR::IndexIVFPQR(
|
|
|
35
35
|
refine_pq(d, M_refine, nbits_per_idx_refine),
|
|
36
36
|
k_factor(4) {
|
|
37
37
|
by_residual = true;
|
|
38
|
+
refine_pq.cp.max_points_per_centroid = 1000;
|
|
38
39
|
}
|
|
39
40
|
|
|
40
41
|
IndexIVFPQR::IndexIVFPQR() : k_factor(1) {
|
|
41
42
|
by_residual = true;
|
|
43
|
+
refine_pq.cp.max_points_per_centroid = 1000;
|
|
42
44
|
}
|
|
43
45
|
|
|
44
46
|
void IndexIVFPQR::reset() {
|
|
@@ -46,24 +48,39 @@ void IndexIVFPQR::reset() {
|
|
|
46
48
|
refine_codes.clear();
|
|
47
49
|
}
|
|
48
50
|
|
|
49
|
-
void IndexIVFPQR::
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
train_residual_o(n, x, residual_2);
|
|
54
|
-
|
|
55
|
-
if (verbose)
|
|
51
|
+
void IndexIVFPQR::train_encoder(idx_t n, const float* x, const idx_t* assign) {
|
|
52
|
+
IndexIVFPQ::train_encoder(n, x, assign);
|
|
53
|
+
if (verbose) {
|
|
56
54
|
printf("training %zdx%zd 2nd level PQ quantizer on %" PRId64
|
|
57
55
|
" %dD-vectors\n",
|
|
58
56
|
refine_pq.M,
|
|
59
57
|
refine_pq.ksub,
|
|
60
58
|
n,
|
|
61
59
|
d);
|
|
62
|
-
|
|
63
|
-
refine_pq.cp.max_points_per_centroid = 1000;
|
|
60
|
+
}
|
|
64
61
|
refine_pq.cp.verbose = verbose;
|
|
65
62
|
|
|
66
|
-
|
|
63
|
+
// 2nd level residual
|
|
64
|
+
std::vector<float> residual_2(n * d);
|
|
65
|
+
std::vector<uint8_t> train_codes(pq.code_size * n);
|
|
66
|
+
pq.compute_codes(x, train_codes.data(), n);
|
|
67
|
+
|
|
68
|
+
for (idx_t i = 0; i < n; i++) {
|
|
69
|
+
const float* xx = x + i * d;
|
|
70
|
+
float* res = residual_2.data() + i * d;
|
|
71
|
+
pq.decode(train_codes.data() + i * pq.code_size, res);
|
|
72
|
+
for (int j = 0; j < d; j++) {
|
|
73
|
+
res[j] = xx[j] - res[j];
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
refine_pq.train(n, residual_2.data());
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
idx_t IndexIVFPQR::train_encoder_num_vectors() const {
|
|
81
|
+
return std::max(
|
|
82
|
+
pq.cp.max_points_per_centroid * pq.ksub,
|
|
83
|
+
refine_pq.cp.max_points_per_centroid * refine_pq.ksub);
|
|
67
84
|
}
|
|
68
85
|
|
|
69
86
|
void IndexIVFPQR::add_with_ids(idx_t n, const float* x, const idx_t* xids) {
|
|
@@ -74,18 +91,18 @@ void IndexIVFPQR::add_core(
|
|
|
74
91
|
idx_t n,
|
|
75
92
|
const float* x,
|
|
76
93
|
const idx_t* xids,
|
|
77
|
-
const idx_t* precomputed_idx
|
|
78
|
-
|
|
79
|
-
|
|
94
|
+
const idx_t* precomputed_idx,
|
|
95
|
+
void* /*inverted_list_context*/) {
|
|
96
|
+
std::unique_ptr<float[]> residual_2(new float[n * d]);
|
|
80
97
|
|
|
81
98
|
idx_t n0 = ntotal;
|
|
82
99
|
|
|
83
|
-
add_core_o(n, x, xids, residual_2, precomputed_idx);
|
|
100
|
+
add_core_o(n, x, xids, residual_2.get(), precomputed_idx);
|
|
84
101
|
|
|
85
102
|
refine_codes.resize(ntotal * refine_pq.code_size);
|
|
86
103
|
|
|
87
104
|
refine_pq.compute_codes(
|
|
88
|
-
residual_2, &refine_codes[n0 * refine_pq.code_size], n);
|
|
105
|
+
residual_2.get(), &refine_codes[n0 * refine_pq.code_size], n);
|
|
89
106
|
}
|
|
90
107
|
#define TIC t0 = get_cycles()
|
|
91
108
|
#define TOC get_cycles() - t0
|
|
@@ -104,11 +121,10 @@ void IndexIVFPQR::search_preassigned(
|
|
|
104
121
|
uint64_t t0;
|
|
105
122
|
TIC;
|
|
106
123
|
size_t k_coarse = long(k * k_factor);
|
|
107
|
-
idx_t
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
float
|
|
111
|
-
ScopeDeleter<float> del(coarse_distances);
|
|
124
|
+
std::unique_ptr<idx_t[]> coarse_labels(new idx_t[k_coarse * n]);
|
|
125
|
+
{
|
|
126
|
+
// query with quantizer levels 1 and 2.
|
|
127
|
+
std::unique_ptr<float[]> coarse_distances(new float[k_coarse * n]);
|
|
112
128
|
|
|
113
129
|
IndexIVFPQ::search_preassigned(
|
|
114
130
|
n,
|
|
@@ -116,8 +132,8 @@ void IndexIVFPQR::search_preassigned(
|
|
|
116
132
|
k_coarse,
|
|
117
133
|
idx,
|
|
118
134
|
L1_dis,
|
|
119
|
-
coarse_distances,
|
|
120
|
-
coarse_labels,
|
|
135
|
+
coarse_distances.get(),
|
|
136
|
+
coarse_labels.get(),
|
|
121
137
|
true,
|
|
122
138
|
params);
|
|
123
139
|
}
|
|
@@ -131,13 +147,12 @@ void IndexIVFPQR::search_preassigned(
|
|
|
131
147
|
#pragma omp parallel reduction(+ : n_refine)
|
|
132
148
|
{
|
|
133
149
|
// tmp buffers
|
|
134
|
-
float
|
|
135
|
-
|
|
136
|
-
float* residual_2 = residual_1 + d;
|
|
150
|
+
std::unique_ptr<float[]> residual_1(new float[2 * d]);
|
|
151
|
+
float* residual_2 = residual_1.get() + d;
|
|
137
152
|
#pragma omp for
|
|
138
153
|
for (idx_t i = 0; i < n; i++) {
|
|
139
154
|
const float* xq = x + i * d;
|
|
140
|
-
const idx_t* shortlist = coarse_labels + k_coarse * i;
|
|
155
|
+
const idx_t* shortlist = coarse_labels.get() + k_coarse * i;
|
|
141
156
|
float* heap_sim = distances + k * i;
|
|
142
157
|
idx_t* heap_ids = labels + k * i;
|
|
143
158
|
maxheap_heapify(k, heap_sim, heap_ids);
|
|
@@ -155,7 +170,7 @@ void IndexIVFPQR::search_preassigned(
|
|
|
155
170
|
assert(ofs >= 0 && ofs < invlists->list_size(list_no));
|
|
156
171
|
|
|
157
172
|
// 1st level residual
|
|
158
|
-
quantizer->compute_residual(xq, residual_1, list_no);
|
|
173
|
+
quantizer->compute_residual(xq, residual_1.get(), list_no);
|
|
159
174
|
|
|
160
175
|
// 2nd level residual
|
|
161
176
|
const uint8_t* l2code = invlists->get_single_code(list_no, ofs);
|
|
@@ -168,9 +183,10 @@ void IndexIVFPQR::search_preassigned(
|
|
|
168
183
|
idx_t id = invlists->get_single_id(list_no, ofs);
|
|
169
184
|
assert(0 <= id && id < ntotal);
|
|
170
185
|
refine_pq.decode(
|
|
171
|
-
&refine_codes[id * refine_pq.code_size],
|
|
186
|
+
&refine_codes[id * refine_pq.code_size],
|
|
187
|
+
residual_1.get());
|
|
172
188
|
|
|
173
|
-
float dis = fvec_L2sqr(residual_1, residual_2, d);
|
|
189
|
+
float dis = fvec_L2sqr(residual_1.get(), residual_2, d);
|
|
174
190
|
|
|
175
191
|
if (dis < heap_sim[0]) {
|
|
176
192
|
idx_t id_or_pair = store_pairs ? sl : id;
|
|
@@ -37,7 +37,9 @@ struct IndexIVFPQR : IndexIVFPQ {
|
|
|
37
37
|
size_t remove_ids(const IDSelector& sel) override;
|
|
38
38
|
|
|
39
39
|
/// trains the two product quantizers
|
|
40
|
-
void
|
|
40
|
+
void train_encoder(idx_t n, const float* x, const idx_t* assign) override;
|
|
41
|
+
|
|
42
|
+
idx_t train_encoder_num_vectors() const override;
|
|
41
43
|
|
|
42
44
|
void add_with_ids(idx_t n, const float* x, const idx_t* xids) override;
|
|
43
45
|
|
|
@@ -46,7 +48,8 @@ struct IndexIVFPQR : IndexIVFPQ {
|
|
|
46
48
|
idx_t n,
|
|
47
49
|
const float* x,
|
|
48
50
|
const idx_t* xids,
|
|
49
|
-
const idx_t* precomputed_idx
|
|
51
|
+
const idx_t* precomputed_idx,
|
|
52
|
+
void* inverted_list_context = nullptr) override;
|
|
50
53
|
|
|
51
54
|
void reconstruct_from_offset(int64_t list_no, int64_t offset, float* recons)
|
|
52
55
|
const override;
|
|
@@ -9,8 +9,8 @@
|
|
|
9
9
|
|
|
10
10
|
#include <faiss/IndexIVFSpectralHash.h>
|
|
11
11
|
|
|
12
|
-
#include <stdint.h>
|
|
13
12
|
#include <algorithm>
|
|
13
|
+
#include <cstdint>
|
|
14
14
|
#include <memory>
|
|
15
15
|
|
|
16
16
|
#include <faiss/IndexLSH.h>
|
|
@@ -31,22 +31,17 @@ IndexIVFSpectralHash::IndexIVFSpectralHash(
|
|
|
31
31
|
float period)
|
|
32
32
|
: IndexIVF(quantizer, d, nlist, (nbit + 7) / 8, METRIC_L2),
|
|
33
33
|
nbit(nbit),
|
|
34
|
-
period(period)
|
|
35
|
-
threshold_type(Thresh_global) {
|
|
34
|
+
period(period) {
|
|
36
35
|
RandomRotationMatrix* rr = new RandomRotationMatrix(d, nbit);
|
|
37
36
|
rr->init(1234);
|
|
38
37
|
vt = rr;
|
|
39
|
-
own_fields = true;
|
|
40
38
|
is_trained = false;
|
|
39
|
+
by_residual = false;
|
|
41
40
|
}
|
|
42
41
|
|
|
43
|
-
IndexIVFSpectralHash::IndexIVFSpectralHash()
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
own_fields(false),
|
|
47
|
-
nbit(0),
|
|
48
|
-
period(0),
|
|
49
|
-
threshold_type(Thresh_global) {}
|
|
42
|
+
IndexIVFSpectralHash::IndexIVFSpectralHash() : IndexIVF() {
|
|
43
|
+
by_residual = false;
|
|
44
|
+
}
|
|
50
45
|
|
|
51
46
|
IndexIVFSpectralHash::~IndexIVFSpectralHash() {
|
|
52
47
|
if (own_fields) {
|
|
@@ -67,10 +62,14 @@ float median(size_t n, float* x) {
|
|
|
67
62
|
|
|
68
63
|
} // namespace
|
|
69
64
|
|
|
70
|
-
void IndexIVFSpectralHash::
|
|
65
|
+
void IndexIVFSpectralHash::train_encoder(
|
|
66
|
+
idx_t n,
|
|
67
|
+
const float* x,
|
|
68
|
+
const idx_t* assign) {
|
|
71
69
|
if (!vt->is_trained) {
|
|
72
70
|
vt->train(n, x);
|
|
73
71
|
}
|
|
72
|
+
FAISS_THROW_IF_NOT(!by_residual);
|
|
74
73
|
|
|
75
74
|
if (threshold_type == Thresh_global) {
|
|
76
75
|
// nothing to do
|
|
@@ -158,7 +157,7 @@ void binarize_with_freq(
|
|
|
158
157
|
}
|
|
159
158
|
}
|
|
160
159
|
|
|
161
|
-
}
|
|
160
|
+
} // namespace
|
|
162
161
|
|
|
163
162
|
void IndexIVFSpectralHash::encode_vectors(
|
|
164
163
|
idx_t n,
|
|
@@ -167,6 +166,7 @@ void IndexIVFSpectralHash::encode_vectors(
|
|
|
167
166
|
uint8_t* codes,
|
|
168
167
|
bool include_listnos) const {
|
|
169
168
|
FAISS_THROW_IF_NOT(is_trained);
|
|
169
|
+
FAISS_THROW_IF_NOT(!by_residual);
|
|
170
170
|
float freq = 2.0 / period;
|
|
171
171
|
size_t coarse_size = include_listnos ? coarse_code_size() : 0;
|
|
172
172
|
|
|
@@ -224,6 +224,7 @@ struct IVFScanner : InvertedListScanner {
|
|
|
224
224
|
hc(qcode.data(), index->code_size) {
|
|
225
225
|
this->store_pairs = store_pairs;
|
|
226
226
|
this->code_size = index->code_size;
|
|
227
|
+
this->keep_max = is_similarity_metric(index->metric_type);
|
|
227
228
|
}
|
|
228
229
|
|
|
229
230
|
void set_query(const float* query) override {
|
|
@@ -288,26 +289,23 @@ struct IVFScanner : InvertedListScanner {
|
|
|
288
289
|
}
|
|
289
290
|
};
|
|
290
291
|
|
|
292
|
+
struct BuildScanner {
|
|
293
|
+
using T = InvertedListScanner*;
|
|
294
|
+
|
|
295
|
+
template <class HammingComputer>
|
|
296
|
+
static T f(const IndexIVFSpectralHash* index, bool store_pairs) {
|
|
297
|
+
return new IVFScanner<HammingComputer>(index, store_pairs);
|
|
298
|
+
}
|
|
299
|
+
};
|
|
300
|
+
|
|
291
301
|
} // anonymous namespace
|
|
292
302
|
|
|
293
303
|
InvertedListScanner* IndexIVFSpectralHash::get_InvertedListScanner(
|
|
294
304
|
bool store_pairs,
|
|
295
305
|
const IDSelector* sel) const {
|
|
296
306
|
FAISS_THROW_IF_NOT(!sel);
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
case cs: \
|
|
300
|
-
return new IVFScanner<HammingComputer##cs>(this, store_pairs)
|
|
301
|
-
HANDLE_CODE_SIZE(4);
|
|
302
|
-
HANDLE_CODE_SIZE(8);
|
|
303
|
-
HANDLE_CODE_SIZE(16);
|
|
304
|
-
HANDLE_CODE_SIZE(20);
|
|
305
|
-
HANDLE_CODE_SIZE(32);
|
|
306
|
-
HANDLE_CODE_SIZE(64);
|
|
307
|
-
#undef HANDLE_CODE_SIZE
|
|
308
|
-
default:
|
|
309
|
-
return new IVFScanner<HammingComputerDefault>(this, store_pairs);
|
|
310
|
-
}
|
|
307
|
+
BuildScanner bs;
|
|
308
|
+
return dispatch_HammingComputer(code_size, bs, this, store_pairs);
|
|
311
309
|
}
|
|
312
310
|
|
|
313
311
|
void IndexIVFSpectralHash::replace_vt(VectorTransform* vt_in, bool own) {
|
|
@@ -30,14 +30,14 @@ struct IndexPreTransform;
|
|
|
30
30
|
*/
|
|
31
31
|
struct IndexIVFSpectralHash : IndexIVF {
|
|
32
32
|
/// transformation from d to nbit dim
|
|
33
|
-
VectorTransform* vt;
|
|
33
|
+
VectorTransform* vt = nullptr;
|
|
34
34
|
/// own the vt
|
|
35
|
-
bool own_fields;
|
|
35
|
+
bool own_fields = true;
|
|
36
36
|
|
|
37
37
|
/// nb of bits of the binary signature
|
|
38
|
-
int nbit;
|
|
38
|
+
int nbit = 0;
|
|
39
39
|
/// interval size for 0s and 1s
|
|
40
|
-
float period;
|
|
40
|
+
float period = 0;
|
|
41
41
|
|
|
42
42
|
enum ThresholdType {
|
|
43
43
|
Thresh_global, ///< global threshold at 0
|
|
@@ -45,7 +45,7 @@ struct IndexIVFSpectralHash : IndexIVF {
|
|
|
45
45
|
Thresh_centroid_half, ///< central interval around centroid
|
|
46
46
|
Thresh_median ///< median of training set
|
|
47
47
|
};
|
|
48
|
-
ThresholdType threshold_type;
|
|
48
|
+
ThresholdType threshold_type = Thresh_global;
|
|
49
49
|
|
|
50
50
|
/// Trained threshold.
|
|
51
51
|
/// size nlist * nbit or 0 if Thresh_global
|
|
@@ -60,7 +60,7 @@ struct IndexIVFSpectralHash : IndexIVF {
|
|
|
60
60
|
|
|
61
61
|
IndexIVFSpectralHash();
|
|
62
62
|
|
|
63
|
-
void
|
|
63
|
+
void train_encoder(idx_t n, const float* x, const idx_t* assign) override;
|
|
64
64
|
|
|
65
65
|
void encode_vectors(
|
|
66
66
|
idx_t n,
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
#include <cstring>
|
|
12
12
|
|
|
13
13
|
#include <algorithm>
|
|
14
|
+
#include <memory>
|
|
14
15
|
|
|
15
16
|
#include <faiss/impl/FaissAssert.h>
|
|
16
17
|
#include <faiss/utils/hamming.h>
|
|
@@ -56,7 +57,7 @@ const float* IndexLSH::apply_preprocess(idx_t n, const float* x) const {
|
|
|
56
57
|
}
|
|
57
58
|
|
|
58
59
|
if (train_thresholds) {
|
|
59
|
-
if (xt ==
|
|
60
|
+
if (xt == nullptr) {
|
|
60
61
|
xt = new float[nbits * n];
|
|
61
62
|
memcpy(xt, x, sizeof(*x) * n * nbits);
|
|
62
63
|
}
|
|
@@ -75,18 +76,17 @@ void IndexLSH::train(idx_t n, const float* x) {
|
|
|
75
76
|
thresholds.resize(nbits);
|
|
76
77
|
train_thresholds = false;
|
|
77
78
|
const float* xt = apply_preprocess(n, x);
|
|
78
|
-
|
|
79
|
+
std::unique_ptr<const float[]> del(xt == x ? nullptr : xt);
|
|
79
80
|
train_thresholds = true;
|
|
80
81
|
|
|
81
|
-
float
|
|
82
|
-
ScopeDeleter<float> del2(transposed_x);
|
|
82
|
+
std::unique_ptr<float[]> transposed_x(new float[n * nbits]);
|
|
83
83
|
|
|
84
84
|
for (idx_t i = 0; i < n; i++)
|
|
85
85
|
for (idx_t j = 0; j < nbits; j++)
|
|
86
86
|
transposed_x[j * n + i] = xt[i * nbits + j];
|
|
87
87
|
|
|
88
88
|
for (idx_t i = 0; i < nbits; i++) {
|
|
89
|
-
float* xi = transposed_x + i * n;
|
|
89
|
+
float* xi = transposed_x.get() + i * n;
|
|
90
90
|
// std::nth_element
|
|
91
91
|
std::sort(xi, xi + n);
|
|
92
92
|
if (n % 2 == 1)
|
|
@@ -110,19 +110,17 @@ void IndexLSH::search(
|
|
|
110
110
|
FAISS_THROW_IF_NOT(k > 0);
|
|
111
111
|
FAISS_THROW_IF_NOT(is_trained);
|
|
112
112
|
const float* xt = apply_preprocess(n, x);
|
|
113
|
-
|
|
113
|
+
std::unique_ptr<const float[]> del(xt == x ? nullptr : xt);
|
|
114
114
|
|
|
115
|
-
uint8_t
|
|
116
|
-
ScopeDeleter<uint8_t> del2(qcodes);
|
|
115
|
+
std::unique_ptr<uint8_t[]> qcodes(new uint8_t[n * code_size]);
|
|
117
116
|
|
|
118
|
-
fvecs2bitvecs(xt, qcodes, nbits, n);
|
|
117
|
+
fvecs2bitvecs(xt, qcodes.get(), nbits, n);
|
|
119
118
|
|
|
120
|
-
int
|
|
121
|
-
ScopeDeleter<int> del3(idistances);
|
|
119
|
+
std::unique_ptr<int[]> idistances(new int[n * k]);
|
|
122
120
|
|
|
123
|
-
int_maxheap_array_t res = {size_t(n), size_t(k), labels, idistances};
|
|
121
|
+
int_maxheap_array_t res = {size_t(n), size_t(k), labels, idistances.get()};
|
|
124
122
|
|
|
125
|
-
hammings_knn_hc(&res, qcodes, codes.data(), ntotal, code_size, true);
|
|
123
|
+
hammings_knn_hc(&res, qcodes.get(), codes.data(), ntotal, code_size, true);
|
|
126
124
|
|
|
127
125
|
// convert distances to floats
|
|
128
126
|
for (int i = 0; i < k * n; i++)
|
|
@@ -146,16 +144,16 @@ void IndexLSH::transfer_thresholds(LinearTransform* vt) {
|
|
|
146
144
|
void IndexLSH::sa_encode(idx_t n, const float* x, uint8_t* bytes) const {
|
|
147
145
|
FAISS_THROW_IF_NOT(is_trained);
|
|
148
146
|
const float* xt = apply_preprocess(n, x);
|
|
149
|
-
|
|
147
|
+
std::unique_ptr<const float[]> del(xt == x ? nullptr : xt);
|
|
150
148
|
fvecs2bitvecs(xt, bytes, nbits, n);
|
|
151
149
|
}
|
|
152
150
|
|
|
153
151
|
void IndexLSH::sa_decode(idx_t n, const uint8_t* bytes, float* x) const {
|
|
154
152
|
float* xt = x;
|
|
155
|
-
|
|
153
|
+
std::unique_ptr<float[]> del;
|
|
156
154
|
if (rotate_data || nbits != d) {
|
|
157
155
|
xt = new float[n * nbits];
|
|
158
|
-
del.
|
|
156
|
+
del.reset(xt);
|
|
159
157
|
}
|
|
160
158
|
bitvecs2fvecs(bytes, xt, nbits, n);
|
|
161
159
|
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
namespace faiss {
|
|
16
16
|
|
|
17
17
|
IndexLattice::IndexLattice(idx_t d, int nsq, int scale_nbit, int r2)
|
|
18
|
-
:
|
|
18
|
+
: IndexFlatCodes(0, d, METRIC_L2),
|
|
19
19
|
nsq(nsq),
|
|
20
20
|
dsq(d / nsq),
|
|
21
21
|
zn_sphere_codec(dsq, r2),
|
|
@@ -114,22 +114,4 @@ void IndexLattice::sa_decode(idx_t n, const uint8_t* codes, float* x) const {
|
|
|
114
114
|
}
|
|
115
115
|
}
|
|
116
116
|
|
|
117
|
-
void IndexLattice::add(idx_t, const float*) {
|
|
118
|
-
FAISS_THROW_MSG("not implemented");
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
void IndexLattice::search(
|
|
122
|
-
idx_t,
|
|
123
|
-
const float*,
|
|
124
|
-
idx_t,
|
|
125
|
-
float*,
|
|
126
|
-
idx_t*,
|
|
127
|
-
const SearchParameters*) const {
|
|
128
|
-
FAISS_THROW_MSG("not implemented");
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
void IndexLattice::reset() {
|
|
132
|
-
FAISS_THROW_MSG("not implemented");
|
|
133
|
-
}
|
|
134
|
-
|
|
135
117
|
} // namespace faiss
|
|
@@ -5,21 +5,18 @@
|
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
#ifndef FAISS_INDEX_LATTICE_H
|
|
11
|
-
#define FAISS_INDEX_LATTICE_H
|
|
8
|
+
#pragma once
|
|
12
9
|
|
|
13
10
|
#include <vector>
|
|
14
11
|
|
|
15
|
-
#include <faiss/
|
|
12
|
+
#include <faiss/IndexFlatCodes.h>
|
|
16
13
|
#include <faiss/impl/lattice_Zn.h>
|
|
17
14
|
|
|
18
15
|
namespace faiss {
|
|
19
16
|
|
|
20
17
|
/** Index that encodes a vector with a series of Zn lattice quantizers
|
|
21
18
|
*/
|
|
22
|
-
struct IndexLattice :
|
|
19
|
+
struct IndexLattice : IndexFlatCodes {
|
|
23
20
|
/// number of sub-vectors
|
|
24
21
|
int nsq;
|
|
25
22
|
/// dimension of sub-vectors
|
|
@@ -30,8 +27,6 @@ struct IndexLattice : Index {
|
|
|
30
27
|
|
|
31
28
|
/// nb bits used to encode the scale, per subvector
|
|
32
29
|
int scale_nbit, lattice_nbit;
|
|
33
|
-
/// total, in bytes
|
|
34
|
-
size_t code_size;
|
|
35
30
|
|
|
36
31
|
/// mins and maxes of the vector norms, per subquantizer
|
|
37
32
|
std::vector<float> trained;
|
|
@@ -46,20 +41,6 @@ struct IndexLattice : Index {
|
|
|
46
41
|
void sa_encode(idx_t n, const float* x, uint8_t* bytes) const override;
|
|
47
42
|
|
|
48
43
|
void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
|
|
49
|
-
|
|
50
|
-
/// not implemented
|
|
51
|
-
void add(idx_t n, const float* x) override;
|
|
52
|
-
void search(
|
|
53
|
-
idx_t n,
|
|
54
|
-
const float* x,
|
|
55
|
-
idx_t k,
|
|
56
|
-
float* distances,
|
|
57
|
-
idx_t* labels,
|
|
58
|
-
const SearchParameters* params = nullptr) const override;
|
|
59
|
-
|
|
60
|
-
void reset() override;
|
|
61
44
|
};
|
|
62
45
|
|
|
63
46
|
} // namespace faiss
|
|
64
|
-
|
|
65
|
-
#endif
|
|
@@ -58,35 +58,6 @@ using storage_idx_t = NNDescent::storage_idx_t;
|
|
|
58
58
|
|
|
59
59
|
namespace {
|
|
60
60
|
|
|
61
|
-
/* Wrap the distance computer into one that negates the
|
|
62
|
-
distances. This makes supporting INNER_PRODUCE search easier */
|
|
63
|
-
|
|
64
|
-
struct NegativeDistanceComputer : DistanceComputer {
|
|
65
|
-
/// owned by this
|
|
66
|
-
DistanceComputer* basedis;
|
|
67
|
-
|
|
68
|
-
explicit NegativeDistanceComputer(DistanceComputer* basedis)
|
|
69
|
-
: basedis(basedis) {}
|
|
70
|
-
|
|
71
|
-
void set_query(const float* x) override {
|
|
72
|
-
basedis->set_query(x);
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
/// compute distance of vector i to current query
|
|
76
|
-
float operator()(idx_t i) override {
|
|
77
|
-
return -(*basedis)(i);
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
/// compute distance between two stored vectors
|
|
81
|
-
float symmetric_dis(idx_t i, idx_t j) override {
|
|
82
|
-
return -basedis->symmetric_dis(i, j);
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
~NegativeDistanceComputer() override {
|
|
86
|
-
delete basedis;
|
|
87
|
-
}
|
|
88
|
-
};
|
|
89
|
-
|
|
90
61
|
DistanceComputer* storage_distance_computer(const Index* storage) {
|
|
91
62
|
if (is_similarity_metric(storage->metric_type)) {
|
|
92
63
|
return new NegativeDistanceComputer(storage->get_distance_computer());
|
|
@@ -158,8 +129,8 @@ void IndexNNDescent::search(
|
|
|
158
129
|
{
|
|
159
130
|
VisitedTable vt(ntotal);
|
|
160
131
|
|
|
161
|
-
DistanceComputer
|
|
162
|
-
|
|
132
|
+
std::unique_ptr<DistanceComputer> dis(
|
|
133
|
+
storage_distance_computer(storage));
|
|
163
134
|
|
|
164
135
|
#pragma omp for
|
|
165
136
|
for (idx_t i = i0; i < i1; i++) {
|
|
@@ -197,8 +168,7 @@ void IndexNNDescent::add(idx_t n, const float* x) {
|
|
|
197
168
|
storage->add(n, x);
|
|
198
169
|
ntotal = storage->ntotal;
|
|
199
170
|
|
|
200
|
-
DistanceComputer
|
|
201
|
-
ScopeDeleter1<DistanceComputer> del(dis);
|
|
171
|
+
std::unique_ptr<DistanceComputer> dis(storage_distance_computer(storage));
|
|
202
172
|
nndescent.build(*dis, ntotal, verbose);
|
|
203
173
|
}
|
|
204
174
|
|
|
@@ -29,32 +29,16 @@ using namespace nsg;
|
|
|
29
29
|
* IndexNSG implementation
|
|
30
30
|
**************************************************************/
|
|
31
31
|
|
|
32
|
-
IndexNSG::IndexNSG(int d, int R, MetricType metric)
|
|
33
|
-
: Index(d, metric),
|
|
34
|
-
nsg(R),
|
|
35
|
-
own_fields(false),
|
|
36
|
-
storage(nullptr),
|
|
37
|
-
is_built(false),
|
|
38
|
-
GK(64),
|
|
39
|
-
build_type(0) {
|
|
40
|
-
nndescent_S = 10;
|
|
41
|
-
nndescent_R = 100;
|
|
32
|
+
IndexNSG::IndexNSG(int d, int R, MetricType metric) : Index(d, metric), nsg(R) {
|
|
42
33
|
nndescent_L = GK + 50;
|
|
43
|
-
nndescent_iter = 10;
|
|
44
34
|
}
|
|
45
35
|
|
|
46
36
|
IndexNSG::IndexNSG(Index* storage, int R)
|
|
47
37
|
: Index(storage->d, storage->metric_type),
|
|
48
38
|
nsg(R),
|
|
49
|
-
own_fields(false),
|
|
50
39
|
storage(storage),
|
|
51
|
-
is_built(false),
|
|
52
|
-
GK(64),
|
|
53
40
|
build_type(1) {
|
|
54
|
-
nndescent_S = 10;
|
|
55
|
-
nndescent_R = 100;
|
|
56
41
|
nndescent_L = GK + 50;
|
|
57
|
-
nndescent_iter = 10;
|
|
58
42
|
}
|
|
59
43
|
|
|
60
44
|
IndexNSG::~IndexNSG() {
|
|
@@ -95,8 +79,8 @@ void IndexNSG::search(
|
|
|
95
79
|
{
|
|
96
80
|
VisitedTable vt(ntotal);
|
|
97
81
|
|
|
98
|
-
DistanceComputer
|
|
99
|
-
|
|
82
|
+
std::unique_ptr<DistanceComputer> dis(
|
|
83
|
+
storage_distance_computer(storage));
|
|
100
84
|
|
|
101
85
|
#pragma omp for
|
|
102
86
|
for (idx_t i = i0; i < i1; i++) {
|
|
@@ -120,7 +104,7 @@ void IndexNSG::search(
|
|
|
120
104
|
}
|
|
121
105
|
}
|
|
122
106
|
|
|
123
|
-
void IndexNSG::build(idx_t n, const float* x, idx_t* knn_graph, int
|
|
107
|
+
void IndexNSG::build(idx_t n, const float* x, idx_t* knn_graph, int GK_2) {
|
|
124
108
|
FAISS_THROW_IF_NOT_MSG(
|
|
125
109
|
storage,
|
|
126
110
|
"Please use IndexNSGFlat (or variants) instead of IndexNSG directly");
|
|
@@ -131,9 +115,9 @@ void IndexNSG::build(idx_t n, const float* x, idx_t* knn_graph, int GK) {
|
|
|
131
115
|
ntotal = storage->ntotal;
|
|
132
116
|
|
|
133
117
|
// check the knn graph
|
|
134
|
-
check_knn_graph(knn_graph, n,
|
|
118
|
+
check_knn_graph(knn_graph, n, GK_2);
|
|
135
119
|
|
|
136
|
-
const nsg::Graph<idx_t> knng(knn_graph, n,
|
|
120
|
+
const nsg::Graph<idx_t> knng(knn_graph, n, GK_2);
|
|
137
121
|
nsg.build(storage, n, knng, verbose);
|
|
138
122
|
is_built = true;
|
|
139
123
|
}
|
|
@@ -302,10 +286,10 @@ IndexNSGFlat::IndexNSGFlat(int d, int R, MetricType metric)
|
|
|
302
286
|
* IndexNSGPQ implementation
|
|
303
287
|
**************************************************************/
|
|
304
288
|
|
|
305
|
-
IndexNSGPQ::IndexNSGPQ()
|
|
289
|
+
IndexNSGPQ::IndexNSGPQ() = default;
|
|
306
290
|
|
|
307
|
-
IndexNSGPQ::IndexNSGPQ(int d, int pq_m, int M)
|
|
308
|
-
: IndexNSG(new IndexPQ(d, pq_m,
|
|
291
|
+
IndexNSGPQ::IndexNSGPQ(int d, int pq_m, int M, int pq_nbits)
|
|
292
|
+
: IndexNSG(new IndexPQ(d, pq_m, pq_nbits), M) {
|
|
309
293
|
own_fields = true;
|
|
310
294
|
is_trained = false;
|
|
311
295
|
}
|
|
@@ -325,10 +309,10 @@ IndexNSGSQ::IndexNSGSQ(
|
|
|
325
309
|
int M,
|
|
326
310
|
MetricType metric)
|
|
327
311
|
: IndexNSG(new IndexScalarQuantizer(d, qtype, metric), M) {
|
|
328
|
-
is_trained =
|
|
312
|
+
is_trained = this->storage->is_trained;
|
|
329
313
|
own_fields = true;
|
|
330
314
|
}
|
|
331
315
|
|
|
332
|
-
IndexNSGSQ::IndexNSGSQ()
|
|
316
|
+
IndexNSGSQ::IndexNSGSQ() = default;
|
|
333
317
|
|
|
334
318
|
} // namespace faiss
|