faiss 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +103 -3
- data/ext/faiss/ext.cpp +99 -32
- data/ext/faiss/extconf.rb +12 -2
- data/lib/faiss/ext.bundle +0 -0
- data/lib/faiss/index.rb +3 -3
- data/lib/faiss/index_binary.rb +3 -3
- data/lib/faiss/kmeans.rb +1 -1
- data/lib/faiss/pca_matrix.rb +2 -2
- data/lib/faiss/product_quantizer.rb +3 -3
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/AutoTune.cpp +719 -0
- data/vendor/faiss/AutoTune.h +212 -0
- data/vendor/faiss/Clustering.cpp +261 -0
- data/vendor/faiss/Clustering.h +101 -0
- data/vendor/faiss/IVFlib.cpp +339 -0
- data/vendor/faiss/IVFlib.h +132 -0
- data/vendor/faiss/Index.cpp +171 -0
- data/vendor/faiss/Index.h +261 -0
- data/vendor/faiss/Index2Layer.cpp +437 -0
- data/vendor/faiss/Index2Layer.h +85 -0
- data/vendor/faiss/IndexBinary.cpp +77 -0
- data/vendor/faiss/IndexBinary.h +163 -0
- data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
- data/vendor/faiss/IndexBinaryFlat.h +54 -0
- data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
- data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
- data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
- data/vendor/faiss/IndexBinaryHNSW.h +56 -0
- data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
- data/vendor/faiss/IndexBinaryIVF.h +211 -0
- data/vendor/faiss/IndexFlat.cpp +508 -0
- data/vendor/faiss/IndexFlat.h +175 -0
- data/vendor/faiss/IndexHNSW.cpp +1090 -0
- data/vendor/faiss/IndexHNSW.h +170 -0
- data/vendor/faiss/IndexIVF.cpp +909 -0
- data/vendor/faiss/IndexIVF.h +353 -0
- data/vendor/faiss/IndexIVFFlat.cpp +502 -0
- data/vendor/faiss/IndexIVFFlat.h +118 -0
- data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
- data/vendor/faiss/IndexIVFPQ.h +161 -0
- data/vendor/faiss/IndexIVFPQR.cpp +219 -0
- data/vendor/faiss/IndexIVFPQR.h +65 -0
- data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
- data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
- data/vendor/faiss/IndexLSH.cpp +225 -0
- data/vendor/faiss/IndexLSH.h +87 -0
- data/vendor/faiss/IndexLattice.cpp +143 -0
- data/vendor/faiss/IndexLattice.h +68 -0
- data/vendor/faiss/IndexPQ.cpp +1188 -0
- data/vendor/faiss/IndexPQ.h +199 -0
- data/vendor/faiss/IndexPreTransform.cpp +288 -0
- data/vendor/faiss/IndexPreTransform.h +91 -0
- data/vendor/faiss/IndexReplicas.cpp +123 -0
- data/vendor/faiss/IndexReplicas.h +76 -0
- data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
- data/vendor/faiss/IndexScalarQuantizer.h +127 -0
- data/vendor/faiss/IndexShards.cpp +317 -0
- data/vendor/faiss/IndexShards.h +100 -0
- data/vendor/faiss/InvertedLists.cpp +623 -0
- data/vendor/faiss/InvertedLists.h +334 -0
- data/vendor/faiss/LICENSE +21 -0
- data/vendor/faiss/MatrixStats.cpp +252 -0
- data/vendor/faiss/MatrixStats.h +62 -0
- data/vendor/faiss/MetaIndexes.cpp +351 -0
- data/vendor/faiss/MetaIndexes.h +126 -0
- data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
- data/vendor/faiss/OnDiskInvertedLists.h +127 -0
- data/vendor/faiss/VectorTransform.cpp +1157 -0
- data/vendor/faiss/VectorTransform.h +322 -0
- data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
- data/vendor/faiss/c_api/AutoTune_c.h +64 -0
- data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
- data/vendor/faiss/c_api/Clustering_c.h +117 -0
- data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
- data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
- data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
- data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
- data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
- data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
- data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
- data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
- data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
- data/vendor/faiss/c_api/IndexShards_c.h +42 -0
- data/vendor/faiss/c_api/Index_c.cpp +105 -0
- data/vendor/faiss/c_api/Index_c.h +183 -0
- data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
- data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
- data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
- data/vendor/faiss/c_api/clone_index_c.h +32 -0
- data/vendor/faiss/c_api/error_c.h +42 -0
- data/vendor/faiss/c_api/error_impl.cpp +27 -0
- data/vendor/faiss/c_api/error_impl.h +16 -0
- data/vendor/faiss/c_api/faiss_c.h +58 -0
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
- data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
- data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
- data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
- data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
- data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
- data/vendor/faiss/c_api/index_factory_c.h +30 -0
- data/vendor/faiss/c_api/index_io_c.cpp +42 -0
- data/vendor/faiss/c_api/index_io_c.h +50 -0
- data/vendor/faiss/c_api/macros_impl.h +110 -0
- data/vendor/faiss/clone_index.cpp +147 -0
- data/vendor/faiss/clone_index.h +38 -0
- data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
- data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
- data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
- data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
- data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
- data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
- data/vendor/faiss/gpu/GpuCloner.h +82 -0
- data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
- data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
- data/vendor/faiss/gpu/GpuDistance.h +52 -0
- data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
- data/vendor/faiss/gpu/GpuIndex.h +148 -0
- data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
- data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
- data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
- data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
- data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
- data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
- data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
- data/vendor/faiss/gpu/GpuResources.cpp +52 -0
- data/vendor/faiss/gpu/GpuResources.h +73 -0
- data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
- data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
- data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
- data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
- data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
- data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
- data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
- data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
- data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
- data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
- data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
- data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
- data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
- data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
- data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
- data/vendor/faiss/gpu/test/TestUtils.h +93 -0
- data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
- data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
- data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
- data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
- data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
- data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
- data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
- data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
- data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
- data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
- data/vendor/faiss/gpu/utils/Timer.h +52 -0
- data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
- data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
- data/vendor/faiss/impl/FaissAssert.h +95 -0
- data/vendor/faiss/impl/FaissException.cpp +66 -0
- data/vendor/faiss/impl/FaissException.h +71 -0
- data/vendor/faiss/impl/HNSW.cpp +818 -0
- data/vendor/faiss/impl/HNSW.h +275 -0
- data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
- data/vendor/faiss/impl/PolysemousTraining.h +158 -0
- data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
- data/vendor/faiss/impl/ProductQuantizer.h +242 -0
- data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
- data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
- data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
- data/vendor/faiss/impl/ThreadedIndex.h +80 -0
- data/vendor/faiss/impl/index_read.cpp +793 -0
- data/vendor/faiss/impl/index_write.cpp +558 -0
- data/vendor/faiss/impl/io.cpp +142 -0
- data/vendor/faiss/impl/io.h +98 -0
- data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
- data/vendor/faiss/impl/lattice_Zn.h +199 -0
- data/vendor/faiss/index_factory.cpp +392 -0
- data/vendor/faiss/index_factory.h +25 -0
- data/vendor/faiss/index_io.h +75 -0
- data/vendor/faiss/misc/test_blas.cpp +84 -0
- data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
- data/vendor/faiss/tests/test_merge.cpp +258 -0
- data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
- data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
- data/vendor/faiss/tests/test_params_override.cpp +231 -0
- data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
- data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
- data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
- data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
- data/vendor/faiss/utils/Heap.cpp +122 -0
- data/vendor/faiss/utils/Heap.h +495 -0
- data/vendor/faiss/utils/WorkerThread.cpp +126 -0
- data/vendor/faiss/utils/WorkerThread.h +61 -0
- data/vendor/faiss/utils/distances.cpp +765 -0
- data/vendor/faiss/utils/distances.h +243 -0
- data/vendor/faiss/utils/distances_simd.cpp +809 -0
- data/vendor/faiss/utils/extra_distances.cpp +336 -0
- data/vendor/faiss/utils/extra_distances.h +54 -0
- data/vendor/faiss/utils/hamming-inl.h +472 -0
- data/vendor/faiss/utils/hamming.cpp +792 -0
- data/vendor/faiss/utils/hamming.h +220 -0
- data/vendor/faiss/utils/random.cpp +192 -0
- data/vendor/faiss/utils/random.h +60 -0
- data/vendor/faiss/utils/utils.cpp +783 -0
- data/vendor/faiss/utils/utils.h +181 -0
- metadata +216 -2
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
#include <faiss/gpu/utils/Timer.h>
|
|
10
|
+
#include <faiss/gpu/utils/DeviceUtils.h>
|
|
11
|
+
#include <faiss/impl/FaissAssert.h>
|
|
12
|
+
|
|
13
|
+
namespace faiss { namespace gpu {
|
|
14
|
+
|
|
15
|
+
KernelTimer::KernelTimer(cudaStream_t stream)
|
|
16
|
+
: startEvent_(0),
|
|
17
|
+
stopEvent_(0),
|
|
18
|
+
stream_(stream),
|
|
19
|
+
valid_(true) {
|
|
20
|
+
CUDA_VERIFY(cudaEventCreate(&startEvent_));
|
|
21
|
+
CUDA_VERIFY(cudaEventCreate(&stopEvent_));
|
|
22
|
+
|
|
23
|
+
CUDA_VERIFY(cudaEventRecord(startEvent_, stream_));
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
KernelTimer::~KernelTimer() {
|
|
27
|
+
CUDA_VERIFY(cudaEventDestroy(startEvent_));
|
|
28
|
+
CUDA_VERIFY(cudaEventDestroy(stopEvent_));
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
float
|
|
32
|
+
KernelTimer::elapsedMilliseconds() {
|
|
33
|
+
FAISS_ASSERT(valid_);
|
|
34
|
+
|
|
35
|
+
CUDA_VERIFY(cudaEventRecord(stopEvent_, stream_));
|
|
36
|
+
CUDA_VERIFY(cudaEventSynchronize(stopEvent_));
|
|
37
|
+
|
|
38
|
+
auto time = 0.0f;
|
|
39
|
+
CUDA_VERIFY(cudaEventElapsedTime(&time, startEvent_, stopEvent_));
|
|
40
|
+
valid_ = false;
|
|
41
|
+
|
|
42
|
+
return time;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
CpuTimer::CpuTimer() {
|
|
46
|
+
clock_gettime(CLOCK_REALTIME, &start_);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
float
|
|
50
|
+
CpuTimer::elapsedMilliseconds() {
|
|
51
|
+
struct timespec end;
|
|
52
|
+
clock_gettime(CLOCK_REALTIME, &end);
|
|
53
|
+
|
|
54
|
+
auto diffS = end.tv_sec - start_.tv_sec;
|
|
55
|
+
auto diffNs = end.tv_nsec - start_.tv_nsec;
|
|
56
|
+
|
|
57
|
+
return 1000.0f * (float) diffS + ((float) diffNs) / 1000000.0f;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
} } // namespace
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
#pragma once
|
|
10
|
+
|
|
11
|
+
#include <cuda_runtime.h>
|
|
12
|
+
#include <time.h>
|
|
13
|
+
|
|
14
|
+
namespace faiss { namespace gpu {
|
|
15
|
+
|
|
16
|
+
/// Utility class for timing execution of a kernel
|
|
17
|
+
class KernelTimer {
|
|
18
|
+
public:
|
|
19
|
+
/// Constructor starts the timer and adds an event into the current
|
|
20
|
+
/// device stream
|
|
21
|
+
KernelTimer(cudaStream_t stream = 0);
|
|
22
|
+
|
|
23
|
+
/// Destructor releases event resources
|
|
24
|
+
~KernelTimer();
|
|
25
|
+
|
|
26
|
+
/// Adds a stop event then synchronizes on the stop event to get the
|
|
27
|
+
/// actual GPU-side kernel timings for any kernels launched in the
|
|
28
|
+
/// current stream. Returns the number of milliseconds elapsed.
|
|
29
|
+
/// Can only be called once.
|
|
30
|
+
float elapsedMilliseconds();
|
|
31
|
+
|
|
32
|
+
private:
|
|
33
|
+
cudaEvent_t startEvent_;
|
|
34
|
+
cudaEvent_t stopEvent_;
|
|
35
|
+
cudaStream_t stream_;
|
|
36
|
+
bool valid_;
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
/// CPU wallclock elapsed timer
|
|
40
|
+
class CpuTimer {
|
|
41
|
+
public:
|
|
42
|
+
/// Creates and starts a new timer
|
|
43
|
+
CpuTimer();
|
|
44
|
+
|
|
45
|
+
/// Returns elapsed time in milliseconds
|
|
46
|
+
float elapsedMilliseconds();
|
|
47
|
+
|
|
48
|
+
private:
|
|
49
|
+
struct timespec start_;
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
} } // namespace
|
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// -*- c++ -*-
|
|
9
|
+
|
|
10
|
+
#include <cstring>
|
|
11
|
+
|
|
12
|
+
#include <faiss/impl/AuxIndexStructures.h>
|
|
13
|
+
|
|
14
|
+
#include <faiss/impl/FaissAssert.h>
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
namespace faiss {
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
/***********************************************************************
|
|
21
|
+
* RangeSearchResult
|
|
22
|
+
***********************************************************************/
|
|
23
|
+
|
|
24
|
+
RangeSearchResult::RangeSearchResult (idx_t nq, bool alloc_lims): nq (nq) {
|
|
25
|
+
if (alloc_lims) {
|
|
26
|
+
lims = new size_t [nq + 1];
|
|
27
|
+
memset (lims, 0, sizeof(*lims) * (nq + 1));
|
|
28
|
+
} else {
|
|
29
|
+
lims = nullptr;
|
|
30
|
+
}
|
|
31
|
+
labels = nullptr;
|
|
32
|
+
distances = nullptr;
|
|
33
|
+
buffer_size = 1024 * 256;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/// called when lims contains the nb of elements result entries
|
|
37
|
+
/// for each query
|
|
38
|
+
void RangeSearchResult::do_allocation () {
|
|
39
|
+
size_t ofs = 0;
|
|
40
|
+
for (int i = 0; i < nq; i++) {
|
|
41
|
+
size_t n = lims[i];
|
|
42
|
+
lims [i] = ofs;
|
|
43
|
+
ofs += n;
|
|
44
|
+
}
|
|
45
|
+
lims [nq] = ofs;
|
|
46
|
+
labels = new idx_t [ofs];
|
|
47
|
+
distances = new float [ofs];
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
RangeSearchResult::~RangeSearchResult () {
|
|
51
|
+
delete [] labels;
|
|
52
|
+
delete [] distances;
|
|
53
|
+
delete [] lims;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
/***********************************************************************
|
|
61
|
+
* BufferList
|
|
62
|
+
***********************************************************************/
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
BufferList::BufferList (size_t buffer_size):
|
|
66
|
+
buffer_size (buffer_size)
|
|
67
|
+
{
|
|
68
|
+
wp = buffer_size;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
BufferList::~BufferList ()
|
|
72
|
+
{
|
|
73
|
+
for (int i = 0; i < buffers.size(); i++) {
|
|
74
|
+
delete [] buffers[i].ids;
|
|
75
|
+
delete [] buffers[i].dis;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
void BufferList::add (idx_t id, float dis) {
|
|
80
|
+
if (wp == buffer_size) { // need new buffer
|
|
81
|
+
append_buffer();
|
|
82
|
+
}
|
|
83
|
+
Buffer & buf = buffers.back();
|
|
84
|
+
buf.ids [wp] = id;
|
|
85
|
+
buf.dis [wp] = dis;
|
|
86
|
+
wp++;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
void BufferList::append_buffer ()
|
|
91
|
+
{
|
|
92
|
+
Buffer buf = {new idx_t [buffer_size], new float [buffer_size]};
|
|
93
|
+
buffers.push_back (buf);
|
|
94
|
+
wp = 0;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/// copy elemnts ofs:ofs+n-1 seen as linear data in the buffers to
|
|
98
|
+
/// tables dest_ids, dest_dis
|
|
99
|
+
void BufferList::copy_range (size_t ofs, size_t n,
|
|
100
|
+
idx_t * dest_ids, float *dest_dis)
|
|
101
|
+
{
|
|
102
|
+
size_t bno = ofs / buffer_size;
|
|
103
|
+
ofs -= bno * buffer_size;
|
|
104
|
+
while (n > 0) {
|
|
105
|
+
size_t ncopy = ofs + n < buffer_size ? n : buffer_size - ofs;
|
|
106
|
+
Buffer buf = buffers [bno];
|
|
107
|
+
memcpy (dest_ids, buf.ids + ofs, ncopy * sizeof(*dest_ids));
|
|
108
|
+
memcpy (dest_dis, buf.dis + ofs, ncopy * sizeof(*dest_dis));
|
|
109
|
+
dest_ids += ncopy;
|
|
110
|
+
dest_dis += ncopy;
|
|
111
|
+
ofs = 0;
|
|
112
|
+
bno ++;
|
|
113
|
+
n -= ncopy;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
/***********************************************************************
|
|
119
|
+
* RangeSearchPartialResult
|
|
120
|
+
***********************************************************************/
|
|
121
|
+
|
|
122
|
+
void RangeQueryResult::add (float dis, idx_t id) {
|
|
123
|
+
nres++;
|
|
124
|
+
pres->add (id, dis);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
RangeSearchPartialResult::RangeSearchPartialResult (RangeSearchResult * res_in):
|
|
130
|
+
BufferList(res_in->buffer_size),
|
|
131
|
+
res(res_in)
|
|
132
|
+
{}
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
/// begin a new result
|
|
136
|
+
RangeQueryResult &
|
|
137
|
+
RangeSearchPartialResult::new_result (idx_t qno)
|
|
138
|
+
{
|
|
139
|
+
RangeQueryResult qres = {qno, 0, this};
|
|
140
|
+
queries.push_back (qres);
|
|
141
|
+
return queries.back();
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
void RangeSearchPartialResult::finalize ()
|
|
146
|
+
{
|
|
147
|
+
set_lims ();
|
|
148
|
+
#pragma omp barrier
|
|
149
|
+
|
|
150
|
+
#pragma omp single
|
|
151
|
+
res->do_allocation ();
|
|
152
|
+
|
|
153
|
+
#pragma omp barrier
|
|
154
|
+
copy_result ();
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
/// called by range_search before do_allocation
|
|
159
|
+
void RangeSearchPartialResult::set_lims ()
|
|
160
|
+
{
|
|
161
|
+
for (int i = 0; i < queries.size(); i++) {
|
|
162
|
+
RangeQueryResult & qres = queries[i];
|
|
163
|
+
res->lims[qres.qno] = qres.nres;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
/// called by range_search after do_allocation
|
|
168
|
+
void RangeSearchPartialResult::copy_result (bool incremental)
|
|
169
|
+
{
|
|
170
|
+
size_t ofs = 0;
|
|
171
|
+
for (int i = 0; i < queries.size(); i++) {
|
|
172
|
+
RangeQueryResult & qres = queries[i];
|
|
173
|
+
|
|
174
|
+
copy_range (ofs, qres.nres,
|
|
175
|
+
res->labels + res->lims[qres.qno],
|
|
176
|
+
res->distances + res->lims[qres.qno]);
|
|
177
|
+
if (incremental) {
|
|
178
|
+
res->lims[qres.qno] += qres.nres;
|
|
179
|
+
}
|
|
180
|
+
ofs += qres.nres;
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
void RangeSearchPartialResult::merge (std::vector <RangeSearchPartialResult *> &
|
|
185
|
+
partial_results, bool do_delete)
|
|
186
|
+
{
|
|
187
|
+
|
|
188
|
+
int npres = partial_results.size();
|
|
189
|
+
if (npres == 0) return;
|
|
190
|
+
RangeSearchResult *result = partial_results[0]->res;
|
|
191
|
+
size_t nx = result->nq;
|
|
192
|
+
|
|
193
|
+
// count
|
|
194
|
+
for (const RangeSearchPartialResult * pres : partial_results) {
|
|
195
|
+
if (!pres) continue;
|
|
196
|
+
for (const RangeQueryResult &qres : pres->queries) {
|
|
197
|
+
result->lims[qres.qno] += qres.nres;
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
result->do_allocation ();
|
|
201
|
+
for (int j = 0; j < npres; j++) {
|
|
202
|
+
if (!partial_results[j]) continue;
|
|
203
|
+
partial_results[j]->copy_result (true);
|
|
204
|
+
if (do_delete) {
|
|
205
|
+
delete partial_results[j];
|
|
206
|
+
partial_results[j] = nullptr;
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
// reset the limits
|
|
211
|
+
for (size_t i = nx; i > 0; i--) {
|
|
212
|
+
result->lims [i] = result->lims [i - 1];
|
|
213
|
+
}
|
|
214
|
+
result->lims [0] = 0;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
/***********************************************************************
|
|
218
|
+
* IDSelectorRange
|
|
219
|
+
***********************************************************************/
|
|
220
|
+
|
|
221
|
+
IDSelectorRange::IDSelectorRange (idx_t imin, idx_t imax):
|
|
222
|
+
imin (imin), imax (imax)
|
|
223
|
+
{
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
bool IDSelectorRange::is_member (idx_t id) const
|
|
227
|
+
{
|
|
228
|
+
return id >= imin && id < imax;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
/***********************************************************************
|
|
233
|
+
* IDSelectorBatch
|
|
234
|
+
***********************************************************************/
|
|
235
|
+
|
|
236
|
+
IDSelectorBatch::IDSelectorBatch (size_t n, const idx_t *indices)
|
|
237
|
+
{
|
|
238
|
+
nbits = 0;
|
|
239
|
+
while (n > (1L << nbits)) nbits++;
|
|
240
|
+
nbits += 5;
|
|
241
|
+
// for n = 1M, nbits = 25 is optimal, see P56659518
|
|
242
|
+
|
|
243
|
+
mask = (1L << nbits) - 1;
|
|
244
|
+
bloom.resize (1UL << (nbits - 3), 0);
|
|
245
|
+
for (long i = 0; i < n; i++) {
|
|
246
|
+
Index::idx_t id = indices[i];
|
|
247
|
+
set.insert(id);
|
|
248
|
+
id &= mask;
|
|
249
|
+
bloom[id >> 3] |= 1 << (id & 7);
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
bool IDSelectorBatch::is_member (idx_t i) const
|
|
254
|
+
{
|
|
255
|
+
long im = i & mask;
|
|
256
|
+
if(!(bloom[im>>3] & (1 << (im & 7)))) {
|
|
257
|
+
return 0;
|
|
258
|
+
}
|
|
259
|
+
return set.count(i);
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
/***********************************************************
|
|
264
|
+
* Interrupt callback
|
|
265
|
+
***********************************************************/
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
std::unique_ptr<InterruptCallback> InterruptCallback::instance;
|
|
269
|
+
|
|
270
|
+
std::mutex InterruptCallback::lock;
|
|
271
|
+
|
|
272
|
+
void InterruptCallback::clear_instance () {
|
|
273
|
+
delete instance.release ();
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
void InterruptCallback::check () {
|
|
277
|
+
if (!instance.get()) {
|
|
278
|
+
return;
|
|
279
|
+
}
|
|
280
|
+
if (instance->want_interrupt ()) {
|
|
281
|
+
FAISS_THROW_MSG ("computation interrupted");
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
bool InterruptCallback::is_interrupted () {
|
|
286
|
+
if (!instance.get()) {
|
|
287
|
+
return false;
|
|
288
|
+
}
|
|
289
|
+
std::lock_guard<std::mutex> guard(lock);
|
|
290
|
+
return instance->want_interrupt();
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
size_t InterruptCallback::get_period_hint (size_t flops) {
|
|
295
|
+
if (!instance.get()) {
|
|
296
|
+
return 1L << 30; // never check
|
|
297
|
+
}
|
|
298
|
+
// for 10M flops, it is reasonable to check once every 10 iterations
|
|
299
|
+
return std::max((size_t)10 * 10 * 1000 * 1000 / (flops + 1), (size_t)1);
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
} // namespace faiss
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// -*- c++ -*-
|
|
9
|
+
|
|
10
|
+
// Auxiliary index structures, that are used in indexes but that can
|
|
11
|
+
// be forward-declared
|
|
12
|
+
|
|
13
|
+
#ifndef FAISS_AUX_INDEX_STRUCTURES_H
|
|
14
|
+
#define FAISS_AUX_INDEX_STRUCTURES_H
|
|
15
|
+
|
|
16
|
+
#include <stdint.h>
|
|
17
|
+
|
|
18
|
+
#include <vector>
|
|
19
|
+
#include <unordered_set>
|
|
20
|
+
#include <memory>
|
|
21
|
+
#include <mutex>
|
|
22
|
+
|
|
23
|
+
#include <faiss/Index.h>
|
|
24
|
+
|
|
25
|
+
namespace faiss {
|
|
26
|
+
|
|
27
|
+
/** The objective is to have a simple result structure while
|
|
28
|
+
* minimizing the number of mem copies in the result. The method
|
|
29
|
+
* do_allocation can be overloaded to allocate the result tables in
|
|
30
|
+
* the matrix type of a scripting language like Lua or Python. */
|
|
31
|
+
struct RangeSearchResult {
|
|
32
|
+
size_t nq; ///< nb of queries
|
|
33
|
+
size_t *lims; ///< size (nq + 1)
|
|
34
|
+
|
|
35
|
+
typedef Index::idx_t idx_t;
|
|
36
|
+
|
|
37
|
+
idx_t *labels; ///< result for query i is labels[lims[i]:lims[i+1]]
|
|
38
|
+
float *distances; ///< corresponding distances (not sorted)
|
|
39
|
+
|
|
40
|
+
size_t buffer_size; ///< size of the result buffers used
|
|
41
|
+
|
|
42
|
+
/// lims must be allocated on input to range_search.
|
|
43
|
+
explicit RangeSearchResult (idx_t nq, bool alloc_lims=true);
|
|
44
|
+
|
|
45
|
+
/// called when lims contains the nb of elements result entries
|
|
46
|
+
/// for each query
|
|
47
|
+
|
|
48
|
+
virtual void do_allocation ();
|
|
49
|
+
|
|
50
|
+
virtual ~RangeSearchResult ();
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
|
|
56
|
+
Encapsulates a set of ids to remove. */
|
|
57
|
+
struct IDSelector {
|
|
58
|
+
typedef Index::idx_t idx_t;
|
|
59
|
+
virtual bool is_member (idx_t id) const = 0;
|
|
60
|
+
virtual ~IDSelector() {}
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
/** remove ids between [imni, imax) */
|
|
66
|
+
struct IDSelectorRange: IDSelector {
|
|
67
|
+
idx_t imin, imax;
|
|
68
|
+
|
|
69
|
+
IDSelectorRange (idx_t imin, idx_t imax);
|
|
70
|
+
bool is_member(idx_t id) const override;
|
|
71
|
+
~IDSelectorRange() override {}
|
|
72
|
+
};
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
/** Remove ids from a set. Repetitions of ids in the indices set
|
|
76
|
+
* passed to the constructor does not hurt performance. The hash
|
|
77
|
+
* function used for the bloom filter and GCC's implementation of
|
|
78
|
+
* unordered_set are just the least significant bits of the id. This
|
|
79
|
+
* works fine for random ids or ids in sequences but will produce many
|
|
80
|
+
* hash collisions if lsb's are always the same */
|
|
81
|
+
struct IDSelectorBatch: IDSelector {
|
|
82
|
+
|
|
83
|
+
std::unordered_set<idx_t> set;
|
|
84
|
+
|
|
85
|
+
typedef unsigned char uint8_t;
|
|
86
|
+
std::vector<uint8_t> bloom; // assumes low bits of id are a good hash value
|
|
87
|
+
int nbits;
|
|
88
|
+
idx_t mask;
|
|
89
|
+
|
|
90
|
+
IDSelectorBatch (size_t n, const idx_t *indices);
|
|
91
|
+
bool is_member(idx_t id) const override;
|
|
92
|
+
~IDSelectorBatch() override {}
|
|
93
|
+
};
|
|
94
|
+
|
|
95
|
+
/****************************************************************
|
|
96
|
+
* Result structures for range search.
|
|
97
|
+
*
|
|
98
|
+
* The main constraint here is that we want to support parallel
|
|
99
|
+
* queries from different threads in various ways: 1 thread per query,
|
|
100
|
+
* several threads per query. We store the actual results in blocks of
|
|
101
|
+
* fixed size rather than exponentially increasing memory. At the end,
|
|
102
|
+
* we copy the block content to a linear result array.
|
|
103
|
+
*****************************************************************/
|
|
104
|
+
|
|
105
|
+
/** List of temporary buffers used to store results before they are
|
|
106
|
+
* copied to the RangeSearchResult object. */
|
|
107
|
+
struct BufferList {
|
|
108
|
+
typedef Index::idx_t idx_t;
|
|
109
|
+
|
|
110
|
+
// buffer sizes in # entries
|
|
111
|
+
size_t buffer_size;
|
|
112
|
+
|
|
113
|
+
struct Buffer {
|
|
114
|
+
idx_t *ids;
|
|
115
|
+
float *dis;
|
|
116
|
+
};
|
|
117
|
+
|
|
118
|
+
std::vector<Buffer> buffers;
|
|
119
|
+
size_t wp; ///< write pointer in the last buffer.
|
|
120
|
+
|
|
121
|
+
explicit BufferList (size_t buffer_size);
|
|
122
|
+
|
|
123
|
+
~BufferList ();
|
|
124
|
+
|
|
125
|
+
/// create a new buffer
|
|
126
|
+
void append_buffer ();
|
|
127
|
+
|
|
128
|
+
/// add one result, possibly appending a new buffer if needed
|
|
129
|
+
void add (idx_t id, float dis);
|
|
130
|
+
|
|
131
|
+
/// copy elemnts ofs:ofs+n-1 seen as linear data in the buffers to
|
|
132
|
+
/// tables dest_ids, dest_dis
|
|
133
|
+
void copy_range (size_t ofs, size_t n,
|
|
134
|
+
idx_t * dest_ids, float *dest_dis);
|
|
135
|
+
|
|
136
|
+
};
|
|
137
|
+
|
|
138
|
+
struct RangeSearchPartialResult;
|
|
139
|
+
|
|
140
|
+
/// result structure for a single query
|
|
141
|
+
struct RangeQueryResult {
|
|
142
|
+
using idx_t = Index::idx_t;
|
|
143
|
+
idx_t qno; //< id of the query
|
|
144
|
+
size_t nres; //< nb of results for this query
|
|
145
|
+
RangeSearchPartialResult * pres;
|
|
146
|
+
|
|
147
|
+
/// called by search function to report a new result
|
|
148
|
+
void add (float dis, idx_t id);
|
|
149
|
+
};
|
|
150
|
+
|
|
151
|
+
/// the entries in the buffers are split per query
|
|
152
|
+
struct RangeSearchPartialResult: BufferList {
|
|
153
|
+
RangeSearchResult * res;
|
|
154
|
+
|
|
155
|
+
/// eventually the result will be stored in res_in
|
|
156
|
+
explicit RangeSearchPartialResult (RangeSearchResult * res_in);
|
|
157
|
+
|
|
158
|
+
/// query ids + nb of results per query.
|
|
159
|
+
std::vector<RangeQueryResult> queries;
|
|
160
|
+
|
|
161
|
+
/// begin a new result
|
|
162
|
+
RangeQueryResult & new_result (idx_t qno);
|
|
163
|
+
|
|
164
|
+
/*****************************************
|
|
165
|
+
* functions used at the end of the search to merge the result
|
|
166
|
+
* lists */
|
|
167
|
+
void finalize ();
|
|
168
|
+
|
|
169
|
+
/// called by range_search before do_allocation
|
|
170
|
+
void set_lims ();
|
|
171
|
+
|
|
172
|
+
/// called by range_search after do_allocation
|
|
173
|
+
void copy_result (bool incremental = false);
|
|
174
|
+
|
|
175
|
+
/// merge a set of PartialResult's into one RangeSearchResult
|
|
176
|
+
/// on ouptut the partialresults are empty!
|
|
177
|
+
static void merge (std::vector <RangeSearchPartialResult *> &
|
|
178
|
+
partial_results, bool do_delete=true);
|
|
179
|
+
|
|
180
|
+
};
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
/***********************************************************
|
|
184
|
+
* The distance computer maintains a current query and computes
|
|
185
|
+
* distances to elements in an index that supports random access.
|
|
186
|
+
*
|
|
187
|
+
* The DistanceComputer is not intended to be thread-safe (eg. because
|
|
188
|
+
* it maintains counters) so the distance functions are not const,
|
|
189
|
+
* instanciate one from each thread if needed.
|
|
190
|
+
***********************************************************/
|
|
191
|
+
struct DistanceComputer {
|
|
192
|
+
using idx_t = Index::idx_t;
|
|
193
|
+
|
|
194
|
+
/// called before computing distances
|
|
195
|
+
virtual void set_query(const float *x) = 0;
|
|
196
|
+
|
|
197
|
+
/// compute distance of vector i to current query
|
|
198
|
+
virtual float operator () (idx_t i) = 0;
|
|
199
|
+
|
|
200
|
+
/// compute distance between two stored vectors
|
|
201
|
+
virtual float symmetric_dis (idx_t i, idx_t j) = 0;
|
|
202
|
+
|
|
203
|
+
virtual ~DistanceComputer() {}
|
|
204
|
+
};
|
|
205
|
+
|
|
206
|
+
/***********************************************************
|
|
207
|
+
* Interrupt callback
|
|
208
|
+
***********************************************************/
|
|
209
|
+
|
|
210
|
+
struct InterruptCallback {
|
|
211
|
+
virtual bool want_interrupt () = 0;
|
|
212
|
+
virtual ~InterruptCallback() {}
|
|
213
|
+
|
|
214
|
+
// lock that protects concurrent calls to is_interrupted
|
|
215
|
+
static std::mutex lock;
|
|
216
|
+
|
|
217
|
+
static std::unique_ptr<InterruptCallback> instance;
|
|
218
|
+
|
|
219
|
+
static void clear_instance ();
|
|
220
|
+
|
|
221
|
+
/** check if:
|
|
222
|
+
* - an interrupt callback is set
|
|
223
|
+
* - the callback retuns true
|
|
224
|
+
* if this is the case, then throw an exception. Should not be called
|
|
225
|
+
* from multiple threds.
|
|
226
|
+
*/
|
|
227
|
+
static void check ();
|
|
228
|
+
|
|
229
|
+
/// same as check() but return true if is interrupted instead of
|
|
230
|
+
/// throwing. Can be called from multiple threads.
|
|
231
|
+
static bool is_interrupted ();
|
|
232
|
+
|
|
233
|
+
/** assuming each iteration takes a certain number of flops, what
|
|
234
|
+
* is a reasonable interval to check for interrupts?
|
|
235
|
+
*/
|
|
236
|
+
static size_t get_period_hint (size_t flops);
|
|
237
|
+
|
|
238
|
+
};
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
}; // namespace faiss
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
#endif
|