faiss 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/benchs/bench_6bit_codec.cpp +80 -0
- data/vendor/faiss/c_api/AutoTune_c.h +2 -0
- data/vendor/faiss/c_api/IndexShards_c.cpp +0 -6
- data/vendor/faiss/c_api/IndexShards_c.h +1 -4
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +4 -2
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +1 -1
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +1 -1
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +1 -1
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +1 -1
- data/vendor/faiss/demos/demo_imi_flat.cpp +5 -2
- data/vendor/faiss/demos/demo_imi_pq.cpp +6 -2
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +7 -2
- data/vendor/faiss/{AutoTune.cpp → faiss/AutoTune.cpp} +9 -9
- data/vendor/faiss/{AutoTune.h → faiss/AutoTune.h} +0 -0
- data/vendor/faiss/{Clustering.cpp → faiss/Clustering.cpp} +13 -12
- data/vendor/faiss/{Clustering.h → faiss/Clustering.h} +0 -0
- data/vendor/faiss/{DirectMap.cpp → faiss/DirectMap.cpp} +0 -0
- data/vendor/faiss/{DirectMap.h → faiss/DirectMap.h} +0 -0
- data/vendor/faiss/{IVFlib.cpp → faiss/IVFlib.cpp} +86 -11
- data/vendor/faiss/{IVFlib.h → faiss/IVFlib.h} +26 -8
- data/vendor/faiss/{Index.cpp → faiss/Index.cpp} +0 -0
- data/vendor/faiss/{Index.h → faiss/Index.h} +1 -1
- data/vendor/faiss/{Index2Layer.cpp → faiss/Index2Layer.cpp} +12 -11
- data/vendor/faiss/{Index2Layer.h → faiss/Index2Layer.h} +0 -0
- data/vendor/faiss/{IndexBinary.cpp → faiss/IndexBinary.cpp} +2 -1
- data/vendor/faiss/{IndexBinary.h → faiss/IndexBinary.h} +0 -0
- data/vendor/faiss/{IndexBinaryFlat.cpp → faiss/IndexBinaryFlat.cpp} +0 -0
- data/vendor/faiss/{IndexBinaryFlat.h → faiss/IndexBinaryFlat.h} +0 -0
- data/vendor/faiss/{IndexBinaryFromFloat.cpp → faiss/IndexBinaryFromFloat.cpp} +1 -0
- data/vendor/faiss/{IndexBinaryFromFloat.h → faiss/IndexBinaryFromFloat.h} +0 -0
- data/vendor/faiss/{IndexBinaryHNSW.cpp → faiss/IndexBinaryHNSW.cpp} +1 -2
- data/vendor/faiss/{IndexBinaryHNSW.h → faiss/IndexBinaryHNSW.h} +0 -0
- data/vendor/faiss/{IndexBinaryHash.cpp → faiss/IndexBinaryHash.cpp} +16 -7
- data/vendor/faiss/{IndexBinaryHash.h → faiss/IndexBinaryHash.h} +2 -1
- data/vendor/faiss/{IndexBinaryIVF.cpp → faiss/IndexBinaryIVF.cpp} +10 -16
- data/vendor/faiss/{IndexBinaryIVF.h → faiss/IndexBinaryIVF.h} +1 -1
- data/vendor/faiss/{IndexFlat.cpp → faiss/IndexFlat.cpp} +0 -0
- data/vendor/faiss/{IndexFlat.h → faiss/IndexFlat.h} +0 -0
- data/vendor/faiss/{IndexHNSW.cpp → faiss/IndexHNSW.cpp} +63 -32
- data/vendor/faiss/{IndexHNSW.h → faiss/IndexHNSW.h} +0 -0
- data/vendor/faiss/{IndexIVF.cpp → faiss/IndexIVF.cpp} +129 -46
- data/vendor/faiss/{IndexIVF.h → faiss/IndexIVF.h} +7 -3
- data/vendor/faiss/{IndexIVFFlat.cpp → faiss/IndexIVFFlat.cpp} +6 -5
- data/vendor/faiss/{IndexIVFFlat.h → faiss/IndexIVFFlat.h} +0 -0
- data/vendor/faiss/{IndexIVFPQ.cpp → faiss/IndexIVFPQ.cpp} +9 -8
- data/vendor/faiss/{IndexIVFPQ.h → faiss/IndexIVFPQ.h} +4 -2
- data/vendor/faiss/{IndexIVFPQR.cpp → faiss/IndexIVFPQR.cpp} +3 -1
- data/vendor/faiss/{IndexIVFPQR.h → faiss/IndexIVFPQR.h} +0 -0
- data/vendor/faiss/{IndexIVFSpectralHash.cpp → faiss/IndexIVFSpectralHash.cpp} +1 -1
- data/vendor/faiss/{IndexIVFSpectralHash.h → faiss/IndexIVFSpectralHash.h} +0 -0
- data/vendor/faiss/{IndexLSH.cpp → faiss/IndexLSH.cpp} +0 -0
- data/vendor/faiss/{IndexLSH.h → faiss/IndexLSH.h} +0 -0
- data/vendor/faiss/{IndexLattice.cpp → faiss/IndexLattice.cpp} +0 -0
- data/vendor/faiss/{IndexLattice.h → faiss/IndexLattice.h} +0 -0
- data/vendor/faiss/{IndexPQ.cpp → faiss/IndexPQ.cpp} +6 -6
- data/vendor/faiss/{IndexPQ.h → faiss/IndexPQ.h} +3 -1
- data/vendor/faiss/{IndexPreTransform.cpp → faiss/IndexPreTransform.cpp} +0 -0
- data/vendor/faiss/{IndexPreTransform.h → faiss/IndexPreTransform.h} +0 -0
- data/vendor/faiss/{IndexReplicas.cpp → faiss/IndexReplicas.cpp} +102 -10
- data/vendor/faiss/{IndexReplicas.h → faiss/IndexReplicas.h} +6 -0
- data/vendor/faiss/{IndexScalarQuantizer.cpp → faiss/IndexScalarQuantizer.cpp} +3 -3
- data/vendor/faiss/{IndexScalarQuantizer.h → faiss/IndexScalarQuantizer.h} +0 -0
- data/vendor/faiss/{IndexShards.cpp → faiss/IndexShards.cpp} +37 -12
- data/vendor/faiss/{IndexShards.h → faiss/IndexShards.h} +3 -4
- data/vendor/faiss/{InvertedLists.cpp → faiss/InvertedLists.cpp} +2 -2
- data/vendor/faiss/{InvertedLists.h → faiss/InvertedLists.h} +1 -0
- data/vendor/faiss/{MatrixStats.cpp → faiss/MatrixStats.cpp} +0 -0
- data/vendor/faiss/{MatrixStats.h → faiss/MatrixStats.h} +0 -0
- data/vendor/faiss/{MetaIndexes.cpp → faiss/MetaIndexes.cpp} +5 -3
- data/vendor/faiss/{MetaIndexes.h → faiss/MetaIndexes.h} +0 -0
- data/vendor/faiss/{MetricType.h → faiss/MetricType.h} +0 -0
- data/vendor/faiss/{OnDiskInvertedLists.cpp → faiss/OnDiskInvertedLists.cpp} +141 -3
- data/vendor/faiss/{OnDiskInvertedLists.h → faiss/OnDiskInvertedLists.h} +27 -7
- data/vendor/faiss/{VectorTransform.cpp → faiss/VectorTransform.cpp} +4 -3
- data/vendor/faiss/{VectorTransform.h → faiss/VectorTransform.h} +0 -0
- data/vendor/faiss/{clone_index.cpp → faiss/clone_index.cpp} +0 -0
- data/vendor/faiss/{clone_index.h → faiss/clone_index.h} +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.cpp +14 -14
- data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.h +6 -7
- data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuDistance.h +12 -4
- data/vendor/faiss/{gpu → faiss/gpu}/GpuFaissAssert.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndex.h +3 -9
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexBinaryFlat.h +7 -7
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexFlat.h +35 -10
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVF.h +1 -2
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFFlat.h +4 -3
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFPQ.h +21 -4
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFScalarQuantizer.h +4 -3
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndicesOptions.h +0 -0
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +200 -0
- data/vendor/faiss/faiss/gpu/GpuResources.h +264 -0
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +572 -0
- data/vendor/faiss/{gpu → faiss/gpu}/StandardGpuResources.h +83 -15
- data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper-inl.h +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper.h +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfClustering.cpp +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfIVFPQAdd.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/perf/WriteIndex.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexBinaryFlat.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexFlat.cpp +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFFlat.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFPQ.cpp +141 -52
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuMemoryException.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.cpp +4 -2
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/demo_ivfpq_indexing_gpu.cpp +7 -5
- data/vendor/faiss/{gpu → faiss/gpu}/utils/DeviceUtils.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +213 -0
- data/vendor/faiss/{gpu → faiss/gpu}/utils/StackDeviceMemory.h +25 -40
- data/vendor/faiss/{gpu → faiss/gpu}/utils/StaticUtils.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.cpp +1 -0
- data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.h +3 -1
- data/vendor/faiss/{impl → faiss/impl}/FaissAssert.h +1 -0
- data/vendor/faiss/{impl → faiss/impl}/FaissException.cpp +26 -0
- data/vendor/faiss/{impl → faiss/impl}/FaissException.h +4 -0
- data/vendor/faiss/{impl → faiss/impl}/HNSW.cpp +26 -26
- data/vendor/faiss/{impl → faiss/impl}/HNSW.h +19 -11
- data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.cpp +1 -1
- data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.h +1 -1
- data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer-inl.h +0 -1
- data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.cpp +9 -9
- data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.cpp +63 -39
- data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.h +1 -1
- data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex-inl.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/index_read.cpp +99 -116
- data/vendor/faiss/{impl → faiss/impl}/index_write.cpp +15 -50
- data/vendor/faiss/{impl → faiss/impl}/io.cpp +15 -10
- data/vendor/faiss/{impl → faiss/impl}/io.h +22 -8
- data/vendor/faiss/faiss/impl/io_macros.h +57 -0
- data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.cpp +52 -36
- data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.h +3 -3
- data/vendor/faiss/faiss/impl/platform_macros.h +24 -0
- data/vendor/faiss/{index_factory.cpp → faiss/index_factory.cpp} +33 -12
- data/vendor/faiss/{index_factory.h → faiss/index_factory.h} +0 -0
- data/vendor/faiss/{index_io.h → faiss/index_io.h} +55 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +112 -0
- data/vendor/faiss/faiss/python/python_callbacks.h +45 -0
- data/vendor/faiss/{utils → faiss/utils}/Heap.cpp +5 -5
- data/vendor/faiss/{utils → faiss/utils}/Heap.h +1 -3
- data/vendor/faiss/{utils → faiss/utils}/WorkerThread.cpp +0 -0
- data/vendor/faiss/{utils → faiss/utils}/WorkerThread.h +0 -0
- data/vendor/faiss/{utils → faiss/utils}/distances.cpp +28 -13
- data/vendor/faiss/{utils → faiss/utils}/distances.h +2 -1
- data/vendor/faiss/{utils → faiss/utils}/distances_simd.cpp +5 -5
- data/vendor/faiss/{utils → faiss/utils}/extra_distances.cpp +8 -7
- data/vendor/faiss/{utils → faiss/utils}/extra_distances.h +0 -0
- data/vendor/faiss/{utils → faiss/utils}/hamming-inl.h +1 -3
- data/vendor/faiss/{utils → faiss/utils}/hamming.cpp +8 -7
- data/vendor/faiss/{utils → faiss/utils}/hamming.h +7 -1
- data/vendor/faiss/{utils → faiss/utils}/random.cpp +5 -5
- data/vendor/faiss/{utils → faiss/utils}/random.h +0 -0
- data/vendor/faiss/{utils → faiss/utils}/utils.cpp +27 -28
- data/vendor/faiss/{utils → faiss/utils}/utils.h +4 -0
- data/vendor/faiss/misc/test_blas.cpp +4 -1
- data/vendor/faiss/tests/test_binary_flat.cpp +0 -2
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +6 -1
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +4 -1
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +6 -4
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +12 -5
- data/vendor/faiss/tests/test_merge.cpp +6 -3
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +7 -2
- data/vendor/faiss/tests/test_pairs_decoding.cpp +5 -1
- data/vendor/faiss/tests/test_params_override.cpp +7 -2
- data/vendor/faiss/tests/test_sliding_ivf.cpp +10 -4
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +14 -8
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +11 -7
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +12 -7
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +6 -3
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +7 -3
- metadata +154 -153
- data/vendor/faiss/gpu/GpuResources.cpp +0 -52
- data/vendor/faiss/gpu/GpuResources.h +0 -73
- data/vendor/faiss/gpu/StandardGpuResources.cpp +0 -303
- data/vendor/faiss/gpu/utils/DeviceMemory.cpp +0 -77
- data/vendor/faiss/gpu/utils/DeviceMemory.h +0 -71
- data/vendor/faiss/gpu/utils/MemorySpace.cpp +0 -89
- data/vendor/faiss/gpu/utils/MemorySpace.h +0 -44
- data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +0 -239
@@ -29,7 +29,7 @@ struct IOReader {
|
|
29
29
|
// name that can be used in error messages
|
30
30
|
std::string name;
|
31
31
|
|
32
|
-
// fread
|
32
|
+
// fread. Returns number of items read or 0 in case of EOF.
|
33
33
|
virtual size_t operator()(
|
34
34
|
void *ptr, size_t size, size_t nitems) = 0;
|
35
35
|
|
@@ -43,7 +43,7 @@ struct IOWriter {
|
|
43
43
|
// name that can be used in error messages
|
44
44
|
std::string name;
|
45
45
|
|
46
|
-
// fwrite
|
46
|
+
// fwrite. Return number of items written
|
47
47
|
virtual size_t operator()(
|
48
48
|
const void *ptr, size_t size, size_t nitems) = 0;
|
49
49
|
|
@@ -97,6 +97,10 @@ struct FileIOWriter: IOWriter {
|
|
97
97
|
|
98
98
|
/*******************************************************
|
99
99
|
* Buffered reader + writer
|
100
|
+
*
|
101
|
+
* They attempt to read and write only buffers of size bsz to the
|
102
|
+
* underlying reader or writer. This is done by splitting or merging
|
103
|
+
* the read/write functions.
|
100
104
|
*******************************************************/
|
101
105
|
|
102
106
|
|
@@ -105,24 +109,32 @@ struct FileIOWriter: IOWriter {
|
|
105
109
|
struct BufferedIOReader: IOReader {
|
106
110
|
|
107
111
|
IOReader *reader;
|
108
|
-
size_t bsz
|
112
|
+
size_t bsz;
|
113
|
+
size_t ofs; ///< offset in input stream
|
114
|
+
size_t ofs2; ///< number of bytes returned to caller
|
109
115
|
size_t b0, b1; ///< range of available bytes in the buffer
|
110
116
|
std::vector<char> buffer;
|
111
117
|
|
112
|
-
|
113
|
-
|
118
|
+
/**
|
119
|
+
* @param bsz buffer size (bytes). Reads will be done by batched of
|
120
|
+
* this size
|
121
|
+
*/
|
122
|
+
explicit BufferedIOReader(IOReader *reader, size_t bsz = 1024 * 1024);
|
114
123
|
|
115
124
|
size_t operator()(void *ptr, size_t size, size_t nitems) override;
|
116
125
|
};
|
117
126
|
|
127
|
+
|
118
128
|
struct BufferedIOWriter: IOWriter {
|
119
129
|
|
120
130
|
IOWriter *writer;
|
121
|
-
size_t bsz
|
122
|
-
size_t
|
131
|
+
size_t bsz;
|
132
|
+
size_t ofs;
|
133
|
+
size_t ofs2; ///< number of bytes received from caller
|
134
|
+
size_t b0; ///< amount of data in buffer
|
123
135
|
std::vector<char> buffer;
|
124
136
|
|
125
|
-
BufferedIOWriter(IOWriter *writer, size_t bsz);
|
137
|
+
explicit BufferedIOWriter(IOWriter *writer, size_t bsz = 1024 * 1024);
|
126
138
|
|
127
139
|
size_t operator()(const void *ptr, size_t size, size_t nitems) override;
|
128
140
|
|
@@ -132,5 +144,7 @@ struct BufferedIOWriter: IOWriter {
|
|
132
144
|
|
133
145
|
/// cast a 4-character string to a uint32_t that can be written and read easily
|
134
146
|
uint32_t fourcc (const char sx[4]);
|
147
|
+
uint32_t fourcc (const std::string & sx);
|
148
|
+
|
135
149
|
|
136
150
|
} // namespace faiss
|
@@ -0,0 +1,57 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
#pragma once
|
9
|
+
|
10
|
+
/*************************************************************
|
11
|
+
* I/O macros
|
12
|
+
*
|
13
|
+
* we use macros so that we have a line number to report in abort
|
14
|
+
* (). This makes debugging a lot easier. The IOReader or IOWriter is
|
15
|
+
* always called f and thus is not passed in as a macro parameter.
|
16
|
+
**************************************************************/
|
17
|
+
|
18
|
+
|
19
|
+
#define READANDCHECK(ptr, n) { \
|
20
|
+
size_t ret = (*f)(ptr, sizeof(*(ptr)), n); \
|
21
|
+
FAISS_THROW_IF_NOT_FMT(ret == (n), \
|
22
|
+
"read error in %s: %zd != %zd (%s)", \
|
23
|
+
f->name.c_str(), ret, size_t(n), strerror(errno)); \
|
24
|
+
}
|
25
|
+
|
26
|
+
#define READ1(x) READANDCHECK(&(x), 1)
|
27
|
+
|
28
|
+
// will fail if we write 256G of data at once...
|
29
|
+
#define READVECTOR(vec) \
|
30
|
+
{ \
|
31
|
+
size_t size; \
|
32
|
+
READANDCHECK(&size, 1); \
|
33
|
+
FAISS_THROW_IF_NOT(size >= 0 && size < (uint64_t{1} << 40)); \
|
34
|
+
(vec).resize(size); \
|
35
|
+
READANDCHECK((vec).data(), size); \
|
36
|
+
}
|
37
|
+
|
38
|
+
#define READSTRING(s) { \
|
39
|
+
size_t size = (s).size (); \
|
40
|
+
WRITEANDCHECK (&size, 1); \
|
41
|
+
WRITEANDCHECK ((s).c_str(), size); \
|
42
|
+
}
|
43
|
+
|
44
|
+
#define WRITEANDCHECK(ptr, n) { \
|
45
|
+
size_t ret = (*f)(ptr, sizeof(*(ptr)), n); \
|
46
|
+
FAISS_THROW_IF_NOT_FMT(ret == (n), \
|
47
|
+
"write error in %s: %zd != %zd (%s)", \
|
48
|
+
f->name.c_str(), ret, size_t(n), strerror(errno)); \
|
49
|
+
}
|
50
|
+
|
51
|
+
#define WRITE1(x) WRITEANDCHECK(&(x), 1)
|
52
|
+
|
53
|
+
#define WRITEVECTOR(vec) { \
|
54
|
+
size_t size = (vec).size (); \
|
55
|
+
WRITEANDCHECK (&size, 1); \
|
56
|
+
WRITEANDCHECK ((vec).data (), size); \
|
57
|
+
}
|
@@ -21,6 +21,22 @@
|
|
21
21
|
|
22
22
|
#include <faiss/utils/distances.h>
|
23
23
|
|
24
|
+
#ifdef _MSC_VER
|
25
|
+
|
26
|
+
#include <intrin.h>
|
27
|
+
|
28
|
+
static inline int __builtin_ctzll(uint64_t x) {
|
29
|
+
unsigned long ret;
|
30
|
+
_BitScanForward64(&ret, x);
|
31
|
+
return (int)ret;
|
32
|
+
}
|
33
|
+
|
34
|
+
static inline int __builtin_clzll(uint64_t x) {
|
35
|
+
return (int)__lzcnt64(x);
|
36
|
+
}
|
37
|
+
|
38
|
+
#endif // _MSC_VER
|
39
|
+
|
24
40
|
namespace faiss {
|
25
41
|
|
26
42
|
/********************************************
|
@@ -102,7 +118,7 @@ int decode_comb_1 (uint64_t *n, int k1, int r) {
|
|
102
118
|
}
|
103
119
|
|
104
120
|
// optimized version for < 64 bits
|
105
|
-
|
121
|
+
uint64_t repeats_encode_64 (
|
106
122
|
const std::vector<Repeat> & repeats,
|
107
123
|
int dim, const float *c)
|
108
124
|
{
|
@@ -115,12 +131,12 @@ long repeats_encode_64 (
|
|
115
131
|
uint64_t tosee = ~coded;
|
116
132
|
for(;;) {
|
117
133
|
// directly jump to next available slot.
|
118
|
-
int i =
|
119
|
-
tosee &= ~(
|
134
|
+
int i = __builtin_ctzll(tosee);
|
135
|
+
tosee &= ~(uint64_t{1} << i) ;
|
120
136
|
if (c[i] == r->val) {
|
121
137
|
code_comb += comb(rank, occ + 1);
|
122
138
|
occ++;
|
123
|
-
coded |=
|
139
|
+
coded |= uint64_t{1} << i;
|
124
140
|
if (occ == r->n) break;
|
125
141
|
}
|
126
142
|
rank++;
|
@@ -148,13 +164,13 @@ void repeats_decode_64(
|
|
148
164
|
int occ = 0;
|
149
165
|
int rank = nfree;
|
150
166
|
int next_rank = decode_comb_1 (&code_comb, r->n, rank);
|
151
|
-
uint64_t tosee = ((
|
167
|
+
uint64_t tosee = ((uint64_t{1} << dim) - 1) ^ decoded;
|
152
168
|
for(;;) {
|
153
|
-
int i = 63 -
|
154
|
-
tosee &= ~(
|
169
|
+
int i = 63 - __builtin_clzll(tosee);
|
170
|
+
tosee &= ~(uint64_t{1} << i);
|
155
171
|
rank--;
|
156
172
|
if (rank == next_rank) {
|
157
|
-
decoded |=
|
173
|
+
decoded |= uint64_t{1} << i;
|
158
174
|
c[i] = r->val;
|
159
175
|
occ++;
|
160
176
|
if (occ == r->n) break;
|
@@ -190,9 +206,9 @@ Repeats::Repeats (int dim, const float *c): dim(dim)
|
|
190
206
|
}
|
191
207
|
|
192
208
|
|
193
|
-
|
209
|
+
uint64_t Repeats::count () const
|
194
210
|
{
|
195
|
-
|
211
|
+
uint64_t accu = 1;
|
196
212
|
int remain = dim;
|
197
213
|
for (int i = 0; i < repeats.size(); i++) {
|
198
214
|
accu *= comb(remain, repeats[i].n);
|
@@ -204,7 +220,7 @@ long Repeats::count () const
|
|
204
220
|
|
205
221
|
|
206
222
|
// version with a bool vector that works for > 64 dim
|
207
|
-
|
223
|
+
uint64_t Repeats::encode(const float *c) const
|
208
224
|
{
|
209
225
|
if (dim < 64) {
|
210
226
|
return repeats_encode_64 (repeats, dim, c);
|
@@ -306,20 +322,20 @@ void EnumeratedVectors::decode_multi(size_t n, const uint64_t * codes,
|
|
306
322
|
void EnumeratedVectors::find_nn (
|
307
323
|
size_t nc, const uint64_t * codes,
|
308
324
|
size_t nq, const float *xq,
|
309
|
-
|
325
|
+
int64_t *labels, float *distances)
|
310
326
|
{
|
311
|
-
for (
|
327
|
+
for (size_t i = 0; i < nq; i++) {
|
312
328
|
distances[i] = -1e20;
|
313
329
|
labels[i] = -1;
|
314
330
|
}
|
315
331
|
|
316
|
-
float c
|
317
|
-
for(
|
332
|
+
std::vector<float> c(dim);
|
333
|
+
for(size_t i = 0; i < nc; i++) {
|
318
334
|
uint64_t code = codes[nc];
|
319
|
-
decode(code, c);
|
320
|
-
for (
|
335
|
+
decode(code, c.data());
|
336
|
+
for (size_t j = 0; j < nq; j++) {
|
321
337
|
const float *x = xq + j * dim;
|
322
|
-
float dis = fvec_inner_product(x, c, dim);
|
338
|
+
float dis = fvec_inner_product(x, c.data(), dim);
|
323
339
|
if (dis > distances[j]) {
|
324
340
|
distances[j] = dis;
|
325
341
|
labels[j] = i;
|
@@ -341,9 +357,9 @@ ZnSphereSearch::ZnSphereSearch(int dim, int r2): dimS(dim), r2(r2) {
|
|
341
357
|
}
|
342
358
|
|
343
359
|
float ZnSphereSearch::search(const float *x, float *c) const {
|
344
|
-
float tmp
|
345
|
-
int tmp_int
|
346
|
-
return search(x, c, tmp, tmp_int);
|
360
|
+
std::vector<float> tmp(dimS * 2);
|
361
|
+
std::vector<int> tmp_int(dimS);
|
362
|
+
return search(x, c, tmp.data(), tmp_int.data());
|
347
363
|
}
|
348
364
|
|
349
365
|
float ZnSphereSearch::search(const float *x, float *c,
|
@@ -430,19 +446,19 @@ ZnSphereCodec::ZnSphereCodec(int dim, int r2):
|
|
430
446
|
}
|
431
447
|
|
432
448
|
uint64_t ZnSphereCodec::search_and_encode(const float *x) const {
|
433
|
-
float tmp
|
434
|
-
int tmp_int
|
449
|
+
std::vector<float> tmp(dim * 2);
|
450
|
+
std::vector<int> tmp_int(dim);
|
435
451
|
int ano; // atom number
|
436
|
-
float c
|
437
|
-
search(x, c, tmp, tmp_int, &ano);
|
452
|
+
std::vector<float> c(dim);
|
453
|
+
search(x, c.data(), tmp.data(), tmp_int.data(), &ano);
|
438
454
|
uint64_t signs = 0;
|
439
|
-
float cabs
|
455
|
+
std::vector<float> cabs(dim);
|
440
456
|
int nnz = 0;
|
441
457
|
for (int i = 0; i < dim; i++) {
|
442
458
|
cabs[i] = fabs(c[i]);
|
443
459
|
if (c[i] != 0) {
|
444
460
|
if (c[i] < 0) {
|
445
|
-
signs |=
|
461
|
+
signs |= uint64_t{1} << nnz;
|
446
462
|
}
|
447
463
|
nnz ++;
|
448
464
|
}
|
@@ -450,7 +466,7 @@ uint64_t ZnSphereCodec::search_and_encode(const float *x) const {
|
|
450
466
|
const CodeSegment &cs = code_segments[ano];
|
451
467
|
assert(nnz == cs.signbits);
|
452
468
|
uint64_t code = cs.c0 + signs;
|
453
|
-
code += cs.encode(cabs) << cs.signbits;
|
469
|
+
code += cs.encode(cabs.data()) << cs.signbits;
|
454
470
|
return code;
|
455
471
|
}
|
456
472
|
|
@@ -560,13 +576,13 @@ ZnSphereCodecRec::ZnSphereCodecRec(int dim, int r2):
|
|
560
576
|
std::vector<float> &cache = decode_cache[r2sub];
|
561
577
|
int dimsub = (1 << cache_level);
|
562
578
|
cache.resize (nvi * dimsub);
|
563
|
-
float c
|
579
|
+
std::vector<float> c(dim);
|
564
580
|
uint64_t code0 = get_nv_cum(cache_level + 1, r2,
|
565
581
|
r2 - r2sub);
|
566
582
|
for (int i = 0; i < nvi; i++) {
|
567
|
-
decode(i + code0, c);
|
568
|
-
memcpy(&cache[i * dimsub], c + dim - dimsub,
|
569
|
-
dimsub * sizeof(*c));
|
583
|
+
decode(i + code0, c.data());
|
584
|
+
memcpy(&cache[i * dimsub], c.data() + dim - dimsub,
|
585
|
+
dimsub * sizeof(*c.data()));
|
570
586
|
}
|
571
587
|
}
|
572
588
|
decode_cache_ld = cache_level;
|
@@ -581,8 +597,8 @@ uint64_t ZnSphereCodecRec::encode(const float *c) const
|
|
581
597
|
|
582
598
|
uint64_t ZnSphereCodecRec::encode_centroid(const float *c) const
|
583
599
|
{
|
584
|
-
uint64_t codes
|
585
|
-
int norm2s
|
600
|
+
std::vector<uint64_t> codes(dim);
|
601
|
+
std::vector<int> norm2s(dim);
|
586
602
|
for(int i = 0; i < dim; i++) {
|
587
603
|
if (c[i] == 0) {
|
588
604
|
codes[i] = 0;
|
@@ -617,8 +633,8 @@ uint64_t ZnSphereCodecRec::encode_centroid(const float *c) const
|
|
617
633
|
|
618
634
|
void ZnSphereCodecRec::decode(uint64_t code, float *c) const
|
619
635
|
{
|
620
|
-
uint64_t codes
|
621
|
-
int norm2s
|
636
|
+
std::vector<uint64_t> codes(dim);
|
637
|
+
std::vector<int> norm2s(dim);
|
622
638
|
codes[0] = code;
|
623
639
|
norm2s[0] = r2;
|
624
640
|
|
@@ -80,7 +80,7 @@ struct EnumeratedVectors {
|
|
80
80
|
// (decodes and computes distances)
|
81
81
|
void find_nn (size_t n, const uint64_t * codes,
|
82
82
|
size_t nq, const float *xq,
|
83
|
-
|
83
|
+
int64_t *idx, float *dis);
|
84
84
|
|
85
85
|
virtual ~EnumeratedVectors() {}
|
86
86
|
|
@@ -103,9 +103,9 @@ struct Repeats {
|
|
103
103
|
Repeats(int dim = 0, const float *c = nullptr);
|
104
104
|
|
105
105
|
// count number of possible codes for this atom
|
106
|
-
|
106
|
+
uint64_t count() const;
|
107
107
|
|
108
|
-
|
108
|
+
uint64_t encode(const float *c) const;
|
109
109
|
|
110
110
|
void decode(uint64_t code, float *c) const;
|
111
111
|
};
|
@@ -0,0 +1,24 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
#pragma once
|
9
|
+
|
10
|
+
#ifdef _MSC_VER
|
11
|
+
|
12
|
+
#ifdef FAISS_MAIN_LIB
|
13
|
+
#define FAISS_API __declspec(dllexport)
|
14
|
+
#else // _FAISS_MAIN_LIB
|
15
|
+
#define FAISS_API __declspec(dllimport)
|
16
|
+
#endif // FAISS_MAIN_LIB
|
17
|
+
|
18
|
+
#define __PRETTY_FUNCTION__ __FUNCSIG__
|
19
|
+
|
20
|
+
#else
|
21
|
+
|
22
|
+
#define FAISS_API
|
23
|
+
|
24
|
+
#endif // _MSC_VER
|
@@ -13,9 +13,9 @@
|
|
13
13
|
|
14
14
|
#include <faiss/AutoTune.h>
|
15
15
|
|
16
|
+
#include <cinttypes>
|
16
17
|
#include <cmath>
|
17
18
|
|
18
|
-
|
19
19
|
#include <faiss/impl/FaissAssert.h>
|
20
20
|
#include <faiss/utils/utils.h>
|
21
21
|
#include <faiss/utils/random.h>
|
@@ -38,6 +38,7 @@
|
|
38
38
|
#include <faiss/IndexBinaryFlat.h>
|
39
39
|
#include <faiss/IndexBinaryHNSW.h>
|
40
40
|
#include <faiss/IndexBinaryIVF.h>
|
41
|
+
#include <faiss/IndexBinaryHash.h>
|
41
42
|
|
42
43
|
namespace faiss {
|
43
44
|
|
@@ -81,14 +82,14 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
|
|
81
82
|
|
82
83
|
ScopeDeleter1<Index> del_coarse_quantizer, del_index;
|
83
84
|
|
84
|
-
|
85
|
+
std::string description(description_in);
|
85
86
|
char *ptr;
|
86
|
-
memcpy (description, description_in, strlen(description_in) + 1);
|
87
87
|
|
88
88
|
int64_t ncentroids = -1;
|
89
89
|
bool use_2layer = false;
|
90
|
+
int hnsw_M = -1;
|
90
91
|
|
91
|
-
for (char *tok = strtok_r (description, " ,", &ptr);
|
92
|
+
for (char *tok = strtok_r (&description[0], " ,", &ptr);
|
92
93
|
tok;
|
93
94
|
tok = strtok_r (nullptr, " ,", &ptr)) {
|
94
95
|
int d_out, opq_M, nbit, M, M2, pq_m, ncent, r2;
|
@@ -138,12 +139,11 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
|
|
138
139
|
|
139
140
|
// coarse quantizers
|
140
141
|
} else if (!coarse_quantizer &&
|
141
|
-
sscanf (tok, "IVF%
|
142
|
-
FAISS_THROW_IF_NOT (metric == METRIC_L2);
|
142
|
+
sscanf (tok, "IVF%" PRId64 "_HNSW%d", &ncentroids, &M) == 2) {
|
143
143
|
coarse_quantizer_1 = new IndexHNSWFlat (d, M);
|
144
144
|
|
145
145
|
} else if (!coarse_quantizer &&
|
146
|
-
sscanf (tok, "IVF%
|
146
|
+
sscanf (tok, "IVF%" PRId64, &ncentroids) == 1) {
|
147
147
|
if (metric == METRIC_L2) {
|
148
148
|
coarse_quantizer_1 = new IndexFlatL2 (d);
|
149
149
|
} else {
|
@@ -164,7 +164,7 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
|
|
164
164
|
use_2layer = true;
|
165
165
|
|
166
166
|
} else if (!coarse_quantizer &&
|
167
|
-
sscanf (tok, "Residual%
|
167
|
+
sscanf (tok, "Residual%" PRId64, &ncentroids) == 1) {
|
168
168
|
coarse_quantizer_1 = new IndexFlatL2 (d);
|
169
169
|
use_2layer = true;
|
170
170
|
|
@@ -186,6 +186,8 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
|
|
186
186
|
del_coarse_quantizer.release ();
|
187
187
|
index_ivf->own_fields = true;
|
188
188
|
index_1 = index_ivf;
|
189
|
+
} else if (hnsw_M > 0) {
|
190
|
+
index_1 = new IndexHNSWFlat (d, hnsw_M, metric);
|
189
191
|
} else {
|
190
192
|
FAISS_THROW_IF_NOT_MSG (stok != "FlatDedup",
|
191
193
|
"dedup supported only for IVFFlat");
|
@@ -209,6 +211,8 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
|
|
209
211
|
del_coarse_quantizer.release ();
|
210
212
|
index_ivf->own_fields = true;
|
211
213
|
index_1 = index_ivf;
|
214
|
+
} else if (hnsw_M > 0) {
|
215
|
+
index_1 = new IndexHNSWSQ(d, qt, hnsw_M, metric);
|
212
216
|
} else {
|
213
217
|
index_1 = new IndexScalarQuantizer (d, qt, metric);
|
214
218
|
}
|
@@ -248,6 +252,11 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
|
|
248
252
|
index_2l->q1.own_fields = true;
|
249
253
|
index_1 = index_2l;
|
250
254
|
}
|
255
|
+
} else if (hnsw_M > 0) {
|
256
|
+
IndexHNSWPQ *ipq = new IndexHNSWPQ(d, M, hnsw_M);
|
257
|
+
dynamic_cast<IndexPQ*>(ipq->storage)->do_polysemous_training =
|
258
|
+
do_polysemous_training;
|
259
|
+
index_1 = ipq;
|
251
260
|
} else {
|
252
261
|
IndexPQ *index_pq = new IndexPQ (d, M, nbit, metric);
|
253
262
|
index_pq->do_polysemous_training = do_polysemous_training;
|
@@ -272,13 +281,14 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
|
|
272
281
|
} else if (!index &&
|
273
282
|
sscanf (tok, "HNSW%d_PQ%d", &M, &pq_m) == 2) {
|
274
283
|
index_1 = new IndexHNSWPQ (d, pq_m, M);
|
275
|
-
} else if (!index &&
|
276
|
-
sscanf (tok, "HNSW%d", &M) == 1) {
|
277
|
-
index_1 = new IndexHNSWFlat (d, M);
|
278
284
|
} else if (!index &&
|
279
285
|
sscanf (tok, "HNSW%d_SQ%d", &M, &pq_m) == 2 &&
|
280
286
|
pq_m == 8) {
|
281
287
|
index_1 = new IndexHNSWSQ (d, ScalarQuantizer::QT_8bit, M);
|
288
|
+
} else if (!index &&
|
289
|
+
sscanf (tok, "HNSW%d", &M) == 1) {
|
290
|
+
hnsw_M = M;
|
291
|
+
// here it is unclear what we want: HNSW flat or HNSWx,Y ?
|
282
292
|
} else if (!index && (stok == "LSH" || stok == "LSHr" ||
|
283
293
|
stok == "LSHrt" || stok == "LSHt")) {
|
284
294
|
bool rotate_data = strstr(tok, "r") != nullptr;
|
@@ -318,6 +328,11 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
|
|
318
328
|
}
|
319
329
|
}
|
320
330
|
|
331
|
+
if (!index && hnsw_M > 0) {
|
332
|
+
index = new IndexHNSWFlat (d, hnsw_M, metric);
|
333
|
+
del_index.set (index);
|
334
|
+
}
|
335
|
+
|
321
336
|
FAISS_THROW_IF_NOT_FMT(index, "description %s did not generate an index",
|
322
337
|
description_in);
|
323
338
|
|
@@ -355,7 +370,7 @@ IndexBinary *index_binary_factory(int d, const char *description)
|
|
355
370
|
IndexBinary *index = nullptr;
|
356
371
|
|
357
372
|
int ncentroids = -1;
|
358
|
-
int M;
|
373
|
+
int M, nhash, b;
|
359
374
|
|
360
375
|
if (sscanf(description, "BIVF%d_HNSW%d", &ncentroids, &M) == 2) {
|
361
376
|
IndexBinaryIVF *index_ivf = new IndexBinaryIVF(
|
@@ -375,6 +390,12 @@ IndexBinary *index_binary_factory(int d, const char *description)
|
|
375
390
|
IndexBinaryHNSW *index_hnsw = new IndexBinaryHNSW(d, M);
|
376
391
|
index = index_hnsw;
|
377
392
|
|
393
|
+
} else if (sscanf(description, "BHash%dx%d", &nhash, &b) == 2) {
|
394
|
+
index = new IndexBinaryMultiHash (d, nhash, b);
|
395
|
+
|
396
|
+
} else if (sscanf(description, "BHash%d", &b) == 1) {
|
397
|
+
index = new IndexBinaryHash (d, b);
|
398
|
+
|
378
399
|
} else if (std::string(description) == "BFlat") {
|
379
400
|
index = new IndexBinaryFlat(d);
|
380
401
|
|