faiss 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +103 -3
- data/ext/faiss/ext.cpp +99 -32
- data/ext/faiss/extconf.rb +12 -2
- data/lib/faiss/ext.bundle +0 -0
- data/lib/faiss/index.rb +3 -3
- data/lib/faiss/index_binary.rb +3 -3
- data/lib/faiss/kmeans.rb +1 -1
- data/lib/faiss/pca_matrix.rb +2 -2
- data/lib/faiss/product_quantizer.rb +3 -3
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/AutoTune.cpp +719 -0
- data/vendor/faiss/AutoTune.h +212 -0
- data/vendor/faiss/Clustering.cpp +261 -0
- data/vendor/faiss/Clustering.h +101 -0
- data/vendor/faiss/IVFlib.cpp +339 -0
- data/vendor/faiss/IVFlib.h +132 -0
- data/vendor/faiss/Index.cpp +171 -0
- data/vendor/faiss/Index.h +261 -0
- data/vendor/faiss/Index2Layer.cpp +437 -0
- data/vendor/faiss/Index2Layer.h +85 -0
- data/vendor/faiss/IndexBinary.cpp +77 -0
- data/vendor/faiss/IndexBinary.h +163 -0
- data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
- data/vendor/faiss/IndexBinaryFlat.h +54 -0
- data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
- data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
- data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
- data/vendor/faiss/IndexBinaryHNSW.h +56 -0
- data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
- data/vendor/faiss/IndexBinaryIVF.h +211 -0
- data/vendor/faiss/IndexFlat.cpp +508 -0
- data/vendor/faiss/IndexFlat.h +175 -0
- data/vendor/faiss/IndexHNSW.cpp +1090 -0
- data/vendor/faiss/IndexHNSW.h +170 -0
- data/vendor/faiss/IndexIVF.cpp +909 -0
- data/vendor/faiss/IndexIVF.h +353 -0
- data/vendor/faiss/IndexIVFFlat.cpp +502 -0
- data/vendor/faiss/IndexIVFFlat.h +118 -0
- data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
- data/vendor/faiss/IndexIVFPQ.h +161 -0
- data/vendor/faiss/IndexIVFPQR.cpp +219 -0
- data/vendor/faiss/IndexIVFPQR.h +65 -0
- data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
- data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
- data/vendor/faiss/IndexLSH.cpp +225 -0
- data/vendor/faiss/IndexLSH.h +87 -0
- data/vendor/faiss/IndexLattice.cpp +143 -0
- data/vendor/faiss/IndexLattice.h +68 -0
- data/vendor/faiss/IndexPQ.cpp +1188 -0
- data/vendor/faiss/IndexPQ.h +199 -0
- data/vendor/faiss/IndexPreTransform.cpp +288 -0
- data/vendor/faiss/IndexPreTransform.h +91 -0
- data/vendor/faiss/IndexReplicas.cpp +123 -0
- data/vendor/faiss/IndexReplicas.h +76 -0
- data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
- data/vendor/faiss/IndexScalarQuantizer.h +127 -0
- data/vendor/faiss/IndexShards.cpp +317 -0
- data/vendor/faiss/IndexShards.h +100 -0
- data/vendor/faiss/InvertedLists.cpp +623 -0
- data/vendor/faiss/InvertedLists.h +334 -0
- data/vendor/faiss/LICENSE +21 -0
- data/vendor/faiss/MatrixStats.cpp +252 -0
- data/vendor/faiss/MatrixStats.h +62 -0
- data/vendor/faiss/MetaIndexes.cpp +351 -0
- data/vendor/faiss/MetaIndexes.h +126 -0
- data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
- data/vendor/faiss/OnDiskInvertedLists.h +127 -0
- data/vendor/faiss/VectorTransform.cpp +1157 -0
- data/vendor/faiss/VectorTransform.h +322 -0
- data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
- data/vendor/faiss/c_api/AutoTune_c.h +64 -0
- data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
- data/vendor/faiss/c_api/Clustering_c.h +117 -0
- data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
- data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
- data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
- data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
- data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
- data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
- data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
- data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
- data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
- data/vendor/faiss/c_api/IndexShards_c.h +42 -0
- data/vendor/faiss/c_api/Index_c.cpp +105 -0
- data/vendor/faiss/c_api/Index_c.h +183 -0
- data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
- data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
- data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
- data/vendor/faiss/c_api/clone_index_c.h +32 -0
- data/vendor/faiss/c_api/error_c.h +42 -0
- data/vendor/faiss/c_api/error_impl.cpp +27 -0
- data/vendor/faiss/c_api/error_impl.h +16 -0
- data/vendor/faiss/c_api/faiss_c.h +58 -0
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
- data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
- data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
- data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
- data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
- data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
- data/vendor/faiss/c_api/index_factory_c.h +30 -0
- data/vendor/faiss/c_api/index_io_c.cpp +42 -0
- data/vendor/faiss/c_api/index_io_c.h +50 -0
- data/vendor/faiss/c_api/macros_impl.h +110 -0
- data/vendor/faiss/clone_index.cpp +147 -0
- data/vendor/faiss/clone_index.h +38 -0
- data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
- data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
- data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
- data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
- data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
- data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
- data/vendor/faiss/gpu/GpuCloner.h +82 -0
- data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
- data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
- data/vendor/faiss/gpu/GpuDistance.h +52 -0
- data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
- data/vendor/faiss/gpu/GpuIndex.h +148 -0
- data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
- data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
- data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
- data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
- data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
- data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
- data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
- data/vendor/faiss/gpu/GpuResources.cpp +52 -0
- data/vendor/faiss/gpu/GpuResources.h +73 -0
- data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
- data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
- data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
- data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
- data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
- data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
- data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
- data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
- data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
- data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
- data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
- data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
- data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
- data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
- data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
- data/vendor/faiss/gpu/test/TestUtils.h +93 -0
- data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
- data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
- data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
- data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
- data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
- data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
- data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
- data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
- data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
- data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
- data/vendor/faiss/gpu/utils/Timer.h +52 -0
- data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
- data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
- data/vendor/faiss/impl/FaissAssert.h +95 -0
- data/vendor/faiss/impl/FaissException.cpp +66 -0
- data/vendor/faiss/impl/FaissException.h +71 -0
- data/vendor/faiss/impl/HNSW.cpp +818 -0
- data/vendor/faiss/impl/HNSW.h +275 -0
- data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
- data/vendor/faiss/impl/PolysemousTraining.h +158 -0
- data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
- data/vendor/faiss/impl/ProductQuantizer.h +242 -0
- data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
- data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
- data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
- data/vendor/faiss/impl/ThreadedIndex.h +80 -0
- data/vendor/faiss/impl/index_read.cpp +793 -0
- data/vendor/faiss/impl/index_write.cpp +558 -0
- data/vendor/faiss/impl/io.cpp +142 -0
- data/vendor/faiss/impl/io.h +98 -0
- data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
- data/vendor/faiss/impl/lattice_Zn.h +199 -0
- data/vendor/faiss/index_factory.cpp +392 -0
- data/vendor/faiss/index_factory.h +25 -0
- data/vendor/faiss/index_io.h +75 -0
- data/vendor/faiss/misc/test_blas.cpp +84 -0
- data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
- data/vendor/faiss/tests/test_merge.cpp +258 -0
- data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
- data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
- data/vendor/faiss/tests/test_params_override.cpp +231 -0
- data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
- data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
- data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
- data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
- data/vendor/faiss/utils/Heap.cpp +122 -0
- data/vendor/faiss/utils/Heap.h +495 -0
- data/vendor/faiss/utils/WorkerThread.cpp +126 -0
- data/vendor/faiss/utils/WorkerThread.h +61 -0
- data/vendor/faiss/utils/distances.cpp +765 -0
- data/vendor/faiss/utils/distances.h +243 -0
- data/vendor/faiss/utils/distances_simd.cpp +809 -0
- data/vendor/faiss/utils/extra_distances.cpp +336 -0
- data/vendor/faiss/utils/extra_distances.h +54 -0
- data/vendor/faiss/utils/hamming-inl.h +472 -0
- data/vendor/faiss/utils/hamming.cpp +792 -0
- data/vendor/faiss/utils/hamming.h +220 -0
- data/vendor/faiss/utils/random.cpp +192 -0
- data/vendor/faiss/utils/random.h +60 -0
- data/vendor/faiss/utils/utils.cpp +783 -0
- data/vendor/faiss/utils/utils.h +181 -0
- metadata +216 -2
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// -*- c++ -*-
|
|
9
|
+
|
|
10
|
+
/*
|
|
11
|
+
* Hamming distances. The binary vector dimensionality should be a
|
|
12
|
+
* multiple of 8, as the elementary operations operate on bytes. If
|
|
13
|
+
* you need other sizes, just pad with 0s (this is done by function
|
|
14
|
+
* fvecs2bitvecs).
|
|
15
|
+
*
|
|
16
|
+
* User-defined type hamdis_t is used for distances because at this time
|
|
17
|
+
* it is still uncler clear how we will need to balance
|
|
18
|
+
* - flexibility in vector size (may need 16- or even 8-bit vectors)
|
|
19
|
+
* - memory usage
|
|
20
|
+
* - cache-misses when dealing with large volumes of data (fewer bits is better)
|
|
21
|
+
*
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
#ifndef FAISS_hamming_h
|
|
25
|
+
#define FAISS_hamming_h
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
#include <stdint.h>
|
|
29
|
+
|
|
30
|
+
#include <faiss/utils/Heap.h>
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
/* The Hamming distance type */
|
|
34
|
+
typedef int32_t hamdis_t;
|
|
35
|
+
|
|
36
|
+
namespace faiss {
|
|
37
|
+
|
|
38
|
+
/**************************************************
|
|
39
|
+
* General bit vector functions
|
|
40
|
+
**************************************************/
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
void bitvec_print (const uint8_t * b, size_t d);
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
/* Functions for casting vectors of regular types to compact bits.
|
|
47
|
+
They assume proper allocation done beforehand, meaning that b
|
|
48
|
+
should be be able to receive as many bits as x may produce. */
|
|
49
|
+
|
|
50
|
+
/* Makes an array of bits from the signs of a float array. The length
|
|
51
|
+
of the output array b is rounded up to byte size (allocate
|
|
52
|
+
accordingly) */
|
|
53
|
+
void fvecs2bitvecs (
|
|
54
|
+
const float * x,
|
|
55
|
+
uint8_t * b,
|
|
56
|
+
size_t d,
|
|
57
|
+
size_t n);
|
|
58
|
+
|
|
59
|
+
void bitvecs2fvecs (
|
|
60
|
+
const uint8_t * b,
|
|
61
|
+
float * x,
|
|
62
|
+
size_t d,
|
|
63
|
+
size_t n);
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
void fvec2bitvec (const float * x, uint8_t * b, size_t d);
|
|
67
|
+
|
|
68
|
+
/***********************************************
|
|
69
|
+
* Generic reader/writer for bit strings
|
|
70
|
+
***********************************************/
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
struct BitstringWriter {
|
|
74
|
+
uint8_t *code;
|
|
75
|
+
size_t code_size;
|
|
76
|
+
size_t i; // current bit offset
|
|
77
|
+
|
|
78
|
+
// code_size in bytes
|
|
79
|
+
BitstringWriter(uint8_t *code, int code_size);
|
|
80
|
+
|
|
81
|
+
// write the nbit low bits of x
|
|
82
|
+
void write(uint64_t x, int nbit);
|
|
83
|
+
};
|
|
84
|
+
|
|
85
|
+
struct BitstringReader {
|
|
86
|
+
const uint8_t *code;
|
|
87
|
+
size_t code_size;
|
|
88
|
+
size_t i;
|
|
89
|
+
|
|
90
|
+
// code_size in bytes
|
|
91
|
+
BitstringReader(const uint8_t *code, int code_size);
|
|
92
|
+
|
|
93
|
+
// read nbit bits from the code
|
|
94
|
+
uint64_t read(int nbit);
|
|
95
|
+
};
|
|
96
|
+
|
|
97
|
+
/**************************************************
|
|
98
|
+
* Hamming distance computation functions
|
|
99
|
+
**************************************************/
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
extern size_t hamming_batch_size;
|
|
104
|
+
|
|
105
|
+
inline int popcount64(uint64_t x) {
|
|
106
|
+
return __builtin_popcountl(x);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
/** Compute a set of Hamming distances between na and nb binary vectors
|
|
111
|
+
*
|
|
112
|
+
* @param a size na * nbytespercode
|
|
113
|
+
* @param b size nb * nbytespercode
|
|
114
|
+
* @param nbytespercode should be multiple of 8
|
|
115
|
+
* @param dis output distances, size na * nb
|
|
116
|
+
*/
|
|
117
|
+
void hammings (
|
|
118
|
+
const uint8_t * a,
|
|
119
|
+
const uint8_t * b,
|
|
120
|
+
size_t na, size_t nb,
|
|
121
|
+
size_t nbytespercode,
|
|
122
|
+
hamdis_t * dis);
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
/** Return the k smallest Hamming distances for a set of binary query vectors,
|
|
128
|
+
* using a max heap.
|
|
129
|
+
* @param a queries, size ha->nh * ncodes
|
|
130
|
+
* @param b database, size nb * ncodes
|
|
131
|
+
* @param nb number of database vectors
|
|
132
|
+
* @param ncodes size of the binary codes (bytes)
|
|
133
|
+
* @param ordered if != 0: order the results by decreasing distance
|
|
134
|
+
* (may be bottleneck for k/n > 0.01) */
|
|
135
|
+
void hammings_knn_hc (
|
|
136
|
+
int_maxheap_array_t * ha,
|
|
137
|
+
const uint8_t * a,
|
|
138
|
+
const uint8_t * b,
|
|
139
|
+
size_t nb,
|
|
140
|
+
size_t ncodes,
|
|
141
|
+
int ordered);
|
|
142
|
+
|
|
143
|
+
/* Legacy alias to hammings_knn_hc. */
|
|
144
|
+
void hammings_knn (
|
|
145
|
+
int_maxheap_array_t * ha,
|
|
146
|
+
const uint8_t * a,
|
|
147
|
+
const uint8_t * b,
|
|
148
|
+
size_t nb,
|
|
149
|
+
size_t ncodes,
|
|
150
|
+
int ordered);
|
|
151
|
+
|
|
152
|
+
/** Return the k smallest Hamming distances for a set of binary query vectors,
|
|
153
|
+
* using counting max.
|
|
154
|
+
* @param a queries, size na * ncodes
|
|
155
|
+
* @param b database, size nb * ncodes
|
|
156
|
+
* @param na number of query vectors
|
|
157
|
+
* @param nb number of database vectors
|
|
158
|
+
* @param k number of vectors/distances to return
|
|
159
|
+
* @param ncodes size of the binary codes (bytes)
|
|
160
|
+
* @param distances output distances from each query vector to its k nearest
|
|
161
|
+
* neighbors
|
|
162
|
+
* @param labels output ids of the k nearest neighbors to each query vector
|
|
163
|
+
*/
|
|
164
|
+
void hammings_knn_mc (
|
|
165
|
+
const uint8_t * a,
|
|
166
|
+
const uint8_t * b,
|
|
167
|
+
size_t na,
|
|
168
|
+
size_t nb,
|
|
169
|
+
size_t k,
|
|
170
|
+
size_t ncodes,
|
|
171
|
+
int32_t *distances,
|
|
172
|
+
int64_t *labels);
|
|
173
|
+
|
|
174
|
+
/* Counting the number of matches or of cross-matches (without returning them)
|
|
175
|
+
For use with function that assume pre-allocated memory */
|
|
176
|
+
void hamming_count_thres (
|
|
177
|
+
const uint8_t * bs1,
|
|
178
|
+
const uint8_t * bs2,
|
|
179
|
+
size_t n1,
|
|
180
|
+
size_t n2,
|
|
181
|
+
hamdis_t ht,
|
|
182
|
+
size_t ncodes,
|
|
183
|
+
size_t * nptr);
|
|
184
|
+
|
|
185
|
+
/* Return all Hamming distances/index passing a thres. Pre-allocation of output
|
|
186
|
+
is required. Use hamming_count_thres to determine the proper size. */
|
|
187
|
+
size_t match_hamming_thres (
|
|
188
|
+
const uint8_t * bs1,
|
|
189
|
+
const uint8_t * bs2,
|
|
190
|
+
size_t n1,
|
|
191
|
+
size_t n2,
|
|
192
|
+
hamdis_t ht,
|
|
193
|
+
size_t ncodes,
|
|
194
|
+
int64_t * idx,
|
|
195
|
+
hamdis_t * dis);
|
|
196
|
+
|
|
197
|
+
/* Cross-matching in a set of vectors */
|
|
198
|
+
void crosshamming_count_thres (
|
|
199
|
+
const uint8_t * dbs,
|
|
200
|
+
size_t n,
|
|
201
|
+
hamdis_t ht,
|
|
202
|
+
size_t ncodes,
|
|
203
|
+
size_t * nptr);
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
/* compute the Hamming distances between two codewords of nwords*64 bits */
|
|
207
|
+
hamdis_t hamming (
|
|
208
|
+
const uint64_t * bs1,
|
|
209
|
+
const uint64_t * bs2,
|
|
210
|
+
size_t nwords);
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
} // namespace faiss
|
|
215
|
+
|
|
216
|
+
// inlined definitions of HammingComputerXX and GenHammingComputerXX
|
|
217
|
+
|
|
218
|
+
#include <faiss/utils/hamming-inl.h>
|
|
219
|
+
|
|
220
|
+
#endif /* FAISS_hamming_h */
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// -*- c++ -*-
|
|
9
|
+
|
|
10
|
+
#include <faiss/utils/random.h>
|
|
11
|
+
|
|
12
|
+
namespace faiss {
|
|
13
|
+
|
|
14
|
+
/**************************************************
|
|
15
|
+
* Random data generation functions
|
|
16
|
+
**************************************************/
|
|
17
|
+
|
|
18
|
+
RandomGenerator::RandomGenerator (int64_t seed)
|
|
19
|
+
: mt((unsigned int)seed) {}
|
|
20
|
+
|
|
21
|
+
int RandomGenerator::rand_int ()
|
|
22
|
+
{
|
|
23
|
+
return mt() & 0x7fffffff;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
int64_t RandomGenerator::rand_int64 ()
|
|
27
|
+
{
|
|
28
|
+
return int64_t(rand_int()) | int64_t(rand_int()) << 31;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
int RandomGenerator::rand_int (int max)
|
|
32
|
+
{
|
|
33
|
+
return mt() % max;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
float RandomGenerator::rand_float ()
|
|
37
|
+
{
|
|
38
|
+
return mt() / float(mt.max());
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
double RandomGenerator::rand_double ()
|
|
42
|
+
{
|
|
43
|
+
return mt() / double(mt.max());
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
/***********************************************************************
|
|
48
|
+
* Random functions in this C file only exist because Torch
|
|
49
|
+
* counterparts are slow and not multi-threaded. Typical use is for
|
|
50
|
+
* more than 1-100 billion values. */
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
/* Generate a set of random floating point values such that x[i] in [0,1]
|
|
54
|
+
multi-threading. For this reason, we rely on re-entreant functions. */
|
|
55
|
+
void float_rand (float * x, size_t n, int64_t seed)
|
|
56
|
+
{
|
|
57
|
+
// only try to parallelize on large enough arrays
|
|
58
|
+
const size_t nblock = n < 1024 ? 1 : 1024;
|
|
59
|
+
|
|
60
|
+
RandomGenerator rng0 (seed);
|
|
61
|
+
int a0 = rng0.rand_int (), b0 = rng0.rand_int ();
|
|
62
|
+
|
|
63
|
+
#pragma omp parallel for
|
|
64
|
+
for (size_t j = 0; j < nblock; j++) {
|
|
65
|
+
|
|
66
|
+
RandomGenerator rng (a0 + j * b0);
|
|
67
|
+
|
|
68
|
+
const size_t istart = j * n / nblock;
|
|
69
|
+
const size_t iend = (j + 1) * n / nblock;
|
|
70
|
+
|
|
71
|
+
for (size_t i = istart; i < iend; i++)
|
|
72
|
+
x[i] = rng.rand_float ();
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
void float_randn (float * x, size_t n, int64_t seed)
|
|
78
|
+
{
|
|
79
|
+
// only try to parallelize on large enough arrays
|
|
80
|
+
const size_t nblock = n < 1024 ? 1 : 1024;
|
|
81
|
+
|
|
82
|
+
RandomGenerator rng0 (seed);
|
|
83
|
+
int a0 = rng0.rand_int (), b0 = rng0.rand_int ();
|
|
84
|
+
|
|
85
|
+
#pragma omp parallel for
|
|
86
|
+
for (size_t j = 0; j < nblock; j++) {
|
|
87
|
+
RandomGenerator rng (a0 + j * b0);
|
|
88
|
+
|
|
89
|
+
double a = 0, b = 0, s = 0;
|
|
90
|
+
int state = 0; /* generate two number per "do-while" loop */
|
|
91
|
+
|
|
92
|
+
const size_t istart = j * n / nblock;
|
|
93
|
+
const size_t iend = (j + 1) * n / nblock;
|
|
94
|
+
|
|
95
|
+
for (size_t i = istart; i < iend; i++) {
|
|
96
|
+
/* Marsaglia's method (see Knuth) */
|
|
97
|
+
if (state == 0) {
|
|
98
|
+
do {
|
|
99
|
+
a = 2.0 * rng.rand_double () - 1;
|
|
100
|
+
b = 2.0 * rng.rand_double () - 1;
|
|
101
|
+
s = a * a + b * b;
|
|
102
|
+
} while (s >= 1.0);
|
|
103
|
+
x[i] = a * sqrt(-2.0 * log(s) / s);
|
|
104
|
+
}
|
|
105
|
+
else
|
|
106
|
+
x[i] = b * sqrt(-2.0 * log(s) / s);
|
|
107
|
+
state = 1 - state;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
/* Integer versions */
|
|
114
|
+
void int64_rand (int64_t * x, size_t n, int64_t seed)
|
|
115
|
+
{
|
|
116
|
+
// only try to parallelize on large enough arrays
|
|
117
|
+
const size_t nblock = n < 1024 ? 1 : 1024;
|
|
118
|
+
|
|
119
|
+
RandomGenerator rng0 (seed);
|
|
120
|
+
int a0 = rng0.rand_int (), b0 = rng0.rand_int ();
|
|
121
|
+
|
|
122
|
+
#pragma omp parallel for
|
|
123
|
+
for (size_t j = 0; j < nblock; j++) {
|
|
124
|
+
|
|
125
|
+
RandomGenerator rng (a0 + j * b0);
|
|
126
|
+
|
|
127
|
+
const size_t istart = j * n / nblock;
|
|
128
|
+
const size_t iend = (j + 1) * n / nblock;
|
|
129
|
+
for (size_t i = istart; i < iend; i++)
|
|
130
|
+
x[i] = rng.rand_int64 ();
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
void int64_rand_max (int64_t * x, size_t n, uint64_t max, int64_t seed)
|
|
135
|
+
{
|
|
136
|
+
// only try to parallelize on large enough arrays
|
|
137
|
+
const size_t nblock = n < 1024 ? 1 : 1024;
|
|
138
|
+
|
|
139
|
+
RandomGenerator rng0 (seed);
|
|
140
|
+
int a0 = rng0.rand_int (), b0 = rng0.rand_int ();
|
|
141
|
+
|
|
142
|
+
#pragma omp parallel for
|
|
143
|
+
for (size_t j = 0; j < nblock; j++) {
|
|
144
|
+
|
|
145
|
+
RandomGenerator rng (a0 + j * b0);
|
|
146
|
+
|
|
147
|
+
const size_t istart = j * n / nblock;
|
|
148
|
+
const size_t iend = (j + 1) * n / nblock;
|
|
149
|
+
for (size_t i = istart; i < iend; i++)
|
|
150
|
+
x[i] = rng.rand_int64 () % max;
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
void rand_perm (int *perm, size_t n, int64_t seed)
|
|
156
|
+
{
|
|
157
|
+
for (size_t i = 0; i < n; i++) perm[i] = i;
|
|
158
|
+
|
|
159
|
+
RandomGenerator rng (seed);
|
|
160
|
+
|
|
161
|
+
for (size_t i = 0; i + 1 < n; i++) {
|
|
162
|
+
int i2 = i + rng.rand_int (n - i);
|
|
163
|
+
std::swap(perm[i], perm[i2]);
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
void byte_rand (uint8_t * x, size_t n, int64_t seed)
|
|
171
|
+
{
|
|
172
|
+
// only try to parallelize on large enough arrays
|
|
173
|
+
const size_t nblock = n < 1024 ? 1 : 1024;
|
|
174
|
+
|
|
175
|
+
RandomGenerator rng0 (seed);
|
|
176
|
+
int a0 = rng0.rand_int (), b0 = rng0.rand_int ();
|
|
177
|
+
|
|
178
|
+
#pragma omp parallel for
|
|
179
|
+
for (size_t j = 0; j < nblock; j++) {
|
|
180
|
+
|
|
181
|
+
RandomGenerator rng (a0 + j * b0);
|
|
182
|
+
|
|
183
|
+
const size_t istart = j * n / nblock;
|
|
184
|
+
const size_t iend = (j + 1) * n / nblock;
|
|
185
|
+
|
|
186
|
+
size_t i;
|
|
187
|
+
for (i = istart; i < iend; i++)
|
|
188
|
+
x[i] = rng.rand_int64 ();
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
} // namespace faiss
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// -*- c++ -*-
|
|
9
|
+
|
|
10
|
+
/* Random generators. Implemented here for speed and to make
|
|
11
|
+
* sequences reproducible.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
#pragma once
|
|
15
|
+
|
|
16
|
+
#include <random>
|
|
17
|
+
#include <stdint.h>
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
namespace faiss {
|
|
21
|
+
|
|
22
|
+
/**************************************************
|
|
23
|
+
* Random data generation functions
|
|
24
|
+
**************************************************/
|
|
25
|
+
|
|
26
|
+
/// random generator that can be used in multithreaded contexts
|
|
27
|
+
struct RandomGenerator {
|
|
28
|
+
|
|
29
|
+
std::mt19937 mt;
|
|
30
|
+
|
|
31
|
+
/// random positive integer
|
|
32
|
+
int rand_int ();
|
|
33
|
+
|
|
34
|
+
/// random int64_t
|
|
35
|
+
int64_t rand_int64 ();
|
|
36
|
+
|
|
37
|
+
/// generate random integer between 0 and max-1
|
|
38
|
+
int rand_int (int max);
|
|
39
|
+
|
|
40
|
+
/// between 0 and 1
|
|
41
|
+
float rand_float ();
|
|
42
|
+
|
|
43
|
+
double rand_double ();
|
|
44
|
+
|
|
45
|
+
explicit RandomGenerator (int64_t seed = 1234);
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
/* Generate an array of uniform random floats / multi-threaded implementation */
|
|
49
|
+
void float_rand (float * x, size_t n, int64_t seed);
|
|
50
|
+
void float_randn (float * x, size_t n, int64_t seed);
|
|
51
|
+
void int64_rand (int64_t * x, size_t n, int64_t seed);
|
|
52
|
+
void byte_rand (uint8_t * x, size_t n, int64_t seed);
|
|
53
|
+
// max is actually the maximum value + 1
|
|
54
|
+
void int64_rand_max (int64_t * x, size_t n, uint64_t max, int64_t seed);
|
|
55
|
+
|
|
56
|
+
/* random permutation */
|
|
57
|
+
void rand_perm (int * perm, size_t n, int64_t seed);
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
} // namespace faiss
|