faiss 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +103 -3
- data/ext/faiss/ext.cpp +99 -32
- data/ext/faiss/extconf.rb +12 -2
- data/lib/faiss/ext.bundle +0 -0
- data/lib/faiss/index.rb +3 -3
- data/lib/faiss/index_binary.rb +3 -3
- data/lib/faiss/kmeans.rb +1 -1
- data/lib/faiss/pca_matrix.rb +2 -2
- data/lib/faiss/product_quantizer.rb +3 -3
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/AutoTune.cpp +719 -0
- data/vendor/faiss/AutoTune.h +212 -0
- data/vendor/faiss/Clustering.cpp +261 -0
- data/vendor/faiss/Clustering.h +101 -0
- data/vendor/faiss/IVFlib.cpp +339 -0
- data/vendor/faiss/IVFlib.h +132 -0
- data/vendor/faiss/Index.cpp +171 -0
- data/vendor/faiss/Index.h +261 -0
- data/vendor/faiss/Index2Layer.cpp +437 -0
- data/vendor/faiss/Index2Layer.h +85 -0
- data/vendor/faiss/IndexBinary.cpp +77 -0
- data/vendor/faiss/IndexBinary.h +163 -0
- data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
- data/vendor/faiss/IndexBinaryFlat.h +54 -0
- data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
- data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
- data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
- data/vendor/faiss/IndexBinaryHNSW.h +56 -0
- data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
- data/vendor/faiss/IndexBinaryIVF.h +211 -0
- data/vendor/faiss/IndexFlat.cpp +508 -0
- data/vendor/faiss/IndexFlat.h +175 -0
- data/vendor/faiss/IndexHNSW.cpp +1090 -0
- data/vendor/faiss/IndexHNSW.h +170 -0
- data/vendor/faiss/IndexIVF.cpp +909 -0
- data/vendor/faiss/IndexIVF.h +353 -0
- data/vendor/faiss/IndexIVFFlat.cpp +502 -0
- data/vendor/faiss/IndexIVFFlat.h +118 -0
- data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
- data/vendor/faiss/IndexIVFPQ.h +161 -0
- data/vendor/faiss/IndexIVFPQR.cpp +219 -0
- data/vendor/faiss/IndexIVFPQR.h +65 -0
- data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
- data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
- data/vendor/faiss/IndexLSH.cpp +225 -0
- data/vendor/faiss/IndexLSH.h +87 -0
- data/vendor/faiss/IndexLattice.cpp +143 -0
- data/vendor/faiss/IndexLattice.h +68 -0
- data/vendor/faiss/IndexPQ.cpp +1188 -0
- data/vendor/faiss/IndexPQ.h +199 -0
- data/vendor/faiss/IndexPreTransform.cpp +288 -0
- data/vendor/faiss/IndexPreTransform.h +91 -0
- data/vendor/faiss/IndexReplicas.cpp +123 -0
- data/vendor/faiss/IndexReplicas.h +76 -0
- data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
- data/vendor/faiss/IndexScalarQuantizer.h +127 -0
- data/vendor/faiss/IndexShards.cpp +317 -0
- data/vendor/faiss/IndexShards.h +100 -0
- data/vendor/faiss/InvertedLists.cpp +623 -0
- data/vendor/faiss/InvertedLists.h +334 -0
- data/vendor/faiss/LICENSE +21 -0
- data/vendor/faiss/MatrixStats.cpp +252 -0
- data/vendor/faiss/MatrixStats.h +62 -0
- data/vendor/faiss/MetaIndexes.cpp +351 -0
- data/vendor/faiss/MetaIndexes.h +126 -0
- data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
- data/vendor/faiss/OnDiskInvertedLists.h +127 -0
- data/vendor/faiss/VectorTransform.cpp +1157 -0
- data/vendor/faiss/VectorTransform.h +322 -0
- data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
- data/vendor/faiss/c_api/AutoTune_c.h +64 -0
- data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
- data/vendor/faiss/c_api/Clustering_c.h +117 -0
- data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
- data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
- data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
- data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
- data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
- data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
- data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
- data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
- data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
- data/vendor/faiss/c_api/IndexShards_c.h +42 -0
- data/vendor/faiss/c_api/Index_c.cpp +105 -0
- data/vendor/faiss/c_api/Index_c.h +183 -0
- data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
- data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
- data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
- data/vendor/faiss/c_api/clone_index_c.h +32 -0
- data/vendor/faiss/c_api/error_c.h +42 -0
- data/vendor/faiss/c_api/error_impl.cpp +27 -0
- data/vendor/faiss/c_api/error_impl.h +16 -0
- data/vendor/faiss/c_api/faiss_c.h +58 -0
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
- data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
- data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
- data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
- data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
- data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
- data/vendor/faiss/c_api/index_factory_c.h +30 -0
- data/vendor/faiss/c_api/index_io_c.cpp +42 -0
- data/vendor/faiss/c_api/index_io_c.h +50 -0
- data/vendor/faiss/c_api/macros_impl.h +110 -0
- data/vendor/faiss/clone_index.cpp +147 -0
- data/vendor/faiss/clone_index.h +38 -0
- data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
- data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
- data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
- data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
- data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
- data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
- data/vendor/faiss/gpu/GpuCloner.h +82 -0
- data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
- data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
- data/vendor/faiss/gpu/GpuDistance.h +52 -0
- data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
- data/vendor/faiss/gpu/GpuIndex.h +148 -0
- data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
- data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
- data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
- data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
- data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
- data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
- data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
- data/vendor/faiss/gpu/GpuResources.cpp +52 -0
- data/vendor/faiss/gpu/GpuResources.h +73 -0
- data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
- data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
- data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
- data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
- data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
- data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
- data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
- data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
- data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
- data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
- data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
- data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
- data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
- data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
- data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
- data/vendor/faiss/gpu/test/TestUtils.h +93 -0
- data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
- data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
- data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
- data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
- data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
- data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
- data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
- data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
- data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
- data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
- data/vendor/faiss/gpu/utils/Timer.h +52 -0
- data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
- data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
- data/vendor/faiss/impl/FaissAssert.h +95 -0
- data/vendor/faiss/impl/FaissException.cpp +66 -0
- data/vendor/faiss/impl/FaissException.h +71 -0
- data/vendor/faiss/impl/HNSW.cpp +818 -0
- data/vendor/faiss/impl/HNSW.h +275 -0
- data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
- data/vendor/faiss/impl/PolysemousTraining.h +158 -0
- data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
- data/vendor/faiss/impl/ProductQuantizer.h +242 -0
- data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
- data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
- data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
- data/vendor/faiss/impl/ThreadedIndex.h +80 -0
- data/vendor/faiss/impl/index_read.cpp +793 -0
- data/vendor/faiss/impl/index_write.cpp +558 -0
- data/vendor/faiss/impl/io.cpp +142 -0
- data/vendor/faiss/impl/io.h +98 -0
- data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
- data/vendor/faiss/impl/lattice_Zn.h +199 -0
- data/vendor/faiss/index_factory.cpp +392 -0
- data/vendor/faiss/index_factory.h +25 -0
- data/vendor/faiss/index_io.h +75 -0
- data/vendor/faiss/misc/test_blas.cpp +84 -0
- data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
- data/vendor/faiss/tests/test_merge.cpp +258 -0
- data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
- data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
- data/vendor/faiss/tests/test_params_override.cpp +231 -0
- data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
- data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
- data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
- data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
- data/vendor/faiss/utils/Heap.cpp +122 -0
- data/vendor/faiss/utils/Heap.h +495 -0
- data/vendor/faiss/utils/WorkerThread.cpp +126 -0
- data/vendor/faiss/utils/WorkerThread.h +61 -0
- data/vendor/faiss/utils/distances.cpp +765 -0
- data/vendor/faiss/utils/distances.h +243 -0
- data/vendor/faiss/utils/distances_simd.cpp +809 -0
- data/vendor/faiss/utils/extra_distances.cpp +336 -0
- data/vendor/faiss/utils/extra_distances.h +54 -0
- data/vendor/faiss/utils/hamming-inl.h +472 -0
- data/vendor/faiss/utils/hamming.cpp +792 -0
- data/vendor/faiss/utils/hamming.h +220 -0
- data/vendor/faiss/utils/random.cpp +192 -0
- data/vendor/faiss/utils/random.h +60 -0
- data/vendor/faiss/utils/utils.cpp +783 -0
- data/vendor/faiss/utils/utils.h +181 -0
- metadata +216 -2
|
@@ -0,0 +1,783 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// -*- c++ -*-
|
|
9
|
+
|
|
10
|
+
#include <faiss/utils/utils.h>
|
|
11
|
+
|
|
12
|
+
#include <cstdio>
|
|
13
|
+
#include <cassert>
|
|
14
|
+
#include <cstring>
|
|
15
|
+
#include <cmath>
|
|
16
|
+
|
|
17
|
+
#include <sys/time.h>
|
|
18
|
+
#include <sys/types.h>
|
|
19
|
+
#include <unistd.h>
|
|
20
|
+
|
|
21
|
+
#include <omp.h>
|
|
22
|
+
|
|
23
|
+
#include <algorithm>
|
|
24
|
+
#include <vector>
|
|
25
|
+
|
|
26
|
+
#include <faiss/impl/AuxIndexStructures.h>
|
|
27
|
+
#include <faiss/impl/FaissAssert.h>
|
|
28
|
+
#include <faiss/utils/random.h>
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
#ifndef FINTEGER
|
|
33
|
+
#define FINTEGER long
|
|
34
|
+
#endif
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
extern "C" {
|
|
38
|
+
|
|
39
|
+
/* declare BLAS functions, see http://www.netlib.org/clapack/cblas/ */
|
|
40
|
+
|
|
41
|
+
int sgemm_ (const char *transa, const char *transb, FINTEGER *m, FINTEGER *
|
|
42
|
+
n, FINTEGER *k, const float *alpha, const float *a,
|
|
43
|
+
FINTEGER *lda, const float *b, FINTEGER *
|
|
44
|
+
ldb, float *beta, float *c, FINTEGER *ldc);
|
|
45
|
+
|
|
46
|
+
/* Lapack functions, see http://www.netlib.org/clapack/old/single/sgeqrf.c */
|
|
47
|
+
|
|
48
|
+
int sgeqrf_ (FINTEGER *m, FINTEGER *n, float *a, FINTEGER *lda,
|
|
49
|
+
float *tau, float *work, FINTEGER *lwork, FINTEGER *info);
|
|
50
|
+
|
|
51
|
+
int sorgqr_(FINTEGER *m, FINTEGER *n, FINTEGER *k, float *a,
|
|
52
|
+
FINTEGER *lda, float *tau, float *work,
|
|
53
|
+
FINTEGER *lwork, FINTEGER *info);
|
|
54
|
+
|
|
55
|
+
int sgemv_(const char *trans, FINTEGER *m, FINTEGER *n, float *alpha,
|
|
56
|
+
const float *a, FINTEGER *lda, const float *x, FINTEGER *incx,
|
|
57
|
+
float *beta, float *y, FINTEGER *incy);
|
|
58
|
+
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
/**************************************************
|
|
63
|
+
* Get some stats about the system
|
|
64
|
+
**************************************************/
|
|
65
|
+
|
|
66
|
+
namespace faiss {
|
|
67
|
+
|
|
68
|
+
double getmillisecs () {
|
|
69
|
+
struct timeval tv;
|
|
70
|
+
gettimeofday (&tv, nullptr);
|
|
71
|
+
return tv.tv_sec * 1e3 + tv.tv_usec * 1e-3;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
uint64_t get_cycles () {
|
|
75
|
+
#ifdef __x86_64__
|
|
76
|
+
uint32_t high, low;
|
|
77
|
+
asm volatile("rdtsc \n\t"
|
|
78
|
+
: "=a" (low),
|
|
79
|
+
"=d" (high));
|
|
80
|
+
return ((uint64_t)high << 32) | (low);
|
|
81
|
+
#else
|
|
82
|
+
return 0;
|
|
83
|
+
#endif
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
#ifdef __linux__
|
|
88
|
+
|
|
89
|
+
size_t get_mem_usage_kb ()
|
|
90
|
+
{
|
|
91
|
+
int pid = getpid ();
|
|
92
|
+
char fname[256];
|
|
93
|
+
snprintf (fname, 256, "/proc/%d/status", pid);
|
|
94
|
+
FILE * f = fopen (fname, "r");
|
|
95
|
+
FAISS_THROW_IF_NOT_MSG (f, "cannot open proc status file");
|
|
96
|
+
size_t sz = 0;
|
|
97
|
+
for (;;) {
|
|
98
|
+
char buf [256];
|
|
99
|
+
if (!fgets (buf, 256, f)) break;
|
|
100
|
+
if (sscanf (buf, "VmRSS: %ld kB", &sz) == 1) break;
|
|
101
|
+
}
|
|
102
|
+
fclose (f);
|
|
103
|
+
return sz;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
#elif __APPLE__
|
|
107
|
+
|
|
108
|
+
size_t get_mem_usage_kb ()
|
|
109
|
+
{
|
|
110
|
+
fprintf(stderr, "WARN: get_mem_usage_kb not implemented on the mac\n");
|
|
111
|
+
return 0;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
#endif
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
void reflection (const float * __restrict u,
|
|
121
|
+
float * __restrict x,
|
|
122
|
+
size_t n, size_t d, size_t nu)
|
|
123
|
+
{
|
|
124
|
+
size_t i, j, l;
|
|
125
|
+
for (i = 0; i < n; i++) {
|
|
126
|
+
const float * up = u;
|
|
127
|
+
for (l = 0; l < nu; l++) {
|
|
128
|
+
float ip1 = 0, ip2 = 0;
|
|
129
|
+
|
|
130
|
+
for (j = 0; j < d; j+=2) {
|
|
131
|
+
ip1 += up[j] * x[j];
|
|
132
|
+
ip2 += up[j+1] * x[j+1];
|
|
133
|
+
}
|
|
134
|
+
float ip = 2 * (ip1 + ip2);
|
|
135
|
+
|
|
136
|
+
for (j = 0; j < d; j++)
|
|
137
|
+
x[j] -= ip * up[j];
|
|
138
|
+
up += d;
|
|
139
|
+
}
|
|
140
|
+
x += d;
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
/* Reference implementation (slower) */
|
|
146
|
+
void reflection_ref (const float * u, float * x, size_t n, size_t d, size_t nu)
|
|
147
|
+
{
|
|
148
|
+
size_t i, j, l;
|
|
149
|
+
for (i = 0; i < n; i++) {
|
|
150
|
+
const float * up = u;
|
|
151
|
+
for (l = 0; l < nu; l++) {
|
|
152
|
+
double ip = 0;
|
|
153
|
+
|
|
154
|
+
for (j = 0; j < d; j++)
|
|
155
|
+
ip += up[j] * x[j];
|
|
156
|
+
ip *= 2;
|
|
157
|
+
|
|
158
|
+
for (j = 0; j < d; j++)
|
|
159
|
+
x[j] -= ip * up[j];
|
|
160
|
+
|
|
161
|
+
up += d;
|
|
162
|
+
}
|
|
163
|
+
x += d;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
/***************************************************************************
|
|
173
|
+
* Some matrix manipulation functions
|
|
174
|
+
***************************************************************************/
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
/* This function exists because the Torch counterpart is extremly slow
|
|
178
|
+
(not multi-threaded + unexpected overhead even in single thread).
|
|
179
|
+
It is here to implement the usual property |x-y|^2=|x|^2+|y|^2-2<x|y> */
|
|
180
|
+
void inner_product_to_L2sqr (float * __restrict dis,
|
|
181
|
+
const float * nr1,
|
|
182
|
+
const float * nr2,
|
|
183
|
+
size_t n1, size_t n2)
|
|
184
|
+
{
|
|
185
|
+
|
|
186
|
+
#pragma omp parallel for
|
|
187
|
+
for (size_t j = 0 ; j < n1 ; j++) {
|
|
188
|
+
float * disj = dis + j * n2;
|
|
189
|
+
for (size_t i = 0 ; i < n2 ; i++)
|
|
190
|
+
disj[i] = nr1[j] + nr2[i] - 2 * disj[i];
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
void matrix_qr (int m, int n, float *a)
|
|
196
|
+
{
|
|
197
|
+
FAISS_THROW_IF_NOT (m >= n);
|
|
198
|
+
FINTEGER mi = m, ni = n, ki = mi < ni ? mi : ni;
|
|
199
|
+
std::vector<float> tau (ki);
|
|
200
|
+
FINTEGER lwork = -1, info;
|
|
201
|
+
float work_size;
|
|
202
|
+
|
|
203
|
+
sgeqrf_ (&mi, &ni, a, &mi, tau.data(),
|
|
204
|
+
&work_size, &lwork, &info);
|
|
205
|
+
lwork = size_t(work_size);
|
|
206
|
+
std::vector<float> work (lwork);
|
|
207
|
+
|
|
208
|
+
sgeqrf_ (&mi, &ni, a, &mi,
|
|
209
|
+
tau.data(), work.data(), &lwork, &info);
|
|
210
|
+
|
|
211
|
+
sorgqr_ (&mi, &ni, &ki, a, &mi, tau.data(),
|
|
212
|
+
work.data(), &lwork, &info);
|
|
213
|
+
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
/***************************************************************************
|
|
218
|
+
* Kmeans subroutine
|
|
219
|
+
***************************************************************************/
|
|
220
|
+
|
|
221
|
+
// a bit above machine epsilon for float16
|
|
222
|
+
|
|
223
|
+
#define EPS (1 / 1024.)
|
|
224
|
+
|
|
225
|
+
/* For k-means, compute centroids given assignment of vectors to centroids */
|
|
226
|
+
int km_update_centroids (const float * x,
|
|
227
|
+
float * centroids,
|
|
228
|
+
int64_t * assign,
|
|
229
|
+
size_t d, size_t k, size_t n,
|
|
230
|
+
size_t k_frozen)
|
|
231
|
+
{
|
|
232
|
+
k -= k_frozen;
|
|
233
|
+
centroids += k_frozen * d;
|
|
234
|
+
|
|
235
|
+
std::vector<size_t> hassign(k);
|
|
236
|
+
memset (centroids, 0, sizeof(*centroids) * d * k);
|
|
237
|
+
|
|
238
|
+
#pragma omp parallel
|
|
239
|
+
{
|
|
240
|
+
int nt = omp_get_num_threads();
|
|
241
|
+
int rank = omp_get_thread_num();
|
|
242
|
+
// this thread is taking care of centroids c0:c1
|
|
243
|
+
size_t c0 = (k * rank) / nt;
|
|
244
|
+
size_t c1 = (k * (rank + 1)) / nt;
|
|
245
|
+
const float *xi = x;
|
|
246
|
+
size_t nacc = 0;
|
|
247
|
+
|
|
248
|
+
for (size_t i = 0; i < n; i++) {
|
|
249
|
+
int64_t ci = assign[i];
|
|
250
|
+
assert (ci >= 0 && ci < k + k_frozen);
|
|
251
|
+
ci -= k_frozen;
|
|
252
|
+
if (ci >= c0 && ci < c1) {
|
|
253
|
+
float * c = centroids + ci * d;
|
|
254
|
+
hassign[ci]++;
|
|
255
|
+
for (size_t j = 0; j < d; j++)
|
|
256
|
+
c[j] += xi[j];
|
|
257
|
+
nacc++;
|
|
258
|
+
}
|
|
259
|
+
xi += d;
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
#pragma omp parallel for
|
|
265
|
+
for (size_t ci = 0; ci < k; ci++) {
|
|
266
|
+
float * c = centroids + ci * d;
|
|
267
|
+
float ni = (float) hassign[ci];
|
|
268
|
+
if (ni != 0) {
|
|
269
|
+
for (size_t j = 0; j < d; j++)
|
|
270
|
+
c[j] /= ni;
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
/* Take care of void clusters */
|
|
275
|
+
size_t nsplit = 0;
|
|
276
|
+
RandomGenerator rng (1234);
|
|
277
|
+
for (size_t ci = 0; ci < k; ci++) {
|
|
278
|
+
if (hassign[ci] == 0) { /* need to redefine a centroid */
|
|
279
|
+
size_t cj;
|
|
280
|
+
for (cj = 0; 1; cj = (cj + 1) % k) {
|
|
281
|
+
/* probability to pick this cluster for split */
|
|
282
|
+
float p = (hassign[cj] - 1.0) / (float) (n - k);
|
|
283
|
+
float r = rng.rand_float ();
|
|
284
|
+
if (r < p) {
|
|
285
|
+
break; /* found our cluster to be split */
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
memcpy (centroids+ci*d, centroids+cj*d, sizeof(*centroids) * d);
|
|
289
|
+
|
|
290
|
+
/* small symmetric pertubation. Much better than */
|
|
291
|
+
for (size_t j = 0; j < d; j++) {
|
|
292
|
+
if (j % 2 == 0) {
|
|
293
|
+
centroids[ci * d + j] *= 1 + EPS;
|
|
294
|
+
centroids[cj * d + j] *= 1 - EPS;
|
|
295
|
+
} else {
|
|
296
|
+
centroids[ci * d + j] *= 1 - EPS;
|
|
297
|
+
centroids[cj * d + j] *= 1 + EPS;
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
/* assume even split of the cluster */
|
|
302
|
+
hassign[ci] = hassign[cj] / 2;
|
|
303
|
+
hassign[cj] -= hassign[ci];
|
|
304
|
+
nsplit++;
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
return nsplit;
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
#undef EPS
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
/***************************************************************************
|
|
316
|
+
* Result list routines
|
|
317
|
+
***************************************************************************/
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
void ranklist_handle_ties (int k, int64_t *idx, const float *dis)
|
|
321
|
+
{
|
|
322
|
+
float prev_dis = -1e38;
|
|
323
|
+
int prev_i = -1;
|
|
324
|
+
for (int i = 0; i < k; i++) {
|
|
325
|
+
if (dis[i] != prev_dis) {
|
|
326
|
+
if (i > prev_i + 1) {
|
|
327
|
+
// sort between prev_i and i - 1
|
|
328
|
+
std::sort (idx + prev_i, idx + i);
|
|
329
|
+
}
|
|
330
|
+
prev_i = i;
|
|
331
|
+
prev_dis = dis[i];
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
size_t merge_result_table_with (size_t n, size_t k,
|
|
337
|
+
int64_t *I0, float *D0,
|
|
338
|
+
const int64_t *I1, const float *D1,
|
|
339
|
+
bool keep_min,
|
|
340
|
+
int64_t translation)
|
|
341
|
+
{
|
|
342
|
+
size_t n1 = 0;
|
|
343
|
+
|
|
344
|
+
#pragma omp parallel reduction(+:n1)
|
|
345
|
+
{
|
|
346
|
+
std::vector<int64_t> tmpI (k);
|
|
347
|
+
std::vector<float> tmpD (k);
|
|
348
|
+
|
|
349
|
+
#pragma omp for
|
|
350
|
+
for (size_t i = 0; i < n; i++) {
|
|
351
|
+
int64_t *lI0 = I0 + i * k;
|
|
352
|
+
float *lD0 = D0 + i * k;
|
|
353
|
+
const int64_t *lI1 = I1 + i * k;
|
|
354
|
+
const float *lD1 = D1 + i * k;
|
|
355
|
+
size_t r0 = 0;
|
|
356
|
+
size_t r1 = 0;
|
|
357
|
+
|
|
358
|
+
if (keep_min) {
|
|
359
|
+
for (size_t j = 0; j < k; j++) {
|
|
360
|
+
|
|
361
|
+
if (lI0[r0] >= 0 && lD0[r0] < lD1[r1]) {
|
|
362
|
+
tmpD[j] = lD0[r0];
|
|
363
|
+
tmpI[j] = lI0[r0];
|
|
364
|
+
r0++;
|
|
365
|
+
} else if (lD1[r1] >= 0) {
|
|
366
|
+
tmpD[j] = lD1[r1];
|
|
367
|
+
tmpI[j] = lI1[r1] + translation;
|
|
368
|
+
r1++;
|
|
369
|
+
} else { // both are NaNs
|
|
370
|
+
tmpD[j] = NAN;
|
|
371
|
+
tmpI[j] = -1;
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
} else {
|
|
375
|
+
for (size_t j = 0; j < k; j++) {
|
|
376
|
+
if (lI0[r0] >= 0 && lD0[r0] > lD1[r1]) {
|
|
377
|
+
tmpD[j] = lD0[r0];
|
|
378
|
+
tmpI[j] = lI0[r0];
|
|
379
|
+
r0++;
|
|
380
|
+
} else if (lD1[r1] >= 0) {
|
|
381
|
+
tmpD[j] = lD1[r1];
|
|
382
|
+
tmpI[j] = lI1[r1] + translation;
|
|
383
|
+
r1++;
|
|
384
|
+
} else { // both are NaNs
|
|
385
|
+
tmpD[j] = NAN;
|
|
386
|
+
tmpI[j] = -1;
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
n1 += r1;
|
|
391
|
+
memcpy (lD0, tmpD.data(), sizeof (lD0[0]) * k);
|
|
392
|
+
memcpy (lI0, tmpI.data(), sizeof (lI0[0]) * k);
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
return n1;
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
size_t ranklist_intersection_size (size_t k1, const int64_t *v1,
|
|
402
|
+
size_t k2, const int64_t *v2_in)
|
|
403
|
+
{
|
|
404
|
+
if (k2 > k1) return ranklist_intersection_size (k2, v2_in, k1, v1);
|
|
405
|
+
int64_t *v2 = new int64_t [k2];
|
|
406
|
+
memcpy (v2, v2_in, sizeof (int64_t) * k2);
|
|
407
|
+
std::sort (v2, v2 + k2);
|
|
408
|
+
{ // de-dup v2
|
|
409
|
+
int64_t prev = -1;
|
|
410
|
+
size_t wp = 0;
|
|
411
|
+
for (size_t i = 0; i < k2; i++) {
|
|
412
|
+
if (v2 [i] != prev) {
|
|
413
|
+
v2[wp++] = prev = v2 [i];
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
k2 = wp;
|
|
417
|
+
}
|
|
418
|
+
const int64_t seen_flag = 1L << 60;
|
|
419
|
+
size_t count = 0;
|
|
420
|
+
for (size_t i = 0; i < k1; i++) {
|
|
421
|
+
int64_t q = v1 [i];
|
|
422
|
+
size_t i0 = 0, i1 = k2;
|
|
423
|
+
while (i0 + 1 < i1) {
|
|
424
|
+
size_t imed = (i1 + i0) / 2;
|
|
425
|
+
int64_t piv = v2 [imed] & ~seen_flag;
|
|
426
|
+
if (piv <= q) i0 = imed;
|
|
427
|
+
else i1 = imed;
|
|
428
|
+
}
|
|
429
|
+
if (v2 [i0] == q) {
|
|
430
|
+
count++;
|
|
431
|
+
v2 [i0] |= seen_flag;
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
delete [] v2;
|
|
435
|
+
|
|
436
|
+
return count;
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
double imbalance_factor (int k, const int *hist) {
|
|
440
|
+
double tot = 0, uf = 0;
|
|
441
|
+
|
|
442
|
+
for (int i = 0 ; i < k ; i++) {
|
|
443
|
+
tot += hist[i];
|
|
444
|
+
uf += hist[i] * (double) hist[i];
|
|
445
|
+
}
|
|
446
|
+
uf = uf * k / (tot * tot);
|
|
447
|
+
|
|
448
|
+
return uf;
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
double imbalance_factor (int n, int k, const int64_t *assign) {
|
|
453
|
+
std::vector<int> hist(k, 0);
|
|
454
|
+
for (int i = 0; i < n; i++) {
|
|
455
|
+
hist[assign[i]]++;
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
return imbalance_factor (k, hist.data());
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
int ivec_hist (size_t n, const int * v, int vmax, int *hist) {
|
|
464
|
+
memset (hist, 0, sizeof(hist[0]) * vmax);
|
|
465
|
+
int nout = 0;
|
|
466
|
+
while (n--) {
|
|
467
|
+
if (v[n] < 0 || v[n] >= vmax) nout++;
|
|
468
|
+
else hist[v[n]]++;
|
|
469
|
+
}
|
|
470
|
+
return nout;
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
void bincode_hist(size_t n, size_t nbits, const uint8_t *codes, int *hist)
|
|
475
|
+
{
|
|
476
|
+
FAISS_THROW_IF_NOT (nbits % 8 == 0);
|
|
477
|
+
size_t d = nbits / 8;
|
|
478
|
+
std::vector<int> accu(d * 256);
|
|
479
|
+
const uint8_t *c = codes;
|
|
480
|
+
for (size_t i = 0; i < n; i++)
|
|
481
|
+
for(int j = 0; j < d; j++)
|
|
482
|
+
accu[j * 256 + *c++]++;
|
|
483
|
+
memset (hist, 0, sizeof(*hist) * nbits);
|
|
484
|
+
for (int i = 0; i < d; i++) {
|
|
485
|
+
const int *ai = accu.data() + i * 256;
|
|
486
|
+
int * hi = hist + i * 8;
|
|
487
|
+
for (int j = 0; j < 256; j++)
|
|
488
|
+
for (int k = 0; k < 8; k++)
|
|
489
|
+
if ((j >> k) & 1)
|
|
490
|
+
hi[k] += ai[j];
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
|
|
497
|
+
size_t ivec_checksum (size_t n, const int *a)
|
|
498
|
+
{
|
|
499
|
+
size_t cs = 112909;
|
|
500
|
+
while (n--) cs = cs * 65713 + a[n] * 1686049;
|
|
501
|
+
return cs;
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
namespace {
|
|
506
|
+
struct ArgsortComparator {
|
|
507
|
+
const float *vals;
|
|
508
|
+
bool operator() (const size_t a, const size_t b) const {
|
|
509
|
+
return vals[a] < vals[b];
|
|
510
|
+
}
|
|
511
|
+
};
|
|
512
|
+
|
|
513
|
+
struct SegmentS {
|
|
514
|
+
size_t i0; // begin pointer in the permutation array
|
|
515
|
+
size_t i1; // end
|
|
516
|
+
size_t len() const {
|
|
517
|
+
return i1 - i0;
|
|
518
|
+
}
|
|
519
|
+
};
|
|
520
|
+
|
|
521
|
+
// see https://en.wikipedia.org/wiki/Merge_algorithm#Parallel_merge
|
|
522
|
+
// extended to > 1 merge thread
|
|
523
|
+
|
|
524
|
+
// merges 2 ranges that should be consecutive on the source into
|
|
525
|
+
// the union of the two on the destination
|
|
526
|
+
template<typename T>
|
|
527
|
+
void parallel_merge (const T *src, T *dst,
|
|
528
|
+
SegmentS &s1, SegmentS & s2, int nt,
|
|
529
|
+
const ArgsortComparator & comp) {
|
|
530
|
+
if (s2.len() > s1.len()) { // make sure that s1 larger than s2
|
|
531
|
+
std::swap(s1, s2);
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
// compute sub-ranges for each thread
|
|
535
|
+
SegmentS s1s[nt], s2s[nt], sws[nt];
|
|
536
|
+
s2s[0].i0 = s2.i0;
|
|
537
|
+
s2s[nt - 1].i1 = s2.i1;
|
|
538
|
+
|
|
539
|
+
// not sure parallel actually helps here
|
|
540
|
+
#pragma omp parallel for num_threads(nt)
|
|
541
|
+
for (int t = 0; t < nt; t++) {
|
|
542
|
+
s1s[t].i0 = s1.i0 + s1.len() * t / nt;
|
|
543
|
+
s1s[t].i1 = s1.i0 + s1.len() * (t + 1) / nt;
|
|
544
|
+
|
|
545
|
+
if (t + 1 < nt) {
|
|
546
|
+
T pivot = src[s1s[t].i1];
|
|
547
|
+
size_t i0 = s2.i0, i1 = s2.i1;
|
|
548
|
+
while (i0 + 1 < i1) {
|
|
549
|
+
size_t imed = (i1 + i0) / 2;
|
|
550
|
+
if (comp (pivot, src[imed])) {i1 = imed; }
|
|
551
|
+
else {i0 = imed; }
|
|
552
|
+
}
|
|
553
|
+
s2s[t].i1 = s2s[t + 1].i0 = i1;
|
|
554
|
+
}
|
|
555
|
+
}
|
|
556
|
+
s1.i0 = std::min(s1.i0, s2.i0);
|
|
557
|
+
s1.i1 = std::max(s1.i1, s2.i1);
|
|
558
|
+
s2 = s1;
|
|
559
|
+
sws[0].i0 = s1.i0;
|
|
560
|
+
for (int t = 0; t < nt; t++) {
|
|
561
|
+
sws[t].i1 = sws[t].i0 + s1s[t].len() + s2s[t].len();
|
|
562
|
+
if (t + 1 < nt) {
|
|
563
|
+
sws[t + 1].i0 = sws[t].i1;
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
assert(sws[nt - 1].i1 == s1.i1);
|
|
567
|
+
|
|
568
|
+
// do the actual merging
|
|
569
|
+
#pragma omp parallel for num_threads(nt)
|
|
570
|
+
for (int t = 0; t < nt; t++) {
|
|
571
|
+
SegmentS sw = sws[t];
|
|
572
|
+
SegmentS s1t = s1s[t];
|
|
573
|
+
SegmentS s2t = s2s[t];
|
|
574
|
+
if (s1t.i0 < s1t.i1 && s2t.i0 < s2t.i1) {
|
|
575
|
+
for (;;) {
|
|
576
|
+
// assert (sw.len() == s1t.len() + s2t.len());
|
|
577
|
+
if (comp(src[s1t.i0], src[s2t.i0])) {
|
|
578
|
+
dst[sw.i0++] = src[s1t.i0++];
|
|
579
|
+
if (s1t.i0 == s1t.i1) break;
|
|
580
|
+
} else {
|
|
581
|
+
dst[sw.i0++] = src[s2t.i0++];
|
|
582
|
+
if (s2t.i0 == s2t.i1) break;
|
|
583
|
+
}
|
|
584
|
+
}
|
|
585
|
+
}
|
|
586
|
+
if (s1t.len() > 0) {
|
|
587
|
+
assert(s1t.len() == sw.len());
|
|
588
|
+
memcpy(dst + sw.i0, src + s1t.i0, s1t.len() * sizeof(dst[0]));
|
|
589
|
+
} else if (s2t.len() > 0) {
|
|
590
|
+
assert(s2t.len() == sw.len());
|
|
591
|
+
memcpy(dst + sw.i0, src + s2t.i0, s2t.len() * sizeof(dst[0]));
|
|
592
|
+
}
|
|
593
|
+
}
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
};
|
|
597
|
+
|
|
598
|
+
void fvec_argsort (size_t n, const float *vals,
|
|
599
|
+
size_t *perm)
|
|
600
|
+
{
|
|
601
|
+
for (size_t i = 0; i < n; i++) perm[i] = i;
|
|
602
|
+
ArgsortComparator comp = {vals};
|
|
603
|
+
std::sort (perm, perm + n, comp);
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
void fvec_argsort_parallel (size_t n, const float *vals,
|
|
607
|
+
size_t *perm)
|
|
608
|
+
{
|
|
609
|
+
size_t * perm2 = new size_t[n];
|
|
610
|
+
// 2 result tables, during merging, flip between them
|
|
611
|
+
size_t *permB = perm2, *permA = perm;
|
|
612
|
+
|
|
613
|
+
int nt = omp_get_max_threads();
|
|
614
|
+
{ // prepare correct permutation so that the result ends in perm
|
|
615
|
+
// at final iteration
|
|
616
|
+
int nseg = nt;
|
|
617
|
+
while (nseg > 1) {
|
|
618
|
+
nseg = (nseg + 1) / 2;
|
|
619
|
+
std::swap (permA, permB);
|
|
620
|
+
}
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
#pragma omp parallel
|
|
624
|
+
for (size_t i = 0; i < n; i++) permA[i] = i;
|
|
625
|
+
|
|
626
|
+
ArgsortComparator comp = {vals};
|
|
627
|
+
|
|
628
|
+
SegmentS segs[nt];
|
|
629
|
+
|
|
630
|
+
// independent sorts
|
|
631
|
+
#pragma omp parallel for
|
|
632
|
+
for (int t = 0; t < nt; t++) {
|
|
633
|
+
size_t i0 = t * n / nt;
|
|
634
|
+
size_t i1 = (t + 1) * n / nt;
|
|
635
|
+
SegmentS seg = {i0, i1};
|
|
636
|
+
std::sort (permA + seg.i0, permA + seg.i1, comp);
|
|
637
|
+
segs[t] = seg;
|
|
638
|
+
}
|
|
639
|
+
int prev_nested = omp_get_nested();
|
|
640
|
+
omp_set_nested(1);
|
|
641
|
+
|
|
642
|
+
int nseg = nt;
|
|
643
|
+
while (nseg > 1) {
|
|
644
|
+
int nseg1 = (nseg + 1) / 2;
|
|
645
|
+
int sub_nt = nseg % 2 == 0 ? nt : nt - 1;
|
|
646
|
+
int sub_nseg1 = nseg / 2;
|
|
647
|
+
|
|
648
|
+
#pragma omp parallel for num_threads(nseg1)
|
|
649
|
+
for (int s = 0; s < nseg; s += 2) {
|
|
650
|
+
if (s + 1 == nseg) { // otherwise isolated segment
|
|
651
|
+
memcpy(permB + segs[s].i0, permA + segs[s].i0,
|
|
652
|
+
segs[s].len() * sizeof(size_t));
|
|
653
|
+
} else {
|
|
654
|
+
int t0 = s * sub_nt / sub_nseg1;
|
|
655
|
+
int t1 = (s + 1) * sub_nt / sub_nseg1;
|
|
656
|
+
printf("merge %d %d, %d threads\n", s, s + 1, t1 - t0);
|
|
657
|
+
parallel_merge(permA, permB, segs[s], segs[s + 1],
|
|
658
|
+
t1 - t0, comp);
|
|
659
|
+
}
|
|
660
|
+
}
|
|
661
|
+
for (int s = 0; s < nseg; s += 2)
|
|
662
|
+
segs[s / 2] = segs[s];
|
|
663
|
+
nseg = nseg1;
|
|
664
|
+
std::swap (permA, permB);
|
|
665
|
+
}
|
|
666
|
+
assert (permA == perm);
|
|
667
|
+
omp_set_nested(prev_nested);
|
|
668
|
+
delete [] perm2;
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
|
|
672
|
+
|
|
673
|
+
|
|
674
|
+
|
|
675
|
+
|
|
676
|
+
|
|
677
|
+
|
|
678
|
+
|
|
679
|
+
|
|
680
|
+
|
|
681
|
+
|
|
682
|
+
|
|
683
|
+
|
|
684
|
+
|
|
685
|
+
|
|
686
|
+
|
|
687
|
+
|
|
688
|
+
const float *fvecs_maybe_subsample (
|
|
689
|
+
size_t d, size_t *n, size_t nmax, const float *x,
|
|
690
|
+
bool verbose, int64_t seed)
|
|
691
|
+
{
|
|
692
|
+
|
|
693
|
+
if (*n <= nmax) return x; // nothing to do
|
|
694
|
+
|
|
695
|
+
size_t n2 = nmax;
|
|
696
|
+
if (verbose) {
|
|
697
|
+
printf (" Input training set too big (max size is %ld), sampling "
|
|
698
|
+
"%ld / %ld vectors\n", nmax, n2, *n);
|
|
699
|
+
}
|
|
700
|
+
std::vector<int> subset (*n);
|
|
701
|
+
rand_perm (subset.data (), *n, seed);
|
|
702
|
+
float *x_subset = new float[n2 * d];
|
|
703
|
+
for (int64_t i = 0; i < n2; i++)
|
|
704
|
+
memcpy (&x_subset[i * d],
|
|
705
|
+
&x[subset[i] * size_t(d)],
|
|
706
|
+
sizeof (x[0]) * d);
|
|
707
|
+
*n = n2;
|
|
708
|
+
return x_subset;
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
|
|
712
|
+
void binary_to_real(size_t d, const uint8_t *x_in, float *x_out) {
|
|
713
|
+
for (size_t i = 0; i < d; ++i) {
|
|
714
|
+
x_out[i] = 2 * ((x_in[i >> 3] >> (i & 7)) & 1) - 1;
|
|
715
|
+
}
|
|
716
|
+
}
|
|
717
|
+
|
|
718
|
+
void real_to_binary(size_t d, const float *x_in, uint8_t *x_out) {
|
|
719
|
+
for (size_t i = 0; i < d / 8; ++i) {
|
|
720
|
+
uint8_t b = 0;
|
|
721
|
+
for (int j = 0; j < 8; ++j) {
|
|
722
|
+
if (x_in[8 * i + j] > 0) {
|
|
723
|
+
b |= (1 << j);
|
|
724
|
+
}
|
|
725
|
+
}
|
|
726
|
+
x_out[i] = b;
|
|
727
|
+
}
|
|
728
|
+
}
|
|
729
|
+
|
|
730
|
+
|
|
731
|
+
// from Python's stringobject.c
|
|
732
|
+
uint64_t hash_bytes (const uint8_t *bytes, int64_t n) {
|
|
733
|
+
const uint8_t *p = bytes;
|
|
734
|
+
uint64_t x = (uint64_t)(*p) << 7;
|
|
735
|
+
int64_t len = n;
|
|
736
|
+
while (--len >= 0) {
|
|
737
|
+
x = (1000003*x) ^ *p++;
|
|
738
|
+
}
|
|
739
|
+
x ^= n;
|
|
740
|
+
return x;
|
|
741
|
+
}
|
|
742
|
+
|
|
743
|
+
|
|
744
|
+
bool check_openmp() {
|
|
745
|
+
omp_set_num_threads(10);
|
|
746
|
+
|
|
747
|
+
if (omp_get_max_threads() != 10) {
|
|
748
|
+
return false;
|
|
749
|
+
}
|
|
750
|
+
|
|
751
|
+
std::vector<int> nt_per_thread(10);
|
|
752
|
+
size_t sum = 0;
|
|
753
|
+
bool in_parallel = true;
|
|
754
|
+
#pragma omp parallel reduction(+: sum)
|
|
755
|
+
{
|
|
756
|
+
if (!omp_in_parallel()) {
|
|
757
|
+
in_parallel = false;
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
int nt = omp_get_num_threads();
|
|
761
|
+
int rank = omp_get_thread_num();
|
|
762
|
+
|
|
763
|
+
nt_per_thread[rank] = nt;
|
|
764
|
+
#pragma omp for
|
|
765
|
+
for(int i = 0; i < 1000 * 1000 * 10; i++) {
|
|
766
|
+
sum += i;
|
|
767
|
+
}
|
|
768
|
+
}
|
|
769
|
+
|
|
770
|
+
if (!in_parallel) {
|
|
771
|
+
return false;
|
|
772
|
+
}
|
|
773
|
+
if (nt_per_thread[0] != 10) {
|
|
774
|
+
return false;
|
|
775
|
+
}
|
|
776
|
+
if (sum == 0) {
|
|
777
|
+
return false;
|
|
778
|
+
}
|
|
779
|
+
|
|
780
|
+
return true;
|
|
781
|
+
}
|
|
782
|
+
|
|
783
|
+
} // namespace faiss
|