faiss 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +292 -291
- data/vendor/faiss/faiss/AutoTune.h +55 -56
- data/vendor/faiss/faiss/Clustering.cpp +334 -195
- data/vendor/faiss/faiss/Clustering.h +88 -35
- data/vendor/faiss/faiss/IVFlib.cpp +171 -195
- data/vendor/faiss/faiss/IVFlib.h +48 -51
- data/vendor/faiss/faiss/Index.cpp +85 -103
- data/vendor/faiss/faiss/Index.h +54 -48
- data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
- data/vendor/faiss/faiss/Index2Layer.h +22 -22
- data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
- data/vendor/faiss/faiss/IndexBinary.h +140 -132
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
- data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
- data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
- data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
- data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
- data/vendor/faiss/faiss/IndexFlat.h +35 -46
- data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
- data/vendor/faiss/faiss/IndexHNSW.h +57 -41
- data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
- data/vendor/faiss/faiss/IndexIVF.h +146 -113
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
- data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
- data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
- data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
- data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
- data/vendor/faiss/faiss/IndexLSH.h +21 -26
- data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
- data/vendor/faiss/faiss/IndexLattice.h +11 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
- data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
- data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
- data/vendor/faiss/faiss/IndexNSG.h +85 -0
- data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
- data/vendor/faiss/faiss/IndexPQ.h +64 -67
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
- data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
- data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
- data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
- data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
- data/vendor/faiss/faiss/IndexRefine.h +22 -23
- data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
- data/vendor/faiss/faiss/IndexReplicas.h +62 -56
- data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
- data/vendor/faiss/faiss/IndexResidual.h +152 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
- data/vendor/faiss/faiss/IndexShards.cpp +256 -240
- data/vendor/faiss/faiss/IndexShards.h +85 -73
- data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
- data/vendor/faiss/faiss/MatrixStats.h +7 -10
- data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
- data/vendor/faiss/faiss/MetaIndexes.h +40 -34
- data/vendor/faiss/faiss/MetricType.h +7 -7
- data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
- data/vendor/faiss/faiss/VectorTransform.h +61 -89
- data/vendor/faiss/faiss/clone_index.cpp +77 -73
- data/vendor/faiss/faiss/clone_index.h +4 -9
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
- data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
- data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
- data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
- data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
- data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
- data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
- data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
- data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
- data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
- data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
- data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
- data/vendor/faiss/faiss/impl/FaissException.h +41 -29
- data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
- data/vendor/faiss/faiss/impl/HNSW.h +179 -200
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
- data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
- data/vendor/faiss/faiss/impl/NSG.h +199 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
- data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
- data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
- data/vendor/faiss/faiss/impl/io.cpp +75 -94
- data/vendor/faiss/faiss/impl/io.h +31 -41
- data/vendor/faiss/faiss/impl/io_macros.h +40 -29
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
- data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
- data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
- data/vendor/faiss/faiss/index_factory.cpp +269 -218
- data/vendor/faiss/faiss/index_factory.h +6 -7
- data/vendor/faiss/faiss/index_io.h +23 -26
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
- data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
- data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
- data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
- data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
- data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
- data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
- data/vendor/faiss/faiss/utils/Heap.h +186 -209
- data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
- data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
- data/vendor/faiss/faiss/utils/distances.cpp +301 -310
- data/vendor/faiss/faiss/utils/distances.h +133 -118
- data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
- data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
- data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
- data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
- data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
- data/vendor/faiss/faiss/utils/hamming.h +62 -85
- data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
- data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
- data/vendor/faiss/faiss/utils/partitioning.h +26 -21
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
- data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
- data/vendor/faiss/faiss/utils/random.cpp +39 -63
- data/vendor/faiss/faiss/utils/random.h +13 -16
- data/vendor/faiss/faiss/utils/simdlib.h +4 -2
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
- data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
- data/vendor/faiss/faiss/utils/utils.cpp +304 -287
- data/vendor/faiss/faiss/utils/utils.h +53 -48
- metadata +20 -2
@@ -9,10 +9,10 @@
|
|
9
9
|
|
10
10
|
#include <faiss/utils/utils.h>
|
11
11
|
|
12
|
-
#include <cstdio>
|
13
12
|
#include <cassert>
|
14
|
-
#include <cstring>
|
15
13
|
#include <cmath>
|
14
|
+
#include <cstdio>
|
15
|
+
#include <cstring>
|
16
16
|
|
17
17
|
#include <sys/types.h>
|
18
18
|
|
@@ -32,46 +32,94 @@
|
|
32
32
|
|
33
33
|
#include <faiss/impl/AuxIndexStructures.h>
|
34
34
|
#include <faiss/impl/FaissAssert.h>
|
35
|
+
#include <faiss/impl/platform_macros.h>
|
35
36
|
#include <faiss/utils/random.h>
|
36
37
|
|
37
|
-
|
38
|
-
|
39
38
|
#ifndef FINTEGER
|
40
39
|
#define FINTEGER long
|
41
40
|
#endif
|
42
41
|
|
43
|
-
|
44
42
|
extern "C" {
|
45
43
|
|
46
44
|
/* declare BLAS functions, see http://www.netlib.org/clapack/cblas/ */
|
47
45
|
|
48
|
-
int sgemm_
|
49
|
-
|
50
|
-
|
51
|
-
|
46
|
+
int sgemm_(
|
47
|
+
const char* transa,
|
48
|
+
const char* transb,
|
49
|
+
FINTEGER* m,
|
50
|
+
FINTEGER* n,
|
51
|
+
FINTEGER* k,
|
52
|
+
const float* alpha,
|
53
|
+
const float* a,
|
54
|
+
FINTEGER* lda,
|
55
|
+
const float* b,
|
56
|
+
FINTEGER* ldb,
|
57
|
+
float* beta,
|
58
|
+
float* c,
|
59
|
+
FINTEGER* ldc);
|
52
60
|
|
53
61
|
/* Lapack functions, see http://www.netlib.org/clapack/old/single/sgeqrf.c */
|
54
62
|
|
55
|
-
int sgeqrf_
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
63
|
+
int sgeqrf_(
|
64
|
+
FINTEGER* m,
|
65
|
+
FINTEGER* n,
|
66
|
+
float* a,
|
67
|
+
FINTEGER* lda,
|
68
|
+
float* tau,
|
69
|
+
float* work,
|
70
|
+
FINTEGER* lwork,
|
71
|
+
FINTEGER* info);
|
72
|
+
|
73
|
+
int sorgqr_(
|
74
|
+
FINTEGER* m,
|
75
|
+
FINTEGER* n,
|
76
|
+
FINTEGER* k,
|
77
|
+
float* a,
|
78
|
+
FINTEGER* lda,
|
79
|
+
float* tau,
|
80
|
+
float* work,
|
81
|
+
FINTEGER* lwork,
|
82
|
+
FINTEGER* info);
|
83
|
+
|
84
|
+
int sgemv_(
|
85
|
+
const char* trans,
|
86
|
+
FINTEGER* m,
|
87
|
+
FINTEGER* n,
|
88
|
+
float* alpha,
|
89
|
+
const float* a,
|
90
|
+
FINTEGER* lda,
|
91
|
+
const float* x,
|
92
|
+
FINTEGER* incx,
|
93
|
+
float* beta,
|
94
|
+
float* y,
|
95
|
+
FINTEGER* incy);
|
66
96
|
}
|
67
97
|
|
68
|
-
|
69
98
|
/**************************************************
|
70
99
|
* Get some stats about the system
|
71
100
|
**************************************************/
|
72
101
|
|
73
102
|
namespace faiss {
|
74
103
|
|
104
|
+
std::string get_compile_options() {
|
105
|
+
std::string options;
|
106
|
+
|
107
|
+
// this flag is set by GCC and Clang
|
108
|
+
#ifdef __OPTIMIZE__
|
109
|
+
options += "OPTIMIZE ";
|
110
|
+
#endif
|
111
|
+
|
112
|
+
#ifdef __AVX2__
|
113
|
+
options += "AVX2";
|
114
|
+
#elif defined(__aarch64__)
|
115
|
+
options += "NEON";
|
116
|
+
#else
|
117
|
+
options += "GENERIC";
|
118
|
+
#endif
|
119
|
+
|
120
|
+
return options;
|
121
|
+
}
|
122
|
+
|
75
123
|
#ifdef _MSC_VER
|
76
124
|
double getmillisecs() {
|
77
125
|
LARGE_INTEGER ts;
|
@@ -81,73 +129,69 @@ double getmillisecs() {
|
|
81
129
|
|
82
130
|
return (ts.QuadPart * 1e3) / freq.QuadPart;
|
83
131
|
}
|
84
|
-
#else
|
85
|
-
double getmillisecs
|
132
|
+
#else // _MSC_VER
|
133
|
+
double getmillisecs() {
|
86
134
|
struct timeval tv;
|
87
|
-
gettimeofday
|
135
|
+
gettimeofday(&tv, nullptr);
|
88
136
|
return tv.tv_sec * 1e3 + tv.tv_usec * 1e-3;
|
89
137
|
}
|
90
138
|
#endif // _MSC_VER
|
91
139
|
|
92
|
-
uint64_t get_cycles
|
93
|
-
#ifdef
|
140
|
+
uint64_t get_cycles() {
|
141
|
+
#ifdef __x86_64__
|
94
142
|
uint32_t high, low;
|
95
|
-
asm volatile("rdtsc \n\t"
|
96
|
-
: "=a" (low),
|
97
|
-
"=d" (high));
|
143
|
+
asm volatile("rdtsc \n\t" : "=a"(low), "=d"(high));
|
98
144
|
return ((uint64_t)high << 32) | (low);
|
99
145
|
#else
|
100
146
|
return 0;
|
101
147
|
#endif
|
102
148
|
}
|
103
149
|
|
104
|
-
|
105
150
|
#ifdef __linux__
|
106
151
|
|
107
|
-
size_t get_mem_usage_kb
|
108
|
-
|
109
|
-
int pid = getpid ();
|
152
|
+
size_t get_mem_usage_kb() {
|
153
|
+
int pid = getpid();
|
110
154
|
char fname[256];
|
111
|
-
snprintf
|
112
|
-
FILE
|
113
|
-
FAISS_THROW_IF_NOT_MSG
|
155
|
+
snprintf(fname, 256, "/proc/%d/status", pid);
|
156
|
+
FILE* f = fopen(fname, "r");
|
157
|
+
FAISS_THROW_IF_NOT_MSG(f, "cannot open proc status file");
|
114
158
|
size_t sz = 0;
|
115
159
|
for (;;) {
|
116
|
-
char buf
|
117
|
-
if (!fgets
|
118
|
-
|
160
|
+
char buf[256];
|
161
|
+
if (!fgets(buf, 256, f))
|
162
|
+
break;
|
163
|
+
if (sscanf(buf, "VmRSS: %ld kB", &sz) == 1)
|
164
|
+
break;
|
119
165
|
}
|
120
|
-
fclose
|
166
|
+
fclose(f);
|
121
167
|
return sz;
|
122
168
|
}
|
123
169
|
|
124
170
|
#else
|
125
171
|
|
126
|
-
size_t get_mem_usage_kb
|
127
|
-
|
128
|
-
|
172
|
+
size_t get_mem_usage_kb() {
|
173
|
+
fprintf(stderr,
|
174
|
+
"WARN: get_mem_usage_kb not implemented on current architecture\n");
|
129
175
|
return 0;
|
130
176
|
}
|
131
177
|
|
132
178
|
#endif
|
133
179
|
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
size_t n, size_t d, size_t nu)
|
141
|
-
{
|
180
|
+
void reflection(
|
181
|
+
const float* __restrict u,
|
182
|
+
float* __restrict x,
|
183
|
+
size_t n,
|
184
|
+
size_t d,
|
185
|
+
size_t nu) {
|
142
186
|
size_t i, j, l;
|
143
187
|
for (i = 0; i < n; i++) {
|
144
|
-
const float
|
188
|
+
const float* up = u;
|
145
189
|
for (l = 0; l < nu; l++) {
|
146
190
|
float ip1 = 0, ip2 = 0;
|
147
191
|
|
148
|
-
for (j = 0; j < d; j+=2) {
|
192
|
+
for (j = 0; j < d; j += 2) {
|
149
193
|
ip1 += up[j] * x[j];
|
150
|
-
ip2 += up[j+1] * x[j+1];
|
194
|
+
ip2 += up[j + 1] * x[j + 1];
|
151
195
|
}
|
152
196
|
float ip = 2 * (ip1 + ip2);
|
153
197
|
|
@@ -159,13 +203,11 @@ void reflection (const float * __restrict u,
|
|
159
203
|
}
|
160
204
|
}
|
161
205
|
|
162
|
-
|
163
206
|
/* Reference implementation (slower) */
|
164
|
-
void reflection_ref
|
165
|
-
{
|
207
|
+
void reflection_ref(const float* u, float* x, size_t n, size_t d, size_t nu) {
|
166
208
|
size_t i, j, l;
|
167
209
|
for (i = 0; i < n; i++) {
|
168
|
-
const float
|
210
|
+
const float* up = u;
|
169
211
|
for (l = 0; l < nu; l++) {
|
170
212
|
double ip = 0;
|
171
213
|
|
@@ -182,53 +224,38 @@ void reflection_ref (const float * u, float * x, size_t n, size_t d, size_t nu)
|
|
182
224
|
}
|
183
225
|
}
|
184
226
|
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
227
|
/***************************************************************************
|
191
228
|
* Some matrix manipulation functions
|
192
229
|
***************************************************************************/
|
193
230
|
|
194
|
-
void matrix_qr
|
195
|
-
|
196
|
-
FAISS_THROW_IF_NOT (m >= n);
|
231
|
+
void matrix_qr(int m, int n, float* a) {
|
232
|
+
FAISS_THROW_IF_NOT(m >= n);
|
197
233
|
FINTEGER mi = m, ni = n, ki = mi < ni ? mi : ni;
|
198
|
-
std::vector<float> tau
|
234
|
+
std::vector<float> tau(ki);
|
199
235
|
FINTEGER lwork = -1, info;
|
200
236
|
float work_size;
|
201
237
|
|
202
|
-
sgeqrf_
|
203
|
-
&work_size, &lwork, &info);
|
238
|
+
sgeqrf_(&mi, &ni, a, &mi, tau.data(), &work_size, &lwork, &info);
|
204
239
|
lwork = size_t(work_size);
|
205
|
-
std::vector<float> work
|
206
|
-
|
207
|
-
sgeqrf_ (&mi, &ni, a, &mi,
|
208
|
-
tau.data(), work.data(), &lwork, &info);
|
240
|
+
std::vector<float> work(lwork);
|
209
241
|
|
210
|
-
|
211
|
-
work.data(), &lwork, &info);
|
242
|
+
sgeqrf_(&mi, &ni, a, &mi, tau.data(), work.data(), &lwork, &info);
|
212
243
|
|
244
|
+
sorgqr_(&mi, &ni, &ki, a, &mi, tau.data(), work.data(), &lwork, &info);
|
213
245
|
}
|
214
246
|
|
215
|
-
|
216
|
-
|
217
|
-
|
218
247
|
/***************************************************************************
|
219
248
|
* Result list routines
|
220
249
|
***************************************************************************/
|
221
250
|
|
222
|
-
|
223
|
-
void ranklist_handle_ties (int k, int64_t *idx, const float *dis)
|
224
|
-
{
|
251
|
+
void ranklist_handle_ties(int k, int64_t* idx, const float* dis) {
|
225
252
|
float prev_dis = -1e38;
|
226
253
|
int prev_i = -1;
|
227
254
|
for (int i = 0; i < k; i++) {
|
228
255
|
if (dis[i] != prev_dis) {
|
229
256
|
if (i > prev_i + 1) {
|
230
257
|
// sort between prev_i and i - 1
|
231
|
-
std::sort
|
258
|
+
std::sort(idx + prev_i, idx + i);
|
232
259
|
}
|
233
260
|
prev_i = i;
|
234
261
|
prev_dis = dis[i];
|
@@ -236,31 +263,33 @@ void ranklist_handle_ties (int k, int64_t *idx, const float *dis)
|
|
236
263
|
}
|
237
264
|
}
|
238
265
|
|
239
|
-
size_t merge_result_table_with
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
266
|
+
size_t merge_result_table_with(
|
267
|
+
size_t n,
|
268
|
+
size_t k,
|
269
|
+
int64_t* I0,
|
270
|
+
float* D0,
|
271
|
+
const int64_t* I1,
|
272
|
+
const float* D1,
|
273
|
+
bool keep_min,
|
274
|
+
int64_t translation) {
|
245
275
|
size_t n1 = 0;
|
246
276
|
|
247
|
-
#pragma omp parallel reduction(
|
277
|
+
#pragma omp parallel reduction(+ : n1)
|
248
278
|
{
|
249
|
-
std::vector<int64_t> tmpI
|
250
|
-
std::vector<float> tmpD
|
279
|
+
std::vector<int64_t> tmpI(k);
|
280
|
+
std::vector<float> tmpD(k);
|
251
281
|
|
252
282
|
#pragma omp for
|
253
283
|
for (int64_t i = 0; i < n; i++) {
|
254
|
-
int64_t
|
255
|
-
float
|
256
|
-
const int64_t
|
257
|
-
const float
|
284
|
+
int64_t* lI0 = I0 + i * k;
|
285
|
+
float* lD0 = D0 + i * k;
|
286
|
+
const int64_t* lI1 = I1 + i * k;
|
287
|
+
const float* lD1 = D1 + i * k;
|
258
288
|
size_t r0 = 0;
|
259
289
|
size_t r1 = 0;
|
260
290
|
|
261
291
|
if (keep_min) {
|
262
292
|
for (size_t j = 0; j < k; j++) {
|
263
|
-
|
264
293
|
if (lI0[r0] >= 0 && lD0[r0] < lD1[r1]) {
|
265
294
|
tmpD[j] = lD0[r0];
|
266
295
|
tmpI[j] = lI0[r0];
|
@@ -291,29 +320,30 @@ size_t merge_result_table_with (size_t n, size_t k,
|
|
291
320
|
}
|
292
321
|
}
|
293
322
|
n1 += r1;
|
294
|
-
memcpy
|
295
|
-
memcpy
|
323
|
+
memcpy(lD0, tmpD.data(), sizeof(lD0[0]) * k);
|
324
|
+
memcpy(lI0, tmpI.data(), sizeof(lI0[0]) * k);
|
296
325
|
}
|
297
326
|
}
|
298
327
|
|
299
328
|
return n1;
|
300
329
|
}
|
301
330
|
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
{
|
307
|
-
if (k2 > k1)
|
308
|
-
|
309
|
-
|
310
|
-
|
331
|
+
size_t ranklist_intersection_size(
|
332
|
+
size_t k1,
|
333
|
+
const int64_t* v1,
|
334
|
+
size_t k2,
|
335
|
+
const int64_t* v2_in) {
|
336
|
+
if (k2 > k1)
|
337
|
+
return ranklist_intersection_size(k2, v2_in, k1, v1);
|
338
|
+
int64_t* v2 = new int64_t[k2];
|
339
|
+
memcpy(v2, v2_in, sizeof(int64_t) * k2);
|
340
|
+
std::sort(v2, v2 + k2);
|
311
341
|
{ // de-dup v2
|
312
342
|
int64_t prev = -1;
|
313
343
|
size_t wp = 0;
|
314
344
|
for (size_t i = 0; i < k2; i++) {
|
315
|
-
if (v2
|
316
|
-
v2[wp++] = prev = v2
|
345
|
+
if (v2[i] != prev) {
|
346
|
+
v2[wp++] = prev = v2[i];
|
317
347
|
}
|
318
348
|
}
|
319
349
|
k2 = wp;
|
@@ -321,195 +351,196 @@ size_t ranklist_intersection_size (size_t k1, const int64_t *v1,
|
|
321
351
|
const int64_t seen_flag = int64_t{1} << 60;
|
322
352
|
size_t count = 0;
|
323
353
|
for (size_t i = 0; i < k1; i++) {
|
324
|
-
int64_t q = v1
|
354
|
+
int64_t q = v1[i];
|
325
355
|
size_t i0 = 0, i1 = k2;
|
326
356
|
while (i0 + 1 < i1) {
|
327
357
|
size_t imed = (i1 + i0) / 2;
|
328
|
-
int64_t piv = v2
|
329
|
-
if (piv <= q)
|
330
|
-
|
358
|
+
int64_t piv = v2[imed] & ~seen_flag;
|
359
|
+
if (piv <= q)
|
360
|
+
i0 = imed;
|
361
|
+
else
|
362
|
+
i1 = imed;
|
331
363
|
}
|
332
|
-
if (v2
|
364
|
+
if (v2[i0] == q) {
|
333
365
|
count++;
|
334
|
-
v2
|
366
|
+
v2[i0] |= seen_flag;
|
335
367
|
}
|
336
368
|
}
|
337
|
-
delete
|
369
|
+
delete[] v2;
|
338
370
|
|
339
371
|
return count;
|
340
372
|
}
|
341
373
|
|
342
|
-
double imbalance_factor
|
374
|
+
double imbalance_factor(int k, const int* hist) {
|
343
375
|
double tot = 0, uf = 0;
|
344
376
|
|
345
|
-
for (int i = 0
|
377
|
+
for (int i = 0; i < k; i++) {
|
346
378
|
tot += hist[i];
|
347
|
-
uf += hist[i] * (double)
|
379
|
+
uf += hist[i] * (double)hist[i];
|
348
380
|
}
|
349
381
|
uf = uf * k / (tot * tot);
|
350
382
|
|
351
383
|
return uf;
|
352
384
|
}
|
353
385
|
|
354
|
-
|
355
|
-
double imbalance_factor (int n, int k, const int64_t *assign) {
|
386
|
+
double imbalance_factor(int n, int k, const int64_t* assign) {
|
356
387
|
std::vector<int> hist(k, 0);
|
357
388
|
for (int i = 0; i < n; i++) {
|
358
389
|
hist[assign[i]]++;
|
359
390
|
}
|
360
391
|
|
361
|
-
return imbalance_factor
|
392
|
+
return imbalance_factor(k, hist.data());
|
362
393
|
}
|
363
394
|
|
364
|
-
|
365
|
-
|
366
|
-
int ivec_hist (size_t n, const int * v, int vmax, int *hist) {
|
367
|
-
memset (hist, 0, sizeof(hist[0]) * vmax);
|
395
|
+
int ivec_hist(size_t n, const int* v, int vmax, int* hist) {
|
396
|
+
memset(hist, 0, sizeof(hist[0]) * vmax);
|
368
397
|
int nout = 0;
|
369
398
|
while (n--) {
|
370
|
-
if (v[n] < 0 || v[n] >= vmax)
|
371
|
-
|
399
|
+
if (v[n] < 0 || v[n] >= vmax)
|
400
|
+
nout++;
|
401
|
+
else
|
402
|
+
hist[v[n]]++;
|
372
403
|
}
|
373
404
|
return nout;
|
374
405
|
}
|
375
406
|
|
376
|
-
|
377
|
-
|
378
|
-
{
|
379
|
-
FAISS_THROW_IF_NOT (nbits % 8 == 0);
|
407
|
+
void bincode_hist(size_t n, size_t nbits, const uint8_t* codes, int* hist) {
|
408
|
+
FAISS_THROW_IF_NOT(nbits % 8 == 0);
|
380
409
|
size_t d = nbits / 8;
|
381
410
|
std::vector<int> accu(d * 256);
|
382
|
-
const uint8_t
|
411
|
+
const uint8_t* c = codes;
|
383
412
|
for (size_t i = 0; i < n; i++)
|
384
|
-
for(int j = 0; j < d; j++)
|
413
|
+
for (int j = 0; j < d; j++)
|
385
414
|
accu[j * 256 + *c++]++;
|
386
|
-
memset
|
415
|
+
memset(hist, 0, sizeof(*hist) * nbits);
|
387
416
|
for (int i = 0; i < d; i++) {
|
388
|
-
const int
|
389
|
-
int
|
417
|
+
const int* ai = accu.data() + i * 256;
|
418
|
+
int* hi = hist + i * 8;
|
390
419
|
for (int j = 0; j < 256; j++)
|
391
420
|
for (int k = 0; k < 8; k++)
|
392
421
|
if ((j >> k) & 1)
|
393
422
|
hi[k] += ai[j];
|
394
423
|
}
|
395
|
-
|
396
424
|
}
|
397
425
|
|
398
|
-
|
399
|
-
|
400
|
-
size_t ivec_checksum (size_t n, const int *a)
|
401
|
-
{
|
426
|
+
size_t ivec_checksum(size_t n, const int* a) {
|
402
427
|
size_t cs = 112909;
|
403
|
-
while (n--)
|
428
|
+
while (n--)
|
429
|
+
cs = cs * 65713 + a[n] * 1686049;
|
404
430
|
return cs;
|
405
431
|
}
|
406
432
|
|
407
|
-
|
408
433
|
namespace {
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
434
|
+
struct ArgsortComparator {
|
435
|
+
const float* vals;
|
436
|
+
bool operator()(const size_t a, const size_t b) const {
|
437
|
+
return vals[a] < vals[b];
|
438
|
+
}
|
439
|
+
};
|
415
440
|
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
441
|
+
struct SegmentS {
|
442
|
+
size_t i0; // begin pointer in the permutation array
|
443
|
+
size_t i1; // end
|
444
|
+
size_t len() const {
|
445
|
+
return i1 - i0;
|
446
|
+
}
|
447
|
+
};
|
448
|
+
|
449
|
+
// see https://en.wikipedia.org/wiki/Merge_algorithm#Parallel_merge
|
450
|
+
// extended to > 1 merge thread
|
451
|
+
|
452
|
+
// merges 2 ranges that should be consecutive on the source into
|
453
|
+
// the union of the two on the destination
|
454
|
+
template <typename T>
|
455
|
+
void parallel_merge(
|
456
|
+
const T* src,
|
457
|
+
T* dst,
|
458
|
+
SegmentS& s1,
|
459
|
+
SegmentS& s2,
|
460
|
+
int nt,
|
461
|
+
const ArgsortComparator& comp) {
|
462
|
+
if (s2.len() > s1.len()) { // make sure that s1 larger than s2
|
463
|
+
std::swap(s1, s2);
|
464
|
+
}
|
436
465
|
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
466
|
+
// compute sub-ranges for each thread
|
467
|
+
std::vector<SegmentS> s1s(nt), s2s(nt), sws(nt);
|
468
|
+
s2s[0].i0 = s2.i0;
|
469
|
+
s2s[nt - 1].i1 = s2.i1;
|
441
470
|
|
442
|
-
|
471
|
+
// not sure parallel actually helps here
|
443
472
|
#pragma omp parallel for num_threads(nt)
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
473
|
+
for (int t = 0; t < nt; t++) {
|
474
|
+
s1s[t].i0 = s1.i0 + s1.len() * t / nt;
|
475
|
+
s1s[t].i1 = s1.i0 + s1.len() * (t + 1) / nt;
|
476
|
+
|
477
|
+
if (t + 1 < nt) {
|
478
|
+
T pivot = src[s1s[t].i1];
|
479
|
+
size_t i0 = s2.i0, i1 = s2.i1;
|
480
|
+
while (i0 + 1 < i1) {
|
481
|
+
size_t imed = (i1 + i0) / 2;
|
482
|
+
if (comp(pivot, src[imed])) {
|
483
|
+
i1 = imed;
|
484
|
+
} else {
|
485
|
+
i0 = imed;
|
455
486
|
}
|
456
|
-
s2s[t].i1 = s2s[t + 1].i0 = i1;
|
457
487
|
}
|
488
|
+
s2s[t].i1 = s2s[t + 1].i0 = i1;
|
458
489
|
}
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
490
|
+
}
|
491
|
+
s1.i0 = std::min(s1.i0, s2.i0);
|
492
|
+
s1.i1 = std::max(s1.i1, s2.i1);
|
493
|
+
s2 = s1;
|
494
|
+
sws[0].i0 = s1.i0;
|
495
|
+
for (int t = 0; t < nt; t++) {
|
496
|
+
sws[t].i1 = sws[t].i0 + s1s[t].len() + s2s[t].len();
|
497
|
+
if (t + 1 < nt) {
|
498
|
+
sws[t + 1].i0 = sws[t].i1;
|
468
499
|
}
|
469
|
-
|
500
|
+
}
|
501
|
+
assert(sws[nt - 1].i1 == s1.i1);
|
470
502
|
|
471
|
-
|
503
|
+
// do the actual merging
|
472
504
|
#pragma omp parallel for num_threads(nt)
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
505
|
+
for (int t = 0; t < nt; t++) {
|
506
|
+
SegmentS sw = sws[t];
|
507
|
+
SegmentS s1t = s1s[t];
|
508
|
+
SegmentS s2t = s2s[t];
|
509
|
+
if (s1t.i0 < s1t.i1 && s2t.i0 < s2t.i1) {
|
510
|
+
for (;;) {
|
511
|
+
// assert (sw.len() == s1t.len() + s2t.len());
|
512
|
+
if (comp(src[s1t.i0], src[s2t.i0])) {
|
513
|
+
dst[sw.i0++] = src[s1t.i0++];
|
514
|
+
if (s1t.i0 == s1t.i1)
|
515
|
+
break;
|
516
|
+
} else {
|
517
|
+
dst[sw.i0++] = src[s2t.i0++];
|
518
|
+
if (s2t.i0 == s2t.i1)
|
519
|
+
break;
|
487
520
|
}
|
488
521
|
}
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
522
|
+
}
|
523
|
+
if (s1t.len() > 0) {
|
524
|
+
assert(s1t.len() == sw.len());
|
525
|
+
memcpy(dst + sw.i0, src + s1t.i0, s1t.len() * sizeof(dst[0]));
|
526
|
+
} else if (s2t.len() > 0) {
|
527
|
+
assert(s2t.len() == sw.len());
|
528
|
+
memcpy(dst + sw.i0, src + s2t.i0, s2t.len() * sizeof(dst[0]));
|
496
529
|
}
|
497
530
|
}
|
531
|
+
}
|
498
532
|
|
499
|
-
};
|
533
|
+
}; // namespace
|
500
534
|
|
501
|
-
void fvec_argsort
|
502
|
-
|
503
|
-
|
504
|
-
for (size_t i = 0; i < n; i++) perm[i] = i;
|
535
|
+
void fvec_argsort(size_t n, const float* vals, size_t* perm) {
|
536
|
+
for (size_t i = 0; i < n; i++)
|
537
|
+
perm[i] = i;
|
505
538
|
ArgsortComparator comp = {vals};
|
506
|
-
std::sort
|
539
|
+
std::sort(perm, perm + n, comp);
|
507
540
|
}
|
508
541
|
|
509
|
-
void fvec_argsort_parallel
|
510
|
-
|
511
|
-
{
|
512
|
-
size_t * perm2 = new size_t[n];
|
542
|
+
void fvec_argsort_parallel(size_t n, const float* vals, size_t* perm) {
|
543
|
+
size_t* perm2 = new size_t[n];
|
513
544
|
// 2 result tables, during merging, flip between them
|
514
545
|
size_t *permB = perm2, *permA = perm;
|
515
546
|
|
@@ -519,12 +550,13 @@ void fvec_argsort_parallel (size_t n, const float *vals,
|
|
519
550
|
int nseg = nt;
|
520
551
|
while (nseg > 1) {
|
521
552
|
nseg = (nseg + 1) / 2;
|
522
|
-
std::swap
|
553
|
+
std::swap(permA, permB);
|
523
554
|
}
|
524
555
|
}
|
525
556
|
|
526
557
|
#pragma omp parallel
|
527
|
-
for (size_t i = 0; i < n; i++)
|
558
|
+
for (size_t i = 0; i < n; i++)
|
559
|
+
permA[i] = i;
|
528
560
|
|
529
561
|
ArgsortComparator comp = {vals};
|
530
562
|
|
@@ -536,7 +568,7 @@ void fvec_argsort_parallel (size_t n, const float *vals,
|
|
536
568
|
size_t i0 = t * n / nt;
|
537
569
|
size_t i1 = (t + 1) * n / nt;
|
538
570
|
SegmentS seg = {i0, i1};
|
539
|
-
std::sort
|
571
|
+
std::sort(permA + seg.i0, permA + seg.i1, comp);
|
540
572
|
segs[t] = seg;
|
541
573
|
}
|
542
574
|
int prev_nested = omp_get_nested();
|
@@ -551,99 +583,84 @@ void fvec_argsort_parallel (size_t n, const float *vals,
|
|
551
583
|
#pragma omp parallel for num_threads(nseg1)
|
552
584
|
for (int s = 0; s < nseg; s += 2) {
|
553
585
|
if (s + 1 == nseg) { // otherwise isolated segment
|
554
|
-
memcpy(permB + segs[s].i0,
|
586
|
+
memcpy(permB + segs[s].i0,
|
587
|
+
permA + segs[s].i0,
|
555
588
|
segs[s].len() * sizeof(size_t));
|
556
589
|
} else {
|
557
590
|
int t0 = s * sub_nt / sub_nseg1;
|
558
591
|
int t1 = (s + 1) * sub_nt / sub_nseg1;
|
559
592
|
printf("merge %d %d, %d threads\n", s, s + 1, t1 - t0);
|
560
|
-
parallel_merge(
|
561
|
-
|
593
|
+
parallel_merge(
|
594
|
+
permA, permB, segs[s], segs[s + 1], t1 - t0, comp);
|
562
595
|
}
|
563
596
|
}
|
564
597
|
for (int s = 0; s < nseg; s += 2)
|
565
598
|
segs[s / 2] = segs[s];
|
566
599
|
nseg = nseg1;
|
567
|
-
std::swap
|
600
|
+
std::swap(permA, permB);
|
568
601
|
}
|
569
|
-
assert
|
602
|
+
assert(permA == perm);
|
570
603
|
omp_set_nested(prev_nested);
|
571
|
-
delete
|
604
|
+
delete[] perm2;
|
572
605
|
}
|
573
606
|
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
const float *fvecs_maybe_subsample (
|
592
|
-
size_t d, size_t *n, size_t nmax, const float *x,
|
593
|
-
bool verbose, int64_t seed)
|
594
|
-
{
|
595
|
-
|
596
|
-
if (*n <= nmax) return x; // nothing to do
|
607
|
+
const float* fvecs_maybe_subsample(
|
608
|
+
size_t d,
|
609
|
+
size_t* n,
|
610
|
+
size_t nmax,
|
611
|
+
const float* x,
|
612
|
+
bool verbose,
|
613
|
+
int64_t seed) {
|
614
|
+
if (*n <= nmax)
|
615
|
+
return x; // nothing to do
|
597
616
|
|
598
617
|
size_t n2 = nmax;
|
599
618
|
if (verbose) {
|
600
|
-
printf
|
601
|
-
|
619
|
+
printf(" Input training set too big (max size is %zd), sampling "
|
620
|
+
"%zd / %zd vectors\n",
|
621
|
+
nmax,
|
622
|
+
n2,
|
623
|
+
*n);
|
602
624
|
}
|
603
|
-
std::vector<int> subset
|
604
|
-
rand_perm
|
605
|
-
float
|
625
|
+
std::vector<int> subset(*n);
|
626
|
+
rand_perm(subset.data(), *n, seed);
|
627
|
+
float* x_subset = new float[n2 * d];
|
606
628
|
for (int64_t i = 0; i < n2; i++)
|
607
|
-
memcpy
|
608
|
-
&x[subset[i] * size_t(d)],
|
609
|
-
sizeof (x[0]) * d);
|
629
|
+
memcpy(&x_subset[i * d], &x[subset[i] * size_t(d)], sizeof(x[0]) * d);
|
610
630
|
*n = n2;
|
611
631
|
return x_subset;
|
612
632
|
}
|
613
633
|
|
614
|
-
|
615
|
-
void binary_to_real(size_t d, const uint8_t *x_in, float *x_out) {
|
634
|
+
void binary_to_real(size_t d, const uint8_t* x_in, float* x_out) {
|
616
635
|
for (size_t i = 0; i < d; ++i) {
|
617
636
|
x_out[i] = 2 * ((x_in[i >> 3] >> (i & 7)) & 1) - 1;
|
618
637
|
}
|
619
638
|
}
|
620
639
|
|
621
|
-
void real_to_binary(size_t d, const float
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
|
640
|
+
void real_to_binary(size_t d, const float* x_in, uint8_t* x_out) {
|
641
|
+
for (size_t i = 0; i < d / 8; ++i) {
|
642
|
+
uint8_t b = 0;
|
643
|
+
for (int j = 0; j < 8; ++j) {
|
644
|
+
if (x_in[8 * i + j] > 0) {
|
645
|
+
b |= (1 << j);
|
646
|
+
}
|
647
|
+
}
|
648
|
+
x_out[i] = b;
|
628
649
|
}
|
629
|
-
x_out[i] = b;
|
630
|
-
}
|
631
650
|
}
|
632
651
|
|
633
|
-
|
634
652
|
// from Python's stringobject.c
|
635
|
-
uint64_t hash_bytes
|
636
|
-
const uint8_t
|
653
|
+
uint64_t hash_bytes(const uint8_t* bytes, int64_t n) {
|
654
|
+
const uint8_t* p = bytes;
|
637
655
|
uint64_t x = (uint64_t)(*p) << 7;
|
638
656
|
int64_t len = n;
|
639
657
|
while (--len >= 0) {
|
640
|
-
x = (1000003*x) ^ *p++;
|
658
|
+
x = (1000003 * x) ^ *p++;
|
641
659
|
}
|
642
660
|
x ^= n;
|
643
661
|
return x;
|
644
662
|
}
|
645
663
|
|
646
|
-
|
647
664
|
bool check_openmp() {
|
648
665
|
omp_set_num_threads(10);
|
649
666
|
|
@@ -654,7 +671,7 @@ bool check_openmp() {
|
|
654
671
|
std::vector<int> nt_per_thread(10);
|
655
672
|
size_t sum = 0;
|
656
673
|
bool in_parallel = true;
|
657
|
-
#pragma omp parallel reduction(
|
674
|
+
#pragma omp parallel reduction(+ : sum)
|
658
675
|
{
|
659
676
|
if (!omp_in_parallel()) {
|
660
677
|
in_parallel = false;
|
@@ -665,7 +682,7 @@ bool check_openmp() {
|
|
665
682
|
|
666
683
|
nt_per_thread[rank] = nt;
|
667
684
|
#pragma omp for
|
668
|
-
for(int i = 0; i < 1000 * 1000 * 10; i++) {
|
685
|
+
for (int i = 0; i < 1000 * 1000 * 10; i++) {
|
669
686
|
sum += i;
|
670
687
|
}
|
671
688
|
}
|