faiss 0.4.1 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +39 -29
- data/vendor/faiss/faiss/Clustering.cpp +4 -2
- data/vendor/faiss/faiss/IVFlib.cpp +14 -7
- data/vendor/faiss/faiss/Index.h +72 -3
- data/vendor/faiss/faiss/Index2Layer.cpp +2 -4
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +0 -1
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +1 -0
- data/vendor/faiss/faiss/IndexBinary.h +46 -3
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +118 -4
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +41 -0
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +0 -1
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +18 -7
- data/vendor/faiss/faiss/IndexBinaryIVF.h +5 -1
- data/vendor/faiss/faiss/IndexFlat.cpp +6 -4
- data/vendor/faiss/faiss/IndexHNSW.cpp +65 -24
- data/vendor/faiss/faiss/IndexHNSW.h +10 -1
- data/vendor/faiss/faiss/IndexIDMap.cpp +96 -18
- data/vendor/faiss/faiss/IndexIDMap.h +20 -0
- data/vendor/faiss/faiss/IndexIVF.cpp +28 -10
- data/vendor/faiss/faiss/IndexIVF.h +16 -1
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +84 -16
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +18 -6
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +33 -21
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +16 -6
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +24 -15
- data/vendor/faiss/faiss/IndexIVFFastScan.h +4 -2
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +59 -43
- data/vendor/faiss/faiss/IndexIVFFlat.h +10 -2
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +16 -3
- data/vendor/faiss/faiss/IndexIVFPQ.h +8 -1
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +14 -6
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +2 -1
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +14 -4
- data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +28 -3
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +8 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +9 -2
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +2 -1
- data/vendor/faiss/faiss/IndexLattice.cpp +8 -4
- data/vendor/faiss/faiss/IndexNNDescent.cpp +0 -7
- data/vendor/faiss/faiss/IndexNSG.cpp +3 -3
- data/vendor/faiss/faiss/IndexPQ.cpp +0 -1
- data/vendor/faiss/faiss/IndexPQ.h +1 -0
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +0 -2
- data/vendor/faiss/faiss/IndexPreTransform.cpp +4 -2
- data/vendor/faiss/faiss/IndexRefine.cpp +11 -6
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +16 -4
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +10 -3
- data/vendor/faiss/faiss/IndexShards.cpp +7 -6
- data/vendor/faiss/faiss/MatrixStats.cpp +16 -8
- data/vendor/faiss/faiss/MetaIndexes.cpp +12 -6
- data/vendor/faiss/faiss/MetricType.h +5 -3
- data/vendor/faiss/faiss/clone_index.cpp +2 -4
- data/vendor/faiss/faiss/cppcontrib/factory_tools.cpp +6 -0
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +9 -4
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +32 -10
- data/vendor/faiss/faiss/gpu/GpuIndex.h +88 -0
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryCagra.h +125 -0
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +39 -4
- data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +3 -3
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +1 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +3 -2
- data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +41 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +6 -3
- data/vendor/faiss/faiss/impl/HNSW.cpp +34 -19
- data/vendor/faiss/faiss/impl/IDSelector.cpp +2 -1
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +2 -3
- data/vendor/faiss/faiss/impl/NNDescent.cpp +17 -9
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +42 -21
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +6 -24
- data/vendor/faiss/faiss/impl/ResultHandler.h +56 -47
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +28 -15
- data/vendor/faiss/faiss/impl/index_read.cpp +36 -11
- data/vendor/faiss/faiss/impl/index_write.cpp +19 -6
- data/vendor/faiss/faiss/impl/io.cpp +9 -5
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +18 -11
- data/vendor/faiss/faiss/impl/mapped_io.cpp +4 -7
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +0 -1
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +0 -1
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +6 -6
- data/vendor/faiss/faiss/impl/zerocopy_io.cpp +1 -1
- data/vendor/faiss/faiss/impl/zerocopy_io.h +2 -2
- data/vendor/faiss/faiss/index_factory.cpp +49 -33
- data/vendor/faiss/faiss/index_factory.h +8 -2
- data/vendor/faiss/faiss/index_io.h +0 -3
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +2 -1
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +12 -6
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +8 -4
- data/vendor/faiss/faiss/utils/Heap.cpp +15 -8
- data/vendor/faiss/faiss/utils/Heap.h +23 -12
- data/vendor/faiss/faiss/utils/distances.cpp +42 -21
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +1 -1
- data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -3
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +27 -4
- data/vendor/faiss/faiss/utils/extra_distances.cpp +8 -4
- data/vendor/faiss/faiss/utils/hamming.cpp +20 -10
- data/vendor/faiss/faiss/utils/partitioning.cpp +8 -4
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +17 -9
- data/vendor/faiss/faiss/utils/rabitq_simd.h +539 -0
- data/vendor/faiss/faiss/utils/random.cpp +14 -7
- data/vendor/faiss/faiss/utils/utils.cpp +0 -3
- metadata +5 -2
@@ -77,8 +77,9 @@ void fvec_norms_L2sqr(
|
|
77
77
|
size_t d,
|
78
78
|
size_t nx) {
|
79
79
|
#pragma omp parallel for if (nx > 10000)
|
80
|
-
for (int64_t i = 0; i < nx; i++)
|
80
|
+
for (int64_t i = 0; i < nx; i++) {
|
81
81
|
nr[i] = fvec_norm_L2sqr(x + i * d, d);
|
82
|
+
}
|
82
83
|
}
|
83
84
|
|
84
85
|
// The following is a workaround to a problem
|
@@ -208,8 +209,9 @@ void exhaustive_inner_product_blas(
|
|
208
209
|
size_t ny,
|
209
210
|
BlockResultHandler& res) {
|
210
211
|
// BLAS does not like empty matrices
|
211
|
-
if (nx == 0 || ny == 0)
|
212
|
+
if (nx == 0 || ny == 0) {
|
212
213
|
return;
|
214
|
+
}
|
213
215
|
|
214
216
|
/* block sizes */
|
215
217
|
const size_t bs_x = distance_compute_blas_query_bs;
|
@@ -218,15 +220,17 @@ void exhaustive_inner_product_blas(
|
|
218
220
|
|
219
221
|
for (size_t i0 = 0; i0 < nx; i0 += bs_x) {
|
220
222
|
size_t i1 = i0 + bs_x;
|
221
|
-
if (i1 > nx)
|
223
|
+
if (i1 > nx) {
|
222
224
|
i1 = nx;
|
225
|
+
}
|
223
226
|
|
224
227
|
res.begin_multiple(i0, i1);
|
225
228
|
|
226
229
|
for (size_t j0 = 0; j0 < ny; j0 += bs_y) {
|
227
230
|
size_t j1 = j0 + bs_y;
|
228
|
-
if (j1 > ny)
|
231
|
+
if (j1 > ny) {
|
229
232
|
j1 = ny;
|
233
|
+
}
|
230
234
|
/* compute the actual dot products */
|
231
235
|
{
|
232
236
|
float one = 1, zero = 0;
|
@@ -265,8 +269,9 @@ void exhaustive_L2sqr_blas_default_impl(
|
|
265
269
|
BlockResultHandler& res,
|
266
270
|
const float* y_norms = nullptr) {
|
267
271
|
// BLAS does not like empty matrices
|
268
|
-
if (nx == 0 || ny == 0)
|
272
|
+
if (nx == 0 || ny == 0) {
|
269
273
|
return;
|
274
|
+
}
|
270
275
|
|
271
276
|
/* block sizes */
|
272
277
|
const size_t bs_x = distance_compute_blas_query_bs;
|
@@ -287,15 +292,17 @@ void exhaustive_L2sqr_blas_default_impl(
|
|
287
292
|
|
288
293
|
for (size_t i0 = 0; i0 < nx; i0 += bs_x) {
|
289
294
|
size_t i1 = i0 + bs_x;
|
290
|
-
if (i1 > nx)
|
295
|
+
if (i1 > nx) {
|
291
296
|
i1 = nx;
|
297
|
+
}
|
292
298
|
|
293
299
|
res.begin_multiple(i0, i1);
|
294
300
|
|
295
301
|
for (size_t j0 = 0; j0 < ny; j0 += bs_y) {
|
296
302
|
size_t j1 = j0 + bs_y;
|
297
|
-
if (j1 > ny)
|
303
|
+
if (j1 > ny) {
|
298
304
|
j1 = ny;
|
305
|
+
}
|
299
306
|
/* compute the actual dot products */
|
300
307
|
{
|
301
308
|
float one = 1, zero = 0;
|
@@ -327,8 +334,9 @@ void exhaustive_L2sqr_blas_default_impl(
|
|
327
334
|
}
|
328
335
|
// negative values can occur for identical vectors
|
329
336
|
// due to roundoff errors
|
330
|
-
if (dis < 0)
|
337
|
+
if (dis < 0) {
|
331
338
|
dis = 0;
|
339
|
+
}
|
332
340
|
|
333
341
|
*ip_line = dis;
|
334
342
|
ip_line++;
|
@@ -363,8 +371,9 @@ void exhaustive_L2sqr_blas_cmax_avx2(
|
|
363
371
|
Top1BlockResultHandler<CMax<float, int64_t>>& res,
|
364
372
|
const float* y_norms) {
|
365
373
|
// BLAS does not like empty matrices
|
366
|
-
if (nx == 0 || ny == 0)
|
374
|
+
if (nx == 0 || ny == 0) {
|
367
375
|
return;
|
376
|
+
}
|
368
377
|
|
369
378
|
/* block sizes */
|
370
379
|
const size_t bs_x = distance_compute_blas_query_bs;
|
@@ -385,15 +394,17 @@ void exhaustive_L2sqr_blas_cmax_avx2(
|
|
385
394
|
|
386
395
|
for (size_t i0 = 0; i0 < nx; i0 += bs_x) {
|
387
396
|
size_t i1 = i0 + bs_x;
|
388
|
-
if (i1 > nx)
|
397
|
+
if (i1 > nx) {
|
389
398
|
i1 = nx;
|
399
|
+
}
|
390
400
|
|
391
401
|
res.begin_multiple(i0, i1);
|
392
402
|
|
393
403
|
for (size_t j0 = 0; j0 < ny; j0 += bs_y) {
|
394
404
|
size_t j1 = j0 + bs_y;
|
395
|
-
if (j1 > ny)
|
405
|
+
if (j1 > ny) {
|
396
406
|
j1 = ny;
|
407
|
+
}
|
397
408
|
/* compute the actual dot products */
|
398
409
|
{
|
399
410
|
float one = 1, zero = 0;
|
@@ -519,8 +530,9 @@ void exhaustive_L2sqr_blas_cmax_avx2(
|
|
519
530
|
|
520
531
|
// negative values can occur for identical vectors
|
521
532
|
// due to roundoff errors.
|
522
|
-
if (distance_candidate < 0)
|
533
|
+
if (distance_candidate < 0) {
|
523
534
|
distance_candidate = 0;
|
535
|
+
}
|
524
536
|
|
525
537
|
int64_t index_candidate = min_indices_scalar[jv] + j0;
|
526
538
|
|
@@ -540,8 +552,9 @@ void exhaustive_L2sqr_blas_cmax_avx2(
|
|
540
552
|
float dis = x_norms[i] + y_norms[idx_j + j0] - 2 * ip;
|
541
553
|
// negative values can occur for identical vectors
|
542
554
|
// due to roundoff errors.
|
543
|
-
if (dis < 0)
|
555
|
+
if (dis < 0) {
|
544
556
|
dis = 0;
|
557
|
+
}
|
545
558
|
|
546
559
|
if (current_min_distance > dis) {
|
547
560
|
current_min_distance = dis;
|
@@ -1142,33 +1155,40 @@ void pairwise_L2sqr(
|
|
1142
1155
|
int64_t ldq,
|
1143
1156
|
int64_t ldb,
|
1144
1157
|
int64_t ldd) {
|
1145
|
-
if (nq == 0 || nb == 0)
|
1158
|
+
if (nq == 0 || nb == 0) {
|
1146
1159
|
return;
|
1147
|
-
|
1160
|
+
}
|
1161
|
+
if (ldq == -1) {
|
1148
1162
|
ldq = d;
|
1149
|
-
|
1163
|
+
}
|
1164
|
+
if (ldb == -1) {
|
1150
1165
|
ldb = d;
|
1151
|
-
|
1166
|
+
}
|
1167
|
+
if (ldd == -1) {
|
1152
1168
|
ldd = nb;
|
1169
|
+
}
|
1153
1170
|
|
1154
1171
|
// store in beginning of distance matrix to avoid malloc
|
1155
1172
|
float* b_norms = dis;
|
1156
1173
|
|
1157
1174
|
#pragma omp parallel for if (nb > 1)
|
1158
|
-
for (int64_t i = 0; i < nb; i++)
|
1175
|
+
for (int64_t i = 0; i < nb; i++) {
|
1159
1176
|
b_norms[i] = fvec_norm_L2sqr(xb + i * ldb, d);
|
1177
|
+
}
|
1160
1178
|
|
1161
1179
|
#pragma omp parallel for
|
1162
1180
|
for (int64_t i = 1; i < nq; i++) {
|
1163
1181
|
float q_norm = fvec_norm_L2sqr(xq + i * ldq, d);
|
1164
|
-
for (int64_t j = 0; j < nb; j++)
|
1182
|
+
for (int64_t j = 0; j < nb; j++) {
|
1165
1183
|
dis[i * ldd + j] = q_norm + b_norms[j];
|
1184
|
+
}
|
1166
1185
|
}
|
1167
1186
|
|
1168
1187
|
{
|
1169
1188
|
float q_norm = fvec_norm_L2sqr(xq, d);
|
1170
|
-
for (int64_t j = 0; j < nb; j++)
|
1189
|
+
for (int64_t j = 0; j < nb; j++) {
|
1171
1190
|
dis[j] += q_norm;
|
1191
|
+
}
|
1172
1192
|
}
|
1173
1193
|
|
1174
1194
|
{
|
@@ -1200,8 +1220,9 @@ void inner_product_to_L2sqr(
|
|
1200
1220
|
#pragma omp parallel for
|
1201
1221
|
for (int64_t j = 0; j < n1; j++) {
|
1202
1222
|
float* disj = dis + j * n2;
|
1203
|
-
for (size_t i = 0; i < n2; i++)
|
1223
|
+
for (size_t i = 0; i < n2; i++) {
|
1204
1224
|
disj[i] = nr1[j] + nr2[i] - 2 * disj[i];
|
1225
|
+
}
|
1205
1226
|
}
|
1206
1227
|
}
|
1207
1228
|
|
@@ -7,9 +7,9 @@
|
|
7
7
|
|
8
8
|
#include <faiss/utils/distances_fused/distances_fused.h>
|
9
9
|
|
10
|
-
#include <faiss/impl/platform_macros.h>
|
10
|
+
#include <faiss/impl/platform_macros.h> // NOLINT
|
11
11
|
|
12
|
-
#include <faiss/utils/distances_fused/avx512.h>
|
12
|
+
#include <faiss/utils/distances_fused/avx512.h> // NOLINT
|
13
13
|
#include <faiss/utils/distances_fused/simdlib_based.h>
|
14
14
|
|
15
15
|
namespace faiss {
|
@@ -260,7 +260,7 @@ void exhaustive_L2sqr_fused_cmax(
|
|
260
260
|
const size_t nx_p = (nx / NX_POINTS_PER_LOOP) * NX_POINTS_PER_LOOP;
|
261
261
|
// the main loop.
|
262
262
|
#pragma omp parallel for schedule(dynamic)
|
263
|
-
for (
|
263
|
+
for (int64_t i = 0; i < nx_p; i += NX_POINTS_PER_LOOP) {
|
264
264
|
kernel<DIM, NX_POINTS_PER_LOOP, NY_POINTS_PER_LOOP>(
|
265
265
|
x, y, y_transposed.data(), ny, res, y_norms, i);
|
266
266
|
}
|
@@ -3252,8 +3252,9 @@ void fvec_inner_products_ny(
|
|
3252
3252
|
float bf,
|
3253
3253
|
const float* b,
|
3254
3254
|
float* c) {
|
3255
|
-
for (size_t i = 0; i < n; i++)
|
3255
|
+
for (size_t i = 0; i < n; i++) {
|
3256
3256
|
c[i] = a[i] + bf * b[i];
|
3257
|
+
}
|
3257
3258
|
}
|
3258
3259
|
|
3259
3260
|
#if defined(__AVX512F__)
|
@@ -3536,10 +3537,11 @@ int fvec_madd_and_argmin(
|
|
3536
3537
|
float bf,
|
3537
3538
|
const float* b,
|
3538
3539
|
float* c) {
|
3539
|
-
if ((n & 3) == 0 && ((((long)a) | ((long)b) | ((long)c)) & 15) == 0)
|
3540
|
+
if ((n & 3) == 0 && ((((long)a) | ((long)b) | ((long)c)) & 15) == 0) {
|
3540
3541
|
return fvec_madd_and_argmin_sse(n, a, bf, b, c);
|
3541
|
-
else
|
3542
|
+
} else {
|
3542
3543
|
return fvec_madd_and_argmin_ref(n, a, bf, b, c);
|
3544
|
+
}
|
3543
3545
|
}
|
3544
3546
|
|
3545
3547
|
#elif defined(__aarch64__)
|
@@ -153,14 +153,37 @@ inline float VectorDistance<METRIC_NaNEuclidean>::operator()(
|
|
153
153
|
}
|
154
154
|
|
155
155
|
template <>
|
156
|
-
inline float VectorDistance<
|
156
|
+
inline float VectorDistance<METRIC_GOWER>::operator()(
|
157
157
|
const float* x,
|
158
158
|
const float* y) const {
|
159
159
|
float accu = 0;
|
160
|
+
size_t valid_dims = 0;
|
161
|
+
|
160
162
|
for (size_t i = 0; i < d; i++) {
|
161
|
-
|
163
|
+
if (std::isnan(x[i]) || std::isnan(y[i])) {
|
164
|
+
continue;
|
165
|
+
}
|
166
|
+
|
167
|
+
if (x[i] >= 0 && y[i] >= 0) {
|
168
|
+
if (x[i] > 1 || y[i] > 1) {
|
169
|
+
return std::numeric_limits<float>::quiet_NaN();
|
170
|
+
}
|
171
|
+
// Numeric dimensions are in [0,1]
|
172
|
+
accu += fabs(x[i] - y[i]);
|
173
|
+
} else if (x[i] < 0 && y[i] < 0) {
|
174
|
+
// Categorical dimensions are negative values
|
175
|
+
accu += float(int(x[i] != y[i]));
|
176
|
+
} else {
|
177
|
+
// Invalid representation
|
178
|
+
return std::numeric_limits<float>::quiet_NaN();
|
179
|
+
}
|
180
|
+
valid_dims++;
|
162
181
|
}
|
163
|
-
|
182
|
+
|
183
|
+
if (valid_dims == 0) {
|
184
|
+
return std::numeric_limits<float>::quiet_NaN();
|
185
|
+
}
|
186
|
+
return accu / valid_dims;
|
164
187
|
}
|
165
188
|
|
166
189
|
/***************************************************************************
|
@@ -193,7 +216,7 @@ typename Consumer::T dispatch_VectorDistance(
|
|
193
216
|
DISPATCH_VD(METRIC_JensenShannon);
|
194
217
|
DISPATCH_VD(METRIC_Jaccard);
|
195
218
|
DISPATCH_VD(METRIC_NaNEuclidean);
|
196
|
-
DISPATCH_VD(
|
219
|
+
DISPATCH_VD(METRIC_GOWER);
|
197
220
|
default:
|
198
221
|
FAISS_THROW_FMT("Invalid metric %d", metric);
|
199
222
|
}
|
@@ -155,14 +155,18 @@ void pairwise_extra_distances(
|
|
155
155
|
int64_t ldq,
|
156
156
|
int64_t ldb,
|
157
157
|
int64_t ldd) {
|
158
|
-
if (nq == 0 || nb == 0)
|
158
|
+
if (nq == 0 || nb == 0) {
|
159
159
|
return;
|
160
|
-
|
160
|
+
}
|
161
|
+
if (ldq == -1) {
|
161
162
|
ldq = d;
|
162
|
-
|
163
|
+
}
|
164
|
+
if (ldb == -1) {
|
163
165
|
ldb = d;
|
164
|
-
|
166
|
+
}
|
167
|
+
if (ldd == -1) {
|
165
168
|
ldd = nb;
|
169
|
+
}
|
166
170
|
|
167
171
|
Run_pairwise_extra_distances run;
|
168
172
|
dispatch_VectorDistance(
|
@@ -53,8 +53,9 @@ void hammings(
|
|
53
53
|
for (i = 0; i < n1; i++) {
|
54
54
|
const uint64_t* __restrict bs1_ = bs1 + i * nwords;
|
55
55
|
hamdis_t* __restrict dis_ = dis + i * n2;
|
56
|
-
for (j = 0; j < n2; j++)
|
56
|
+
for (j = 0; j < n2; j++) {
|
57
57
|
dis_[j] = hamming<nbits>(bs1_, bs2 + j * nwords);
|
58
|
+
}
|
58
59
|
}
|
59
60
|
}
|
60
61
|
|
@@ -70,8 +71,9 @@ void hammings(
|
|
70
71
|
for (i = 0; i < n1; i++) {
|
71
72
|
const uint64_t* __restrict bs1_ = bs1 + i * nwords;
|
72
73
|
hamdis_t* __restrict dis_ = dis + i * n2;
|
73
|
-
for (j = 0; j < n2; j++)
|
74
|
+
for (j = 0; j < n2; j++) {
|
74
75
|
dis_[j] = hamming(bs1_, bs2 + j * nwords, nwords);
|
76
|
+
}
|
75
77
|
}
|
76
78
|
}
|
77
79
|
|
@@ -92,8 +94,9 @@ void hamming_count_thres(
|
|
92
94
|
bs2 = bs2_;
|
93
95
|
for (j = 0; j < n2; j++) {
|
94
96
|
/* collect the match only if this satisfies the threshold */
|
95
|
-
if (hamming<nbits>(bs1, bs2) <= ht)
|
97
|
+
if (hamming<nbits>(bs1, bs2) <= ht) {
|
96
98
|
posm++;
|
99
|
+
}
|
97
100
|
bs2 += nwords;
|
98
101
|
}
|
99
102
|
bs1 += nwords; /* next signature */
|
@@ -114,8 +117,9 @@ void crosshamming_count_thres(
|
|
114
117
|
const uint64_t* bs2 = bs1 + 2;
|
115
118
|
for (j = i + 1; j < n; j++) {
|
116
119
|
/* collect the match only if this satisfies the threshold */
|
117
|
-
if (hamming<nbits>(bs1, bs2) <= ht)
|
120
|
+
if (hamming<nbits>(bs1, bs2) <= ht) {
|
118
121
|
posm++;
|
122
|
+
}
|
119
123
|
bs2 += nwords;
|
120
124
|
}
|
121
125
|
bs1 += nwords;
|
@@ -175,8 +179,9 @@ void hammings_knn_hc(
|
|
175
179
|
ApproxTopK_mode_t approx_topk_mode = ApproxTopK_mode_t::EXACT_TOPK,
|
176
180
|
const faiss::IDSelector* sel = nullptr) {
|
177
181
|
size_t k = ha->k;
|
178
|
-
if (init_heap)
|
182
|
+
if (init_heap) {
|
179
183
|
ha->heapify();
|
184
|
+
}
|
180
185
|
|
181
186
|
const size_t block_size = hamming_batch_size;
|
182
187
|
for (size_t j0 = 0; j0 < n2; j0 += block_size) {
|
@@ -229,8 +234,9 @@ void hammings_knn_hc(
|
|
229
234
|
}
|
230
235
|
}
|
231
236
|
}
|
232
|
-
if (order)
|
237
|
+
if (order) {
|
233
238
|
ha->reorder();
|
239
|
+
}
|
234
240
|
}
|
235
241
|
|
236
242
|
/* Return closest neighbors w.r.t Hamming distance, using max count. */
|
@@ -364,8 +370,9 @@ void fvec2bitvec(const float* __restrict x, uint8_t* __restrict b, size_t d) {
|
|
364
370
|
uint8_t mask = 1;
|
365
371
|
int nj = i + 8 <= d ? 8 : d - i;
|
366
372
|
for (int j = 0; j < nj; j++) {
|
367
|
-
if (x[i + j] >= 0)
|
373
|
+
if (x[i + j] >= 0) {
|
368
374
|
w |= mask;
|
375
|
+
}
|
369
376
|
mask <<= 1;
|
370
377
|
}
|
371
378
|
*b = w;
|
@@ -382,8 +389,9 @@ void fvecs2bitvecs(
|
|
382
389
|
size_t n) {
|
383
390
|
const int64_t ncodes = ((d + 7) / 8);
|
384
391
|
#pragma omp parallel for if (n > 100000)
|
385
|
-
for (int64_t i = 0; i < n; i++)
|
392
|
+
for (int64_t i = 0; i < n; i++) {
|
386
393
|
fvec2bitvec(x + i * d, b + i * ncodes, d);
|
394
|
+
}
|
387
395
|
}
|
388
396
|
|
389
397
|
void bitvecs2fvecs(
|
@@ -667,8 +675,9 @@ void generalized_hammings_knn_hc(
|
|
667
675
|
int na = ha->nh;
|
668
676
|
int k = ha->k;
|
669
677
|
|
670
|
-
if (ordered)
|
678
|
+
if (ordered) {
|
671
679
|
ha->heapify();
|
680
|
+
}
|
672
681
|
|
673
682
|
#pragma omp parallel for
|
674
683
|
for (int i = 0; i < na; i++) {
|
@@ -698,8 +707,9 @@ void generalized_hammings_knn_hc(
|
|
698
707
|
}
|
699
708
|
}
|
700
709
|
|
701
|
-
if (ordered)
|
710
|
+
if (ordered) {
|
702
711
|
ha->reorder();
|
712
|
+
}
|
703
713
|
}
|
704
714
|
|
705
715
|
void pack_bitstrings(
|
@@ -1193,12 +1193,14 @@ void simd_histogram_8(
|
|
1193
1193
|
|
1194
1194
|
// complete with remaining bins
|
1195
1195
|
for (int i = (n & ~15); i < n; i++) {
|
1196
|
-
if (data[i] < min)
|
1196
|
+
if (data[i] < min) {
|
1197
1197
|
continue;
|
1198
|
+
}
|
1198
1199
|
uint16_t v = data[i] - min;
|
1199
1200
|
v >>= shift;
|
1200
|
-
if (v < 8)
|
1201
|
+
if (v < 8) {
|
1201
1202
|
hist[v]++;
|
1203
|
+
}
|
1202
1204
|
}
|
1203
1205
|
}
|
1204
1206
|
|
@@ -1247,12 +1249,14 @@ void simd_histogram_16(
|
|
1247
1249
|
}
|
1248
1250
|
|
1249
1251
|
for (int i = (n & ~15); i < n; i++) {
|
1250
|
-
if (data[i] < min)
|
1252
|
+
if (data[i] < min) {
|
1251
1253
|
continue;
|
1254
|
+
}
|
1252
1255
|
uint16_t v = data[i] - min;
|
1253
1256
|
v >>= shift;
|
1254
|
-
if (v < 16)
|
1257
|
+
if (v < 16) {
|
1255
1258
|
hist[v]++;
|
1259
|
+
}
|
1256
1260
|
}
|
1257
1261
|
}
|
1258
1262
|
|
@@ -28,8 +28,9 @@ namespace {
|
|
28
28
|
float tab_min(const float* tab, size_t n) {
|
29
29
|
float min = HUGE_VAL;
|
30
30
|
for (int i = 0; i < n; i++) {
|
31
|
-
if (tab[i] < min)
|
31
|
+
if (tab[i] < min) {
|
32
32
|
min = tab[i];
|
33
|
+
}
|
33
34
|
}
|
34
35
|
return min;
|
35
36
|
}
|
@@ -37,8 +38,9 @@ float tab_min(const float* tab, size_t n) {
|
|
37
38
|
float tab_max(const float* tab, size_t n) {
|
38
39
|
float max = -HUGE_VAL;
|
39
40
|
for (int i = 0; i < n; i++) {
|
40
|
-
if (tab[i] > max)
|
41
|
+
if (tab[i] > max) {
|
41
42
|
max = tab[i];
|
43
|
+
}
|
42
44
|
}
|
43
45
|
return max;
|
44
46
|
}
|
@@ -79,10 +81,12 @@ void round_uint8_per_column(
|
|
79
81
|
b += mins[i];
|
80
82
|
round_tab(tab + i * d, d, a, mins[i]);
|
81
83
|
}
|
82
|
-
if (a_out)
|
84
|
+
if (a_out) {
|
83
85
|
*a_out = a;
|
84
|
-
|
86
|
+
}
|
87
|
+
if (b_out) {
|
85
88
|
*b_out = b;
|
89
|
+
}
|
86
90
|
}
|
87
91
|
|
88
92
|
void round_uint8_per_column_multi(
|
@@ -115,14 +119,16 @@ void round_uint8_per_column_multi(
|
|
115
119
|
round_tab(tab + (j * n + i) * d, d, a, mins[i]);
|
116
120
|
}
|
117
121
|
}
|
118
|
-
if (a_out)
|
122
|
+
if (a_out) {
|
119
123
|
*a_out = a;
|
120
|
-
|
124
|
+
}
|
125
|
+
if (b_out) {
|
121
126
|
*b_out = b;
|
127
|
+
}
|
122
128
|
}
|
123
129
|
|
124
130
|
// translation of
|
125
|
-
// https://github.com/
|
131
|
+
// https://gist.github.com/mdouze/f3a05bff5186c1874a77356452297357#file-lut_quantization-ipynb
|
126
132
|
void quantize_LUT_and_bias(
|
127
133
|
size_t nprobe,
|
128
134
|
size_t M,
|
@@ -264,10 +270,12 @@ void quantize_LUT_and_bias(
|
|
264
270
|
ij_2 += M2 - M;
|
265
271
|
}
|
266
272
|
}
|
267
|
-
if (a_out)
|
273
|
+
if (a_out) {
|
268
274
|
*a_out = a;
|
269
|
-
|
275
|
+
}
|
276
|
+
if (b_out) {
|
270
277
|
*b_out = b;
|
278
|
+
}
|
271
279
|
}
|
272
280
|
|
273
281
|
void aq_quantize_LUT_and_bias(
|