faiss 0.4.1 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +39 -29
- data/vendor/faiss/faiss/Clustering.cpp +4 -2
- data/vendor/faiss/faiss/IVFlib.cpp +14 -7
- data/vendor/faiss/faiss/Index.h +72 -3
- data/vendor/faiss/faiss/Index2Layer.cpp +2 -4
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +0 -1
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +1 -0
- data/vendor/faiss/faiss/IndexBinary.h +46 -3
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +118 -4
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +41 -0
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +0 -1
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +18 -7
- data/vendor/faiss/faiss/IndexBinaryIVF.h +5 -1
- data/vendor/faiss/faiss/IndexFlat.cpp +6 -4
- data/vendor/faiss/faiss/IndexHNSW.cpp +65 -24
- data/vendor/faiss/faiss/IndexHNSW.h +10 -1
- data/vendor/faiss/faiss/IndexIDMap.cpp +96 -18
- data/vendor/faiss/faiss/IndexIDMap.h +20 -0
- data/vendor/faiss/faiss/IndexIVF.cpp +28 -10
- data/vendor/faiss/faiss/IndexIVF.h +16 -1
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +84 -16
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +18 -6
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +33 -21
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +16 -6
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +24 -15
- data/vendor/faiss/faiss/IndexIVFFastScan.h +4 -2
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +59 -43
- data/vendor/faiss/faiss/IndexIVFFlat.h +10 -2
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +16 -3
- data/vendor/faiss/faiss/IndexIVFPQ.h +8 -1
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +14 -6
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +2 -1
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +14 -4
- data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +28 -3
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +8 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +9 -2
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +2 -1
- data/vendor/faiss/faiss/IndexLattice.cpp +8 -4
- data/vendor/faiss/faiss/IndexNNDescent.cpp +0 -7
- data/vendor/faiss/faiss/IndexNSG.cpp +3 -3
- data/vendor/faiss/faiss/IndexPQ.cpp +0 -1
- data/vendor/faiss/faiss/IndexPQ.h +1 -0
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +0 -2
- data/vendor/faiss/faiss/IndexPreTransform.cpp +4 -2
- data/vendor/faiss/faiss/IndexRefine.cpp +11 -6
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +16 -4
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +10 -3
- data/vendor/faiss/faiss/IndexShards.cpp +7 -6
- data/vendor/faiss/faiss/MatrixStats.cpp +16 -8
- data/vendor/faiss/faiss/MetaIndexes.cpp +12 -6
- data/vendor/faiss/faiss/MetricType.h +5 -3
- data/vendor/faiss/faiss/clone_index.cpp +2 -4
- data/vendor/faiss/faiss/cppcontrib/factory_tools.cpp +6 -0
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +9 -4
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +32 -10
- data/vendor/faiss/faiss/gpu/GpuIndex.h +88 -0
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryCagra.h +125 -0
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +39 -4
- data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +3 -3
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +1 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +3 -2
- data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +41 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +6 -3
- data/vendor/faiss/faiss/impl/HNSW.cpp +34 -19
- data/vendor/faiss/faiss/impl/IDSelector.cpp +2 -1
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +2 -3
- data/vendor/faiss/faiss/impl/NNDescent.cpp +17 -9
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +42 -21
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +6 -24
- data/vendor/faiss/faiss/impl/ResultHandler.h +56 -47
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +28 -15
- data/vendor/faiss/faiss/impl/index_read.cpp +36 -11
- data/vendor/faiss/faiss/impl/index_write.cpp +19 -6
- data/vendor/faiss/faiss/impl/io.cpp +9 -5
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +18 -11
- data/vendor/faiss/faiss/impl/mapped_io.cpp +4 -7
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +0 -1
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +0 -1
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +6 -6
- data/vendor/faiss/faiss/impl/zerocopy_io.cpp +1 -1
- data/vendor/faiss/faiss/impl/zerocopy_io.h +2 -2
- data/vendor/faiss/faiss/index_factory.cpp +49 -33
- data/vendor/faiss/faiss/index_factory.h +8 -2
- data/vendor/faiss/faiss/index_io.h +0 -3
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +2 -1
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +12 -6
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +8 -4
- data/vendor/faiss/faiss/utils/Heap.cpp +15 -8
- data/vendor/faiss/faiss/utils/Heap.h +23 -12
- data/vendor/faiss/faiss/utils/distances.cpp +42 -21
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +1 -1
- data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -3
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +27 -4
- data/vendor/faiss/faiss/utils/extra_distances.cpp +8 -4
- data/vendor/faiss/faiss/utils/hamming.cpp +20 -10
- data/vendor/faiss/faiss/utils/partitioning.cpp +8 -4
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +17 -9
- data/vendor/faiss/faiss/utils/rabitq_simd.h +539 -0
- data/vendor/faiss/faiss/utils/random.cpp +14 -7
- data/vendor/faiss/faiss/utils/utils.cpp +0 -3
- metadata +5 -2
@@ -15,8 +15,6 @@
|
|
15
15
|
#include <faiss/impl/ResultHandler.h>
|
16
16
|
#include <faiss/utils/prefetch.h>
|
17
17
|
|
18
|
-
#include <faiss/impl/platform_macros.h>
|
19
|
-
|
20
18
|
#ifdef __AVX2__
|
21
19
|
#include <immintrin.h>
|
22
20
|
|
@@ -78,8 +76,9 @@ void HNSW::set_default_probas(int M, float levelMult) {
|
|
78
76
|
cum_nneighbor_per_level.push_back(0);
|
79
77
|
for (int level = 0;; level++) {
|
80
78
|
float proba = exp(-level / levelMult) * (1 - exp(-1 / levelMult));
|
81
|
-
if (proba < 1e-9)
|
79
|
+
if (proba < 1e-9) {
|
82
80
|
break;
|
81
|
+
}
|
83
82
|
assign_probas.push_back(proba);
|
84
83
|
nn += level == 0 ? M * 2 : M;
|
85
84
|
cum_nneighbor_per_level.push_back(nn);
|
@@ -120,8 +119,9 @@ void HNSW::print_neighbor_stats(int level) const {
|
|
120
119
|
neighbor_range(i, level, &begin, &end);
|
121
120
|
std::unordered_set<int> neighset;
|
122
121
|
for (size_t j = begin; j < end; j++) {
|
123
|
-
if (neighbors[j] < 0)
|
122
|
+
if (neighbors[j] < 0) {
|
124
123
|
break;
|
124
|
+
}
|
125
125
|
neighset.insert(neighbors[j]);
|
126
126
|
}
|
127
127
|
int n_neigh = neighset.size();
|
@@ -129,15 +129,17 @@ void HNSW::print_neighbor_stats(int level) const {
|
|
129
129
|
int n_reciprocal = 0;
|
130
130
|
for (size_t j = begin; j < end; j++) {
|
131
131
|
storage_idx_t i2 = neighbors[j];
|
132
|
-
if (i2 < 0)
|
132
|
+
if (i2 < 0) {
|
133
133
|
break;
|
134
|
+
}
|
134
135
|
FAISS_ASSERT(i2 != i);
|
135
136
|
size_t begin2, end2;
|
136
137
|
neighbor_range(i2, level, &begin2, &end2);
|
137
138
|
for (size_t j2 = begin2; j2 < end2; j2++) {
|
138
139
|
storage_idx_t i3 = neighbors[j2];
|
139
|
-
if (i3 < 0)
|
140
|
+
if (i3 < 0) {
|
140
141
|
break;
|
142
|
+
}
|
141
143
|
if (i3 == i) {
|
142
144
|
n_reciprocal++;
|
143
145
|
continue;
|
@@ -178,8 +180,9 @@ void HNSW::fill_with_random_links(size_t n) {
|
|
178
180
|
}
|
179
181
|
printf("linking %zd elements in level %d\n", elts.size(), level);
|
180
182
|
|
181
|
-
if (elts.size() == 1)
|
183
|
+
if (elts.size() == 1) {
|
182
184
|
continue;
|
185
|
+
}
|
183
186
|
|
184
187
|
for (int ii = 0; ii < elts.size(); ii++) {
|
185
188
|
int i = elts[ii];
|
@@ -213,8 +216,9 @@ int HNSW::prepare_level_tab(size_t n, bool preset_levels) {
|
|
213
216
|
int max_level_2 = 0;
|
214
217
|
for (int i = 0; i < n; i++) {
|
215
218
|
int pt_level = levels[i + n0] - 1;
|
216
|
-
if (pt_level > max_level_2)
|
219
|
+
if (pt_level > max_level_2) {
|
217
220
|
max_level_2 = pt_level;
|
221
|
+
}
|
218
222
|
offsets.push_back(offsets.back() + cum_nb_neighbors(pt_level + 1));
|
219
223
|
}
|
220
224
|
neighbors.resize(offsets.back(), -1);
|
@@ -319,8 +323,9 @@ void add_link(
|
|
319
323
|
// there is enough room, find a slot to add it
|
320
324
|
size_t i = end;
|
321
325
|
while (i > begin) {
|
322
|
-
if (hnsw.neighbors[i - 1] != -1)
|
326
|
+
if (hnsw.neighbors[i - 1] != -1) {
|
323
327
|
break;
|
328
|
+
}
|
324
329
|
i--;
|
325
330
|
}
|
326
331
|
hnsw.neighbors[i] = dest;
|
@@ -397,10 +402,12 @@ void search_neighbors_to_add(
|
|
397
402
|
// a reference version
|
398
403
|
for (size_t i = begin; i < end; i++) {
|
399
404
|
storage_idx_t nodeId = hnsw.neighbors[i];
|
400
|
-
if (nodeId < 0)
|
405
|
+
if (nodeId < 0) {
|
401
406
|
break;
|
402
|
-
|
407
|
+
}
|
408
|
+
if (vt.get(nodeId)) {
|
403
409
|
continue;
|
410
|
+
}
|
404
411
|
vt.set(nodeId);
|
405
412
|
|
406
413
|
float dis = qdis(nodeId);
|
@@ -436,8 +443,9 @@ void search_neighbors_to_add(
|
|
436
443
|
|
437
444
|
for (size_t j = begin; j < end; j++) {
|
438
445
|
storage_idx_t nodeId = hnsw.neighbors[j];
|
439
|
-
if (nodeId < 0)
|
446
|
+
if (nodeId < 0) {
|
440
447
|
break;
|
448
|
+
}
|
441
449
|
if (vt.get(nodeId)) {
|
442
450
|
continue;
|
443
451
|
}
|
@@ -647,8 +655,9 @@ int search_from_candidates(
|
|
647
655
|
size_t jmax = begin;
|
648
656
|
for (size_t j = begin; j < end; j++) {
|
649
657
|
int v1 = hnsw.neighbors[j];
|
650
|
-
if (v1 < 0)
|
658
|
+
if (v1 < 0) {
|
651
659
|
break;
|
660
|
+
}
|
652
661
|
|
653
662
|
prefetch_L2(vt.visited.data() + v1);
|
654
663
|
jmax += 1;
|
@@ -761,8 +770,9 @@ std::priority_queue<HNSW::Node> search_from_candidate_unbounded(
|
|
761
770
|
size_t jmax = begin;
|
762
771
|
for (size_t j = begin; j < end; j++) {
|
763
772
|
int v1 = hnsw.neighbors[j];
|
764
|
-
if (v1 < 0)
|
773
|
+
if (v1 < 0) {
|
765
774
|
break;
|
775
|
+
}
|
766
776
|
|
767
777
|
prefetch_L2(vt->visited.data() + v1);
|
768
778
|
jmax += 1;
|
@@ -864,8 +874,9 @@ HNSWStats greedy_update_nearest(
|
|
864
874
|
|
865
875
|
for (size_t j = begin; j < end; j++) {
|
866
876
|
storage_idx_t v = hnsw.neighbors[j];
|
867
|
-
if (v < 0)
|
877
|
+
if (v < 0) {
|
868
878
|
break;
|
879
|
+
}
|
869
880
|
ndis += 1;
|
870
881
|
|
871
882
|
buffered_ids[n_buffered] = v;
|
@@ -1013,11 +1024,13 @@ void HNSW::search_level_0(
|
|
1013
1024
|
for (int j = 0; j < nprobe; j++) {
|
1014
1025
|
storage_idx_t cj = nearest_i[j];
|
1015
1026
|
|
1016
|
-
if (cj < 0)
|
1027
|
+
if (cj < 0) {
|
1017
1028
|
break;
|
1029
|
+
}
|
1018
1030
|
|
1019
|
-
if (vt.get(cj))
|
1031
|
+
if (vt.get(cj)) {
|
1020
1032
|
continue;
|
1033
|
+
}
|
1021
1034
|
|
1022
1035
|
int candidates_size = std::max(efSearch, k);
|
1023
1036
|
MinimaxHeap candidates(candidates_size);
|
@@ -1044,8 +1057,9 @@ void HNSW::search_level_0(
|
|
1044
1057
|
for (int j = 0; j < nprobe; j++) {
|
1045
1058
|
storage_idx_t cj = nearest_i[j];
|
1046
1059
|
|
1047
|
-
if (cj < 0)
|
1060
|
+
if (cj < 0) {
|
1048
1061
|
break;
|
1062
|
+
}
|
1049
1063
|
candidates.push(cj, nearest_d[j]);
|
1050
1064
|
}
|
1051
1065
|
|
@@ -1093,8 +1107,9 @@ void HNSW::permute_entries(const idx_t* map) {
|
|
1093
1107
|
|
1094
1108
|
void HNSW::MinimaxHeap::push(storage_idx_t i, float v) {
|
1095
1109
|
if (k == n) {
|
1096
|
-
if (v >= dis[0])
|
1110
|
+
if (v >= dis[0]) {
|
1097
1111
|
return;
|
1112
|
+
}
|
1098
1113
|
if (ids[0] != -1) {
|
1099
1114
|
--nvalid;
|
1100
1115
|
}
|
@@ -71,8 +71,9 @@ IDSelectorArray::IDSelectorArray(size_t n, const idx_t* ids) : n(n), ids(ids) {}
|
|
71
71
|
|
72
72
|
bool IDSelectorArray::is_member(idx_t id) const {
|
73
73
|
for (idx_t i = 0; i < n; i++) {
|
74
|
-
if (ids[i] == id)
|
74
|
+
if (ids[i] == id) {
|
75
75
|
return true;
|
76
|
+
}
|
76
77
|
}
|
77
78
|
return false;
|
78
79
|
}
|
@@ -14,17 +14,16 @@
|
|
14
14
|
#include <random>
|
15
15
|
|
16
16
|
#include <algorithm>
|
17
|
+
#include <utility>
|
17
18
|
|
18
19
|
#include <faiss/impl/AuxIndexStructures.h>
|
19
20
|
#include <faiss/impl/FaissAssert.h>
|
20
21
|
#include <faiss/utils/distances.h>
|
21
|
-
#include <faiss/utils/hamming.h> // BitstringWriter
|
22
22
|
#include <faiss/utils/utils.h>
|
23
23
|
|
24
24
|
#include <faiss/utils/approx_topk/approx_topk.h>
|
25
25
|
|
26
26
|
// this is needed for prefetching
|
27
|
-
#include <faiss/impl/platform_macros.h>
|
28
27
|
|
29
28
|
#ifdef __AVX2__
|
30
29
|
#include <xmmintrin.h>
|
@@ -825,7 +824,7 @@ void LSQTimer::reset() {
|
|
825
824
|
}
|
826
825
|
|
827
826
|
LSQTimerScope::LSQTimerScope(LSQTimer* timer, std::string name)
|
828
|
-
: timer(timer), name(name), finished(false) {
|
827
|
+
: timer(timer), name(std::move(name)), finished(false) {
|
829
828
|
t0 = getmillisecs();
|
830
829
|
}
|
831
830
|
|
@@ -55,11 +55,13 @@ Nhood::Nhood(const Nhood& other) {
|
|
55
55
|
/// Insert a point into the candidate pool
|
56
56
|
void Nhood::insert(int id, float dist) {
|
57
57
|
LockGuard guard(lock);
|
58
|
-
if (dist > pool.front().distance)
|
58
|
+
if (dist > pool.front().distance) {
|
59
59
|
return;
|
60
|
+
}
|
60
61
|
for (int i = 0; i < pool.size(); i++) {
|
61
|
-
if (id == pool[i].id)
|
62
|
+
if (id == pool[i].id) {
|
62
63
|
return;
|
64
|
+
}
|
63
65
|
}
|
64
66
|
if (pool.size() < pool.capacity()) {
|
65
67
|
pool.push_back(Neighbor(id, dist, true));
|
@@ -118,22 +120,26 @@ int insert_into_pool(Neighbor* addr, int size, Neighbor nn) {
|
|
118
120
|
}
|
119
121
|
while (left < right - 1) {
|
120
122
|
int mid = (left + right) / 2;
|
121
|
-
if (addr[mid].distance > nn.distance)
|
123
|
+
if (addr[mid].distance > nn.distance) {
|
122
124
|
right = mid;
|
123
|
-
else
|
125
|
+
} else {
|
124
126
|
left = mid;
|
127
|
+
}
|
125
128
|
}
|
126
129
|
// check equal ID
|
127
130
|
|
128
131
|
while (left > 0) {
|
129
|
-
if (addr[left].distance < nn.distance)
|
132
|
+
if (addr[left].distance < nn.distance) {
|
130
133
|
break;
|
131
|
-
|
134
|
+
}
|
135
|
+
if (addr[left].id == nn.id) {
|
132
136
|
return size + 1;
|
137
|
+
}
|
133
138
|
left--;
|
134
139
|
}
|
135
|
-
if (addr[left].id == nn.id || addr[right].id == nn.id)
|
140
|
+
if (addr[left].id == nn.id || addr[right].id == nn.id) {
|
136
141
|
return size + 1;
|
142
|
+
}
|
137
143
|
memmove((char*)&addr[right + 1],
|
138
144
|
&addr[right],
|
139
145
|
(size - right) * sizeof(Neighbor));
|
@@ -191,8 +197,9 @@ void NNDescent::update() {
|
|
191
197
|
auto& nn = graph[n];
|
192
198
|
std::sort(nn.pool.begin(), nn.pool.end());
|
193
199
|
|
194
|
-
if (nn.pool.size() > L)
|
200
|
+
if (nn.pool.size() > L) {
|
195
201
|
nn.pool.resize(L);
|
202
|
+
}
|
196
203
|
nn.pool.reserve(L); // keep the pool size be L
|
197
204
|
|
198
205
|
int maxl = std::min(nn.M + S, (int)nn.pool.size());
|
@@ -470,8 +477,9 @@ void NNDescent::search(
|
|
470
477
|
Neighbor nn(id, dist, true);
|
471
478
|
int r = insert_into_pool(retset.data(), L_2, nn);
|
472
479
|
|
473
|
-
if (r < nk)
|
480
|
+
if (r < nk) {
|
474
481
|
nk = r;
|
482
|
+
}
|
475
483
|
}
|
476
484
|
}
|
477
485
|
if (nk <= k) {
|
@@ -42,8 +42,9 @@ double PermutationObjective::cost_update(const int* perm, int iw, int jw)
|
|
42
42
|
double orig_cost = compute_cost(perm);
|
43
43
|
|
44
44
|
std::vector<int> perm2(n);
|
45
|
-
for (int i = 0; i < n; i++)
|
45
|
+
for (int i = 0; i < n; i++) {
|
46
46
|
perm2[i] = perm[i];
|
47
|
+
}
|
47
48
|
perm2[iw] = perm[jw];
|
48
49
|
perm2[jw] = perm[iw];
|
49
50
|
|
@@ -73,8 +74,9 @@ double SimulatedAnnealingOptimizer::run_optimization(int* best_perm) {
|
|
73
74
|
// just do a few runs of the annealing and keep the lowest output cost
|
74
75
|
for (int it = 0; it < n_redo; it++) {
|
75
76
|
std::vector<int> perm(n);
|
76
|
-
for (int i = 0; i < n; i++)
|
77
|
+
for (int i = 0; i < n; i++) {
|
77
78
|
perm[i] = i;
|
79
|
+
}
|
78
80
|
if (init_random) {
|
79
81
|
for (int i = 0; i < n; i++) {
|
80
82
|
int j = i + rnd->rand_int(n - i);
|
@@ -82,8 +84,9 @@ double SimulatedAnnealingOptimizer::run_optimization(int* best_perm) {
|
|
82
84
|
}
|
83
85
|
}
|
84
86
|
float cost = optimize(perm.data());
|
85
|
-
if (logfile)
|
87
|
+
if (logfile) {
|
86
88
|
fprintf(logfile, "\n");
|
89
|
+
}
|
87
90
|
if (verbose > 1) {
|
88
91
|
printf(" optimization run %d: cost=%g %s\n",
|
89
92
|
it,
|
@@ -103,8 +106,9 @@ double SimulatedAnnealingOptimizer::run_optimization(int* best_perm) {
|
|
103
106
|
double SimulatedAnnealingOptimizer::optimize(int* perm) {
|
104
107
|
double cost = init_cost = obj->compute_cost(perm);
|
105
108
|
int log2n = 0;
|
106
|
-
while (!(n <= (1 << log2n)))
|
109
|
+
while (!(n <= (1 << log2n))) {
|
107
110
|
log2n++;
|
111
|
+
}
|
108
112
|
double temperature = init_temperature;
|
109
113
|
int n_swap = 0, n_hot = 0;
|
110
114
|
for (int it = 0; it < n_iter; it++) {
|
@@ -116,16 +120,18 @@ double SimulatedAnnealingOptimizer::optimize(int* perm) {
|
|
116
120
|
} else {
|
117
121
|
iw = rnd->rand_int(n);
|
118
122
|
jw = rnd->rand_int(n - 1);
|
119
|
-
if (jw == iw)
|
123
|
+
if (jw == iw) {
|
120
124
|
jw++;
|
125
|
+
}
|
121
126
|
}
|
122
127
|
double delta_cost = obj->cost_update(perm, iw, jw);
|
123
128
|
if (delta_cost < 0 || rnd->rand_float() < temperature) {
|
124
129
|
std::swap(perm[iw], perm[jw]);
|
125
130
|
cost += delta_cost;
|
126
131
|
n_swap++;
|
127
|
-
if (delta_cost >= 0)
|
132
|
+
if (delta_cost >= 0) {
|
128
133
|
n_hot++;
|
134
|
+
}
|
129
135
|
}
|
130
136
|
if (verbose > 2 || (verbose > 1 && it % 10000 == 0)) {
|
131
137
|
printf(" iteration %d cost %g temp %g n_swap %d "
|
@@ -147,8 +153,9 @@ double SimulatedAnnealingOptimizer::optimize(int* perm) {
|
|
147
153
|
n_hot);
|
148
154
|
}
|
149
155
|
}
|
150
|
-
if (verbose > 1)
|
156
|
+
if (verbose > 1) {
|
151
157
|
printf("\n");
|
158
|
+
}
|
152
159
|
return cost;
|
153
160
|
}
|
154
161
|
|
@@ -467,8 +474,9 @@ struct Score3Computer : PermutationObjective {
|
|
467
474
|
*/
|
468
475
|
Taccu compute_update(const int* perm, int iw, int jw) const {
|
469
476
|
assert(iw != jw);
|
470
|
-
if (iw > jw)
|
477
|
+
if (iw > jw) {
|
471
478
|
std::swap(iw, jw);
|
479
|
+
}
|
472
480
|
|
473
481
|
Taccu accu = 0;
|
474
482
|
const Ttab* n_gt_i = n_gt.data();
|
@@ -480,8 +488,9 @@ struct Score3Computer : PermutationObjective {
|
|
480
488
|
|
481
489
|
accu += update_i_cross(perm, iw, jw, ip0, ip, n_gt_i);
|
482
490
|
|
483
|
-
if (ip != ip0)
|
491
|
+
if (ip != ip0) {
|
484
492
|
accu += update_i_plane(perm, iw, jw, ip0, ip, n_gt_i);
|
493
|
+
}
|
485
494
|
|
486
495
|
n_gt_i += nc * nc;
|
487
496
|
}
|
@@ -585,8 +594,9 @@ struct Score3Computer : PermutationObjective {
|
|
585
594
|
const Ttab* n_gt_ij) const {
|
586
595
|
Taccu accu = 0;
|
587
596
|
for (int k = 0; k < nc; k++) {
|
588
|
-
if (k == iw || k == jw)
|
597
|
+
if (k == iw || k == jw) {
|
589
598
|
continue;
|
599
|
+
}
|
590
600
|
int kp = perm[k];
|
591
601
|
Ttab ng = n_gt_ij[k];
|
592
602
|
if (hamming_dis(ip, jp) < hamming_dis(ip, kp)) {
|
@@ -617,8 +627,9 @@ struct Score3Computer : PermutationObjective {
|
|
617
627
|
accu += update_k(perm, iw, jw, ip0, ip, jp0, jp, iw, n_gt_ij);
|
618
628
|
accu += update_k(perm, iw, jw, ip0, ip, jp0, jp, jw, n_gt_ij);
|
619
629
|
|
620
|
-
if (jp != jp0)
|
630
|
+
if (jp != jp0) {
|
621
631
|
accu += update_j_line(perm, iw, jw, ip0, ip, jp0, jp, n_gt_ij);
|
632
|
+
}
|
622
633
|
|
623
634
|
n_gt_ij += nc;
|
624
635
|
}
|
@@ -721,8 +732,9 @@ struct RankingScore2 : Score3Computer<float, double> {
|
|
721
732
|
|
722
733
|
{ // build rank table
|
723
734
|
IndirectSort s = {gtd};
|
724
|
-
for (int j = 0; j < nb; j++)
|
735
|
+
for (int j = 0; j < nb; j++) {
|
725
736
|
ranks[j] = j;
|
737
|
+
}
|
726
738
|
std::sort(ranks, ranks + nb, s);
|
727
739
|
}
|
728
740
|
|
@@ -814,17 +826,20 @@ void PolysemousTraining::optimize_reproduce_distances(
|
|
814
826
|
final_cost);
|
815
827
|
}
|
816
828
|
|
817
|
-
if (log_pattern.size())
|
829
|
+
if (log_pattern.size()) {
|
818
830
|
fclose(optim.logfile);
|
831
|
+
}
|
819
832
|
|
820
833
|
std::vector<float> centroids_copy;
|
821
|
-
for (int i = 0; i < dsub * n; i++)
|
834
|
+
for (int i = 0; i < dsub * n; i++) {
|
822
835
|
centroids_copy.push_back(centroids[i]);
|
836
|
+
}
|
823
837
|
|
824
|
-
for (int i = 0; i < n; i++)
|
838
|
+
for (int i = 0; i < n; i++) {
|
825
839
|
memcpy(centroids + perm[i] * dsub,
|
826
840
|
centroids_copy.data() + i * dsub,
|
827
841
|
dsub * sizeof(centroids[0]));
|
842
|
+
}
|
828
843
|
}
|
829
844
|
}
|
830
845
|
|
@@ -853,14 +868,16 @@ void PolysemousTraining::optimize_ranking(
|
|
853
868
|
|
854
869
|
if (n > 0) {
|
855
870
|
std::vector<float> xtrain(n * dsub);
|
856
|
-
for (int i = 0; i < n; i++)
|
871
|
+
for (int i = 0; i < n; i++) {
|
857
872
|
memcpy(xtrain.data() + i * dsub,
|
858
873
|
x + i * pq.d + m * dsub,
|
859
874
|
sizeof(float) * dsub);
|
875
|
+
}
|
860
876
|
|
861
877
|
codes.resize(n);
|
862
|
-
for (int i = 0; i < n; i++)
|
878
|
+
for (int i = 0; i < n; i++) {
|
863
879
|
codes[i] = all_codes[i * pq.code_size + m];
|
880
|
+
}
|
864
881
|
|
865
882
|
nq = n / 4;
|
866
883
|
nb = n - nq;
|
@@ -873,8 +890,9 @@ void PolysemousTraining::optimize_ranking(
|
|
873
890
|
} else {
|
874
891
|
nq = nb = pq.ksub;
|
875
892
|
codes.resize(2 * nq);
|
876
|
-
for (int i = 0; i < nq; i++)
|
893
|
+
for (int i = 0; i < nq; i++) {
|
877
894
|
codes[i] = codes[i + nq] = i;
|
895
|
+
}
|
878
896
|
|
879
897
|
gt_distances.resize(nq * nb);
|
880
898
|
|
@@ -921,19 +939,22 @@ void PolysemousTraining::optimize_ranking(
|
|
921
939
|
optim.init_cost,
|
922
940
|
final_cost);
|
923
941
|
|
924
|
-
if (log_pattern.size())
|
942
|
+
if (log_pattern.size()) {
|
925
943
|
fclose(optim.logfile);
|
944
|
+
}
|
926
945
|
|
927
946
|
float* centroids = pq.get_centroids(m, 0);
|
928
947
|
|
929
948
|
std::vector<float> centroids_copy;
|
930
|
-
for (int i = 0; i < dsub * pq.ksub; i++)
|
949
|
+
for (int i = 0; i < dsub * pq.ksub; i++) {
|
931
950
|
centroids_copy.push_back(centroids[i]);
|
951
|
+
}
|
932
952
|
|
933
|
-
for (int i = 0; i < pq.ksub; i++)
|
953
|
+
for (int i = 0; i < pq.ksub; i++) {
|
934
954
|
memcpy(centroids + perm[i] * dsub,
|
935
955
|
centroids_copy.data() + i * dsub,
|
936
956
|
dsub * sizeof(centroids[0]));
|
957
|
+
}
|
937
958
|
}
|
938
959
|
}
|
939
960
|
|
@@ -7,6 +7,9 @@
|
|
7
7
|
|
8
8
|
#include <faiss/impl/RaBitQuantizer.h>
|
9
9
|
|
10
|
+
#include <faiss/impl/FaissAssert.h>
|
11
|
+
#include <faiss/utils/distances.h>
|
12
|
+
#include <faiss/utils/rabitq_simd.h>
|
10
13
|
#include <algorithm>
|
11
14
|
#include <cmath>
|
12
15
|
#include <cstring>
|
@@ -14,9 +17,6 @@
|
|
14
17
|
#include <memory>
|
15
18
|
#include <vector>
|
16
19
|
|
17
|
-
#include <faiss/impl/FaissAssert.h>
|
18
|
-
#include <faiss/utils/distances.h>
|
19
|
-
|
20
20
|
namespace faiss {
|
21
21
|
|
22
22
|
struct FactorsData {
|
@@ -351,27 +351,9 @@ float RaBitDistanceComputerQ::distance_to_code(const uint8_t* code) {
|
|
351
351
|
const size_t di_8b = (d + 7) / 8;
|
352
352
|
const size_t di_64b = (di_8b / 8) * 8;
|
353
353
|
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
// process 64-bit popcounts
|
359
|
-
uint64_t count_dot = 0;
|
360
|
-
for (size_t i = 0; i < di_64b; i += 8) {
|
361
|
-
const auto qv = *(const uint64_t*)(query_j + i);
|
362
|
-
const auto yv = *(const uint64_t*)(binary_data + i);
|
363
|
-
count_dot += __builtin_popcountll(qv & yv);
|
364
|
-
}
|
365
|
-
|
366
|
-
// process leftovers
|
367
|
-
for (size_t i = di_64b; i < di_8b; i++) {
|
368
|
-
const auto qv = *(query_j + i);
|
369
|
-
const auto yv = *(binary_data + i);
|
370
|
-
count_dot += __builtin_popcount(qv & yv);
|
371
|
-
}
|
372
|
-
|
373
|
-
dot_qo += (count_dot << j);
|
374
|
-
}
|
354
|
+
// Use the optimized popcount function from rabitq_simd.h
|
355
|
+
float dot_qo =
|
356
|
+
rabitq_dp_popcnt(rearranged_rotated_qq.data(), binary_data, d, qb);
|
375
357
|
|
376
358
|
// It was a willful decision (after the discussion) to not to pre-cache
|
377
359
|
// the sum of all bits, just in order to reduce the overhead per vector.
|