faiss 0.2.0 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/LICENSE.txt +1 -1
- data/README.md +7 -7
- data/ext/faiss/extconf.rb +6 -3
- data/ext/faiss/numo.hpp +4 -4
- data/ext/faiss/utils.cpp +1 -1
- data/ext/faiss/utils.h +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +292 -291
- data/vendor/faiss/faiss/AutoTune.h +55 -56
- data/vendor/faiss/faiss/Clustering.cpp +365 -194
- data/vendor/faiss/faiss/Clustering.h +102 -35
- data/vendor/faiss/faiss/IVFlib.cpp +171 -195
- data/vendor/faiss/faiss/IVFlib.h +48 -51
- data/vendor/faiss/faiss/Index.cpp +85 -103
- data/vendor/faiss/faiss/Index.h +54 -48
- data/vendor/faiss/faiss/Index2Layer.cpp +126 -224
- data/vendor/faiss/faiss/Index2Layer.h +22 -36
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +195 -0
- data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
- data/vendor/faiss/faiss/IndexBinary.h +140 -132
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
- data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
- data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
- data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
- data/vendor/faiss/faiss/IndexFlat.cpp +115 -176
- data/vendor/faiss/faiss/IndexFlat.h +42 -59
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
- data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
- data/vendor/faiss/faiss/IndexHNSW.h +57 -41
- data/vendor/faiss/faiss/IndexIVF.cpp +545 -453
- data/vendor/faiss/faiss/IndexIVF.h +169 -118
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +247 -252
- data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +459 -517
- data/vendor/faiss/faiss/IndexIVFPQ.h +75 -67
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
- data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +163 -150
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +38 -25
- data/vendor/faiss/faiss/IndexLSH.cpp +66 -113
- data/vendor/faiss/faiss/IndexLSH.h +20 -38
- data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
- data/vendor/faiss/faiss/IndexLattice.h +11 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +229 -0
- data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
- data/vendor/faiss/faiss/IndexNSG.cpp +301 -0
- data/vendor/faiss/faiss/IndexNSG.h +85 -0
- data/vendor/faiss/faiss/IndexPQ.cpp +387 -495
- data/vendor/faiss/faiss/IndexPQ.h +64 -82
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
- data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
- data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
- data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
- data/vendor/faiss/faiss/IndexRefine.cpp +139 -127
- data/vendor/faiss/faiss/IndexRefine.h +32 -23
- data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
- data/vendor/faiss/faiss/IndexReplicas.h +62 -56
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +111 -172
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -59
- data/vendor/faiss/faiss/IndexShards.cpp +256 -240
- data/vendor/faiss/faiss/IndexShards.h +85 -73
- data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
- data/vendor/faiss/faiss/MatrixStats.h +7 -10
- data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
- data/vendor/faiss/faiss/MetaIndexes.h +40 -34
- data/vendor/faiss/faiss/MetricType.h +7 -7
- data/vendor/faiss/faiss/VectorTransform.cpp +654 -475
- data/vendor/faiss/faiss/VectorTransform.h +64 -89
- data/vendor/faiss/faiss/clone_index.cpp +78 -73
- data/vendor/faiss/faiss/clone_index.h +4 -9
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
- data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +198 -171
- data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
- data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
- data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
- data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
- data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
- data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
- data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
- data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
- data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +503 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +175 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
- data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
- data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
- data/vendor/faiss/faiss/impl/FaissException.h +41 -29
- data/vendor/faiss/faiss/impl/HNSW.cpp +606 -617
- data/vendor/faiss/faiss/impl/HNSW.h +179 -200
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +855 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +244 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
- data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +679 -0
- data/vendor/faiss/faiss/impl/NSG.h +199 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +758 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +188 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +647 -707
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
- data/vendor/faiss/faiss/impl/index_read.cpp +631 -480
- data/vendor/faiss/faiss/impl/index_write.cpp +547 -407
- data/vendor/faiss/faiss/impl/io.cpp +76 -95
- data/vendor/faiss/faiss/impl/io.h +31 -41
- data/vendor/faiss/faiss/impl/io_macros.h +60 -29
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
- data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
- data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
- data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
- data/vendor/faiss/faiss/index_factory.cpp +619 -397
- data/vendor/faiss/faiss/index_factory.h +8 -6
- data/vendor/faiss/faiss/index_io.h +23 -26
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
- data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
- data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
- data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
- data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
- data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
- data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
- data/vendor/faiss/faiss/utils/Heap.h +186 -209
- data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
- data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
- data/vendor/faiss/faiss/utils/distances.cpp +305 -312
- data/vendor/faiss/faiss/utils/distances.h +170 -122
- data/vendor/faiss/faiss/utils/distances_simd.cpp +498 -508
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
- data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
- data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
- data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
- data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
- data/vendor/faiss/faiss/utils/hamming.h +62 -85
- data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
- data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
- data/vendor/faiss/faiss/utils/partitioning.h +26 -21
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
- data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
- data/vendor/faiss/faiss/utils/random.cpp +39 -63
- data/vendor/faiss/faiss/utils/random.h +13 -16
- data/vendor/faiss/faiss/utils/simdlib.h +4 -2
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
- data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
- data/vendor/faiss/faiss/utils/utils.cpp +304 -287
- data/vendor/faiss/faiss/utils/utils.h +54 -49
- metadata +29 -4
|
@@ -14,9 +14,9 @@
|
|
|
14
14
|
#include <unordered_set>
|
|
15
15
|
|
|
16
16
|
#include <sys/mman.h>
|
|
17
|
-
#include <unistd.h>
|
|
18
|
-
#include <sys/types.h>
|
|
19
17
|
#include <sys/stat.h>
|
|
18
|
+
#include <sys/types.h>
|
|
19
|
+
#include <unistd.h>
|
|
20
20
|
|
|
21
21
|
#include <faiss/impl/FaissAssert.h>
|
|
22
22
|
#include <faiss/utils/utils.h>
|
|
@@ -24,15 +24,12 @@
|
|
|
24
24
|
#include <faiss/impl/io.h>
|
|
25
25
|
#include <faiss/impl/io_macros.h>
|
|
26
26
|
|
|
27
|
-
|
|
28
27
|
namespace faiss {
|
|
29
28
|
|
|
30
|
-
|
|
31
29
|
/**********************************************
|
|
32
30
|
* LockLevels
|
|
33
31
|
**********************************************/
|
|
34
32
|
|
|
35
|
-
|
|
36
33
|
struct LockLevels {
|
|
37
34
|
/* There n times lock1(n), one lock2 and one lock3
|
|
38
35
|
* Invariants:
|
|
@@ -47,8 +44,8 @@ struct LockLevels {
|
|
|
47
44
|
pthread_cond_t level3_cv;
|
|
48
45
|
|
|
49
46
|
std::unordered_set<int> level1_holders; // which level1 locks are held
|
|
50
|
-
int n_level2;
|
|
51
|
-
bool level3_in_use;
|
|
47
|
+
int n_level2; // nb threads that wait on level2
|
|
48
|
+
bool level3_in_use; // a threads waits on level3
|
|
52
49
|
bool level2_in_use;
|
|
53
50
|
|
|
54
51
|
LockLevels() {
|
|
@@ -91,7 +88,7 @@ struct LockLevels {
|
|
|
91
88
|
|
|
92
89
|
void lock_2() {
|
|
93
90
|
pthread_mutex_lock(&mutex1);
|
|
94
|
-
n_level2
|
|
91
|
+
n_level2++;
|
|
95
92
|
if (level3_in_use) { // tell waiting level3 that we are blocked
|
|
96
93
|
pthread_cond_signal(&level3_cv);
|
|
97
94
|
}
|
|
@@ -105,7 +102,7 @@ struct LockLevels {
|
|
|
105
102
|
void unlock_2() {
|
|
106
103
|
pthread_mutex_lock(&mutex1);
|
|
107
104
|
level2_in_use = false;
|
|
108
|
-
n_level2
|
|
105
|
+
n_level2--;
|
|
109
106
|
pthread_cond_signal(&level2_cv);
|
|
110
107
|
pthread_mutex_unlock(&mutex1);
|
|
111
108
|
}
|
|
@@ -128,17 +125,17 @@ struct LockLevels {
|
|
|
128
125
|
pthread_mutex_unlock(&mutex1);
|
|
129
126
|
}
|
|
130
127
|
|
|
131
|
-
void print
|
|
128
|
+
void print() {
|
|
132
129
|
pthread_mutex_lock(&mutex1);
|
|
133
130
|
printf("State: level3_in_use=%d n_level2=%d level1_holders: [",
|
|
134
|
-
int(level3_in_use),
|
|
131
|
+
int(level3_in_use),
|
|
132
|
+
n_level2);
|
|
135
133
|
for (int k : level1_holders) {
|
|
136
134
|
printf("%d ", k);
|
|
137
135
|
}
|
|
138
136
|
printf("]\n");
|
|
139
137
|
pthread_mutex_unlock(&mutex1);
|
|
140
138
|
}
|
|
141
|
-
|
|
142
139
|
};
|
|
143
140
|
|
|
144
141
|
/**********************************************
|
|
@@ -146,27 +143,27 @@ struct LockLevels {
|
|
|
146
143
|
**********************************************/
|
|
147
144
|
|
|
148
145
|
struct OnDiskInvertedLists::OngoingPrefetch {
|
|
149
|
-
|
|
150
146
|
struct Thread {
|
|
151
147
|
pthread_t pth;
|
|
152
|
-
OngoingPrefetch
|
|
148
|
+
OngoingPrefetch* pf;
|
|
153
149
|
|
|
154
|
-
bool one_list
|
|
150
|
+
bool one_list() {
|
|
155
151
|
idx_t list_no = pf->get_next_list();
|
|
156
|
-
if(list_no == -1)
|
|
157
|
-
|
|
158
|
-
od->
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
const
|
|
152
|
+
if (list_no == -1)
|
|
153
|
+
return false;
|
|
154
|
+
const OnDiskInvertedLists* od = pf->od;
|
|
155
|
+
od->locks->lock_1(list_no);
|
|
156
|
+
size_t n = od->list_size(list_no);
|
|
157
|
+
const Index::idx_t* idx = od->get_ids(list_no);
|
|
158
|
+
const uint8_t* codes = od->get_codes(list_no);
|
|
162
159
|
int cs = 0;
|
|
163
|
-
for (size_t i = 0; i < n;i++) {
|
|
160
|
+
for (size_t i = 0; i < n; i++) {
|
|
164
161
|
cs += idx[i];
|
|
165
162
|
}
|
|
166
|
-
const idx_t
|
|
163
|
+
const idx_t* codes8 = (const idx_t*)codes;
|
|
167
164
|
idx_t n8 = n * od->code_size / 8;
|
|
168
165
|
|
|
169
|
-
for (size_t i = 0; i < n8;i++) {
|
|
166
|
+
for (size_t i = 0; i < n8; i++) {
|
|
170
167
|
cs += codes8[i];
|
|
171
168
|
}
|
|
172
169
|
od->locks->unlock_1(list_no);
|
|
@@ -174,7 +171,6 @@ struct OnDiskInvertedLists::OngoingPrefetch {
|
|
|
174
171
|
global_cs += cs & 1;
|
|
175
172
|
return true;
|
|
176
173
|
}
|
|
177
|
-
|
|
178
174
|
};
|
|
179
175
|
|
|
180
176
|
std::vector<Thread> threads;
|
|
@@ -189,125 +185,123 @@ struct OnDiskInvertedLists::OngoingPrefetch {
|
|
|
189
185
|
// pretext to avoid code below to be optimized out
|
|
190
186
|
static int global_cs;
|
|
191
187
|
|
|
192
|
-
const OnDiskInvertedLists
|
|
188
|
+
const OnDiskInvertedLists* od;
|
|
193
189
|
|
|
194
|
-
explicit OngoingPrefetch
|
|
195
|
-
|
|
196
|
-
pthread_mutex_init
|
|
197
|
-
pthread_mutex_init (&list_ids_mutex, nullptr);
|
|
190
|
+
explicit OngoingPrefetch(const OnDiskInvertedLists* od) : od(od) {
|
|
191
|
+
pthread_mutex_init(&mutex, nullptr);
|
|
192
|
+
pthread_mutex_init(&list_ids_mutex, nullptr);
|
|
198
193
|
cur_list = 0;
|
|
199
194
|
}
|
|
200
195
|
|
|
201
|
-
static void* prefetch_list
|
|
202
|
-
Thread
|
|
196
|
+
static void* prefetch_list(void* arg) {
|
|
197
|
+
Thread* th = static_cast<Thread*>(arg);
|
|
203
198
|
|
|
204
|
-
while (th->one_list())
|
|
199
|
+
while (th->one_list())
|
|
200
|
+
;
|
|
205
201
|
|
|
206
202
|
return nullptr;
|
|
207
203
|
}
|
|
208
204
|
|
|
209
|
-
idx_t get_next_list
|
|
205
|
+
idx_t get_next_list() {
|
|
210
206
|
idx_t list_no = -1;
|
|
211
|
-
pthread_mutex_lock
|
|
207
|
+
pthread_mutex_lock(&list_ids_mutex);
|
|
212
208
|
if (cur_list >= 0 && cur_list < list_ids.size()) {
|
|
213
209
|
list_no = list_ids[cur_list++];
|
|
214
210
|
}
|
|
215
|
-
pthread_mutex_unlock
|
|
211
|
+
pthread_mutex_unlock(&list_ids_mutex);
|
|
216
212
|
return list_no;
|
|
217
213
|
}
|
|
218
214
|
|
|
219
|
-
void prefetch_lists
|
|
220
|
-
pthread_mutex_lock
|
|
221
|
-
pthread_mutex_lock
|
|
222
|
-
list_ids.clear
|
|
223
|
-
pthread_mutex_unlock
|
|
224
|
-
for (auto
|
|
225
|
-
pthread_join
|
|
215
|
+
void prefetch_lists(const idx_t* list_nos, int n) {
|
|
216
|
+
pthread_mutex_lock(&mutex);
|
|
217
|
+
pthread_mutex_lock(&list_ids_mutex);
|
|
218
|
+
list_ids.clear();
|
|
219
|
+
pthread_mutex_unlock(&list_ids_mutex);
|
|
220
|
+
for (auto& th : threads) {
|
|
221
|
+
pthread_join(th.pth, nullptr);
|
|
226
222
|
}
|
|
227
223
|
|
|
228
|
-
threads.resize
|
|
224
|
+
threads.resize(0);
|
|
229
225
|
cur_list = 0;
|
|
230
|
-
int nt = std::min
|
|
226
|
+
int nt = std::min(n, od->prefetch_nthread);
|
|
231
227
|
|
|
232
228
|
if (nt > 0) {
|
|
233
229
|
// prepare tasks
|
|
234
230
|
for (int i = 0; i < n; i++) {
|
|
235
231
|
idx_t list_no = list_nos[i];
|
|
236
232
|
if (list_no >= 0 && od->list_size(list_no) > 0) {
|
|
237
|
-
list_ids.push_back
|
|
233
|
+
list_ids.push_back(list_no);
|
|
238
234
|
}
|
|
239
235
|
}
|
|
240
236
|
// prepare threads
|
|
241
|
-
threads.resize
|
|
242
|
-
for (Thread
|
|
237
|
+
threads.resize(nt);
|
|
238
|
+
for (Thread& th : threads) {
|
|
243
239
|
th.pf = this;
|
|
244
|
-
pthread_create
|
|
240
|
+
pthread_create(&th.pth, nullptr, prefetch_list, &th);
|
|
245
241
|
}
|
|
246
242
|
}
|
|
247
|
-
pthread_mutex_unlock
|
|
243
|
+
pthread_mutex_unlock(&mutex);
|
|
248
244
|
}
|
|
249
245
|
|
|
250
|
-
~OngoingPrefetch
|
|
251
|
-
pthread_mutex_lock
|
|
252
|
-
for (auto
|
|
253
|
-
pthread_join
|
|
246
|
+
~OngoingPrefetch() {
|
|
247
|
+
pthread_mutex_lock(&mutex);
|
|
248
|
+
for (auto& th : threads) {
|
|
249
|
+
pthread_join(th.pth, nullptr);
|
|
254
250
|
}
|
|
255
|
-
pthread_mutex_unlock
|
|
256
|
-
pthread_mutex_destroy
|
|
257
|
-
pthread_mutex_destroy
|
|
251
|
+
pthread_mutex_unlock(&mutex);
|
|
252
|
+
pthread_mutex_destroy(&mutex);
|
|
253
|
+
pthread_mutex_destroy(&list_ids_mutex);
|
|
258
254
|
}
|
|
259
|
-
|
|
260
255
|
};
|
|
261
256
|
|
|
262
257
|
int OnDiskInvertedLists::OngoingPrefetch::global_cs = 0;
|
|
263
258
|
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
{
|
|
267
|
-
pf->prefetch_lists (list_nos, n);
|
|
259
|
+
void OnDiskInvertedLists::prefetch_lists(const idx_t* list_nos, int n) const {
|
|
260
|
+
pf->prefetch_lists(list_nos, n);
|
|
268
261
|
}
|
|
269
262
|
|
|
270
|
-
|
|
271
|
-
|
|
272
263
|
/**********************************************
|
|
273
264
|
* OnDiskInvertedLists: mmapping
|
|
274
265
|
**********************************************/
|
|
275
266
|
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
{
|
|
279
|
-
const char *rw_flags = read_only ? "r" : "r+";
|
|
267
|
+
void OnDiskInvertedLists::do_mmap() {
|
|
268
|
+
const char* rw_flags = read_only ? "r" : "r+";
|
|
280
269
|
int prot = read_only ? PROT_READ : PROT_WRITE | PROT_READ;
|
|
281
|
-
FILE
|
|
282
|
-
FAISS_THROW_IF_NOT_FMT
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
270
|
+
FILE* f = fopen(filename.c_str(), rw_flags);
|
|
271
|
+
FAISS_THROW_IF_NOT_FMT(
|
|
272
|
+
f,
|
|
273
|
+
"could not open %s in mode %s: %s",
|
|
274
|
+
filename.c_str(),
|
|
275
|
+
rw_flags,
|
|
276
|
+
strerror(errno));
|
|
277
|
+
|
|
278
|
+
uint8_t* ptro =
|
|
279
|
+
(uint8_t*)mmap(nullptr, totsize, prot, MAP_SHARED, fileno(f), 0);
|
|
280
|
+
|
|
281
|
+
FAISS_THROW_IF_NOT_FMT(
|
|
282
|
+
ptro != MAP_FAILED,
|
|
283
|
+
"could not mmap %s: %s",
|
|
284
|
+
filename.c_str(),
|
|
285
|
+
strerror(errno));
|
|
292
286
|
ptr = ptro;
|
|
293
|
-
fclose
|
|
294
|
-
|
|
287
|
+
fclose(f);
|
|
295
288
|
}
|
|
296
289
|
|
|
297
|
-
void OnDiskInvertedLists::update_totsize
|
|
298
|
-
{
|
|
299
|
-
|
|
290
|
+
void OnDiskInvertedLists::update_totsize(size_t new_size) {
|
|
300
291
|
// unmap file
|
|
301
292
|
if (ptr != nullptr) {
|
|
302
|
-
int err = munmap
|
|
303
|
-
FAISS_THROW_IF_NOT_FMT
|
|
293
|
+
int err = munmap(ptr, totsize);
|
|
294
|
+
FAISS_THROW_IF_NOT_FMT(err == 0, "munmap error: %s", strerror(errno));
|
|
304
295
|
}
|
|
305
296
|
if (totsize == 0) {
|
|
306
297
|
// must create file before truncating it
|
|
307
|
-
FILE
|
|
308
|
-
FAISS_THROW_IF_NOT_FMT
|
|
309
|
-
|
|
310
|
-
|
|
298
|
+
FILE* f = fopen(filename.c_str(), "w");
|
|
299
|
+
FAISS_THROW_IF_NOT_FMT(
|
|
300
|
+
f,
|
|
301
|
+
"could not open %s in mode W: %s",
|
|
302
|
+
filename.c_str(),
|
|
303
|
+
strerror(errno));
|
|
304
|
+
fclose(f);
|
|
311
305
|
}
|
|
312
306
|
|
|
313
307
|
if (new_size > totsize) {
|
|
@@ -315,7 +309,7 @@ void OnDiskInvertedLists::update_totsize (size_t new_size)
|
|
|
315
309
|
slots.back().offset + slots.back().capacity == totsize) {
|
|
316
310
|
slots.back().capacity += new_size - totsize;
|
|
317
311
|
} else {
|
|
318
|
-
slots.push_back
|
|
312
|
+
slots.push_back(Slot(totsize, new_size - totsize));
|
|
319
313
|
}
|
|
320
314
|
} else {
|
|
321
315
|
assert(!"not implemented");
|
|
@@ -324,89 +318,69 @@ void OnDiskInvertedLists::update_totsize (size_t new_size)
|
|
|
324
318
|
totsize = new_size;
|
|
325
319
|
|
|
326
320
|
// create file
|
|
327
|
-
printf
|
|
321
|
+
printf("resizing %s to %zd bytes\n", filename.c_str(), totsize);
|
|
328
322
|
|
|
329
|
-
int err = truncate
|
|
323
|
+
int err = truncate(filename.c_str(), totsize);
|
|
330
324
|
|
|
331
|
-
FAISS_THROW_IF_NOT_FMT
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
325
|
+
FAISS_THROW_IF_NOT_FMT(
|
|
326
|
+
err == 0,
|
|
327
|
+
"truncate %s to %ld: %s",
|
|
328
|
+
filename.c_str(),
|
|
329
|
+
totsize,
|
|
330
|
+
strerror(errno));
|
|
331
|
+
do_mmap();
|
|
335
332
|
}
|
|
336
333
|
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
334
|
/**********************************************
|
|
343
335
|
* OnDiskInvertedLists
|
|
344
336
|
**********************************************/
|
|
345
337
|
|
|
346
338
|
#define INVALID_OFFSET (size_t)(-1)
|
|
347
339
|
|
|
348
|
-
OnDiskOneList::OnDiskOneList ()
|
|
349
|
-
size (0), capacity (0), offset (INVALID_OFFSET)
|
|
350
|
-
{}
|
|
351
|
-
|
|
352
|
-
OnDiskInvertedLists::Slot::Slot (size_t offset, size_t capacity):
|
|
353
|
-
offset (offset), capacity (capacity)
|
|
354
|
-
{}
|
|
340
|
+
OnDiskOneList::OnDiskOneList() : size(0), capacity(0), offset(INVALID_OFFSET) {}
|
|
355
341
|
|
|
356
|
-
OnDiskInvertedLists::Slot::Slot
|
|
357
|
-
|
|
358
|
-
{}
|
|
342
|
+
OnDiskInvertedLists::Slot::Slot(size_t offset, size_t capacity)
|
|
343
|
+
: offset(offset), capacity(capacity) {}
|
|
359
344
|
|
|
345
|
+
OnDiskInvertedLists::Slot::Slot() : offset(0), capacity(0) {}
|
|
360
346
|
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
size_t
|
|
364
|
-
const char
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
lists.resize (nlist);
|
|
347
|
+
OnDiskInvertedLists::OnDiskInvertedLists(
|
|
348
|
+
size_t nlist,
|
|
349
|
+
size_t code_size,
|
|
350
|
+
const char* filename)
|
|
351
|
+
: InvertedLists(nlist, code_size),
|
|
352
|
+
filename(filename),
|
|
353
|
+
totsize(0),
|
|
354
|
+
ptr(nullptr),
|
|
355
|
+
read_only(false),
|
|
356
|
+
locks(new LockLevels()),
|
|
357
|
+
pf(new OngoingPrefetch(this)),
|
|
358
|
+
prefetch_nthread(32) {
|
|
359
|
+
lists.resize(nlist);
|
|
375
360
|
|
|
376
361
|
// slots starts empty
|
|
377
362
|
}
|
|
378
363
|
|
|
379
|
-
OnDiskInvertedLists::OnDiskInvertedLists ()
|
|
380
|
-
OnDiskInvertedLists (0, 0, "")
|
|
381
|
-
{
|
|
382
|
-
}
|
|
364
|
+
OnDiskInvertedLists::OnDiskInvertedLists() : OnDiskInvertedLists(0, 0, "") {}
|
|
383
365
|
|
|
384
|
-
OnDiskInvertedLists::~OnDiskInvertedLists
|
|
385
|
-
{
|
|
366
|
+
OnDiskInvertedLists::~OnDiskInvertedLists() {
|
|
386
367
|
delete pf;
|
|
387
368
|
|
|
388
369
|
// unmap all lists
|
|
389
370
|
if (ptr != nullptr) {
|
|
390
|
-
int err = munmap
|
|
371
|
+
int err = munmap(ptr, totsize);
|
|
391
372
|
if (err != 0) {
|
|
392
|
-
fprintf(stderr, "mumap error: %s",
|
|
393
|
-
strerror(errno));
|
|
373
|
+
fprintf(stderr, "mumap error: %s", strerror(errno));
|
|
394
374
|
}
|
|
395
375
|
}
|
|
396
376
|
delete locks;
|
|
397
377
|
}
|
|
398
378
|
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
size_t OnDiskInvertedLists::list_size(size_t list_no) const
|
|
403
|
-
{
|
|
379
|
+
size_t OnDiskInvertedLists::list_size(size_t list_no) const {
|
|
404
380
|
return lists[list_no].size;
|
|
405
381
|
}
|
|
406
382
|
|
|
407
|
-
|
|
408
|
-
const uint8_t * OnDiskInvertedLists::get_codes (size_t list_no) const
|
|
409
|
-
{
|
|
383
|
+
const uint8_t* OnDiskInvertedLists::get_codes(size_t list_no) const {
|
|
410
384
|
if (lists[list_no].offset == INVALID_OFFSET) {
|
|
411
385
|
return nullptr;
|
|
412
386
|
}
|
|
@@ -414,68 +388,65 @@ const uint8_t * OnDiskInvertedLists::get_codes (size_t list_no) const
|
|
|
414
388
|
return ptr + lists[list_no].offset;
|
|
415
389
|
}
|
|
416
390
|
|
|
417
|
-
const Index::idx_t
|
|
418
|
-
{
|
|
391
|
+
const Index::idx_t* OnDiskInvertedLists::get_ids(size_t list_no) const {
|
|
419
392
|
if (lists[list_no].offset == INVALID_OFFSET) {
|
|
420
393
|
return nullptr;
|
|
421
394
|
}
|
|
422
395
|
|
|
423
|
-
return (
|
|
424
|
-
|
|
396
|
+
return (
|
|
397
|
+
const idx_t*)(ptr + lists[list_no].offset + code_size * lists[list_no].capacity);
|
|
425
398
|
}
|
|
426
399
|
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
memcpy
|
|
400
|
+
void OnDiskInvertedLists::update_entries(
|
|
401
|
+
size_t list_no,
|
|
402
|
+
size_t offset,
|
|
403
|
+
size_t n_entry,
|
|
404
|
+
const idx_t* ids_in,
|
|
405
|
+
const uint8_t* codes_in) {
|
|
406
|
+
FAISS_THROW_IF_NOT(!read_only);
|
|
407
|
+
if (n_entry == 0)
|
|
408
|
+
return;
|
|
409
|
+
const List& l = lists[list_no];
|
|
410
|
+
assert(n_entry + offset <= l.size);
|
|
411
|
+
idx_t* ids = const_cast<idx_t*>(get_ids(list_no));
|
|
412
|
+
memcpy(ids + offset, ids_in, sizeof(ids_in[0]) * n_entry);
|
|
413
|
+
uint8_t* codes = const_cast<uint8_t*>(get_codes(list_no));
|
|
414
|
+
memcpy(codes + offset * code_size, codes_in, code_size * n_entry);
|
|
440
415
|
}
|
|
441
416
|
|
|
442
|
-
size_t OnDiskInvertedLists::add_entries
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
417
|
+
size_t OnDiskInvertedLists::add_entries(
|
|
418
|
+
size_t list_no,
|
|
419
|
+
size_t n_entry,
|
|
420
|
+
const idx_t* ids,
|
|
421
|
+
const uint8_t* code) {
|
|
422
|
+
FAISS_THROW_IF_NOT(!read_only);
|
|
423
|
+
locks->lock_1(list_no);
|
|
424
|
+
size_t o = list_size(list_no);
|
|
425
|
+
resize_locked(list_no, n_entry + o);
|
|
426
|
+
update_entries(list_no, o, n_entry, ids, code);
|
|
427
|
+
locks->unlock_1(list_no);
|
|
452
428
|
return o;
|
|
453
429
|
}
|
|
454
430
|
|
|
455
|
-
void OnDiskInvertedLists::resize
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
locks->unlock_1 (list_no);
|
|
431
|
+
void OnDiskInvertedLists::resize(size_t list_no, size_t new_size) {
|
|
432
|
+
FAISS_THROW_IF_NOT(!read_only);
|
|
433
|
+
locks->lock_1(list_no);
|
|
434
|
+
resize_locked(list_no, new_size);
|
|
435
|
+
locks->unlock_1(list_no);
|
|
461
436
|
}
|
|
462
437
|
|
|
438
|
+
void OnDiskInvertedLists::resize_locked(size_t list_no, size_t new_size) {
|
|
439
|
+
List& l = lists[list_no];
|
|
463
440
|
|
|
464
|
-
|
|
465
|
-
void OnDiskInvertedLists::resize_locked (size_t list_no, size_t new_size)
|
|
466
|
-
{
|
|
467
|
-
List & l = lists[list_no];
|
|
468
|
-
|
|
469
|
-
if (new_size <= l.capacity &&
|
|
470
|
-
new_size > l.capacity / 2) {
|
|
441
|
+
if (new_size <= l.capacity && new_size > l.capacity / 2) {
|
|
471
442
|
l.size = new_size;
|
|
472
443
|
return;
|
|
473
444
|
}
|
|
474
445
|
|
|
475
446
|
// otherwise we release the current slot, and find a new one
|
|
476
447
|
|
|
477
|
-
locks->lock_2
|
|
478
|
-
free_slot
|
|
448
|
+
locks->lock_2();
|
|
449
|
+
free_slot(l.offset, l.capacity);
|
|
479
450
|
|
|
480
451
|
List new_l;
|
|
481
452
|
|
|
@@ -487,25 +458,26 @@ void OnDiskInvertedLists::resize_locked (size_t list_no, size_t new_size)
|
|
|
487
458
|
while (new_l.capacity < new_size) {
|
|
488
459
|
new_l.capacity *= 2;
|
|
489
460
|
}
|
|
490
|
-
new_l.offset =
|
|
491
|
-
|
|
461
|
+
new_l.offset =
|
|
462
|
+
allocate_slot(new_l.capacity * (sizeof(idx_t) + code_size));
|
|
492
463
|
}
|
|
493
464
|
|
|
494
465
|
// copy common data
|
|
495
466
|
if (l.offset != new_l.offset) {
|
|
496
|
-
size_t n = std::min
|
|
467
|
+
size_t n = std::min(new_size, l.size);
|
|
497
468
|
if (n > 0) {
|
|
498
|
-
memcpy
|
|
499
|
-
memcpy
|
|
500
|
-
|
|
469
|
+
memcpy(ptr + new_l.offset, get_codes(list_no), n * code_size);
|
|
470
|
+
memcpy(ptr + new_l.offset + new_l.capacity * code_size,
|
|
471
|
+
get_ids(list_no),
|
|
472
|
+
n * sizeof(idx_t));
|
|
501
473
|
}
|
|
502
474
|
}
|
|
503
475
|
|
|
504
476
|
lists[list_no] = new_l;
|
|
505
|
-
locks->unlock_2
|
|
477
|
+
locks->unlock_2();
|
|
506
478
|
}
|
|
507
479
|
|
|
508
|
-
size_t OnDiskInvertedLists::allocate_slot
|
|
480
|
+
size_t OnDiskInvertedLists::allocate_slot(size_t capacity) {
|
|
509
481
|
// should hold lock2
|
|
510
482
|
|
|
511
483
|
auto it = slots.begin();
|
|
@@ -519,19 +491,19 @@ size_t OnDiskInvertedLists::allocate_slot (size_t capacity) {
|
|
|
519
491
|
while (new_size - totsize < capacity) {
|
|
520
492
|
new_size *= 2;
|
|
521
493
|
}
|
|
522
|
-
locks->lock_3
|
|
494
|
+
locks->lock_3();
|
|
523
495
|
update_totsize(new_size);
|
|
524
|
-
locks->unlock_3
|
|
496
|
+
locks->unlock_3();
|
|
525
497
|
it = slots.begin();
|
|
526
498
|
while (it != slots.end() && it->capacity < capacity) {
|
|
527
499
|
it++;
|
|
528
500
|
}
|
|
529
|
-
assert
|
|
501
|
+
assert(it != slots.end());
|
|
530
502
|
}
|
|
531
503
|
|
|
532
504
|
size_t o = it->offset;
|
|
533
505
|
if (it->capacity == capacity) {
|
|
534
|
-
slots.erase
|
|
506
|
+
slots.erase(it);
|
|
535
507
|
} else {
|
|
536
508
|
// take from beginning of slot
|
|
537
509
|
it->capacity -= capacity;
|
|
@@ -541,12 +513,10 @@ size_t OnDiskInvertedLists::allocate_slot (size_t capacity) {
|
|
|
541
513
|
return o;
|
|
542
514
|
}
|
|
543
515
|
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
void OnDiskInvertedLists::free_slot (size_t offset, size_t capacity) {
|
|
547
|
-
|
|
516
|
+
void OnDiskInvertedLists::free_slot(size_t offset, size_t capacity) {
|
|
548
517
|
// should hold lock2
|
|
549
|
-
if (capacity == 0)
|
|
518
|
+
if (capacity == 0)
|
|
519
|
+
return;
|
|
550
520
|
|
|
551
521
|
auto it = slots.begin();
|
|
552
522
|
while (it != slots.end() && it->offset <= offset) {
|
|
@@ -567,15 +537,15 @@ void OnDiskInvertedLists::free_slot (size_t offset, size_t capacity) {
|
|
|
567
537
|
begin_next = it->offset;
|
|
568
538
|
}
|
|
569
539
|
|
|
570
|
-
assert
|
|
571
|
-
assert
|
|
540
|
+
assert(end_prev == inf || offset >= end_prev);
|
|
541
|
+
assert(offset + capacity <= begin_next);
|
|
572
542
|
|
|
573
543
|
if (offset == end_prev) {
|
|
574
544
|
auto prev = it;
|
|
575
545
|
prev--;
|
|
576
546
|
if (offset + capacity == begin_next) {
|
|
577
547
|
prev->capacity += capacity + it->capacity;
|
|
578
|
-
slots.erase
|
|
548
|
+
slots.erase(it);
|
|
579
549
|
} else {
|
|
580
550
|
prev->capacity += capacity;
|
|
581
551
|
}
|
|
@@ -584,36 +554,37 @@ void OnDiskInvertedLists::free_slot (size_t offset, size_t capacity) {
|
|
|
584
554
|
it->offset -= capacity;
|
|
585
555
|
it->capacity += capacity;
|
|
586
556
|
} else {
|
|
587
|
-
slots.insert
|
|
557
|
+
slots.insert(it, Slot(offset, capacity));
|
|
588
558
|
}
|
|
589
559
|
}
|
|
590
560
|
|
|
591
561
|
// TODO shrink global storage if needed
|
|
592
562
|
}
|
|
593
563
|
|
|
594
|
-
|
|
595
564
|
/*****************************************
|
|
596
565
|
* Compact form
|
|
597
566
|
*****************************************/
|
|
598
567
|
|
|
599
|
-
size_t OnDiskInvertedLists::merge_from
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
568
|
+
size_t OnDiskInvertedLists::merge_from(
|
|
569
|
+
const InvertedLists** ils,
|
|
570
|
+
int n_il,
|
|
571
|
+
bool verbose) {
|
|
572
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
573
|
+
totsize == 0, "works only on an empty InvertedLists");
|
|
603
574
|
|
|
604
|
-
std::vector<size_t> sizes
|
|
575
|
+
std::vector<size_t> sizes(nlist);
|
|
605
576
|
for (int i = 0; i < n_il; i++) {
|
|
606
|
-
const InvertedLists
|
|
607
|
-
FAISS_THROW_IF_NOT
|
|
577
|
+
const InvertedLists* il = ils[i];
|
|
578
|
+
FAISS_THROW_IF_NOT(il->nlist == nlist && il->code_size == code_size);
|
|
608
579
|
|
|
609
|
-
for (size_t j = 0; j < nlist; j++)
|
|
610
|
-
sizes
|
|
580
|
+
for (size_t j = 0; j < nlist; j++) {
|
|
581
|
+
sizes[j] += il->list_size(j);
|
|
611
582
|
}
|
|
612
583
|
}
|
|
613
584
|
|
|
614
585
|
size_t cums = 0;
|
|
615
586
|
size_t ntotal = 0;
|
|
616
|
-
for (size_t j = 0; j < nlist; j++)
|
|
587
|
+
for (size_t j = 0; j < nlist; j++) {
|
|
617
588
|
ntotal += sizes[j];
|
|
618
589
|
lists[j].size = 0;
|
|
619
590
|
lists[j].capacity = sizes[j];
|
|
@@ -621,24 +592,26 @@ size_t OnDiskInvertedLists::merge_from (const InvertedLists **ils, int n_il,
|
|
|
621
592
|
cums += lists[j].capacity * (sizeof(idx_t) + code_size);
|
|
622
593
|
}
|
|
623
594
|
|
|
624
|
-
update_totsize
|
|
625
|
-
|
|
595
|
+
update_totsize(cums);
|
|
626
596
|
|
|
627
597
|
size_t nmerged = 0;
|
|
628
598
|
double t0 = getmillisecs(), last_t = t0;
|
|
629
599
|
|
|
630
600
|
#pragma omp parallel for
|
|
631
601
|
for (size_t j = 0; j < nlist; j++) {
|
|
632
|
-
List
|
|
602
|
+
List& l = lists[j];
|
|
633
603
|
for (int i = 0; i < n_il; i++) {
|
|
634
|
-
const InvertedLists
|
|
604
|
+
const InvertedLists* il = ils[i];
|
|
635
605
|
size_t n_entry = il->list_size(j);
|
|
636
606
|
l.size += n_entry;
|
|
637
|
-
update_entries
|
|
638
|
-
|
|
639
|
-
|
|
607
|
+
update_entries(
|
|
608
|
+
j,
|
|
609
|
+
l.size - n_entry,
|
|
610
|
+
n_entry,
|
|
611
|
+
ScopedIds(il, j).get(),
|
|
612
|
+
ScopedCodes(il, j).get());
|
|
640
613
|
}
|
|
641
|
-
assert
|
|
614
|
+
assert(l.size == l.capacity);
|
|
642
615
|
if (verbose) {
|
|
643
616
|
#pragma omp critical
|
|
644
617
|
{
|
|
@@ -646,72 +619,64 @@ size_t OnDiskInvertedLists::merge_from (const InvertedLists **ils, int n_il,
|
|
|
646
619
|
double t1 = getmillisecs();
|
|
647
620
|
if (t1 - last_t > 500) {
|
|
648
621
|
printf("merged %zd lists in %.3f s\r",
|
|
649
|
-
nmerged,
|
|
622
|
+
nmerged,
|
|
623
|
+
(t1 - t0) / 1000.0);
|
|
650
624
|
fflush(stdout);
|
|
651
625
|
last_t = t1;
|
|
652
626
|
}
|
|
653
627
|
}
|
|
654
628
|
}
|
|
655
629
|
}
|
|
656
|
-
if(verbose) {
|
|
630
|
+
if (verbose) {
|
|
657
631
|
printf("\n");
|
|
658
632
|
}
|
|
659
633
|
|
|
660
634
|
return ntotal;
|
|
661
635
|
}
|
|
662
636
|
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
{
|
|
666
|
-
return merge_from
|
|
637
|
+
size_t OnDiskInvertedLists::merge_from_1(
|
|
638
|
+
const InvertedLists* ils,
|
|
639
|
+
bool verbose) {
|
|
640
|
+
return merge_from(&ils, 1, verbose);
|
|
667
641
|
}
|
|
668
642
|
|
|
669
|
-
|
|
670
|
-
void OnDiskInvertedLists::crop_invlists(size_t l0, size_t l1)
|
|
671
|
-
{
|
|
643
|
+
void OnDiskInvertedLists::crop_invlists(size_t l0, size_t l1) {
|
|
672
644
|
FAISS_THROW_IF_NOT(0 <= l0 && l0 <= l1 && l1 <= nlist);
|
|
673
645
|
|
|
674
|
-
std::vector<List> new_lists
|
|
675
|
-
memcpy
|
|
646
|
+
std::vector<List> new_lists(l1 - l0);
|
|
647
|
+
memcpy(new_lists.data(), &lists[l0], (l1 - l0) * sizeof(List));
|
|
676
648
|
|
|
677
649
|
lists.swap(new_lists);
|
|
678
650
|
|
|
679
651
|
nlist = l1 - l0;
|
|
680
652
|
}
|
|
681
653
|
|
|
682
|
-
|
|
683
|
-
void OnDiskInvertedLists::set_all_lists_sizes(const size_t *sizes)
|
|
684
|
-
{
|
|
654
|
+
void OnDiskInvertedLists::set_all_lists_sizes(const size_t* sizes) {
|
|
685
655
|
size_t ofs = 0;
|
|
686
656
|
for (size_t i = 0; i < nlist; i++) {
|
|
687
657
|
lists[i].offset = ofs;
|
|
688
658
|
lists[i].capacity = lists[i].size = sizes[i];
|
|
689
659
|
ofs += sizes[i] * (sizeof(idx_t) + code_size);
|
|
690
660
|
}
|
|
691
|
-
|
|
692
661
|
}
|
|
693
662
|
|
|
694
663
|
/*******************************************************
|
|
695
664
|
* I/O support via callbacks
|
|
696
665
|
*******************************************************/
|
|
697
666
|
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
OnDiskInvertedListsIOHook::
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
WRITE1 (h);
|
|
710
|
-
WRITE1 (ils->nlist);
|
|
711
|
-
WRITE1 (ils->code_size);
|
|
712
|
-
const OnDiskInvertedLists *od = dynamic_cast<const OnDiskInvertedLists*> (ils);
|
|
667
|
+
OnDiskInvertedListsIOHook::OnDiskInvertedListsIOHook()
|
|
668
|
+
: InvertedListsIOHook("ilod", typeid(OnDiskInvertedLists).name()) {}
|
|
669
|
+
|
|
670
|
+
void OnDiskInvertedListsIOHook::write(const InvertedLists* ils, IOWriter* f)
|
|
671
|
+
const {
|
|
672
|
+
uint32_t h = fourcc("ilod");
|
|
673
|
+
WRITE1(h);
|
|
674
|
+
WRITE1(ils->nlist);
|
|
675
|
+
WRITE1(ils->code_size);
|
|
676
|
+
const OnDiskInvertedLists* od =
|
|
677
|
+
dynamic_cast<const OnDiskInvertedLists*>(ils);
|
|
713
678
|
// this is a POD object
|
|
714
|
-
WRITEVECTOR
|
|
679
|
+
WRITEVECTOR(od->lists);
|
|
715
680
|
|
|
716
681
|
{
|
|
717
682
|
std::vector<OnDiskInvertedLists::Slot> v(
|
|
@@ -725,14 +690,14 @@ void OnDiskInvertedListsIOHook::write(const InvertedLists *ils, IOWriter *f) con
|
|
|
725
690
|
WRITE1(od->totsize);
|
|
726
691
|
}
|
|
727
692
|
|
|
728
|
-
InvertedLists
|
|
729
|
-
{
|
|
730
|
-
OnDiskInvertedLists
|
|
693
|
+
InvertedLists* OnDiskInvertedListsIOHook::read(IOReader* f, int io_flags)
|
|
694
|
+
const {
|
|
695
|
+
OnDiskInvertedLists* od = new OnDiskInvertedLists();
|
|
731
696
|
od->read_only = io_flags & IO_FLAG_READ_ONLY;
|
|
732
|
-
READ1
|
|
733
|
-
READ1
|
|
697
|
+
READ1(od->nlist);
|
|
698
|
+
READ1(od->code_size);
|
|
734
699
|
// this is a POD object
|
|
735
|
-
READVECTOR
|
|
700
|
+
READVECTOR(od->lists);
|
|
736
701
|
{
|
|
737
702
|
std::vector<OnDiskInvertedLists::Slot> v;
|
|
738
703
|
READVECTOR(v);
|
|
@@ -744,9 +709,10 @@ InvertedLists * OnDiskInvertedListsIOHook::read(IOReader *f, int io_flags) const
|
|
|
744
709
|
od->filename.assign(x.begin(), x.end());
|
|
745
710
|
|
|
746
711
|
if (io_flags & IO_FLAG_ONDISK_SAME_DIR) {
|
|
747
|
-
FileIOReader
|
|
748
|
-
FAISS_THROW_IF_NOT_MSG
|
|
749
|
-
reader,
|
|
712
|
+
FileIOReader* reader = dynamic_cast<FileIOReader*>(f);
|
|
713
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
714
|
+
reader,
|
|
715
|
+
"IO_FLAG_ONDISK_SAME_DIR only supported "
|
|
750
716
|
"when reading from file");
|
|
751
717
|
std::string indexname = reader->name;
|
|
752
718
|
std::string dirname = "./";
|
|
@@ -762,10 +728,10 @@ InvertedLists * OnDiskInvertedListsIOHook::read(IOReader *f, int io_flags) const
|
|
|
762
728
|
filename = dirname + filename;
|
|
763
729
|
printf("IO_FLAG_ONDISK_SAME_DIR: "
|
|
764
730
|
"updating ondisk filename from %s to %s\n",
|
|
765
|
-
od->filename.c_str(),
|
|
731
|
+
od->filename.c_str(),
|
|
732
|
+
filename.c_str());
|
|
766
733
|
od->filename = filename;
|
|
767
734
|
}
|
|
768
|
-
|
|
769
735
|
}
|
|
770
736
|
READ1(od->totsize);
|
|
771
737
|
if (!(io_flags & IO_FLAG_SKIP_IVF_DATA)) {
|
|
@@ -775,53 +741,51 @@ InvertedLists * OnDiskInvertedListsIOHook::read(IOReader *f, int io_flags) const
|
|
|
775
741
|
}
|
|
776
742
|
|
|
777
743
|
/** read from a ArrayInvertedLists into this invertedlist type */
|
|
778
|
-
InvertedLists
|
|
779
|
-
IOReader
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
744
|
+
InvertedLists* OnDiskInvertedListsIOHook::read_ArrayInvertedLists(
|
|
745
|
+
IOReader* f,
|
|
746
|
+
int /* io_flags */,
|
|
747
|
+
size_t nlist,
|
|
748
|
+
size_t code_size,
|
|
749
|
+
const std::vector<size_t>& sizes) const {
|
|
750
|
+
auto ails = new OnDiskInvertedLists();
|
|
784
751
|
ails->nlist = nlist;
|
|
785
752
|
ails->code_size = code_size;
|
|
786
753
|
ails->read_only = true;
|
|
787
|
-
ails->lists.resize
|
|
754
|
+
ails->lists.resize(nlist);
|
|
788
755
|
|
|
789
|
-
FileIOReader
|
|
756
|
+
FileIOReader* reader = dynamic_cast<FileIOReader*>(f);
|
|
790
757
|
FAISS_THROW_IF_NOT_MSG(reader, "mmap only supported for File objects");
|
|
791
|
-
FILE
|
|
758
|
+
FILE* fdesc = reader->f;
|
|
792
759
|
size_t o0 = ftell(fdesc);
|
|
793
760
|
size_t o = o0;
|
|
794
761
|
{ // do the mmap
|
|
795
762
|
struct stat buf;
|
|
796
|
-
int ret = fstat
|
|
797
|
-
FAISS_THROW_IF_NOT_FMT
|
|
798
|
-
"fstat failed: %s", strerror(errno));
|
|
763
|
+
int ret = fstat(fileno(fdesc), &buf);
|
|
764
|
+
FAISS_THROW_IF_NOT_FMT(ret == 0, "fstat failed: %s", strerror(errno));
|
|
799
765
|
ails->totsize = buf.st_size;
|
|
800
|
-
ails->ptr = (uint8_t*)mmap
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
766
|
+
ails->ptr = (uint8_t*)mmap(
|
|
767
|
+
nullptr,
|
|
768
|
+
ails->totsize,
|
|
769
|
+
PROT_READ,
|
|
770
|
+
MAP_SHARED,
|
|
771
|
+
fileno(fdesc),
|
|
772
|
+
0);
|
|
773
|
+
FAISS_THROW_IF_NOT_FMT(
|
|
774
|
+
ails->ptr != MAP_FAILED, "could not mmap: %s", strerror(errno));
|
|
806
775
|
}
|
|
807
776
|
|
|
808
777
|
FAISS_THROW_IF_NOT(o <= ails->totsize);
|
|
809
778
|
|
|
810
779
|
for (size_t i = 0; i < ails->nlist; i++) {
|
|
811
|
-
OnDiskInvertedLists::List
|
|
780
|
+
OnDiskInvertedLists::List& l = ails->lists[i];
|
|
812
781
|
l.size = l.capacity = sizes[i];
|
|
813
782
|
l.offset = o;
|
|
814
|
-
o += l.size * (sizeof(OnDiskInvertedLists::idx_t) +
|
|
815
|
-
ails->code_size);
|
|
783
|
+
o += l.size * (sizeof(OnDiskInvertedLists::idx_t) + ails->code_size);
|
|
816
784
|
}
|
|
817
785
|
// resume normal reading of file
|
|
818
|
-
fseek
|
|
786
|
+
fseek(fdesc, o, SEEK_SET);
|
|
819
787
|
|
|
820
788
|
return ails;
|
|
821
789
|
}
|
|
822
790
|
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
791
|
} // namespace faiss
|