faiss 0.4.3 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/README.md +2 -0
- data/ext/faiss/index.cpp +33 -6
- data/ext/faiss/index_binary.cpp +17 -4
- data/ext/faiss/kmeans.cpp +6 -6
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +2 -3
- data/vendor/faiss/faiss/AutoTune.h +1 -1
- data/vendor/faiss/faiss/Clustering.cpp +2 -2
- data/vendor/faiss/faiss/Clustering.h +2 -2
- data/vendor/faiss/faiss/IVFlib.cpp +26 -51
- data/vendor/faiss/faiss/IVFlib.h +1 -1
- data/vendor/faiss/faiss/Index.cpp +11 -0
- data/vendor/faiss/faiss/Index.h +34 -11
- data/vendor/faiss/faiss/Index2Layer.cpp +1 -1
- data/vendor/faiss/faiss/Index2Layer.h +2 -2
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +1 -0
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +9 -4
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +5 -1
- data/vendor/faiss/faiss/IndexBinary.h +7 -7
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +8 -2
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +3 -3
- data/vendor/faiss/faiss/IndexBinaryHash.h +5 -5
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +7 -6
- data/vendor/faiss/faiss/IndexFastScan.cpp +125 -49
- data/vendor/faiss/faiss/IndexFastScan.h +102 -7
- data/vendor/faiss/faiss/IndexFlat.cpp +374 -4
- data/vendor/faiss/faiss/IndexFlat.h +81 -1
- data/vendor/faiss/faiss/IndexHNSW.cpp +93 -2
- data/vendor/faiss/faiss/IndexHNSW.h +58 -2
- data/vendor/faiss/faiss/IndexIDMap.cpp +14 -13
- data/vendor/faiss/faiss/IndexIDMap.h +6 -6
- data/vendor/faiss/faiss/IndexIVF.cpp +1 -1
- data/vendor/faiss/faiss/IndexIVF.h +5 -5
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +1 -1
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +9 -3
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +3 -1
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +176 -90
- data/vendor/faiss/faiss/IndexIVFFastScan.h +173 -18
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +1 -0
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +251 -0
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +64 -0
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +3 -1
- data/vendor/faiss/faiss/IndexIVFPQ.h +1 -1
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +134 -2
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +7 -1
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +99 -8
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +4 -1
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +828 -0
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +252 -0
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
- data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -1
- data/vendor/faiss/faiss/IndexNSG.cpp +1 -1
- data/vendor/faiss/faiss/IndexNeuralNetCodec.h +1 -1
- data/vendor/faiss/faiss/IndexPQ.cpp +4 -1
- data/vendor/faiss/faiss/IndexPQ.h +1 -1
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +6 -2
- data/vendor/faiss/faiss/IndexPQFastScan.h +5 -1
- data/vendor/faiss/faiss/IndexPreTransform.cpp +14 -0
- data/vendor/faiss/faiss/IndexPreTransform.h +9 -0
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +96 -13
- data/vendor/faiss/faiss/IndexRaBitQ.h +11 -2
- data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +731 -0
- data/vendor/faiss/faiss/IndexRaBitQFastScan.h +175 -0
- data/vendor/faiss/faiss/IndexRefine.cpp +49 -0
- data/vendor/faiss/faiss/IndexRefine.h +17 -0
- data/vendor/faiss/faiss/IndexShards.cpp +1 -1
- data/vendor/faiss/faiss/MatrixStats.cpp +3 -3
- data/vendor/faiss/faiss/MetricType.h +1 -1
- data/vendor/faiss/faiss/VectorTransform.h +2 -2
- data/vendor/faiss/faiss/clone_index.cpp +5 -1
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +1 -1
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +3 -1
- data/vendor/faiss/faiss/gpu/GpuIndex.h +11 -11
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryCagra.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +11 -7
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +1 -1
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIcmEncoder.cpp +7 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +1 -1
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +1 -1
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +2 -2
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +1 -1
- data/vendor/faiss/faiss/impl/CodePacker.h +2 -2
- data/vendor/faiss/faiss/impl/DistanceComputer.h +77 -6
- data/vendor/faiss/faiss/impl/FastScanDistancePostProcessing.h +53 -0
- data/vendor/faiss/faiss/impl/HNSW.cpp +295 -16
- data/vendor/faiss/faiss/impl/HNSW.h +35 -6
- data/vendor/faiss/faiss/impl/IDSelector.cpp +2 -2
- data/vendor/faiss/faiss/impl/IDSelector.h +4 -4
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +4 -4
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/LookupTableScaler.h +1 -1
- data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -1
- data/vendor/faiss/faiss/impl/NNDescent.h +2 -2
- data/vendor/faiss/faiss/impl/NSG.cpp +1 -1
- data/vendor/faiss/faiss/impl/Panorama.cpp +193 -0
- data/vendor/faiss/faiss/impl/Panorama.h +204 -0
- data/vendor/faiss/faiss/impl/PanoramaStats.cpp +33 -0
- data/vendor/faiss/faiss/impl/PanoramaStats.h +38 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +5 -5
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +1 -1
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +2 -0
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/RaBitQStats.cpp +29 -0
- data/vendor/faiss/faiss/impl/RaBitQStats.h +56 -0
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +294 -0
- data/vendor/faiss/faiss/impl/RaBitQUtils.h +330 -0
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +304 -223
- data/vendor/faiss/faiss/impl/RaBitQuantizer.h +72 -4
- data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.cpp +362 -0
- data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.h +112 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResultHandler.h +4 -4
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +7 -10
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +2 -4
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +7 -4
- data/vendor/faiss/faiss/impl/index_read.cpp +238 -10
- data/vendor/faiss/faiss/impl/index_write.cpp +212 -19
- data/vendor/faiss/faiss/impl/io.cpp +2 -2
- data/vendor/faiss/faiss/impl/io.h +4 -4
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +1 -1
- data/vendor/faiss/faiss/impl/kmeans1d.h +1 -1
- data/vendor/faiss/faiss/impl/lattice_Zn.h +2 -2
- data/vendor/faiss/faiss/impl/mapped_io.cpp +2 -2
- data/vendor/faiss/faiss/impl/mapped_io.h +4 -3
- data/vendor/faiss/faiss/impl/maybe_owned_vector.h +8 -1
- data/vendor/faiss/faiss/impl/platform_macros.h +12 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +30 -4
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +14 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +5 -6
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +55 -11
- data/vendor/faiss/faiss/impl/svs_io.cpp +86 -0
- data/vendor/faiss/faiss/impl/svs_io.h +67 -0
- data/vendor/faiss/faiss/impl/zerocopy_io.h +1 -1
- data/vendor/faiss/faiss/index_factory.cpp +217 -8
- data/vendor/faiss/faiss/index_factory.h +1 -1
- data/vendor/faiss/faiss/index_io.h +1 -1
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +1 -1
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +115 -1
- data/vendor/faiss/faiss/invlists/InvertedLists.h +46 -0
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +1 -1
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +261 -0
- data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +117 -0
- data/vendor/faiss/faiss/svs/IndexSVSFlat.h +66 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +245 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamana.h +137 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.cpp +39 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +42 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +149 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +58 -0
- data/vendor/faiss/faiss/utils/AlignedTable.h +1 -1
- data/vendor/faiss/faiss/utils/Heap.cpp +2 -2
- data/vendor/faiss/faiss/utils/Heap.h +3 -3
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +1 -1
- data/vendor/faiss/faiss/utils/NeuralNet.h +3 -3
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +2 -2
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +2 -2
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +1 -1
- data/vendor/faiss/faiss/utils/distances.cpp +0 -3
- data/vendor/faiss/faiss/utils/distances.h +2 -2
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +3 -1
- data/vendor/faiss/faiss/utils/hamming-inl.h +2 -0
- data/vendor/faiss/faiss/utils/hamming.cpp +7 -6
- data/vendor/faiss/faiss/utils/hamming.h +1 -1
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -2
- data/vendor/faiss/faiss/utils/partitioning.cpp +5 -5
- data/vendor/faiss/faiss/utils/partitioning.h +2 -2
- data/vendor/faiss/faiss/utils/rabitq_simd.h +222 -336
- data/vendor/faiss/faiss/utils/random.cpp +1 -1
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +1 -1
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +1 -1
- data/vendor/faiss/faiss/utils/simdlib_neon.h +2 -2
- data/vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h +1 -1
- data/vendor/faiss/faiss/utils/utils.cpp +9 -2
- data/vendor/faiss/faiss/utils/utils.h +2 -2
- metadata +29 -1
|
@@ -150,7 +150,7 @@ inline void heap_replace_top(
|
|
|
150
150
|
bh_ids[i] = id;
|
|
151
151
|
}
|
|
152
152
|
|
|
153
|
-
/* Partial
|
|
153
|
+
/* Partial instantiation for heaps with TI = int64_t */
|
|
154
154
|
|
|
155
155
|
template <typename T>
|
|
156
156
|
inline void minheap_pop(size_t k, T* bh_val, int64_t* bh_ids) {
|
|
@@ -393,7 +393,7 @@ inline void heap_addn(
|
|
|
393
393
|
}
|
|
394
394
|
}
|
|
395
395
|
|
|
396
|
-
/* Partial
|
|
396
|
+
/* Partial instantiation for heaps with TI = int64_t */
|
|
397
397
|
|
|
398
398
|
template <typename T>
|
|
399
399
|
inline void minheap_addn(
|
|
@@ -489,7 +489,7 @@ struct HeapArray {
|
|
|
489
489
|
return val + key * k;
|
|
490
490
|
}
|
|
491
491
|
|
|
492
|
-
///
|
|
492
|
+
/// Corresponding identifiers
|
|
493
493
|
TI* get_ids(size_t key) {
|
|
494
494
|
return ids + key * k;
|
|
495
495
|
}
|
|
@@ -75,7 +75,7 @@ struct Embedding {
|
|
|
75
75
|
};
|
|
76
76
|
|
|
77
77
|
/// Feed forward layer that expands to a hidden dimension, applies a ReLU non
|
|
78
|
-
/// linearity and maps back to the
|
|
78
|
+
/// linearity and maps back to the original dimension
|
|
79
79
|
struct FFN {
|
|
80
80
|
Linear linear1, linear2;
|
|
81
81
|
|
|
@@ -103,7 +103,7 @@ struct QINCoStep {
|
|
|
103
103
|
return residual_blocks[i];
|
|
104
104
|
}
|
|
105
105
|
|
|
106
|
-
/** encode a set of vectors x with
|
|
106
|
+
/** encode a set of vectors x with initial estimate xhat. Optionally return
|
|
107
107
|
* the delta to be added to xhat to form the new xhat */
|
|
108
108
|
nn::Int32Tensor2D encode(
|
|
109
109
|
const nn::Tensor2D& xhat,
|
|
@@ -141,7 +141,7 @@ struct QINCo : NeuralNetCodec {
|
|
|
141
141
|
|
|
142
142
|
nn::Int32Tensor2D encode(const nn::Tensor2D& x) const override;
|
|
143
143
|
|
|
144
|
-
virtual ~QINCo() {}
|
|
144
|
+
virtual ~QINCo() override {}
|
|
145
145
|
};
|
|
146
146
|
|
|
147
147
|
} // namespace faiss
|
|
@@ -50,8 +50,8 @@
|
|
|
50
50
|
// for j in range(0, NBUCKETS):
|
|
51
51
|
// idx = beam * n + i * NBUCKETS + j
|
|
52
52
|
// if distances[idx] < local_min_distances[j]:
|
|
53
|
-
// local_min_distances[
|
|
54
|
-
// local_min_indices[
|
|
53
|
+
// local_min_distances[j] = distances[idx]
|
|
54
|
+
// local_min_indices[j] = indices[idx]
|
|
55
55
|
//
|
|
56
56
|
// for j in range(0, NBUCKETS):
|
|
57
57
|
// heap.push(local_min_distances[j], local_min_indices[j])
|
|
@@ -106,7 +106,7 @@ struct HeapWithBuckets<CMax<float, int>, NBUCKETS, N> {
|
|
|
106
106
|
distance_candidate,
|
|
107
107
|
_CMP_LE_OS);
|
|
108
108
|
|
|
109
|
-
// // blend seems to be slower
|
|
109
|
+
// // blend seems to be slower than min
|
|
110
110
|
// const __m256 min_distances_new = _mm256_blendv_ps(
|
|
111
111
|
// distance_candidate,
|
|
112
112
|
// min_distances_i[j][p],
|
|
@@ -120,7 +120,7 @@ struct HeapWithBuckets<CMax<float, int>, NBUCKETS, N> {
|
|
|
120
120
|
min_indices_i[j][p]),
|
|
121
121
|
comparison));
|
|
122
122
|
|
|
123
|
-
// // blend seems to be slower
|
|
123
|
+
// // blend seems to be slower than min
|
|
124
124
|
// const __m256 max_distances_new = _mm256_blendv_ps(
|
|
125
125
|
// min_distances_i[j][p],
|
|
126
126
|
// distance_candidate,
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
/// It seems that only the limited number of combinations are
|
|
22
22
|
/// meaningful, because of the limited supply of SIMD registers.
|
|
23
23
|
/// Also, certain combinations, such as B32_D1 and B16_D1, were concluded
|
|
24
|
-
/// to be not very precise in benchmarks, so
|
|
24
|
+
/// to be not very precise in benchmarks, so they were not introduced.
|
|
25
25
|
///
|
|
26
26
|
/// TODO: Consider d-ary SIMD heap.
|
|
27
27
|
|
|
@@ -321,7 +321,6 @@ void exhaustive_L2sqr_blas_default_impl(
|
|
|
321
321
|
ip_block.get(),
|
|
322
322
|
&nyi);
|
|
323
323
|
}
|
|
324
|
-
#pragma omp parallel for
|
|
325
324
|
for (int64_t i = i0; i < i1; i++) {
|
|
326
325
|
float* ip_line = ip_block.get() + (i - i0) * (j1 - j0);
|
|
327
326
|
|
|
@@ -423,7 +422,6 @@ void exhaustive_L2sqr_blas_cmax_avx2(
|
|
|
423
422
|
ip_block.get(),
|
|
424
423
|
&nyi);
|
|
425
424
|
}
|
|
426
|
-
#pragma omp parallel for
|
|
427
425
|
for (int64_t i = i0; i < i1; i++) {
|
|
428
426
|
float* ip_line = ip_block.get() + (i - i0) * (j1 - j0);
|
|
429
427
|
|
|
@@ -633,7 +631,6 @@ void exhaustive_L2sqr_blas_cmax_sve(
|
|
|
633
631
|
ip_block.get(),
|
|
634
632
|
&nyi);
|
|
635
633
|
}
|
|
636
|
-
#pragma omp parallel for
|
|
637
634
|
for (int64_t i = i0; i < i1; i++) {
|
|
638
635
|
const size_t count = j1 - j0;
|
|
639
636
|
float* ip_line = ip_block.get() + (i - i0) * count;
|
|
@@ -324,7 +324,7 @@ void knn_inner_product(
|
|
|
324
324
|
* vector y, for the L2 distance
|
|
325
325
|
* @param x query vectors, size nx * d
|
|
326
326
|
* @param y database vectors, size ny * d
|
|
327
|
-
* @param res result heap
|
|
327
|
+
* @param res result heap structure, which also provides k. Sorted on output
|
|
328
328
|
* @param y_norm2 (optional) norms for the y vectors (nullptr or size ny)
|
|
329
329
|
* @param sel search in this subset of vectors
|
|
330
330
|
*/
|
|
@@ -389,7 +389,7 @@ void knn_inner_products_by_idx(
|
|
|
389
389
|
* @param x query vectors, size nx * d
|
|
390
390
|
* @param y database vectors, size (max(ids) + 1) * d
|
|
391
391
|
* @param subset subset of database vectors to consider, size (nx, nsubset)
|
|
392
|
-
* @param res
|
|
392
|
+
* @param res result structure
|
|
393
393
|
* @param ld_subset stride for the subset array. -1: use nsubset, 0: all queries
|
|
394
394
|
* process the same subset
|
|
395
395
|
*/
|
|
@@ -5,6 +5,8 @@
|
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
+
#pragma once
|
|
9
|
+
|
|
8
10
|
/** In this file are the implementations of extra metrics beyond L2
|
|
9
11
|
* and inner product */
|
|
10
12
|
|
|
@@ -188,7 +190,7 @@ inline float VectorDistance<METRIC_GOWER>::operator()(
|
|
|
188
190
|
|
|
189
191
|
/***************************************************************************
|
|
190
192
|
* Dispatching function that takes a metric type and a consumer object
|
|
191
|
-
* the consumer object should contain a
|
|
193
|
+
* the consumer object should contain a return type T and a operation template
|
|
192
194
|
* function f() that is called to perform the operation. The first argument
|
|
193
195
|
* of the function is the VectorDistance object. The rest are passed in as is.
|
|
194
196
|
**************************************************************************/
|
|
@@ -257,12 +257,13 @@ void hammings_knn_mc(
|
|
|
257
257
|
|
|
258
258
|
std::vector<HCounterState<HammingComputer>> cs;
|
|
259
259
|
for (size_t i = 0; i < na; ++i) {
|
|
260
|
-
cs.push_back(
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
260
|
+
cs.push_back(
|
|
261
|
+
HCounterState<HammingComputer>(
|
|
262
|
+
all_counters.data() + i * nBuckets,
|
|
263
|
+
all_ids_per_dis.get() + i * nBuckets * k,
|
|
264
|
+
a + i * bytes_per_code,
|
|
265
|
+
8 * bytes_per_code,
|
|
266
|
+
k));
|
|
266
267
|
}
|
|
267
268
|
|
|
268
269
|
const size_t block_size = hamming_batch_size;
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
* fvecs2bitvecs).
|
|
15
15
|
*
|
|
16
16
|
* User-defined type hamdis_t is used for distances because at this time
|
|
17
|
-
* it is still
|
|
17
|
+
* it is still unclear clear how we will need to balance
|
|
18
18
|
* - flexibility in vector size (may need 16- or even 8-bit vectors)
|
|
19
19
|
* - memory usage
|
|
20
20
|
* - cache-misses when dealing with large volumes of data (fewer bits is better)
|
|
@@ -30,8 +30,7 @@ inline int popcount64(uint64_t x) {
|
|
|
30
30
|
// This table was moved from .cpp to .h file, because
|
|
31
31
|
// otherwise it was causing compilation errors while trying to
|
|
32
32
|
// compile swig modules on Windows.
|
|
33
|
-
|
|
34
|
-
static constexpr uint8_t hamdis_tab_ham_bytes[256] = {
|
|
33
|
+
inline constexpr uint8_t hamdis_tab_ham_bytes[256] = {
|
|
35
34
|
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4,
|
|
36
35
|
2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
|
37
36
|
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4,
|
|
@@ -140,7 +140,7 @@ typename C::T partition_fuzzy_median3(
|
|
|
140
140
|
using T = typename C::T;
|
|
141
141
|
|
|
142
142
|
// here we use bissection with a median of 3 to find the threshold and
|
|
143
|
-
// compress the arrays afterwards. So it's a n*log(n)
|
|
143
|
+
// compress the arrays afterwards. So it's a n*log(n) algorithm rather than
|
|
144
144
|
// qselect's O(n) but it avoids shuffling around the array.
|
|
145
145
|
|
|
146
146
|
FAISS_THROW_IF_NOT(n >= 3);
|
|
@@ -350,7 +350,7 @@ int simd_compress_array(
|
|
|
350
350
|
}
|
|
351
351
|
}
|
|
352
352
|
|
|
353
|
-
// handle remaining, only
|
|
353
|
+
// handle remaining, only strictly lt ones.
|
|
354
354
|
for (; i0 + 15 < n; i0 += 16) {
|
|
355
355
|
simd16uint16 v(vals + i0);
|
|
356
356
|
simd16uint16 max2 = max_func<C>(v, thr16);
|
|
@@ -506,7 +506,7 @@ uint16_t simd_partition_fuzzy_with_bounds(
|
|
|
506
506
|
|
|
507
507
|
uint64_t t2 = get_cy();
|
|
508
508
|
|
|
509
|
-
partition_stats.
|
|
509
|
+
partition_stats.bisect_cycles += t1 - t0;
|
|
510
510
|
partition_stats.compress_cycles += t2 - t1;
|
|
511
511
|
|
|
512
512
|
return thresh;
|
|
@@ -662,7 +662,7 @@ uint16_t simd_partition_fuzzy_with_bounds_histogram(
|
|
|
662
662
|
}
|
|
663
663
|
}
|
|
664
664
|
|
|
665
|
-
IFV printf("end
|
|
665
|
+
IFV printf("end bisection: thresh=%d q=%ld n_eq=%ld\n", thresh, q, n_eq);
|
|
666
666
|
|
|
667
667
|
if (!C::is_max) {
|
|
668
668
|
if (n_eq == 0) {
|
|
@@ -762,7 +762,7 @@ typename C::T partition_fuzzy(
|
|
|
762
762
|
vals, ids, n, q_min, q_max, q_out);
|
|
763
763
|
}
|
|
764
764
|
|
|
765
|
-
// explicit template
|
|
765
|
+
// explicit template instantiations
|
|
766
766
|
|
|
767
767
|
template float partition_fuzzy<CMin<float, int64_t>>(
|
|
768
768
|
float* vals,
|
|
@@ -28,7 +28,7 @@ typename C::T partition_fuzzy(
|
|
|
28
28
|
size_t q_max,
|
|
29
29
|
size_t* q_out);
|
|
30
30
|
|
|
31
|
-
/** simplified interface for when the
|
|
31
|
+
/** simplified interface for when the partition is not fuzzy */
|
|
32
32
|
template <class C>
|
|
33
33
|
inline typename C::T partition(
|
|
34
34
|
typename C::T* vals,
|
|
@@ -59,7 +59,7 @@ void simd_histogram_16(
|
|
|
59
59
|
int* hist);
|
|
60
60
|
|
|
61
61
|
struct PartitionStats {
|
|
62
|
-
uint64_t
|
|
62
|
+
uint64_t bisect_cycles;
|
|
63
63
|
uint64_t compress_cycles;
|
|
64
64
|
|
|
65
65
|
PartitionStats() {
|