faiss 0.2.0 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/LICENSE.txt +1 -1
- data/README.md +7 -7
- data/ext/faiss/extconf.rb +6 -3
- data/ext/faiss/numo.hpp +4 -4
- data/ext/faiss/utils.cpp +1 -1
- data/ext/faiss/utils.h +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +292 -291
- data/vendor/faiss/faiss/AutoTune.h +55 -56
- data/vendor/faiss/faiss/Clustering.cpp +365 -194
- data/vendor/faiss/faiss/Clustering.h +102 -35
- data/vendor/faiss/faiss/IVFlib.cpp +171 -195
- data/vendor/faiss/faiss/IVFlib.h +48 -51
- data/vendor/faiss/faiss/Index.cpp +85 -103
- data/vendor/faiss/faiss/Index.h +54 -48
- data/vendor/faiss/faiss/Index2Layer.cpp +126 -224
- data/vendor/faiss/faiss/Index2Layer.h +22 -36
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +195 -0
- data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
- data/vendor/faiss/faiss/IndexBinary.h +140 -132
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
- data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
- data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
- data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
- data/vendor/faiss/faiss/IndexFlat.cpp +115 -176
- data/vendor/faiss/faiss/IndexFlat.h +42 -59
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
- data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
- data/vendor/faiss/faiss/IndexHNSW.h +57 -41
- data/vendor/faiss/faiss/IndexIVF.cpp +545 -453
- data/vendor/faiss/faiss/IndexIVF.h +169 -118
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +247 -252
- data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +459 -517
- data/vendor/faiss/faiss/IndexIVFPQ.h +75 -67
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
- data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +163 -150
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +38 -25
- data/vendor/faiss/faiss/IndexLSH.cpp +66 -113
- data/vendor/faiss/faiss/IndexLSH.h +20 -38
- data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
- data/vendor/faiss/faiss/IndexLattice.h +11 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +229 -0
- data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
- data/vendor/faiss/faiss/IndexNSG.cpp +301 -0
- data/vendor/faiss/faiss/IndexNSG.h +85 -0
- data/vendor/faiss/faiss/IndexPQ.cpp +387 -495
- data/vendor/faiss/faiss/IndexPQ.h +64 -82
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
- data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
- data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
- data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
- data/vendor/faiss/faiss/IndexRefine.cpp +139 -127
- data/vendor/faiss/faiss/IndexRefine.h +32 -23
- data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
- data/vendor/faiss/faiss/IndexReplicas.h +62 -56
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +111 -172
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -59
- data/vendor/faiss/faiss/IndexShards.cpp +256 -240
- data/vendor/faiss/faiss/IndexShards.h +85 -73
- data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
- data/vendor/faiss/faiss/MatrixStats.h +7 -10
- data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
- data/vendor/faiss/faiss/MetaIndexes.h +40 -34
- data/vendor/faiss/faiss/MetricType.h +7 -7
- data/vendor/faiss/faiss/VectorTransform.cpp +654 -475
- data/vendor/faiss/faiss/VectorTransform.h +64 -89
- data/vendor/faiss/faiss/clone_index.cpp +78 -73
- data/vendor/faiss/faiss/clone_index.h +4 -9
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
- data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +198 -171
- data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
- data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
- data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
- data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
- data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
- data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
- data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
- data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
- data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +503 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +175 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
- data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
- data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
- data/vendor/faiss/faiss/impl/FaissException.h +41 -29
- data/vendor/faiss/faiss/impl/HNSW.cpp +606 -617
- data/vendor/faiss/faiss/impl/HNSW.h +179 -200
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +855 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +244 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
- data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +679 -0
- data/vendor/faiss/faiss/impl/NSG.h +199 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +758 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +188 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +647 -707
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
- data/vendor/faiss/faiss/impl/index_read.cpp +631 -480
- data/vendor/faiss/faiss/impl/index_write.cpp +547 -407
- data/vendor/faiss/faiss/impl/io.cpp +76 -95
- data/vendor/faiss/faiss/impl/io.h +31 -41
- data/vendor/faiss/faiss/impl/io_macros.h +60 -29
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
- data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
- data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
- data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
- data/vendor/faiss/faiss/index_factory.cpp +619 -397
- data/vendor/faiss/faiss/index_factory.h +8 -6
- data/vendor/faiss/faiss/index_io.h +23 -26
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
- data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
- data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
- data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
- data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
- data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
- data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
- data/vendor/faiss/faiss/utils/Heap.h +186 -209
- data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
- data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
- data/vendor/faiss/faiss/utils/distances.cpp +305 -312
- data/vendor/faiss/faiss/utils/distances.h +170 -122
- data/vendor/faiss/faiss/utils/distances_simd.cpp +498 -508
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
- data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
- data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
- data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
- data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
- data/vendor/faiss/faiss/utils/hamming.h +62 -85
- data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
- data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
- data/vendor/faiss/faiss/utils/partitioning.h +26 -21
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
- data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
- data/vendor/faiss/faiss/utils/random.cpp +39 -63
- data/vendor/faiss/faiss/utils/random.h +13 -16
- data/vendor/faiss/faiss/utils/simdlib.h +4 -2
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
- data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
- data/vendor/faiss/faiss/utils/utils.cpp +304 -287
- data/vendor/faiss/faiss/utils/utils.h +54 -49
- metadata +29 -4
|
@@ -7,7 +7,6 @@
|
|
|
7
7
|
|
|
8
8
|
#pragma once
|
|
9
9
|
|
|
10
|
-
|
|
11
10
|
#include <stdint.h>
|
|
12
11
|
#include <stdio.h>
|
|
13
12
|
|
|
@@ -15,23 +14,27 @@
|
|
|
15
14
|
|
|
16
15
|
namespace faiss {
|
|
17
16
|
|
|
18
|
-
|
|
19
17
|
/** partitions the table into 0:q and q:n where all elements above q are >= all
|
|
20
18
|
* elements below q (for C = CMax, for CMin comparisons are reversed)
|
|
21
19
|
*
|
|
22
20
|
* Returns the partition threshold. The elements q:n are destroyed on output.
|
|
23
21
|
*/
|
|
24
|
-
template<class C>
|
|
22
|
+
template <class C>
|
|
25
23
|
typename C::T partition_fuzzy(
|
|
26
|
-
|
|
27
|
-
|
|
24
|
+
typename C::T* vals,
|
|
25
|
+
typename C::TI* ids,
|
|
26
|
+
size_t n,
|
|
27
|
+
size_t q_min,
|
|
28
|
+
size_t q_max,
|
|
29
|
+
size_t* q_out);
|
|
28
30
|
|
|
29
31
|
/** simplified interface for when the parition is not fuzzy */
|
|
30
|
-
template<class C>
|
|
32
|
+
template <class C>
|
|
31
33
|
inline typename C::T partition(
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
34
|
+
typename C::T* vals,
|
|
35
|
+
typename C::TI* ids,
|
|
36
|
+
size_t n,
|
|
37
|
+
size_t q) {
|
|
35
38
|
return partition_fuzzy<C>(vals, ids, n, q, q, nullptr);
|
|
36
39
|
}
|
|
37
40
|
|
|
@@ -41,29 +44,31 @@ inline typename C::T partition(
|
|
|
41
44
|
* values outside the range are ignored.
|
|
42
45
|
* the data table should be aligned on 32 bytes */
|
|
43
46
|
void simd_histogram_8(
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
+
const uint16_t* data,
|
|
48
|
+
int n,
|
|
49
|
+
uint16_t min,
|
|
50
|
+
int shift,
|
|
51
|
+
int* hist);
|
|
47
52
|
|
|
48
53
|
/** same for 16-bin histogram */
|
|
49
54
|
void simd_histogram_16(
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
55
|
+
const uint16_t* data,
|
|
56
|
+
int n,
|
|
57
|
+
uint16_t min,
|
|
58
|
+
int shift,
|
|
59
|
+
int* hist);
|
|
54
60
|
|
|
55
61
|
struct PartitionStats {
|
|
56
62
|
uint64_t bissect_cycles;
|
|
57
63
|
uint64_t compress_cycles;
|
|
58
64
|
|
|
59
|
-
PartitionStats
|
|
60
|
-
|
|
65
|
+
PartitionStats() {
|
|
66
|
+
reset();
|
|
67
|
+
}
|
|
68
|
+
void reset();
|
|
61
69
|
};
|
|
62
70
|
|
|
63
71
|
// global var that collects them all
|
|
64
72
|
FAISS_API extern PartitionStats partition_stats;
|
|
65
73
|
|
|
66
|
-
|
|
67
|
-
|
|
68
74
|
} // namespace faiss
|
|
69
|
-
|
|
@@ -5,150 +5,157 @@
|
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
|
|
9
8
|
#include <faiss/utils/quantize_lut.h>
|
|
10
9
|
|
|
10
|
+
#include <algorithm>
|
|
11
11
|
#include <cmath>
|
|
12
12
|
#include <cstring>
|
|
13
13
|
#include <vector>
|
|
14
|
-
#include <algorithm>
|
|
15
14
|
|
|
16
15
|
#include <faiss/impl/FaissAssert.h>
|
|
17
16
|
|
|
18
|
-
|
|
19
17
|
namespace faiss {
|
|
20
18
|
|
|
21
|
-
|
|
22
19
|
namespace quantize_lut {
|
|
23
20
|
|
|
24
|
-
|
|
25
21
|
/******************************************************
|
|
26
22
|
* Quantize look-up tables
|
|
27
23
|
******************************************************/
|
|
28
24
|
|
|
29
25
|
namespace {
|
|
30
26
|
|
|
31
|
-
float round_uint8_and_mul(float
|
|
27
|
+
float round_uint8_and_mul(float* tab, size_t n) {
|
|
32
28
|
float max = 0;
|
|
33
|
-
for(int i = 0; i < n; i++) {
|
|
34
|
-
if(fabs(tab[i]) > max) {
|
|
29
|
+
for (int i = 0; i < n; i++) {
|
|
30
|
+
if (fabs(tab[i]) > max) {
|
|
35
31
|
max = fabs(tab[i]);
|
|
36
32
|
}
|
|
37
33
|
}
|
|
38
34
|
float multiplier = 127 / max;
|
|
39
|
-
for(int i = 0; i < n; i++) {
|
|
35
|
+
for (int i = 0; i < n; i++) {
|
|
40
36
|
tab[i] = floorf(tab[i] * multiplier + 128);
|
|
41
37
|
}
|
|
42
38
|
return multiplier;
|
|
43
39
|
}
|
|
44
40
|
|
|
45
41
|
// there can be NaNs in tables, they should be ignored
|
|
46
|
-
float tab_min(const float
|
|
42
|
+
float tab_min(const float* tab, size_t n) {
|
|
47
43
|
float min = HUGE_VAL;
|
|
48
|
-
for(int i = 0; i < n; i++) {
|
|
49
|
-
if (tab[i] < min)
|
|
44
|
+
for (int i = 0; i < n; i++) {
|
|
45
|
+
if (tab[i] < min)
|
|
46
|
+
min = tab[i];
|
|
50
47
|
}
|
|
51
48
|
return min;
|
|
52
49
|
}
|
|
53
50
|
|
|
54
|
-
float tab_max(const float
|
|
51
|
+
float tab_max(const float* tab, size_t n) {
|
|
55
52
|
float max = -HUGE_VAL;
|
|
56
|
-
for(int i = 0; i < n; i++) {
|
|
57
|
-
if (tab[i] > max)
|
|
53
|
+
for (int i = 0; i < n; i++) {
|
|
54
|
+
if (tab[i] > max)
|
|
55
|
+
max = tab[i];
|
|
58
56
|
}
|
|
59
57
|
return max;
|
|
60
58
|
}
|
|
61
59
|
|
|
62
|
-
void round_tab(float
|
|
63
|
-
for(int i = 0; i < n; i++) {
|
|
60
|
+
void round_tab(float* tab, size_t n, float a, float bi) {
|
|
61
|
+
for (int i = 0; i < n; i++) {
|
|
64
62
|
tab[i] = floorf((tab[i] - bi) * a + 0.5);
|
|
65
63
|
}
|
|
66
64
|
}
|
|
67
65
|
|
|
68
|
-
template<typename T>
|
|
69
|
-
void round_tab(const float
|
|
70
|
-
for(int i = 0; i < n; i++) {
|
|
66
|
+
template <typename T>
|
|
67
|
+
void round_tab(const float* tab, size_t n, float a, float bi, T* tab_out) {
|
|
68
|
+
for (int i = 0; i < n; i++) {
|
|
71
69
|
tab_out[i] = (T)floorf((tab[i] - bi) * a + 0.5);
|
|
72
70
|
}
|
|
73
71
|
}
|
|
74
72
|
|
|
75
|
-
|
|
76
|
-
|
|
77
73
|
} // anonymous namespace
|
|
78
74
|
|
|
79
75
|
void round_uint8_per_column(
|
|
80
|
-
float
|
|
81
|
-
|
|
82
|
-
|
|
76
|
+
float* tab,
|
|
77
|
+
size_t n,
|
|
78
|
+
size_t d,
|
|
79
|
+
float* a_out,
|
|
80
|
+
float* b_out) {
|
|
83
81
|
float max_span = 0;
|
|
84
82
|
std::vector<float> mins(n);
|
|
85
|
-
for(int i = 0; i < n; i++) {
|
|
83
|
+
for (int i = 0; i < n; i++) {
|
|
86
84
|
mins[i] = tab_min(tab + i * d, d);
|
|
87
85
|
float span = tab_max(tab + i * d, d) - mins[i];
|
|
88
|
-
if(span > max_span) {
|
|
86
|
+
if (span > max_span) {
|
|
89
87
|
max_span = span;
|
|
90
88
|
}
|
|
91
89
|
}
|
|
92
90
|
float a = 255 / max_span;
|
|
93
91
|
float b = 0;
|
|
94
|
-
for(int i = 0; i < n; i++) {
|
|
92
|
+
for (int i = 0; i < n; i++) {
|
|
95
93
|
b += mins[i];
|
|
96
94
|
round_tab(tab + i * d, d, a, mins[i]);
|
|
97
95
|
}
|
|
98
|
-
if (a_out)
|
|
99
|
-
|
|
96
|
+
if (a_out)
|
|
97
|
+
*a_out = a;
|
|
98
|
+
if (b_out)
|
|
99
|
+
*b_out = b;
|
|
100
100
|
}
|
|
101
101
|
|
|
102
102
|
void round_uint8_per_column_multi(
|
|
103
|
-
float
|
|
104
|
-
|
|
105
|
-
|
|
103
|
+
float* tab,
|
|
104
|
+
size_t m,
|
|
105
|
+
size_t n,
|
|
106
|
+
size_t d,
|
|
107
|
+
float* a_out,
|
|
108
|
+
float* b_out) {
|
|
106
109
|
float max_span = 0;
|
|
107
110
|
std::vector<float> mins(n);
|
|
108
|
-
for(int i = 0; i < n; i++) {
|
|
111
|
+
for (int i = 0; i < n; i++) {
|
|
109
112
|
float min_i = HUGE_VAL;
|
|
110
113
|
float max_i = -HUGE_VAL;
|
|
111
|
-
for(int j = 0; j < m; j++) {
|
|
114
|
+
for (int j = 0; j < m; j++) {
|
|
112
115
|
min_i = std::min(min_i, tab_min(tab + (j * n + i) * d, d));
|
|
113
116
|
max_i = std::max(max_i, tab_max(tab + (j * n + i) * d, d));
|
|
114
117
|
}
|
|
115
118
|
mins[i] = min_i;
|
|
116
119
|
float span = max_i - min_i;
|
|
117
|
-
if(span > max_span) {
|
|
120
|
+
if (span > max_span) {
|
|
118
121
|
max_span = span;
|
|
119
122
|
}
|
|
120
123
|
}
|
|
121
124
|
float a = 255 / max_span;
|
|
122
125
|
float b = 0;
|
|
123
|
-
for(int i = 0; i < n; i++) {
|
|
126
|
+
for (int i = 0; i < n; i++) {
|
|
124
127
|
b += mins[i];
|
|
125
|
-
for(int j = 0; j < m; j++) {
|
|
128
|
+
for (int j = 0; j < m; j++) {
|
|
126
129
|
round_tab(tab + (j * n + i) * d, d, a, mins[i]);
|
|
127
130
|
}
|
|
128
131
|
}
|
|
129
|
-
if (a_out)
|
|
130
|
-
|
|
132
|
+
if (a_out)
|
|
133
|
+
*a_out = a;
|
|
134
|
+
if (b_out)
|
|
135
|
+
*b_out = b;
|
|
131
136
|
}
|
|
132
137
|
|
|
133
|
-
|
|
134
138
|
// translation of
|
|
135
139
|
// https://github.com/fairinternal/faiss_improvements/blob/7122c3cc6ddb0a371d8aa6f1309cd8bcf2335e61/LUT_quantization.ipynb
|
|
136
140
|
void quantize_LUT_and_bias(
|
|
137
|
-
size_t nprobe,
|
|
141
|
+
size_t nprobe,
|
|
142
|
+
size_t M,
|
|
143
|
+
size_t ksub,
|
|
138
144
|
bool lut_is_3d,
|
|
139
|
-
const float
|
|
140
|
-
const float
|
|
141
|
-
uint8_t
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
+
const float* LUT,
|
|
146
|
+
const float* bias,
|
|
147
|
+
uint8_t* LUTq,
|
|
148
|
+
size_t M2,
|
|
149
|
+
uint16_t* biasq,
|
|
150
|
+
float* a_out,
|
|
151
|
+
float* b_out) {
|
|
145
152
|
float a, b;
|
|
146
153
|
if (!bias) {
|
|
147
154
|
FAISS_THROW_IF_NOT(!lut_is_3d);
|
|
148
155
|
std::vector<float> mins(M);
|
|
149
156
|
float max_span_LUT = -HUGE_VAL, max_span_dis = 0;
|
|
150
157
|
b = 0;
|
|
151
|
-
for(int i = 0; i < M; i++) {
|
|
158
|
+
for (int i = 0; i < M; i++) {
|
|
152
159
|
mins[i] = tab_min(LUT + i * ksub, ksub);
|
|
153
160
|
float span = tab_max(LUT + i * ksub, ksub) - mins[i];
|
|
154
161
|
max_span_LUT = std::max(max_span_LUT, span);
|
|
@@ -157,7 +164,7 @@ void quantize_LUT_and_bias(
|
|
|
157
164
|
}
|
|
158
165
|
a = std::min(255 / max_span_LUT, 65535 / max_span_dis);
|
|
159
166
|
|
|
160
|
-
for(int i = 0; i < M; i++) {
|
|
167
|
+
for (int i = 0; i < M; i++) {
|
|
161
168
|
round_tab(LUT + i * ksub, ksub, a, mins[i], LUTq + i * ksub);
|
|
162
169
|
}
|
|
163
170
|
memset(LUTq + M * ksub, 0, ksub * (M2 - M));
|
|
@@ -168,7 +175,7 @@ void quantize_LUT_and_bias(
|
|
|
168
175
|
float bias_max = tab_max(bias, nprobe);
|
|
169
176
|
max_span_dis = bias_max - bias_min;
|
|
170
177
|
b = 0;
|
|
171
|
-
for(int i = 0; i < M; i++) {
|
|
178
|
+
for (int i = 0; i < M; i++) {
|
|
172
179
|
mins[i] = tab_min(LUT + i * ksub, ksub);
|
|
173
180
|
float span = tab_max(LUT + i * ksub, ksub) - mins[i];
|
|
174
181
|
max_span_LUT = std::max(max_span_LUT, span);
|
|
@@ -178,7 +185,7 @@ void quantize_LUT_and_bias(
|
|
|
178
185
|
a = std::min(255 / max_span_LUT, 65535 / max_span_dis);
|
|
179
186
|
b += bias_min;
|
|
180
187
|
|
|
181
|
-
for(int i = 0; i < M; i++) {
|
|
188
|
+
for (int i = 0; i < M; i++) {
|
|
182
189
|
round_tab(LUT + i * ksub, ksub, a, mins[i], LUTq + i * ksub);
|
|
183
190
|
}
|
|
184
191
|
memset(LUTq + M * ksub, 0, ksub * (M2 - M));
|
|
@@ -196,7 +203,7 @@ void quantize_LUT_and_bias(
|
|
|
196
203
|
for (int j = 0; j < nprobe; j++) {
|
|
197
204
|
float max_span_dis_j = bias[j] - bias_min;
|
|
198
205
|
float b2j = bias[j];
|
|
199
|
-
for(int i = 0; i < M; i++) {
|
|
206
|
+
for (int i = 0; i < M; i++) {
|
|
200
207
|
mins[ij] = tab_min(LUT + ij * ksub, ksub);
|
|
201
208
|
float span = tab_max(LUT + ij * ksub, ksub) - mins[ij];
|
|
202
209
|
max_span_LUT = std::max(max_span_LUT, span);
|
|
@@ -214,9 +221,11 @@ void quantize_LUT_and_bias(
|
|
|
214
221
|
ij = 0;
|
|
215
222
|
size_t ij_2 = 0;
|
|
216
223
|
for (int j = 0; j < nprobe; j++) {
|
|
217
|
-
for(int i = 0; i < M; i++) {
|
|
218
|
-
round_tab(
|
|
219
|
-
|
|
224
|
+
for (int i = 0; i < M; i++) {
|
|
225
|
+
round_tab(
|
|
226
|
+
LUT + ij * ksub, ksub, a, mins[ij], LUTq + ij_2 * ksub);
|
|
227
|
+
ij++;
|
|
228
|
+
ij_2++;
|
|
220
229
|
}
|
|
221
230
|
memset(LUTq + ij_2 * ksub, 0, ksub * (M2 - M));
|
|
222
231
|
ij_2 += M2 - M;
|
|
@@ -227,11 +236,11 @@ void quantize_LUT_and_bias(
|
|
|
227
236
|
} else { // !biasq
|
|
228
237
|
// then we integrate the bias into the LUTs
|
|
229
238
|
std::vector<float> LUT2_storage(nprobe * M * ksub);
|
|
230
|
-
float
|
|
239
|
+
float* LUT2 = LUT2_storage.data();
|
|
231
240
|
size_t ijc = 0;
|
|
232
241
|
for (int j = 0; j < nprobe; j++) {
|
|
233
242
|
float bias_j = bias[j] / M;
|
|
234
|
-
for(int i = 0; i < M; i++) {
|
|
243
|
+
for (int i = 0; i < M; i++) {
|
|
235
244
|
for (int c = 0; c < ksub; c++) {
|
|
236
245
|
LUT2[ijc] = LUT[ijc] + bias_j;
|
|
237
246
|
ijc++;
|
|
@@ -241,7 +250,7 @@ void quantize_LUT_and_bias(
|
|
|
241
250
|
std::vector<float> mins(M, HUGE_VAL), maxs(M, -HUGE_VAL);
|
|
242
251
|
size_t ij = 0;
|
|
243
252
|
for (int j = 0; j < nprobe; j++) {
|
|
244
|
-
for(int i = 0; i < M; i++) {
|
|
253
|
+
for (int i = 0; i < M; i++) {
|
|
245
254
|
mins[i] = std::min(mins[i], tab_min(LUT2 + ij * ksub, ksub));
|
|
246
255
|
maxs[i] = std::max(maxs[i], tab_max(LUT2 + ij * ksub, ksub));
|
|
247
256
|
ij++;
|
|
@@ -250,7 +259,7 @@ void quantize_LUT_and_bias(
|
|
|
250
259
|
|
|
251
260
|
float max_span = -HUGE_VAL;
|
|
252
261
|
b = 0;
|
|
253
|
-
for(int i = 0; i < M; i++) {
|
|
262
|
+
for (int i = 0; i < M; i++) {
|
|
254
263
|
float span = maxs[i] - mins[i];
|
|
255
264
|
max_span = std::max(max_span, span);
|
|
256
265
|
b += mins[i];
|
|
@@ -259,19 +268,22 @@ void quantize_LUT_and_bias(
|
|
|
259
268
|
ij = 0;
|
|
260
269
|
size_t ij_2 = 0;
|
|
261
270
|
for (int j = 0; j < nprobe; j++) {
|
|
262
|
-
for(int i = 0; i < M; i++) {
|
|
263
|
-
round_tab(
|
|
264
|
-
|
|
271
|
+
for (int i = 0; i < M; i++) {
|
|
272
|
+
round_tab(
|
|
273
|
+
LUT2 + ij * ksub, ksub, a, mins[i], LUTq + ij_2 * ksub);
|
|
274
|
+
ij++;
|
|
275
|
+
ij_2++;
|
|
265
276
|
}
|
|
266
277
|
memset(LUTq + ij_2 * ksub, 0, ksub * (M2 - M));
|
|
267
278
|
ij_2 += M2 - M;
|
|
268
279
|
}
|
|
269
280
|
}
|
|
270
|
-
if (a_out)
|
|
271
|
-
|
|
281
|
+
if (a_out)
|
|
282
|
+
*a_out = a;
|
|
283
|
+
if (b_out)
|
|
284
|
+
*b_out = b;
|
|
272
285
|
}
|
|
273
286
|
|
|
274
|
-
|
|
275
287
|
} // namespace quantize_lut
|
|
276
288
|
|
|
277
289
|
} // namespace faiss
|
|
@@ -5,12 +5,10 @@
|
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
|
|
9
8
|
#pragma once
|
|
10
9
|
|
|
11
|
-
|
|
12
|
-
#include <cstdio>
|
|
13
10
|
#include <cstdint>
|
|
11
|
+
#include <cstdio>
|
|
14
12
|
|
|
15
13
|
namespace faiss {
|
|
16
14
|
|
|
@@ -32,19 +30,23 @@ namespace quantize_lut {
|
|
|
32
30
|
* @param tab input/output, size (n, d)
|
|
33
31
|
*/
|
|
34
32
|
void round_uint8_per_column(
|
|
35
|
-
float
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
33
|
+
float* tab,
|
|
34
|
+
size_t n,
|
|
35
|
+
size_t d,
|
|
36
|
+
float* a_out = nullptr,
|
|
37
|
+
float* b_out = nullptr);
|
|
40
38
|
|
|
41
39
|
/* affine quantizer, a and b are the affine coefficients
|
|
42
40
|
*
|
|
43
41
|
* @param tab input/output, size (m, n, d)
|
|
44
42
|
*/
|
|
45
43
|
void round_uint8_per_column_multi(
|
|
46
|
-
float
|
|
47
|
-
|
|
44
|
+
float* tab,
|
|
45
|
+
size_t m,
|
|
46
|
+
size_t n,
|
|
47
|
+
size_t d,
|
|
48
|
+
float* a_out = nullptr,
|
|
49
|
+
float* b_out = nullptr);
|
|
48
50
|
|
|
49
51
|
/** LUT quantization to uint8 and bias to uint16.
|
|
50
52
|
*
|
|
@@ -63,18 +65,18 @@ void round_uint8_per_column_multi(
|
|
|
63
65
|
*/
|
|
64
66
|
|
|
65
67
|
void quantize_LUT_and_bias(
|
|
66
|
-
size_t nprobe,
|
|
68
|
+
size_t nprobe,
|
|
69
|
+
size_t M,
|
|
70
|
+
size_t ksub,
|
|
67
71
|
bool lut_is_3d,
|
|
68
|
-
const float
|
|
69
|
-
const float
|
|
70
|
-
uint8_t
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
72
|
+
const float* LUT,
|
|
73
|
+
const float* bias,
|
|
74
|
+
uint8_t* LUTq,
|
|
75
|
+
size_t M2,
|
|
76
|
+
uint16_t* biasq,
|
|
77
|
+
float* a_out = nullptr,
|
|
78
|
+
float* b_out = nullptr);
|
|
75
79
|
|
|
76
80
|
} // namespace quantize_lut
|
|
77
81
|
|
|
78
82
|
} // namespace faiss
|
|
79
|
-
|
|
80
|
-
|
|
@@ -15,79 +15,67 @@ namespace faiss {
|
|
|
15
15
|
* Random data generation functions
|
|
16
16
|
**************************************************/
|
|
17
17
|
|
|
18
|
-
RandomGenerator::RandomGenerator
|
|
19
|
-
: mt((unsigned int)seed) {}
|
|
18
|
+
RandomGenerator::RandomGenerator(int64_t seed) : mt((unsigned int)seed) {}
|
|
20
19
|
|
|
21
|
-
int RandomGenerator::rand_int
|
|
22
|
-
{
|
|
20
|
+
int RandomGenerator::rand_int() {
|
|
23
21
|
return mt() & 0x7fffffff;
|
|
24
22
|
}
|
|
25
23
|
|
|
26
|
-
int64_t RandomGenerator::rand_int64
|
|
27
|
-
{
|
|
24
|
+
int64_t RandomGenerator::rand_int64() {
|
|
28
25
|
return int64_t(rand_int()) | int64_t(rand_int()) << 31;
|
|
29
26
|
}
|
|
30
27
|
|
|
31
|
-
int RandomGenerator::rand_int
|
|
32
|
-
{
|
|
28
|
+
int RandomGenerator::rand_int(int max) {
|
|
33
29
|
return mt() % max;
|
|
34
30
|
}
|
|
35
31
|
|
|
36
|
-
float RandomGenerator::rand_float
|
|
37
|
-
{
|
|
32
|
+
float RandomGenerator::rand_float() {
|
|
38
33
|
return mt() / float(mt.max());
|
|
39
34
|
}
|
|
40
35
|
|
|
41
|
-
double RandomGenerator::rand_double
|
|
42
|
-
{
|
|
36
|
+
double RandomGenerator::rand_double() {
|
|
43
37
|
return mt() / double(mt.max());
|
|
44
38
|
}
|
|
45
39
|
|
|
46
|
-
|
|
47
40
|
/***********************************************************************
|
|
48
41
|
* Random functions in this C file only exist because Torch
|
|
49
42
|
* counterparts are slow and not multi-threaded. Typical use is for
|
|
50
43
|
* more than 1-100 billion values. */
|
|
51
44
|
|
|
52
|
-
|
|
53
45
|
/* Generate a set of random floating point values such that x[i] in [0,1]
|
|
54
46
|
multi-threading. For this reason, we rely on re-entreant functions. */
|
|
55
|
-
void float_rand
|
|
56
|
-
{
|
|
47
|
+
void float_rand(float* x, size_t n, int64_t seed) {
|
|
57
48
|
// only try to parallelize on large enough arrays
|
|
58
49
|
const size_t nblock = n < 1024 ? 1 : 1024;
|
|
59
50
|
|
|
60
|
-
RandomGenerator rng0
|
|
61
|
-
int a0 = rng0.rand_int
|
|
51
|
+
RandomGenerator rng0(seed);
|
|
52
|
+
int a0 = rng0.rand_int(), b0 = rng0.rand_int();
|
|
62
53
|
|
|
63
54
|
#pragma omp parallel for
|
|
64
55
|
for (int64_t j = 0; j < nblock; j++) {
|
|
65
|
-
|
|
66
|
-
RandomGenerator rng (a0 + j * b0);
|
|
56
|
+
RandomGenerator rng(a0 + j * b0);
|
|
67
57
|
|
|
68
58
|
const size_t istart = j * n / nblock;
|
|
69
59
|
const size_t iend = (j + 1) * n / nblock;
|
|
70
60
|
|
|
71
61
|
for (size_t i = istart; i < iend; i++)
|
|
72
|
-
x[i] = rng.rand_float
|
|
62
|
+
x[i] = rng.rand_float();
|
|
73
63
|
}
|
|
74
64
|
}
|
|
75
65
|
|
|
76
|
-
|
|
77
|
-
void float_randn (float * x, size_t n, int64_t seed)
|
|
78
|
-
{
|
|
66
|
+
void float_randn(float* x, size_t n, int64_t seed) {
|
|
79
67
|
// only try to parallelize on large enough arrays
|
|
80
68
|
const size_t nblock = n < 1024 ? 1 : 1024;
|
|
81
69
|
|
|
82
|
-
RandomGenerator rng0
|
|
83
|
-
int a0 = rng0.rand_int
|
|
70
|
+
RandomGenerator rng0(seed);
|
|
71
|
+
int a0 = rng0.rand_int(), b0 = rng0.rand_int();
|
|
84
72
|
|
|
85
73
|
#pragma omp parallel for
|
|
86
74
|
for (int64_t j = 0; j < nblock; j++) {
|
|
87
|
-
RandomGenerator rng
|
|
75
|
+
RandomGenerator rng(a0 + j * b0);
|
|
88
76
|
|
|
89
77
|
double a = 0, b = 0, s = 0;
|
|
90
|
-
int state = 0;
|
|
78
|
+
int state = 0; /* generate two number per "do-while" loop */
|
|
91
79
|
|
|
92
80
|
const size_t istart = j * n / nblock;
|
|
93
81
|
const size_t iend = (j + 1) * n / nblock;
|
|
@@ -96,96 +84,84 @@ void float_randn (float * x, size_t n, int64_t seed)
|
|
|
96
84
|
/* Marsaglia's method (see Knuth) */
|
|
97
85
|
if (state == 0) {
|
|
98
86
|
do {
|
|
99
|
-
a = 2.0 * rng.rand_double
|
|
100
|
-
b = 2.0 * rng.rand_double
|
|
87
|
+
a = 2.0 * rng.rand_double() - 1;
|
|
88
|
+
b = 2.0 * rng.rand_double() - 1;
|
|
101
89
|
s = a * a + b * b;
|
|
102
90
|
} while (s >= 1.0);
|
|
103
91
|
x[i] = a * sqrt(-2.0 * log(s) / s);
|
|
104
|
-
}
|
|
105
|
-
else
|
|
92
|
+
} else
|
|
106
93
|
x[i] = b * sqrt(-2.0 * log(s) / s);
|
|
107
94
|
state = 1 - state;
|
|
108
95
|
}
|
|
109
96
|
}
|
|
110
97
|
}
|
|
111
98
|
|
|
112
|
-
|
|
113
99
|
/* Integer versions */
|
|
114
|
-
void int64_rand
|
|
115
|
-
{
|
|
100
|
+
void int64_rand(int64_t* x, size_t n, int64_t seed) {
|
|
116
101
|
// only try to parallelize on large enough arrays
|
|
117
102
|
const size_t nblock = n < 1024 ? 1 : 1024;
|
|
118
103
|
|
|
119
|
-
RandomGenerator rng0
|
|
120
|
-
int a0 = rng0.rand_int
|
|
104
|
+
RandomGenerator rng0(seed);
|
|
105
|
+
int a0 = rng0.rand_int(), b0 = rng0.rand_int();
|
|
121
106
|
|
|
122
107
|
#pragma omp parallel for
|
|
123
108
|
for (int64_t j = 0; j < nblock; j++) {
|
|
124
|
-
|
|
125
|
-
RandomGenerator rng (a0 + j * b0);
|
|
109
|
+
RandomGenerator rng(a0 + j * b0);
|
|
126
110
|
|
|
127
111
|
const size_t istart = j * n / nblock;
|
|
128
112
|
const size_t iend = (j + 1) * n / nblock;
|
|
129
113
|
for (size_t i = istart; i < iend; i++)
|
|
130
|
-
x[i] = rng.rand_int64
|
|
114
|
+
x[i] = rng.rand_int64();
|
|
131
115
|
}
|
|
132
116
|
}
|
|
133
117
|
|
|
134
|
-
void int64_rand_max
|
|
135
|
-
{
|
|
118
|
+
void int64_rand_max(int64_t* x, size_t n, uint64_t max, int64_t seed) {
|
|
136
119
|
// only try to parallelize on large enough arrays
|
|
137
120
|
const size_t nblock = n < 1024 ? 1 : 1024;
|
|
138
121
|
|
|
139
|
-
RandomGenerator rng0
|
|
140
|
-
int a0 = rng0.rand_int
|
|
122
|
+
RandomGenerator rng0(seed);
|
|
123
|
+
int a0 = rng0.rand_int(), b0 = rng0.rand_int();
|
|
141
124
|
|
|
142
125
|
#pragma omp parallel for
|
|
143
126
|
for (int64_t j = 0; j < nblock; j++) {
|
|
144
|
-
|
|
145
|
-
RandomGenerator rng (a0 + j * b0);
|
|
127
|
+
RandomGenerator rng(a0 + j * b0);
|
|
146
128
|
|
|
147
129
|
const size_t istart = j * n / nblock;
|
|
148
130
|
const size_t iend = (j + 1) * n / nblock;
|
|
149
131
|
for (size_t i = istart; i < iend; i++)
|
|
150
|
-
x[i] = rng.rand_int64
|
|
132
|
+
x[i] = rng.rand_int64() % max;
|
|
151
133
|
}
|
|
152
134
|
}
|
|
153
135
|
|
|
136
|
+
void rand_perm(int* perm, size_t n, int64_t seed) {
|
|
137
|
+
for (size_t i = 0; i < n; i++)
|
|
138
|
+
perm[i] = i;
|
|
154
139
|
|
|
155
|
-
|
|
156
|
-
{
|
|
157
|
-
for (size_t i = 0; i < n; i++) perm[i] = i;
|
|
158
|
-
|
|
159
|
-
RandomGenerator rng (seed);
|
|
140
|
+
RandomGenerator rng(seed);
|
|
160
141
|
|
|
161
142
|
for (size_t i = 0; i + 1 < n; i++) {
|
|
162
|
-
int i2 = i + rng.rand_int
|
|
143
|
+
int i2 = i + rng.rand_int(n - i);
|
|
163
144
|
std::swap(perm[i], perm[i2]);
|
|
164
145
|
}
|
|
165
146
|
}
|
|
166
147
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
void byte_rand (uint8_t * x, size_t n, int64_t seed)
|
|
171
|
-
{
|
|
148
|
+
void byte_rand(uint8_t* x, size_t n, int64_t seed) {
|
|
172
149
|
// only try to parallelize on large enough arrays
|
|
173
150
|
const size_t nblock = n < 1024 ? 1 : 1024;
|
|
174
151
|
|
|
175
|
-
RandomGenerator rng0
|
|
176
|
-
int a0 = rng0.rand_int
|
|
152
|
+
RandomGenerator rng0(seed);
|
|
153
|
+
int a0 = rng0.rand_int(), b0 = rng0.rand_int();
|
|
177
154
|
|
|
178
155
|
#pragma omp parallel for
|
|
179
156
|
for (int64_t j = 0; j < nblock; j++) {
|
|
180
|
-
|
|
181
|
-
RandomGenerator rng (a0 + j * b0);
|
|
157
|
+
RandomGenerator rng(a0 + j * b0);
|
|
182
158
|
|
|
183
159
|
const size_t istart = j * n / nblock;
|
|
184
160
|
const size_t iend = (j + 1) * n / nblock;
|
|
185
161
|
|
|
186
162
|
size_t i;
|
|
187
163
|
for (i = istart; i < iend; i++)
|
|
188
|
-
x[i] = rng.rand_int64
|
|
164
|
+
x[i] = rng.rand_int64();
|
|
189
165
|
}
|
|
190
166
|
}
|
|
191
167
|
|