faiss 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/lib/faiss/version.rb +1 -1
  4. data/vendor/faiss/faiss/AutoTune.h +1 -1
  5. data/vendor/faiss/faiss/Clustering.cpp +35 -4
  6. data/vendor/faiss/faiss/Clustering.h +10 -1
  7. data/vendor/faiss/faiss/IVFlib.cpp +4 -1
  8. data/vendor/faiss/faiss/Index.h +21 -6
  9. data/vendor/faiss/faiss/IndexBinaryHNSW.h +1 -1
  10. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -1
  11. data/vendor/faiss/faiss/IndexFastScan.cpp +22 -4
  12. data/vendor/faiss/faiss/IndexFlat.cpp +11 -7
  13. data/vendor/faiss/faiss/IndexFlatCodes.cpp +159 -5
  14. data/vendor/faiss/faiss/IndexFlatCodes.h +20 -3
  15. data/vendor/faiss/faiss/IndexHNSW.cpp +143 -90
  16. data/vendor/faiss/faiss/IndexHNSW.h +52 -3
  17. data/vendor/faiss/faiss/IndexIVF.cpp +3 -3
  18. data/vendor/faiss/faiss/IndexIVF.h +9 -1
  19. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +15 -0
  20. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +3 -0
  21. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +130 -57
  22. data/vendor/faiss/faiss/IndexIVFFastScan.h +14 -7
  23. data/vendor/faiss/faiss/IndexIVFPQ.cpp +1 -3
  24. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +21 -2
  25. data/vendor/faiss/faiss/IndexLattice.cpp +1 -19
  26. data/vendor/faiss/faiss/IndexLattice.h +3 -22
  27. data/vendor/faiss/faiss/IndexNNDescent.cpp +0 -29
  28. data/vendor/faiss/faiss/IndexNNDescent.h +1 -1
  29. data/vendor/faiss/faiss/IndexNSG.h +1 -1
  30. data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +56 -0
  31. data/vendor/faiss/faiss/IndexNeuralNetCodec.h +49 -0
  32. data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
  33. data/vendor/faiss/faiss/IndexRefine.cpp +5 -5
  34. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +3 -1
  35. data/vendor/faiss/faiss/MetricType.h +7 -2
  36. data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +95 -17
  37. data/vendor/faiss/faiss/cppcontrib/factory_tools.cpp +152 -0
  38. data/vendor/faiss/faiss/cppcontrib/factory_tools.h +24 -0
  39. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +83 -30
  40. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +36 -4
  41. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +6 -0
  42. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -1
  43. data/vendor/faiss/faiss/gpu/GpuIndex.h +2 -8
  44. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +282 -0
  45. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +6 -0
  46. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +2 -0
  47. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +25 -0
  48. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +26 -21
  49. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +6 -0
  50. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +8 -5
  51. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +65 -0
  52. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
  53. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
  54. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +4 -1
  55. data/vendor/faiss/faiss/gpu/utils/Timer.h +1 -1
  56. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +25 -0
  57. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +9 -1
  58. data/vendor/faiss/faiss/impl/DistanceComputer.h +46 -0
  59. data/vendor/faiss/faiss/impl/FaissAssert.h +4 -2
  60. data/vendor/faiss/faiss/impl/HNSW.cpp +358 -190
  61. data/vendor/faiss/faiss/impl/HNSW.h +43 -22
  62. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +8 -8
  63. data/vendor/faiss/faiss/impl/LookupTableScaler.h +34 -0
  64. data/vendor/faiss/faiss/impl/NNDescent.cpp +13 -8
  65. data/vendor/faiss/faiss/impl/NSG.cpp +0 -29
  66. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +1 -0
  67. data/vendor/faiss/faiss/impl/ProductQuantizer.h +5 -1
  68. data/vendor/faiss/faiss/impl/ResultHandler.h +151 -32
  69. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +719 -102
  70. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +3 -0
  71. data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +5 -0
  72. data/vendor/faiss/faiss/impl/code_distance/code_distance-avx512.h +248 -0
  73. data/vendor/faiss/faiss/impl/index_read.cpp +29 -15
  74. data/vendor/faiss/faiss/impl/index_read_utils.h +37 -0
  75. data/vendor/faiss/faiss/impl/index_write.cpp +28 -10
  76. data/vendor/faiss/faiss/impl/io.cpp +13 -5
  77. data/vendor/faiss/faiss/impl/io.h +4 -4
  78. data/vendor/faiss/faiss/impl/io_macros.h +6 -0
  79. data/vendor/faiss/faiss/impl/platform_macros.h +22 -0
  80. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +11 -0
  81. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +1 -1
  82. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +448 -1
  83. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +5 -5
  84. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
  85. data/vendor/faiss/faiss/impl/simd_result_handlers.h +143 -59
  86. data/vendor/faiss/faiss/index_factory.cpp +31 -13
  87. data/vendor/faiss/faiss/index_io.h +12 -5
  88. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +28 -8
  89. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +3 -0
  90. data/vendor/faiss/faiss/invlists/DirectMap.cpp +9 -1
  91. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +55 -17
  92. data/vendor/faiss/faiss/invlists/InvertedLists.h +18 -9
  93. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +21 -6
  94. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +2 -1
  95. data/vendor/faiss/faiss/python/python_callbacks.cpp +3 -3
  96. data/vendor/faiss/faiss/utils/Heap.h +105 -0
  97. data/vendor/faiss/faiss/utils/NeuralNet.cpp +342 -0
  98. data/vendor/faiss/faiss/utils/NeuralNet.h +147 -0
  99. data/vendor/faiss/faiss/utils/bf16.h +36 -0
  100. data/vendor/faiss/faiss/utils/distances.cpp +58 -88
  101. data/vendor/faiss/faiss/utils/distances.h +5 -5
  102. data/vendor/faiss/faiss/utils/distances_simd.cpp +997 -9
  103. data/vendor/faiss/faiss/utils/extra_distances-inl.h +70 -0
  104. data/vendor/faiss/faiss/utils/extra_distances.cpp +85 -137
  105. data/vendor/faiss/faiss/utils/extra_distances.h +3 -2
  106. data/vendor/faiss/faiss/utils/hamming.cpp +1 -1
  107. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +4 -1
  108. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +2 -1
  109. data/vendor/faiss/faiss/utils/random.cpp +43 -0
  110. data/vendor/faiss/faiss/utils/random.h +25 -0
  111. data/vendor/faiss/faiss/utils/simdlib.h +10 -1
  112. data/vendor/faiss/faiss/utils/simdlib_avx512.h +296 -0
  113. data/vendor/faiss/faiss/utils/simdlib_neon.h +5 -2
  114. data/vendor/faiss/faiss/utils/simdlib_ppc64.h +1084 -0
  115. data/vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h +176 -0
  116. data/vendor/faiss/faiss/utils/utils.cpp +10 -3
  117. data/vendor/faiss/faiss/utils/utils.h +3 -0
  118. metadata +16 -4
  119. data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +0 -102
@@ -6,6 +6,7 @@
6
6
  */
7
7
 
8
8
  #include <faiss/gpu/impl/InterleavedCodes.h>
9
+ #include <faiss/gpu/utils/DeviceUtils.h>
9
10
  #include <faiss/gpu/utils/StaticUtils.h>
10
11
  #include <faiss/impl/FaissAssert.h>
11
12
 
@@ -166,15 +167,16 @@ void unpackInterleavedWord(
166
167
  int numVecs,
167
168
  int dims,
168
169
  int bitsPerCode) {
169
- int wordsPerDimBlock = 32 * bitsPerCode / (8 * sizeof(T));
170
+ int warpSize = getWarpSizeCurrentDevice();
171
+ int wordsPerDimBlock = (size_t)warpSize * bitsPerCode / (8 * sizeof(T));
170
172
  int wordsPerBlock = wordsPerDimBlock * dims;
171
- int numBlocks = utils::divUp(numVecs, 32);
173
+ int numBlocks = utils::divUp(numVecs, warpSize);
172
174
 
173
175
  #pragma omp parallel for
174
176
  for (int i = 0; i < numVecs; ++i) {
175
- int block = i / 32;
177
+ int block = i / warpSize;
176
178
  FAISS_ASSERT(block < numBlocks);
177
- int lane = i % 32;
179
+ int lane = i % warpSize;
178
180
 
179
181
  for (int j = 0; j < dims; ++j) {
180
182
  int srcOffset = block * wordsPerBlock + j * wordsPerDimBlock + lane;
@@ -188,9 +190,10 @@ std::vector<uint8_t> unpackInterleaved(
188
190
  int numVecs,
189
191
  int dims,
190
192
  int bitsPerCode) {
191
- int bytesPerDimBlock = 32 * bitsPerCode / 8;
193
+ int warpSize = getWarpSizeCurrentDevice();
194
+ int bytesPerDimBlock = warpSize * bitsPerCode / 8;
192
195
  int bytesPerBlock = bytesPerDimBlock * dims;
193
- int numBlocks = utils::divUp(numVecs, 32);
196
+ int numBlocks = utils::divUp(numVecs, warpSize);
194
197
  size_t totalSize = (size_t)bytesPerBlock * numBlocks;
195
198
  FAISS_ASSERT(data.size() == totalSize);
196
199
 
@@ -217,8 +220,8 @@ std::vector<uint8_t> unpackInterleaved(
217
220
  } else if (bitsPerCode == 4) {
218
221
  #pragma omp parallel for
219
222
  for (int i = 0; i < numVecs; ++i) {
220
- int block = i / 32;
221
- int lane = i % 32;
223
+ int block = i / warpSize;
224
+ int lane = i % warpSize;
222
225
 
223
226
  int word = lane / 2;
224
227
  int subWord = lane % 2;
@@ -235,8 +238,8 @@ std::vector<uint8_t> unpackInterleaved(
235
238
  } else if (bitsPerCode == 5) {
236
239
  #pragma omp parallel for
237
240
  for (int i = 0; i < numVecs; ++i) {
238
- int block = i / 32;
239
- int blockVector = i % 32;
241
+ int block = i / warpSize;
242
+ int blockVector = i % warpSize;
240
243
 
241
244
  for (int j = 0; j < dims; ++j) {
242
245
  uint8_t* dimBlock =
@@ -257,8 +260,8 @@ std::vector<uint8_t> unpackInterleaved(
257
260
  } else if (bitsPerCode == 6) {
258
261
  #pragma omp parallel for
259
262
  for (int i = 0; i < numVecs; ++i) {
260
- int block = i / 32;
261
- int blockVector = i % 32;
263
+ int block = i / warpSize;
264
+ int blockVector = i % warpSize;
262
265
 
263
266
  for (int j = 0; j < dims; ++j) {
264
267
  uint8_t* dimBlock =
@@ -442,17 +445,18 @@ void packInterleavedWord(
442
445
  int numVecs,
443
446
  int dims,
444
447
  int bitsPerCode) {
445
- int wordsPerDimBlock = 32 * bitsPerCode / (8 * sizeof(T));
448
+ int warpSize = getWarpSizeCurrentDevice();
449
+ int wordsPerDimBlock = (size_t)warpSize * bitsPerCode / (8 * sizeof(T));
446
450
  int wordsPerBlock = wordsPerDimBlock * dims;
447
- int numBlocks = utils::divUp(numVecs, 32);
451
+ int numBlocks = utils::divUp(numVecs, warpSize);
448
452
 
449
453
  // We're guaranteed that all other slots not filled by the vectors present
450
454
  // are initialized to zero (from the vector constructor in packInterleaved)
451
455
  #pragma omp parallel for
452
456
  for (int i = 0; i < numVecs; ++i) {
453
- int block = i / 32;
457
+ int block = i / warpSize;
454
458
  FAISS_ASSERT(block < numBlocks);
455
- int lane = i % 32;
459
+ int lane = i % warpSize;
456
460
 
457
461
  for (int j = 0; j < dims; ++j) {
458
462
  int dstOffset = block * wordsPerBlock + j * wordsPerDimBlock + lane;
@@ -466,9 +470,10 @@ std::vector<uint8_t> packInterleaved(
466
470
  int numVecs,
467
471
  int dims,
468
472
  int bitsPerCode) {
469
- int bytesPerDimBlock = 32 * bitsPerCode / 8;
473
+ int warpSize = getWarpSizeCurrentDevice();
474
+ int bytesPerDimBlock = warpSize * bitsPerCode / 8;
470
475
  int bytesPerBlock = bytesPerDimBlock * dims;
471
- int numBlocks = utils::divUp(numVecs, 32);
476
+ int numBlocks = utils::divUp(numVecs, warpSize);
472
477
  size_t totalSize = (size_t)bytesPerBlock * numBlocks;
473
478
 
474
479
  // bit codes padded to whole bytes
@@ -499,7 +504,7 @@ std::vector<uint8_t> packInterleaved(
499
504
  for (int i = 0; i < numBlocks; ++i) {
500
505
  for (int j = 0; j < dims; ++j) {
501
506
  for (int k = 0; k < bytesPerDimBlock; ++k) {
502
- int loVec = i * 32 + k * 2;
507
+ int loVec = i * warpSize + k * 2;
503
508
  int hiVec = loVec + 1;
504
509
 
505
510
  uint8_t lo = loVec < numVecs ? data[loVec * dims + j] : 0;
@@ -516,7 +521,7 @@ std::vector<uint8_t> packInterleaved(
516
521
  for (int j = 0; j < dims; ++j) {
517
522
  for (int k = 0; k < bytesPerDimBlock; ++k) {
518
523
  // What input vectors we are pulling from
519
- int loVec = i * 32 + (k * 8) / 5;
524
+ int loVec = i * warpSize + (k * 8) / 5;
520
525
  int hiVec = loVec + 1;
521
526
  int hiVec2 = hiVec + 1;
522
527
 
@@ -536,7 +541,7 @@ std::vector<uint8_t> packInterleaved(
536
541
  for (int j = 0; j < dims; ++j) {
537
542
  for (int k = 0; k < bytesPerDimBlock; ++k) {
538
543
  // What input vectors we are pulling from
539
- int loVec = i * 32 + (k * 8) / 6;
544
+ int loVec = i * warpSize + (k * 8) / 6;
540
545
  int hiVec = loVec + 1;
541
546
 
542
547
  uint8_t lo = loVec < numVecs ? data[loVec * dims + j] : 0;
@@ -17,6 +17,7 @@
17
17
  #include <vector>
18
18
 
19
19
  #include <cuda_profiler_api.h>
20
+ #include <faiss/impl/AuxIndexStructures.h>
20
21
 
21
22
  DEFINE_int32(num, 10000, "# of vecs");
22
23
  DEFINE_int32(k, 100, "# of clusters");
@@ -34,6 +35,7 @@ DEFINE_int64(
34
35
  "minimum size to use CPU -> GPU paged copies");
35
36
  DEFINE_int64(pinned_mem, -1, "pinned memory allocation to use");
36
37
  DEFINE_int32(max_points, -1, "max points per centroid");
38
+ DEFINE_double(timeout, 0, "timeout in seconds");
37
39
 
38
40
  using namespace faiss::gpu;
39
41
 
@@ -99,10 +101,14 @@ int main(int argc, char** argv) {
99
101
  cp.max_points_per_centroid = FLAGS_max_points;
100
102
  }
101
103
 
104
+ auto tc = new faiss::TimeoutCallback();
105
+ faiss::InterruptCallback::instance.reset(tc);
106
+
102
107
  faiss::Clustering kmeans(FLAGS_dim, FLAGS_k, cp);
103
108
 
104
109
  // Time k-means
105
110
  {
111
+ tc->set_timeout(FLAGS_timeout);
106
112
  CpuTimer timer;
107
113
 
108
114
  kmeans.train(FLAGS_num, vecs.data(), *(gpuIndex.getIndex()));
@@ -7,6 +7,7 @@
7
7
 
8
8
  #include <faiss/gpu/impl/InterleavedCodes.h>
9
9
  #include <faiss/gpu/test/TestUtils.h>
10
+ #include <faiss/gpu/utils/DeviceUtils.h>
10
11
  #include <faiss/gpu/utils/StaticUtils.h>
11
12
  #include <gtest/gtest.h>
12
13
  #include <cmath>
@@ -119,8 +120,9 @@ TEST(TestCodePacking, InterleavedCodes_UnpackPack) {
119
120
  std::cout << bitsPerCode << " " << dims << " " << numVecs
120
121
  << "\n";
121
122
 
122
- int blocks = utils::divUp(numVecs, 32);
123
- int bytesPerDimBlock = 32 * bitsPerCode / 8;
123
+ int warpSize = getWarpSizeCurrentDevice();
124
+ int blocks = utils::divUp(numVecs, warpSize);
125
+ int bytesPerDimBlock = warpSize * bitsPerCode / 8;
124
126
  int bytesPerBlock = bytesPerDimBlock * dims;
125
127
  int size = blocks * bytesPerBlock;
126
128
 
@@ -132,9 +134,9 @@ TEST(TestCodePacking, InterleavedCodes_UnpackPack) {
132
134
 
133
135
  for (int i = 0; i < blocks; ++i) {
134
136
  for (int j = 0; j < dims; ++j) {
135
- for (int k = 0; k < 32; ++k) {
137
+ for (int k = 0; k < warpSize; ++k) {
136
138
  for (int l = 0; l < bytesPerCode; ++l) {
137
- int vec = i * 32 + k;
139
+ int vec = i * warpSize + k;
138
140
  if (vec < numVecs) {
139
141
  data[i * bytesPerBlock +
140
142
  j * bytesPerDimBlock +
@@ -148,7 +150,8 @@ TEST(TestCodePacking, InterleavedCodes_UnpackPack) {
148
150
  for (int i = 0; i < blocks; ++i) {
149
151
  for (int j = 0; j < dims; ++j) {
150
152
  for (int k = 0; k < bytesPerDimBlock; ++k) {
151
- int loVec = i * 32 + (k * 8) / bitsPerCode;
153
+ int loVec =
154
+ i * warpSize + (k * 8) / bitsPerCode;
152
155
  int hiVec = loVec + 1;
153
156
  int hiVec2 = hiVec + 1;
154
157
 
@@ -842,6 +842,71 @@ TEST(TestGpuIndexIVFFlat, LongIVFList) {
842
842
  #endif
843
843
  }
844
844
 
845
+ TEST(TestGpuIndexIVFFlat, Reconstruct_n) {
846
+ Options opt;
847
+
848
+ std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
849
+ std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
850
+
851
+ faiss::IndexFlatL2 cpuQuantizer(opt.dim);
852
+ faiss::IndexIVFFlat cpuIndex(
853
+ &cpuQuantizer, opt.dim, opt.numCentroids, faiss::METRIC_L2);
854
+ cpuIndex.nprobe = opt.nprobe;
855
+ cpuIndex.train(opt.numTrain, trainVecs.data());
856
+ cpuIndex.add(opt.numAdd, addVecs.data());
857
+
858
+ faiss::gpu::StandardGpuResources res;
859
+ res.noTempMemory();
860
+
861
+ faiss::gpu::GpuIndexIVFFlatConfig config;
862
+ config.device = opt.device;
863
+ config.indicesOptions = faiss::gpu::INDICES_64_BIT;
864
+ config.use_raft = false;
865
+
866
+ faiss::gpu::GpuIndexIVFFlat gpuIndex(
867
+ &res, opt.dim, opt.numCentroids, faiss::METRIC_L2, config);
868
+ gpuIndex.nprobe = opt.nprobe;
869
+
870
+ gpuIndex.train(opt.numTrain, trainVecs.data());
871
+ gpuIndex.add(opt.numAdd, addVecs.data());
872
+
873
+ std::vector<float> gpuVals(opt.numAdd * opt.dim);
874
+
875
+ gpuIndex.reconstruct_n(0, gpuIndex.ntotal, gpuVals.data());
876
+
877
+ std::vector<float> cpuVals(opt.numAdd * opt.dim);
878
+
879
+ cpuIndex.reconstruct_n(0, cpuIndex.ntotal, cpuVals.data());
880
+
881
+ EXPECT_EQ(gpuVals, cpuVals);
882
+
883
+ config.indicesOptions = faiss::gpu::INDICES_32_BIT;
884
+
885
+ faiss::gpu::GpuIndexIVFFlat gpuIndex1(
886
+ &res, opt.dim, opt.numCentroids, faiss::METRIC_L2, config);
887
+ gpuIndex1.nprobe = opt.nprobe;
888
+
889
+ gpuIndex1.train(opt.numTrain, trainVecs.data());
890
+ gpuIndex1.add(opt.numAdd, addVecs.data());
891
+
892
+ gpuIndex1.reconstruct_n(0, gpuIndex1.ntotal, gpuVals.data());
893
+
894
+ EXPECT_EQ(gpuVals, cpuVals);
895
+
896
+ config.indicesOptions = faiss::gpu::INDICES_CPU;
897
+
898
+ faiss::gpu::GpuIndexIVFFlat gpuIndex2(
899
+ &res, opt.dim, opt.numCentroids, faiss::METRIC_L2, config);
900
+ gpuIndex2.nprobe = opt.nprobe;
901
+
902
+ gpuIndex2.train(opt.numTrain, trainVecs.data());
903
+ gpuIndex2.add(opt.numAdd, addVecs.data());
904
+
905
+ gpuIndex2.reconstruct_n(0, gpuIndex2.ntotal, gpuVals.data());
906
+
907
+ EXPECT_EQ(gpuVals, cpuVals);
908
+ }
909
+
845
910
  int main(int argc, char** argv) {
846
911
  testing::InitGoogleTest(&argc, argv);
847
912
 
@@ -20,7 +20,7 @@
20
20
 
21
21
  double elapsed() {
22
22
  struct timeval tv;
23
- gettimeofday(&tv, NULL);
23
+ gettimeofday(&tv, nullptr);
24
24
  return tv.tv_sec + tv.tv_usec * 1e-6;
25
25
  }
26
26
 
@@ -76,6 +76,12 @@ bool getTensorCoreSupport(int device);
76
76
  /// Equivalent to getTensorCoreSupport(getCurrentDevice())
77
77
  bool getTensorCoreSupportCurrentDevice();
78
78
 
79
+ /// Returns the warp size of the given GPU device
80
+ int getWarpSize(int device);
81
+
82
+ /// Equivalent to getWarpSize(getCurrentDevice())
83
+ int getWarpSizeCurrentDevice();
84
+
79
85
  /// Returns the amount of currently available memory on the given device
80
86
  size_t getFreeMemory(int device);
81
87
 
@@ -14,7 +14,10 @@ namespace faiss {
14
14
  namespace gpu {
15
15
 
16
16
  KernelTimer::KernelTimer(cudaStream_t stream)
17
- : startEvent_(0), stopEvent_(0), stream_(stream), valid_(true) {
17
+ : startEvent_(nullptr),
18
+ stopEvent_(nullptr),
19
+ stream_(stream),
20
+ valid_(true) {
18
21
  CUDA_VERIFY(cudaEventCreate(&startEvent_));
19
22
  CUDA_VERIFY(cudaEventCreate(&stopEvent_));
20
23
 
@@ -18,7 +18,7 @@ class KernelTimer {
18
18
  public:
19
19
  /// Constructor starts the timer and adds an event into the current
20
20
  /// device stream
21
- KernelTimer(cudaStream_t stream = 0);
21
+ KernelTimer(cudaStream_t stream = nullptr);
22
22
 
23
23
  /// Destructor releases event resources
24
24
  ~KernelTimer();
@@ -236,4 +236,29 @@ size_t InterruptCallback::get_period_hint(size_t flops) {
236
236
  return std::max((size_t)10 * 10 * 1000 * 1000 / (flops + 1), (size_t)1);
237
237
  }
238
238
 
239
+ void TimeoutCallback::set_timeout(double timeout_in_seconds) {
240
+ timeout = timeout_in_seconds;
241
+ start = std::chrono::steady_clock::now();
242
+ }
243
+
244
+ bool TimeoutCallback::want_interrupt() {
245
+ if (timeout == 0) {
246
+ return false;
247
+ }
248
+ auto end = std::chrono::steady_clock::now();
249
+ std::chrono::duration<float, std::milli> duration = end - start;
250
+ float elapsed_in_seconds = duration.count() / 1000.0;
251
+ if (elapsed_in_seconds > timeout) {
252
+ timeout = 0;
253
+ return true;
254
+ }
255
+ return false;
256
+ }
257
+
258
+ void TimeoutCallback::reset(double timeout_in_seconds) {
259
+ auto tc(new faiss::TimeoutCallback());
260
+ faiss::InterruptCallback::instance.reset(tc);
261
+ tc->set_timeout(timeout_in_seconds);
262
+ }
263
+
239
264
  } // namespace faiss
@@ -122,7 +122,7 @@ struct RangeSearchPartialResult : BufferList {
122
122
  void copy_result(bool incremental = false);
123
123
 
124
124
  /// merge a set of PartialResult's into one RangeSearchResult
125
- /// on ouptut the partialresults are empty!
125
+ /// on output the partialresults are empty!
126
126
  static void merge(
127
127
  std::vector<RangeSearchPartialResult*>& partial_results,
128
128
  bool do_delete = true);
@@ -161,6 +161,14 @@ struct FAISS_API InterruptCallback {
161
161
  static size_t get_period_hint(size_t flops);
162
162
  };
163
163
 
164
+ struct TimeoutCallback : InterruptCallback {
165
+ std::chrono::time_point<std::chrono::steady_clock> start;
166
+ double timeout;
167
+ bool want_interrupt() override;
168
+ void set_timeout(double timeout_in_seconds);
169
+ static void reset(double timeout_in_seconds);
170
+ };
171
+
164
172
  /// set implementation optimized for fast access.
165
173
  struct VisitedTable {
166
174
  std::vector<uint8_t> visited;
@@ -59,6 +59,52 @@ struct DistanceComputer {
59
59
  virtual ~DistanceComputer() {}
60
60
  };
61
61
 
62
+ /* Wrap the distance computer into one that negates the
63
+ distances. This makes supporting INNER_PRODUCE search easier */
64
+
65
+ struct NegativeDistanceComputer : DistanceComputer {
66
+ /// owned by this
67
+ DistanceComputer* basedis;
68
+
69
+ explicit NegativeDistanceComputer(DistanceComputer* basedis)
70
+ : basedis(basedis) {}
71
+
72
+ void set_query(const float* x) override {
73
+ basedis->set_query(x);
74
+ }
75
+
76
+ /// compute distance of vector i to current query
77
+ float operator()(idx_t i) override {
78
+ return -(*basedis)(i);
79
+ }
80
+
81
+ void distances_batch_4(
82
+ const idx_t idx0,
83
+ const idx_t idx1,
84
+ const idx_t idx2,
85
+ const idx_t idx3,
86
+ float& dis0,
87
+ float& dis1,
88
+ float& dis2,
89
+ float& dis3) override {
90
+ basedis->distances_batch_4(
91
+ idx0, idx1, idx2, idx3, dis0, dis1, dis2, dis3);
92
+ dis0 = -dis0;
93
+ dis1 = -dis1;
94
+ dis2 = -dis2;
95
+ dis3 = -dis3;
96
+ }
97
+
98
+ /// compute distance between two stored vectors
99
+ float symmetric_dis(idx_t i, idx_t j) override {
100
+ return -basedis->symmetric_dis(i, j);
101
+ }
102
+
103
+ virtual ~NegativeDistanceComputer() {
104
+ delete basedis;
105
+ }
106
+ };
107
+
62
108
  /*************************************************************
63
109
  * Specialized version of the DistanceComputer when we know that codes are
64
110
  * laid out in a flat index.
@@ -94,13 +94,15 @@
94
94
  } \
95
95
  } while (false)
96
96
 
97
- #define FAISS_THROW_IF_NOT_MSG(X, MSG) \
97
+ #define FAISS_THROW_IF_MSG(X, MSG) \
98
98
  do { \
99
- if (!(X)) { \
99
+ if (X) { \
100
100
  FAISS_THROW_FMT("Error: '%s' failed: " MSG, #X); \
101
101
  } \
102
102
  } while (false)
103
103
 
104
+ #define FAISS_THROW_IF_NOT_MSG(X, MSG) FAISS_THROW_IF_MSG(!(X), MSG)
105
+
104
106
  #define FAISS_THROW_IF_NOT_FMT(X, FMT, ...) \
105
107
  do { \
106
108
  if (!(X)) { \