faiss 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +1 -1
  5. data/ext/faiss/extconf.rb +1 -1
  6. data/lib/faiss/version.rb +1 -1
  7. data/vendor/faiss/faiss/AutoTune.cpp +36 -33
  8. data/vendor/faiss/faiss/AutoTune.h +6 -3
  9. data/vendor/faiss/faiss/Clustering.cpp +16 -12
  10. data/vendor/faiss/faiss/Index.cpp +3 -4
  11. data/vendor/faiss/faiss/Index.h +3 -3
  12. data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
  13. data/vendor/faiss/faiss/IndexBinary.h +1 -1
  14. data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
  15. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
  16. data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
  17. data/vendor/faiss/faiss/IndexFlat.h +0 -51
  18. data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
  19. data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
  20. data/vendor/faiss/faiss/IndexIVF.h +22 -15
  21. data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
  22. data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
  23. data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
  24. data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
  25. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
  26. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
  27. data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
  28. data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
  29. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
  30. data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
  31. data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
  32. data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
  33. data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
  34. data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
  35. data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
  36. data/vendor/faiss/faiss/IndexRefine.h +73 -0
  37. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
  38. data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
  39. data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
  40. data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
  41. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
  42. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
  43. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
  44. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
  45. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
  46. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
  47. data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
  48. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
  49. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
  50. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
  51. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
  52. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
  53. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
  54. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
  55. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
  56. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
  57. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
  58. data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
  59. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
  60. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
  61. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
  62. data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
  63. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
  64. data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
  65. data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
  66. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
  67. data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
  68. data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
  69. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
  70. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
  71. data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
  72. data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
  73. data/vendor/faiss/faiss/impl/io.cpp +33 -2
  74. data/vendor/faiss/faiss/impl/io.h +7 -2
  75. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
  76. data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
  77. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
  78. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
  79. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
  80. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
  81. data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
  82. data/vendor/faiss/faiss/index_factory.cpp +112 -7
  83. data/vendor/faiss/faiss/index_io.h +1 -48
  84. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
  85. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
  86. data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
  87. data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
  88. data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
  89. data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
  90. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
  91. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
  92. data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
  93. data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
  94. data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
  95. data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
  96. data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
  97. data/vendor/faiss/faiss/utils/Heap.h +61 -50
  98. data/vendor/faiss/faiss/utils/distances.cpp +164 -319
  99. data/vendor/faiss/faiss/utils/distances.h +28 -20
  100. data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
  101. data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
  102. data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
  103. data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
  104. data/vendor/faiss/faiss/utils/hamming.h +2 -7
  105. data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
  106. data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
  107. data/vendor/faiss/faiss/utils/partitioning.h +69 -0
  108. data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
  109. data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
  110. data/vendor/faiss/faiss/utils/simdlib.h +31 -0
  111. data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
  112. data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
  113. metadata +43 -141
  114. data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
  115. data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
  116. data/vendor/faiss/c_api/AutoTune_c.h +0 -66
  117. data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
  118. data/vendor/faiss/c_api/Clustering_c.h +0 -123
  119. data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
  120. data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
  121. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
  122. data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
  123. data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
  124. data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
  125. data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
  126. data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
  127. data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
  128. data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
  129. data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
  130. data/vendor/faiss/c_api/IndexShards_c.h +0 -39
  131. data/vendor/faiss/c_api/Index_c.cpp +0 -105
  132. data/vendor/faiss/c_api/Index_c.h +0 -183
  133. data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
  134. data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
  135. data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
  136. data/vendor/faiss/c_api/clone_index_c.h +0 -32
  137. data/vendor/faiss/c_api/error_c.h +0 -42
  138. data/vendor/faiss/c_api/error_impl.cpp +0 -27
  139. data/vendor/faiss/c_api/error_impl.h +0 -16
  140. data/vendor/faiss/c_api/faiss_c.h +0 -58
  141. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
  142. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
  143. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
  144. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
  145. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
  146. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
  147. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
  148. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
  149. data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
  150. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
  151. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
  152. data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
  153. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
  154. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
  155. data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
  156. data/vendor/faiss/c_api/index_factory_c.h +0 -30
  157. data/vendor/faiss/c_api/index_io_c.cpp +0 -42
  158. data/vendor/faiss/c_api/index_io_c.h +0 -50
  159. data/vendor/faiss/c_api/macros_impl.h +0 -110
  160. data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
  161. data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
  162. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
  163. data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
  164. data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
  165. data/vendor/faiss/misc/test_blas.cpp +0 -87
  166. data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
  167. data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
  168. data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
  169. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
  170. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
  171. data/vendor/faiss/tests/test_merge.cpp +0 -260
  172. data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
  173. data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
  174. data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
  175. data/vendor/faiss/tests/test_params_override.cpp +0 -236
  176. data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
  177. data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
  178. data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
  179. data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
  180. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
  181. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
  182. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
  183. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
  184. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 97fd4d583754d20a5771e941fd5272205977c34b77eb9252e58f6016b90be52e
4
- data.tar.gz: 26df73fc7891efd894e9ebf4ed37db2aaa3af8b62221cdd707cb287f920ad0a7
3
+ metadata.gz: 4541b0c40468723a8bed3db80d5778fb156afa999cbbd14b83653285b3ae6267
4
+ data.tar.gz: 2beeacbad82a578e2a410938bc4447d73699ef3998c146b4309e4b44816f5e33
5
5
  SHA512:
6
- metadata.gz: 197143a79061c299b8f5e5ae76466db1a673e46f6d842615bed46170afadb6145e14df936262ec45f7e0d1eea889037cb3123c5a9348e32774eeb97414f180fd
7
- data.tar.gz: 5a8e1ecebd43886d974e52fa216d86d9c03e3f8c04629eddc04a8be8df5c7ab4fc4c57398f2893d44718c788cd1853ae1bee06f64f1d49cdc5330cb03c5c0e6c
6
+ metadata.gz: 92d87492ff627e094ef29a48c8e4579dd976846ec9a95d94c45b816391dcea332d370a5fbde9c3f2bdc11964e061557d0e150b223ab6ac0209c092f1e1cd4a6e
7
+ data.tar.gz: 86fc2aaf3151545f24128429cf1369074809f8449f277a2676e9dacea76c156191b3d6df6df83f0d7ac0f061bfdf537b81ef57f6cbbb861e4ba4b592d06e8ca2
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.1.4 (2020-02-04)
2
+
3
+ - Updated Faiss to 1.7.0
4
+
1
5
  ## 0.1.3 (2020-10-22)
2
6
 
3
7
  - Updated Faiss to 1.6.4
data/LICENSE.txt CHANGED
@@ -1,7 +1,7 @@
1
1
  MIT License
2
2
 
3
3
  Copyright (c) Facebook, Inc. and its affiliates.
4
- Copyright (c) 2020 Andrew Kane
4
+ Copyright (c) 2020-2021 Andrew Kane
5
5
 
6
6
  Permission is hereby granted, free of charge, to any person obtaining a copy
7
7
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -4,7 +4,7 @@
4
4
 
5
5
  Learn more about [Faiss](https://engineering.fb.com/data-infrastructure/faiss-a-library-for-efficient-similarity-search/)
6
6
 
7
- [![Build Status](https://travis-ci.org/ankane/faiss.svg?branch=master)](https://travis-ci.org/ankane/faiss)
7
+ [![Build Status](https://github.com/ankane/faiss/workflows/build/badge.svg?branch=master)](https://github.com/ankane/faiss/actions)
8
8
 
9
9
  ## Installation
10
10
 
data/ext/faiss/extconf.rb CHANGED
@@ -9,7 +9,7 @@ $CXXFLAGS << " -std=c++11 -march=native -DFINTEGER=int"
9
9
  ext = File.expand_path(".", __dir__)
10
10
  vendor = File.expand_path("../../vendor/faiss", __dir__)
11
11
 
12
- $srcs = Dir["{#{ext},#{vendor}/faiss,#{vendor}/faiss/impl,#{vendor}/faiss/utils}/*.{cpp}"]
12
+ $srcs = Dir["{#{ext},#{vendor}/faiss,#{vendor}/faiss/{impl,invlists,utils}}/*.{cpp}"]
13
13
  $objs = $srcs.map { |v| v.sub(/cpp\z/, "o") }
14
14
  $INCFLAGS << " -I#{vendor}"
15
15
  $VPATH << vendor
data/lib/faiss/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Faiss
2
- VERSION = "0.1.3"
2
+ VERSION = "0.1.4"
3
3
  end
@@ -15,6 +15,7 @@
15
15
 
16
16
  #include <cinttypes>
17
17
  #include <cmath>
18
+ #include <typeinfo>
18
19
 
19
20
  #include <faiss/impl/FaissAssert.h>
20
21
  #include <faiss/utils/utils.h>
@@ -32,6 +33,7 @@
32
33
  #include <faiss/MetaIndexes.h>
33
34
  #include <faiss/IndexScalarQuantizer.h>
34
35
  #include <faiss/IndexHNSW.h>
36
+ #include <faiss/IndexRefine.h>
35
37
 
36
38
  #include <faiss/IndexBinaryFlat.h>
37
39
  #include <faiss/IndexBinaryHNSW.h>
@@ -234,7 +236,7 @@ void OperatingPoints::display (bool only_optimal) const
234
236
  {
235
237
  const std::vector<OperatingPoint> &pts =
236
238
  only_optimal ? optimal_pts : all_pts;
237
- printf("Tested %zd operating points, %zd ones are optimal:\n",
239
+ printf("Tested %zd operating points, %zd ones are Pareto-optimal:\n",
238
240
  all_pts.size(), optimal_pts.size());
239
241
 
240
242
  for (int i = 0; i < pts.size(); i++) {
@@ -333,7 +335,7 @@ static void init_pq_ParameterRange (const ProductQuantizer & pq,
333
335
  pr.values.push_back (pq.code_size * 8);
334
336
  }
335
337
 
336
- ParameterRange &ParameterSpace::add_range(const char * name)
338
+ ParameterRange &ParameterSpace::add_range(const std::string & name)
337
339
  {
338
340
  for (auto & pr : parameter_ranges) {
339
341
  if (pr.name == name) {
@@ -346,13 +348,13 @@ ParameterRange &ParameterSpace::add_range(const char * name)
346
348
  }
347
349
 
348
350
 
349
- /// initialize with reasonable parameters for the index
351
+ /// initialize with reasonable parameters for this type of index
350
352
  void ParameterSpace::initialize (const Index * index)
351
353
  {
352
354
  if (DC (IndexPreTransform)) {
353
355
  index = ix->index;
354
356
  }
355
- if (DC (IndexRefineFlat)) {
357
+ if (DC (IndexRefine)) {
356
358
  ParameterRange & pr = add_range("k_factor_rf");
357
359
  for (int i = 0; i <= 6; i++) {
358
360
  pr.values.push_back (1 << i);
@@ -372,12 +374,14 @@ void ParameterSpace::initialize (const Index * index)
372
374
  pr.values.push_back (nprobe);
373
375
  }
374
376
  }
375
- if (dynamic_cast<const IndexHNSW*>(ix->quantizer)) {
376
- ParameterRange & pr = add_range("efSearch");
377
- for (int i = 2; i <= 9; i++) {
378
- pr.values.push_back (1 << i);
379
- }
377
+ ParameterSpace ivf_pspace;
378
+ ivf_pspace.initialize(ix->quantizer);
379
+
380
+ for (const ParameterRange & p: ivf_pspace.parameter_ranges) {
381
+ ParameterRange & pr = add_range("quantizer_" + p.name);
382
+ pr.values = p.values;
380
383
  }
384
+
381
385
  }
382
386
  if (DC (IndexPQ)) {
383
387
  ParameterRange & pr = add_range("ht");
@@ -446,7 +450,7 @@ void ParameterSpace::set_index_parameters (
446
450
  tok = strtok_r (nullptr, " ,", &ptr)) {
447
451
  char name[100];
448
452
  double val;
449
- int ret = sscanf (tok, "%100[^=]=%lf", name, &val);
453
+ int ret = sscanf (tok, "%99[^=]=%lf", name, &val);
450
454
  FAISS_THROW_IF_NOT_FMT (
451
455
  ret == 2, "could not interpret parameters %s", tok);
452
456
  set_index_parameter (index, name, val);
@@ -457,44 +461,38 @@ void ParameterSpace::set_index_parameters (
457
461
  void ParameterSpace::set_index_parameter (
458
462
  Index * index, const std::string & name, double val) const
459
463
  {
460
- if (verbose > 1)
461
- printf(" set %s=%g\n", name.c_str(), val);
464
+ if (verbose > 1) {
465
+ printf(" set_index_parameter %s=%g\n", name.c_str(), val);
466
+ }
462
467
 
463
468
  if (name == "verbose") {
464
469
  index->verbose = int(val);
465
470
  // and fall through to also enable it on sub-indexes
466
471
  }
467
- if (DC (IndexPreTransform)) {
472
+ if (DC (IndexIDMap)) {
468
473
  set_index_parameter (ix->index, name, val);
469
474
  return;
470
475
  }
471
- if (DC (IndexShards)) {
472
- // call on all sub-indexes
473
- auto fn =
474
- [this, name, val](int, Index* subIndex) {
475
- set_index_parameter(subIndex, name, val);
476
- };
477
-
478
- ix->runOnIndex(fn);
476
+ if (DC (IndexPreTransform)) {
477
+ set_index_parameter (ix->index, name, val);
479
478
  return;
480
479
  }
481
- if (DC (IndexReplicas)) {
480
+ if (DC (ThreadedIndex<Index>)) {
482
481
  // call on all sub-indexes
483
482
  auto fn =
484
- [this, name, val](int, Index* subIndex) {
485
- set_index_parameter(subIndex, name, val);
483
+ [this, name, val](int /* no */, Index* subIndex) {
484
+ set_index_parameter(subIndex, name, val);
486
485
  };
487
-
488
486
  ix->runOnIndex(fn);
489
487
  return;
490
488
  }
491
- if (DC (IndexRefineFlat)) {
489
+ if (DC (IndexRefine)) {
492
490
  if (name == "k_factor_rf") {
493
491
  ix->k_factor = int(val);
494
492
  return;
495
493
  }
496
494
  // otherwise it is for the sub-index
497
- set_index_parameter (&ix->refine_index, name, val);
495
+ set_index_parameter (ix->base_index, name, val);
498
496
  return;
499
497
  }
500
498
 
@@ -504,10 +502,7 @@ void ParameterSpace::set_index_parameter (
504
502
  }
505
503
 
506
504
  if (name == "nprobe") {
507
- if (DC (IndexIDMap)) {
508
- set_index_parameter (ix->index, name, val);
509
- return;
510
- } else if (DC (IndexIVF)) {
505
+ if (DC (IndexIVF)) {
511
506
  ix->nprobe = int(val);
512
507
  return;
513
508
  }
@@ -559,6 +554,14 @@ void ParameterSpace::set_index_parameter (
559
554
  }
560
555
  }
561
556
 
557
+ if (name.find("quantizer_") == 0) {
558
+ if (DC(IndexIVF)) {
559
+ std::string sub_name = name.substr(strlen("quantizer_"));
560
+ set_index_parameter(ix->quantizer, sub_name, val);
561
+ return;
562
+ }
563
+ }
564
+
562
565
  FAISS_THROW_FMT ("ParameterSpace::set_index_parameter:"
563
566
  "could not set parameter %s",
564
567
  name.c_str());
@@ -707,8 +710,8 @@ void ParameterSpace::explore (Index *index,
707
710
  bool keep = ops->add (perf, t_search, combination_name (cno), cno);
708
711
 
709
712
  if (verbose)
710
- printf(" perf %.3f t %.3f (%d runs) %s\n",
711
- perf, t_search, nrun,
713
+ printf(" perf %.3f t %.3f (%d %s) %s\n",
714
+ perf, t_search, nrun, nrun >= 2 ? "runs" : "run",
712
715
  keep ? "*" : "");
713
716
  }
714
717
  }
@@ -81,7 +81,10 @@ struct IntersectionCriterion: AutoTuneCriterion {
81
81
  /**
82
82
  * Maintains a list of experimental results. Each operating point is a
83
83
  * (perf, t, key) triplet, where higher perf and lower t is
84
- * better. The key field is an arbitrary identifier for the operating point
84
+ * better. The key field is an arbitrary identifier for the operating point.
85
+ *
86
+ * Includes primitives to extract the Pareto-optimal operating points in the
87
+ * (perf, t) space.
85
88
  */
86
89
 
87
90
  struct OperatingPoint {
@@ -168,7 +171,7 @@ struct ParameterSpace {
168
171
  void display () const;
169
172
 
170
173
  /// add a new parameter (or return it if it exists)
171
- ParameterRange &add_range(const char * name);
174
+ ParameterRange &add_range(const std::string & name);
172
175
 
173
176
  /// initialize with reasonable parameters for the index
174
177
  virtual void initialize (const Index * index);
@@ -179,7 +182,7 @@ struct ParameterSpace {
179
182
  /// set a combination of parameters described by a string
180
183
  void set_index_parameters (Index *index, const char *param_string) const;
181
184
 
182
- /// set one of the parameters
185
+ /// set one of the parameters, returns whether setting was successful
183
186
  virtual void set_index_parameter (
184
187
  Index * index, const std::string & name, double val) const;
185
188
 
@@ -269,6 +269,7 @@ void Clustering::train_encoded (idx_t nx, const uint8_t *x_in,
269
269
  const Index * codec, Index & index,
270
270
  const float *weights) {
271
271
 
272
+
272
273
  FAISS_THROW_IF_NOT_FMT (nx >= k,
273
274
  "Number of training points (%" PRId64 ") should be at least "
274
275
  "as large as number of clusters (%zd)", nx, k);
@@ -350,8 +351,9 @@ void Clustering::train_encoded (idx_t nx, const uint8_t *x_in,
350
351
  std::unique_ptr<float []> dis(new float[nx]);
351
352
 
352
353
  // remember best iteration for redo
353
- float best_err = HUGE_VALF;
354
- std::vector<ClusteringIterationStats> best_obj;
354
+ bool lower_is_better = index.metric_type != METRIC_INNER_PRODUCT;
355
+ float best_obj = lower_is_better ? HUGE_VALF : -HUGE_VALF;
356
+ std::vector<ClusteringIterationStats> best_iteration_stats;
355
357
  std::vector<float> best_centroids;
356
358
 
357
359
  // support input centroids
@@ -417,7 +419,7 @@ void Clustering::train_encoded (idx_t nx, const uint8_t *x_in,
417
419
 
418
420
  // k-means iterations
419
421
 
420
- float err = 0;
422
+ float obj = 0;
421
423
  for (int i = 0; i < niter; i++) {
422
424
  double t0s = getmillisecs();
423
425
 
@@ -440,10 +442,10 @@ void Clustering::train_encoded (idx_t nx, const uint8_t *x_in,
440
442
  InterruptCallback::check();
441
443
  t_search_tot += getmillisecs() - t0s;
442
444
 
443
- // accumulate error
444
- err = 0;
445
+ // accumulate objective
446
+ obj = 0;
445
447
  for (int j = 0; j < nx; j++) {
446
- err += dis[j];
448
+ obj += dis[j];
447
449
  }
448
450
 
449
451
  // update the centroids
@@ -463,8 +465,9 @@ void Clustering::train_encoded (idx_t nx, const uint8_t *x_in,
463
465
 
464
466
  // collect statistics
465
467
  ClusteringIterationStats stats =
466
- { err, (getmillisecs() - t0) / 1000.0,
467
- t_search_tot / 1000, imbalance_factor (nx, k, assign.get()),
468
+ { obj, (getmillisecs() - t0) / 1000.0,
469
+ t_search_tot / 1000,
470
+ imbalance_factor (nx, k, assign.get()),
468
471
  nsplit };
469
472
  iteration_stats.push_back(stats);
470
473
 
@@ -491,20 +494,21 @@ void Clustering::train_encoded (idx_t nx, const uint8_t *x_in,
491
494
 
492
495
  if (verbose) printf("\n");
493
496
  if (nredo > 1) {
494
- if (err < best_err) {
497
+ if ((lower_is_better && obj < best_obj) ||
498
+ (!lower_is_better && obj > best_obj)) {
495
499
  if (verbose) {
496
500
  printf ("Objective improved: keep new clusters\n");
497
501
  }
498
502
  best_centroids = centroids;
499
- best_obj = iteration_stats;
500
- best_err = err;
503
+ best_iteration_stats = iteration_stats;
504
+ best_obj = obj;
501
505
  }
502
506
  index.reset ();
503
507
  }
504
508
  }
505
509
  if (nredo > 1) {
506
510
  centroids = best_centroids;
507
- iteration_stats = best_obj;
511
+ iteration_stats = best_iteration_stats;
508
512
  index.reset();
509
513
  index.add(k, best_centroids.data());
510
514
  }
@@ -34,11 +34,10 @@ void Index::range_search (idx_t , const float *, float,
34
34
  FAISS_THROW_MSG ("range search not implemented");
35
35
  }
36
36
 
37
- void Index::assign (idx_t n, const float * x, idx_t * labels, idx_t k)
37
+ void Index::assign (idx_t n, const float * x, idx_t * labels, idx_t k) const
38
38
  {
39
- float * distances = new float[n * k];
40
- ScopeDeleter<float> del(distances);
41
- search (n, x, k, distances, labels);
39
+ std::vector<float> distances(n * k);
40
+ search (n, x, k, distances.data(), labels);
42
41
  }
43
42
 
44
43
  void Index::add_with_ids(
@@ -17,8 +17,8 @@
17
17
  #include <sstream>
18
18
 
19
19
  #define FAISS_VERSION_MAJOR 1
20
- #define FAISS_VERSION_MINOR 6
21
- #define FAISS_VERSION_PATCH 4
20
+ #define FAISS_VERSION_MINOR 7
21
+ #define FAISS_VERSION_PATCH 0
22
22
 
23
23
  /**
24
24
  * @namespace faiss
@@ -133,7 +133,7 @@ struct Index {
133
133
  * @param x input vectors to search, size n * d
134
134
  * @param labels output labels of the NNs, size n*k
135
135
  */
136
- void assign (idx_t n, const float * x, idx_t * labels, idx_t k = 1);
136
+ virtual void assign (idx_t n, const float * x, idx_t * labels, idx_t k = 1) const;
137
137
 
138
138
  /// removes all elements from the database.
139
139
  virtual void reset() = 0;
@@ -26,10 +26,9 @@ void IndexBinary::range_search(idx_t, const uint8_t *, int,
26
26
  FAISS_THROW_MSG("range search not implemented");
27
27
  }
28
28
 
29
- void IndexBinary::assign(idx_t n, const uint8_t *x, idx_t *labels, idx_t k) {
30
- int *distances = new int[n * k];
31
- ScopeDeleter<int> del(distances);
32
- search(n, x, k, distances, labels);
29
+ void IndexBinary::assign(idx_t n, const uint8_t *x, idx_t *labels, idx_t k) const {
30
+ std::vector<int> distances(n * k);
31
+ search(n, x, k, distances.data(), labels);
33
32
  }
34
33
 
35
34
  void IndexBinary::add_with_ids(idx_t, const uint8_t *, const idx_t *) {
@@ -120,7 +120,7 @@ struct IndexBinary {
120
120
  * @param x input vectors to search, size n * d / 8
121
121
  * @param labels output labels of the NNs, size n*k
122
122
  */
123
- void assign(idx_t n, const uint8_t *x, idx_t *labels, idx_t k = 1);
123
+ void assign(idx_t n, const uint8_t *x, idx_t *labels, idx_t k = 1) const;
124
124
 
125
125
  /// Removes all elements from the database.
126
126
  virtual void reset() = 0;
@@ -18,16 +18,7 @@
18
18
 
19
19
  #include <faiss/impl/AuxIndexStructures.h>
20
20
  #include <faiss/impl/FaissAssert.h>
21
-
22
- #ifdef _MSC_VER
23
- #include <intrin.h>
24
-
25
- static inline int __builtin_ctzll(uint64_t x) {
26
- unsigned long ret;
27
- _BitScanForward64(&ret, x);
28
- return (int)ret;
29
- }
30
- #endif // _MSC_VER
21
+ #include <faiss/impl/platform_macros.h>
31
22
 
32
23
  namespace faiss {
33
24
 
@@ -145,8 +136,7 @@ struct KnnSearchResults {
145
136
 
146
137
  inline void add (float dis, idx_t id) {
147
138
  if (dis < heap_sim[0]) {
148
- heap_pop<C> (k, heap_sim, heap_ids);
149
- heap_push<C> (k, heap_sim, heap_ids, dis, id);
139
+ heap_replace_top<C> (k, heap_sim, heap_ids, dis, id);
150
140
  }
151
141
  }
152
142
 
@@ -319,9 +319,8 @@ struct IVFBinaryScannerL2: BinaryInvertedListScanner {
319
319
  for (size_t j = 0; j < n; j++) {
320
320
  uint32_t dis = hc.hamming (codes);
321
321
  if (dis < simi[0]) {
322
- heap_pop<C> (k, simi, idxi);
323
322
  idx_t id = store_pairs ? lo_build(list_no, j) : ids[j];
324
- heap_push<C> (k, simi, idxi, dis, id);
323
+ heap_replace_top<C> (k, simi, idxi, dis, id);
325
324
  nup++;
326
325
  }
327
326
  codes += code_size;
@@ -226,155 +226,7 @@ void IndexFlat::sa_decode (idx_t n, const uint8_t *bytes, float *x) const
226
226
 
227
227
 
228
228
 
229
- /***************************************************
230
- * IndexFlatL2BaseShift
231
- ***************************************************/
232
-
233
- IndexFlatL2BaseShift::IndexFlatL2BaseShift (idx_t d, size_t nshift, const float *shift):
234
- IndexFlatL2 (d), shift (nshift)
235
- {
236
- memcpy (this->shift.data(), shift, sizeof(float) * nshift);
237
- }
238
229
 
239
- void IndexFlatL2BaseShift::search (
240
- idx_t n,
241
- const float *x,
242
- idx_t k,
243
- float *distances,
244
- idx_t *labels) const
245
- {
246
- FAISS_THROW_IF_NOT (shift.size() == ntotal);
247
-
248
- float_maxheap_array_t res = {
249
- size_t(n), size_t(k), labels, distances};
250
- knn_L2sqr_base_shift (x, xb.data(), d, n, ntotal, &res, shift.data());
251
- }
252
-
253
-
254
-
255
- /***************************************************
256
- * IndexRefineFlat
257
- ***************************************************/
258
-
259
- IndexRefineFlat::IndexRefineFlat (Index *base_index):
260
- Index (base_index->d, base_index->metric_type),
261
- refine_index (base_index->d, base_index->metric_type),
262
- base_index (base_index), own_fields (false),
263
- k_factor (1)
264
- {
265
- is_trained = base_index->is_trained;
266
- FAISS_THROW_IF_NOT_MSG (base_index->ntotal == 0,
267
- "base_index should be empty in the beginning");
268
- }
269
-
270
- IndexRefineFlat::IndexRefineFlat () {
271
- base_index = nullptr;
272
- own_fields = false;
273
- k_factor = 1;
274
- }
275
-
276
-
277
- void IndexRefineFlat::train (idx_t n, const float *x)
278
- {
279
- base_index->train (n, x);
280
- is_trained = true;
281
- }
282
-
283
- void IndexRefineFlat::add (idx_t n, const float *x) {
284
- FAISS_THROW_IF_NOT (is_trained);
285
- base_index->add (n, x);
286
- refine_index.add (n, x);
287
- ntotal = refine_index.ntotal;
288
- }
289
-
290
- void IndexRefineFlat::reset ()
291
- {
292
- base_index->reset ();
293
- refine_index.reset ();
294
- ntotal = 0;
295
- }
296
-
297
- namespace {
298
- typedef faiss::Index::idx_t idx_t;
299
-
300
- template<class C>
301
- static void reorder_2_heaps (
302
- idx_t n,
303
- idx_t k, idx_t *labels, float *distances,
304
- idx_t k_base, const idx_t *base_labels, const float *base_distances)
305
- {
306
- #pragma omp parallel for
307
- for (idx_t i = 0; i < n; i++) {
308
- idx_t *idxo = labels + i * k;
309
- float *diso = distances + i * k;
310
- const idx_t *idxi = base_labels + i * k_base;
311
- const float *disi = base_distances + i * k_base;
312
-
313
- heap_heapify<C> (k, diso, idxo, disi, idxi, k);
314
- if (k_base != k) { // add remaining elements
315
- heap_addn<C> (k, diso, idxo, disi + k, idxi + k, k_base - k);
316
- }
317
- heap_reorder<C> (k, diso, idxo);
318
- }
319
- }
320
-
321
-
322
- }
323
-
324
-
325
- void IndexRefineFlat::search (
326
- idx_t n, const float *x, idx_t k,
327
- float *distances, idx_t *labels) const
328
- {
329
- FAISS_THROW_IF_NOT (is_trained);
330
- idx_t k_base = idx_t (k * k_factor);
331
- idx_t * base_labels = labels;
332
- float * base_distances = distances;
333
- ScopeDeleter<idx_t> del1;
334
- ScopeDeleter<float> del2;
335
-
336
-
337
- if (k != k_base) {
338
- base_labels = new idx_t [n * k_base];
339
- del1.set (base_labels);
340
- base_distances = new float [n * k_base];
341
- del2.set (base_distances);
342
- }
343
-
344
- base_index->search (n, x, k_base, base_distances, base_labels);
345
-
346
- for (int i = 0; i < n * k_base; i++)
347
- assert (base_labels[i] >= -1 &&
348
- base_labels[i] < ntotal);
349
-
350
- // compute refined distances
351
- refine_index.compute_distance_subset (
352
- n, x, k_base, base_distances, base_labels);
353
-
354
- // sort and store result
355
- if (metric_type == METRIC_L2) {
356
- typedef CMax <float, idx_t> C;
357
- reorder_2_heaps<C> (
358
- n, k, labels, distances,
359
- k_base, base_labels, base_distances);
360
-
361
- } else if (metric_type == METRIC_INNER_PRODUCT) {
362
- typedef CMin <float, idx_t> C;
363
- reorder_2_heaps<C> (
364
- n, k, labels, distances,
365
- k_base, base_labels, base_distances);
366
- } else {
367
- FAISS_THROW_MSG("Metric type not supported");
368
- }
369
-
370
- }
371
-
372
-
373
-
374
- IndexRefineFlat::~IndexRefineFlat ()
375
- {
376
- if (own_fields) delete base_index;
377
- }
378
230
 
379
231
  /***************************************************
380
232
  * IndexFlat1D