faiss 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +1 -1
  5. data/ext/faiss/extconf.rb +9 -2
  6. data/ext/faiss/index.cpp +1 -1
  7. data/ext/faiss/index_binary.cpp +2 -2
  8. data/ext/faiss/product_quantizer.cpp +1 -1
  9. data/lib/faiss/version.rb +1 -1
  10. data/vendor/faiss/faiss/AutoTune.cpp +7 -7
  11. data/vendor/faiss/faiss/AutoTune.h +0 -1
  12. data/vendor/faiss/faiss/Clustering.cpp +4 -18
  13. data/vendor/faiss/faiss/Clustering.h +31 -21
  14. data/vendor/faiss/faiss/IVFlib.cpp +22 -11
  15. data/vendor/faiss/faiss/Index.cpp +1 -1
  16. data/vendor/faiss/faiss/Index.h +20 -5
  17. data/vendor/faiss/faiss/Index2Layer.cpp +7 -7
  18. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +176 -166
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +15 -15
  20. data/vendor/faiss/faiss/IndexBinary.cpp +9 -4
  21. data/vendor/faiss/faiss/IndexBinary.h +8 -19
  22. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +2 -1
  23. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +24 -31
  24. data/vendor/faiss/faiss/IndexBinaryHash.cpp +25 -50
  25. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +106 -187
  26. data/vendor/faiss/faiss/IndexFastScan.cpp +90 -159
  27. data/vendor/faiss/faiss/IndexFastScan.h +9 -8
  28. data/vendor/faiss/faiss/IndexFlat.cpp +195 -3
  29. data/vendor/faiss/faiss/IndexFlat.h +20 -1
  30. data/vendor/faiss/faiss/IndexFlatCodes.cpp +11 -0
  31. data/vendor/faiss/faiss/IndexFlatCodes.h +3 -1
  32. data/vendor/faiss/faiss/IndexHNSW.cpp +112 -316
  33. data/vendor/faiss/faiss/IndexHNSW.h +12 -48
  34. data/vendor/faiss/faiss/IndexIDMap.cpp +69 -28
  35. data/vendor/faiss/faiss/IndexIDMap.h +24 -2
  36. data/vendor/faiss/faiss/IndexIVF.cpp +159 -53
  37. data/vendor/faiss/faiss/IndexIVF.h +37 -5
  38. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +18 -26
  39. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +3 -2
  40. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +19 -46
  41. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -3
  42. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +433 -405
  43. data/vendor/faiss/faiss/IndexIVFFastScan.h +56 -26
  44. data/vendor/faiss/faiss/IndexIVFFlat.cpp +15 -5
  45. data/vendor/faiss/faiss/IndexIVFFlat.h +3 -2
  46. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +172 -0
  47. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.h +56 -0
  48. data/vendor/faiss/faiss/IndexIVFPQ.cpp +78 -122
  49. data/vendor/faiss/faiss/IndexIVFPQ.h +6 -7
  50. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +18 -50
  51. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +4 -3
  52. data/vendor/faiss/faiss/IndexIVFPQR.cpp +45 -29
  53. data/vendor/faiss/faiss/IndexIVFPQR.h +5 -2
  54. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +25 -27
  55. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +6 -6
  56. data/vendor/faiss/faiss/IndexLSH.cpp +14 -16
  57. data/vendor/faiss/faiss/IndexNNDescent.cpp +3 -4
  58. data/vendor/faiss/faiss/IndexNSG.cpp +11 -27
  59. data/vendor/faiss/faiss/IndexNSG.h +10 -10
  60. data/vendor/faiss/faiss/IndexPQ.cpp +72 -88
  61. data/vendor/faiss/faiss/IndexPQ.h +1 -4
  62. data/vendor/faiss/faiss/IndexPQFastScan.cpp +1 -1
  63. data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -31
  64. data/vendor/faiss/faiss/IndexRefine.cpp +49 -19
  65. data/vendor/faiss/faiss/IndexRefine.h +7 -0
  66. data/vendor/faiss/faiss/IndexReplicas.cpp +23 -26
  67. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +22 -16
  68. data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -4
  69. data/vendor/faiss/faiss/IndexShards.cpp +21 -29
  70. data/vendor/faiss/faiss/IndexShardsIVF.cpp +1 -2
  71. data/vendor/faiss/faiss/MatrixStats.cpp +17 -32
  72. data/vendor/faiss/faiss/MatrixStats.h +21 -9
  73. data/vendor/faiss/faiss/MetaIndexes.cpp +35 -35
  74. data/vendor/faiss/faiss/VectorTransform.cpp +13 -26
  75. data/vendor/faiss/faiss/VectorTransform.h +7 -7
  76. data/vendor/faiss/faiss/clone_index.cpp +15 -10
  77. data/vendor/faiss/faiss/clone_index.h +3 -0
  78. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +87 -4
  79. data/vendor/faiss/faiss/gpu/GpuCloner.h +22 -0
  80. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +7 -0
  81. data/vendor/faiss/faiss/gpu/GpuDistance.h +46 -38
  82. data/vendor/faiss/faiss/gpu/GpuIndex.h +28 -4
  83. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +4 -4
  84. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +8 -9
  85. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +18 -3
  86. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -11
  87. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +1 -3
  88. data/vendor/faiss/faiss/gpu/GpuResources.cpp +24 -3
  89. data/vendor/faiss/faiss/gpu/GpuResources.h +39 -11
  90. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +117 -17
  91. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +57 -3
  92. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +1 -1
  93. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +25 -0
  94. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +129 -9
  95. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +267 -40
  96. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +299 -208
  97. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +1 -0
  98. data/vendor/faiss/faiss/gpu/utils/RaftUtils.h +75 -0
  99. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +3 -1
  100. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +5 -5
  101. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
  102. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +1 -2
  103. data/vendor/faiss/faiss/impl/DistanceComputer.h +24 -1
  104. data/vendor/faiss/faiss/impl/FaissException.h +13 -34
  105. data/vendor/faiss/faiss/impl/HNSW.cpp +321 -70
  106. data/vendor/faiss/faiss/impl/HNSW.h +9 -8
  107. data/vendor/faiss/faiss/impl/IDSelector.h +4 -4
  108. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +3 -1
  109. data/vendor/faiss/faiss/impl/NNDescent.cpp +29 -19
  110. data/vendor/faiss/faiss/impl/NSG.h +1 -1
  111. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +14 -12
  112. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
  113. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +24 -22
  114. data/vendor/faiss/faiss/impl/ProductQuantizer.h +1 -1
  115. data/vendor/faiss/faiss/impl/Quantizer.h +1 -1
  116. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +27 -1015
  117. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +5 -63
  118. data/vendor/faiss/faiss/impl/ResultHandler.h +232 -176
  119. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +444 -104
  120. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +0 -8
  121. data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +280 -42
  122. data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +21 -14
  123. data/vendor/faiss/faiss/impl/code_distance/code_distance.h +22 -12
  124. data/vendor/faiss/faiss/impl/index_read.cpp +45 -19
  125. data/vendor/faiss/faiss/impl/index_write.cpp +60 -41
  126. data/vendor/faiss/faiss/impl/io.cpp +10 -10
  127. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
  128. data/vendor/faiss/faiss/impl/platform_macros.h +18 -1
  129. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +3 -0
  130. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +7 -6
  131. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +52 -38
  132. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +40 -49
  133. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +960 -0
  134. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +176 -0
  135. data/vendor/faiss/faiss/impl/simd_result_handlers.h +374 -202
  136. data/vendor/faiss/faiss/index_factory.cpp +10 -7
  137. data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
  138. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +27 -9
  139. data/vendor/faiss/faiss/invlists/InvertedLists.h +12 -3
  140. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
  141. data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
  142. data/vendor/faiss/faiss/utils/Heap.cpp +3 -1
  143. data/vendor/faiss/faiss/utils/WorkerThread.h +1 -0
  144. data/vendor/faiss/faiss/utils/distances.cpp +128 -74
  145. data/vendor/faiss/faiss/utils/distances.h +81 -4
  146. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +5 -5
  147. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +2 -2
  148. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
  149. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +1 -1
  150. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +5 -5
  151. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +1 -1
  152. data/vendor/faiss/faiss/utils/distances_simd.cpp +428 -70
  153. data/vendor/faiss/faiss/utils/fp16-arm.h +29 -0
  154. data/vendor/faiss/faiss/utils/fp16.h +2 -0
  155. data/vendor/faiss/faiss/utils/hamming.cpp +162 -110
  156. data/vendor/faiss/faiss/utils/hamming.h +58 -0
  157. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +16 -89
  158. data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -0
  159. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +15 -87
  160. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +57 -0
  161. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +14 -104
  162. data/vendor/faiss/faiss/utils/partitioning.cpp +3 -4
  163. data/vendor/faiss/faiss/utils/prefetch.h +77 -0
  164. data/vendor/faiss/faiss/utils/quantize_lut.cpp +0 -14
  165. data/vendor/faiss/faiss/utils/simdlib_avx2.h +0 -6
  166. data/vendor/faiss/faiss/utils/simdlib_neon.h +72 -77
  167. data/vendor/faiss/faiss/utils/sorting.cpp +140 -5
  168. data/vendor/faiss/faiss/utils/sorting.h +27 -0
  169. data/vendor/faiss/faiss/utils/utils.cpp +112 -6
  170. data/vendor/faiss/faiss/utils/utils.h +57 -20
  171. metadata +10 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4e9bd037bbb04bb31eb1856073a7d01d9b1871f1005c2f81b2b3d48c72f737a2
4
- data.tar.gz: bfde828c3c7780e2cba3eac0db39ab805c1892e3fb07968870097051ebc7b713
3
+ metadata.gz: e41b15bbcda6c4d2a250df5b98d86e9baf51b34b90fc2fccb6f0a37f486ef417
4
+ data.tar.gz: 768074275062ed45f1752e3a5c9d55a9695a6aa453e925aa0a6e607ce3215bab
5
5
  SHA512:
6
- metadata.gz: c7dd39002fa4f463c552b75fcfeed505816784ca986e65a46826d7982f9e8f9761750931b7e472ef3406d851c63e2038ef65370cd1c0d54113eb556190fb8c65
7
- data.tar.gz: 2f1fc38577e089b9a817feabe7a794354a385ca4cf99887253db12a88cfeb39c0ba456a44191b16680158e2c30d1356efa909df57a2823376036f47e25aed0b1
6
+ metadata.gz: cecc466dd24e03206219b63e750e48b554355c1c5dfc8e911879988a6f31eb628617133f5b584b3de29efcbe65d087cf5b4e219371cee959e8248c989a4dbffc
7
+ data.tar.gz: 3e0c6be53825949f9c51a0195d85cbed87bc198dd06852c88c537b13e6bcc8e7fa65a3e3c88667eefef44e95278fe2c73ece89d5f92bd24f8c0d27b543488b56
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.3.1 (2024-03-13)
2
+
3
+ - Updated Faiss to 1.8.0
4
+ - Fixed memory leak with `load` and `index_binary_factory` methods
5
+
1
6
  ## 0.3.0 (2023-05-11)
2
7
 
3
8
  - Fixed error on Fedora
data/LICENSE.txt CHANGED
@@ -1,7 +1,7 @@
1
1
  MIT License
2
2
 
3
3
  Copyright (c) Facebook, Inc. and its affiliates.
4
- Copyright (c) 2020-2023 Andrew Kane
4
+ Copyright (c) 2020-2024 Andrew Kane
5
5
 
6
6
  Permission is hereby granted, free of charge, to any person obtaining a copy
7
7
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -4,7 +4,7 @@
4
4
 
5
5
  Learn more about [Faiss](https://engineering.fb.com/data-infrastructure/faiss-a-library-for-efficient-similarity-search/)
6
6
 
7
- [![Build Status](https://github.com/ankane/faiss-ruby/workflows/build/badge.svg?branch=master)](https://github.com/ankane/faiss-ruby/actions)
7
+ [![Build Status](https://github.com/ankane/faiss-ruby/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/faiss-ruby/actions)
8
8
 
9
9
  ## Installation
10
10
 
data/ext/faiss/extconf.rb CHANGED
@@ -19,9 +19,16 @@ abort "Numo not found" unless find_header("numo/narray.h", numo)
19
19
  # for https://bugs.ruby-lang.org/issues/19005
20
20
  $LDFLAGS += " -Wl,-undefined,dynamic_lookup" if RbConfig::CONFIG["host_os"] =~ /darwin/i
21
21
 
22
+ $CXXFLAGS += " -std=c++17 $(optflags) -DFINTEGER=int"
23
+ $CXXFLAGS += " -Wall -Wno-unused-parameter -Wno-unused-function -Wno-unused-variable -Wno-deprecated-declarations -Wno-sign-compare"
24
+
22
25
  # -march=native not supported with ARM Mac
23
- default_optflags = RbConfig::CONFIG["host_os"] =~ /darwin/i && RbConfig::CONFIG["host_cpu"] =~ /arm|aarch64/i ? "" : "-march=native"
24
- $CXXFLAGS << " -std=c++17 $(optflags) -DFINTEGER=int " << with_config("optflags", default_optflags)
26
+ default_optflags = RbConfig::CONFIG["host_os"] =~ /darwin/i && RbConfig::CONFIG["host_cpu"] =~ /arm|aarch64/i ? "" : " -march=native"
27
+ $CXXFLAGS += with_config("optflags", default_optflags)
28
+
29
+ apple_clang = RbConfig::CONFIG["CC_VERSION_MESSAGE"] =~ /apple clang/i
30
+ $CXXFLAGS += " -Xclang" if apple_clang
31
+ $CXXFLAGS += " -fopenmp"
25
32
 
26
33
  ext = File.expand_path(".", __dir__)
27
34
  vendor = File.expand_path("../../vendor/faiss", __dir__)
data/ext/faiss/index.cpp CHANGED
@@ -157,7 +157,7 @@ void init_index(Rice::Module& m) {
157
157
  "load",
158
158
  [](Rice::String fname) {
159
159
  return faiss::read_index(fname.c_str());
160
- });
160
+ }, Rice::Return().takeOwnership());
161
161
 
162
162
  Rice::define_class_under<faiss::IndexFlatL2, faiss::Index>(m, "IndexFlatL2")
163
163
  .define_constructor(Rice::Constructor<faiss::IndexFlatL2, int64_t>());
@@ -59,7 +59,7 @@ void init_index_binary(Rice::Module& m) {
59
59
  "load",
60
60
  [](Rice::String fname) {
61
61
  return faiss::read_index_binary(fname.c_str());
62
- });
62
+ }, Rice::Return().takeOwnership());
63
63
 
64
64
  Rice::define_class_under<faiss::IndexBinaryFlat, faiss::IndexBinary>(m, "IndexBinaryFlat")
65
65
  .define_constructor(Rice::Constructor<faiss::IndexBinaryFlat, int64_t>());
@@ -71,5 +71,5 @@ void init_index_binary(Rice::Module& m) {
71
71
  "index_binary_factory",
72
72
  [](int d, Rice::String description) {
73
73
  return faiss::index_binary_factory(d, description.c_str());
74
- });
74
+ }, Rice::Return().takeOwnership());
75
75
  }
@@ -49,5 +49,5 @@ void init_product_quantizer(Rice::Module& m) {
49
49
  "load",
50
50
  [](Rice::String fname) {
51
51
  return faiss::read_ProductQuantizer(fname.c_str());
52
- });
52
+ }, Rice::Return().takeOwnership());
53
53
  }
data/lib/faiss/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Faiss
2
- VERSION = "0.3.0"
2
+ VERSION = "0.3.1"
3
3
  end
@@ -152,12 +152,10 @@ bool OperatingPoints::add(
152
152
  return false;
153
153
  }
154
154
  }
155
- { // remove non-optimal points from array
156
- int i = a.size() - 1;
157
- while (i > 0) {
158
- if (a[i].t < a[i - 1].t)
159
- a.erase(a.begin() + (i - 1));
160
- i--;
155
+ // remove non-optimal points from array
156
+ for (int i = a.size() - 1; i > 0; --i) {
157
+ if (a[i].t < a[i - 1].t) {
158
+ a.erase(a.begin() + (i - 1));
161
159
  }
162
160
  }
163
161
  return true;
@@ -286,6 +284,8 @@ std::string ParameterSpace::combination_name(size_t cno) const {
286
284
  char buf[1000], *wp = buf;
287
285
  *wp = 0;
288
286
  for (int i = 0; i < parameter_ranges.size(); i++) {
287
+ FAISS_THROW_IF_NOT_MSG(
288
+ buf + 1000 - wp >= 0, "Overflow detected in snprintf");
289
289
  const ParameterRange& pr = parameter_ranges[i];
290
290
  size_t j = cno % pr.values.size();
291
291
  cno /= pr.values.size();
@@ -334,7 +334,7 @@ ParameterRange& ParameterSpace::add_range(const std::string& name) {
334
334
  return pr;
335
335
  }
336
336
  }
337
- parameter_ranges.push_back(ParameterRange());
337
+ parameter_ranges.emplace_back();
338
338
  parameter_ranges.back().name = name;
339
339
  return parameter_ranges.back();
340
340
  }
@@ -11,7 +11,6 @@
11
11
  #define FAISS_AUTO_TUNE_H
12
12
 
13
13
  #include <stdint.h>
14
- #include <unordered_map>
15
14
  #include <vector>
16
15
 
17
16
  #include <faiss/Index.h>
@@ -27,20 +27,6 @@
27
27
 
28
28
  namespace faiss {
29
29
 
30
- ClusteringParameters::ClusteringParameters()
31
- : niter(25),
32
- nredo(1),
33
- verbose(false),
34
- spherical(false),
35
- int_centroids(false),
36
- update_index(false),
37
- frozen_centroids(false),
38
- min_points_per_centroid(39),
39
- max_points_per_centroid(256),
40
- seed(1234),
41
- decode_block_size(32768) {}
42
- // 39 corresponds to 10000 / 256 -> to avoid warnings on PQ tests with randu10k
43
-
44
30
  Clustering::Clustering(int d, int k) : d(d), k(k) {}
45
31
 
46
32
  Clustering::Clustering(int d, int k, const ClusteringParameters& cp)
@@ -231,7 +217,7 @@ int split_clusters(
231
217
  for (size_t ci = 0; ci < k; ci++) {
232
218
  if (hassign[ci] == 0) { /* need to redefine a centroid */
233
219
  size_t cj;
234
- for (cj = 0; 1; cj = (cj + 1) % k) {
220
+ for (cj = 0; true; cj = (cj + 1) % k) {
235
221
  /* probability to pick this cluster for split */
236
222
  float p = (hassign[cj] - 1.0) / (float)(n - k);
237
223
  float r = rng.rand_float();
@@ -264,7 +250,7 @@ int split_clusters(
264
250
  return nsplit;
265
251
  }
266
252
 
267
- }; // namespace
253
+ } // namespace
268
254
 
269
255
  void Clustering::train_encoded(
270
256
  idx_t nx,
@@ -590,7 +576,7 @@ float kmeans_clustering(
590
576
  const float* x,
591
577
  float* centroids) {
592
578
  Clustering clus(d, k);
593
- clus.verbose = d * n * k > (1L << 30);
579
+ clus.verbose = d * n * k > (size_t(1) << 30);
594
580
  // display logs if > 1Gflop per iteration
595
581
  IndexFlatL2 index(d);
596
582
  clus.train(n, x, index);
@@ -631,7 +617,7 @@ void copy_columns(idx_t n, idx_t d1, const float* src, idx_t d2, float* dest) {
631
617
  }
632
618
  }
633
619
 
634
- }; // namespace
620
+ } // namespace
635
621
 
636
622
  void ProgressiveDimClustering::train(
637
623
  idx_t n,
@@ -5,7 +5,7 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
- // -*- c++ -*-
8
+ /** Implementation of k-means clustering with many variants. */
9
9
 
10
10
  #ifndef FAISS_CLUSTERING_H
11
11
  #define FAISS_CLUSTERING_H
@@ -19,25 +19,35 @@ namespace faiss {
19
19
  * constructor of the Clustering object.
20
20
  */
21
21
  struct ClusteringParameters {
22
- int niter; ///< clustering iterations
23
- int nredo; ///< redo clustering this many times and keep best
24
-
25
- bool verbose;
26
- bool spherical; ///< do we want normalized centroids?
27
- bool int_centroids; ///< round centroids coordinates to integer
28
- bool update_index; ///< re-train index after each iteration?
29
- bool frozen_centroids; ///< use the centroids provided as input and do not
30
- ///< change them during iterations
31
-
32
- int min_points_per_centroid; ///< otherwise you get a warning
33
- int max_points_per_centroid; ///< to limit size of dataset
34
-
35
- int seed; ///< seed for the random number generator
36
-
37
- size_t decode_block_size; ///< how many vectors at a time to decode
38
-
39
- /// sets reasonable defaults
40
- ClusteringParameters();
22
+ /// number of clustering iterations
23
+ int niter = 25;
24
+ /// redo clustering this many times and keep the clusters with the best
25
+ /// objective
26
+ int nredo = 1;
27
+
28
+ bool verbose = false;
29
+ /// whether to normalize centroids after each iteration (useful for inner
30
+ /// product clustering)
31
+ bool spherical = false;
32
+ /// round centroids coordinates to integer after each iteration?
33
+ bool int_centroids = false;
34
+ /// re-train index after each iteration?
35
+ bool update_index = false;
36
+
37
+ /// Use the subset of centroids provided as input and do not change them
38
+ /// during iterations
39
+ bool frozen_centroids = false;
40
+ /// If fewer than this number of training vectors per centroid are provided,
41
+ /// writes a warning. Note that fewer than 1 point per centroid raises an
42
+ /// exception.
43
+ int min_points_per_centroid = 39;
44
+ /// to limit size of dataset, otherwise the training set is subsampled
45
+ int max_points_per_centroid = 256;
46
+ /// seed for the random number generator
47
+ int seed = 1234;
48
+
49
+ /// when the training set is encoded, batch size of the codec decoder
50
+ size_t decode_block_size = 32768;
41
51
  };
42
52
 
43
53
  struct ClusteringIterationStats {
@@ -94,7 +104,7 @@ struct Clustering : ClusteringParameters {
94
104
  * to decode the input vectors.
95
105
  *
96
106
  * @param codec codec used to decode the vectors (nullptr =
97
- * vectors are in fact floats) *
107
+ * vectors are in fact floats)
98
108
  */
99
109
  void train_encoded(
100
110
  idx_t nx,
@@ -12,7 +12,9 @@
12
12
 
13
13
  #include <faiss/IndexAdditiveQuantizer.h>
14
14
  #include <faiss/IndexIVFAdditiveQuantizer.h>
15
+ #include <faiss/IndexIVFIndependentQuantizer.h>
15
16
  #include <faiss/IndexPreTransform.h>
17
+ #include <faiss/IndexRefine.h>
16
18
  #include <faiss/MetaIndexes.h>
17
19
  #include <faiss/impl/FaissAssert.h>
18
20
  #include <faiss/utils/distances.h>
@@ -57,20 +59,29 @@ void check_compatible_for_merge(const Index* index0, const Index* index1) {
57
59
  }
58
60
 
59
61
  const IndexIVF* try_extract_index_ivf(const Index* index) {
60
- if (auto* pt = dynamic_cast<const IndexPreTransform*>(index)) {
61
- index = pt->index;
62
+ auto* ivf = dynamic_cast<const IndexIVF*>(index);
63
+ if (ivf != nullptr) {
64
+ return ivf;
62
65
  }
63
66
 
67
+ if (auto* pt = dynamic_cast<const IndexPreTransform*>(index)) {
68
+ return try_extract_index_ivf(pt->index);
69
+ }
64
70
  if (auto* idmap = dynamic_cast<const IndexIDMap*>(index)) {
65
- index = idmap->index;
71
+ return try_extract_index_ivf(idmap->index);
66
72
  }
67
73
  if (auto* idmap = dynamic_cast<const IndexIDMap2*>(index)) {
68
- index = idmap->index;
74
+ return try_extract_index_ivf(idmap->index);
75
+ }
76
+ if (auto* indep =
77
+ dynamic_cast<const IndexIVFIndependentQuantizer*>(index)) {
78
+ return try_extract_index_ivf(indep->index_ivf);
79
+ }
80
+ if (auto* refine = dynamic_cast<const IndexRefine*>(index)) {
81
+ return try_extract_index_ivf(refine->base_index);
69
82
  }
70
83
 
71
- auto* ivf = dynamic_cast<const IndexIVF*>(index);
72
-
73
- return ivf;
84
+ return nullptr;
74
85
  }
75
86
 
76
87
  IndexIVF* try_extract_index_ivf(Index* index) {
@@ -321,14 +332,14 @@ void search_with_parameters(
321
332
  double* ms_per_stage) {
322
333
  FAISS_THROW_IF_NOT(params);
323
334
  const float* prev_x = x;
324
- ScopeDeleter<float> del;
335
+ std::unique_ptr<const float[]> del;
325
336
 
326
337
  double t0 = getmillisecs();
327
338
 
328
339
  if (auto ip = dynamic_cast<const IndexPreTransform*>(index)) {
329
340
  x = ip->apply_chain(n, x);
330
341
  if (x != prev_x) {
331
- del.set(x);
342
+ del.reset(x);
332
343
  }
333
344
  index = ip->index;
334
345
  }
@@ -371,14 +382,14 @@ void range_search_with_parameters(
371
382
  double* ms_per_stage) {
372
383
  FAISS_THROW_IF_NOT(params);
373
384
  const float* prev_x = x;
374
- ScopeDeleter<float> del;
385
+ std::unique_ptr<const float[]> del;
375
386
 
376
387
  double t0 = getmillisecs();
377
388
 
378
389
  if (auto ip = dynamic_cast<const IndexPreTransform*>(index)) {
379
390
  x = ip->apply_chain(n, x);
380
391
  if (x != prev_x) {
381
- del.set(x);
392
+ del.reset(x);
382
393
  }
383
394
  index = ip->index;
384
395
  }
@@ -18,7 +18,7 @@
18
18
 
19
19
  namespace faiss {
20
20
 
21
- Index::~Index() {}
21
+ Index::~Index() = default;
22
22
 
23
23
  void Index::train(idx_t /*n*/, const float* /*x*/) {
24
24
  // does nothing by default
@@ -17,8 +17,8 @@
17
17
  #include <typeinfo>
18
18
 
19
19
  #define FAISS_VERSION_MAJOR 1
20
- #define FAISS_VERSION_MINOR 7
21
- #define FAISS_VERSION_PATCH 4
20
+ #define FAISS_VERSION_MINOR 8
21
+ #define FAISS_VERSION_PATCH 0
22
22
 
23
23
  /**
24
24
  * @namespace faiss
@@ -99,6 +99,7 @@ struct Index {
99
99
  * Vectors are implicitly assigned labels ntotal .. ntotal + n - 1
100
100
  * This function slices the input vectors in chunks smaller than
101
101
  * blocksize_add and calls add_core.
102
+ * @param n number of vectors
102
103
  * @param x input matrix, size n * d
103
104
  */
104
105
  virtual void add(idx_t n, const float* x) = 0;
@@ -108,7 +109,9 @@ struct Index {
108
109
  * The default implementation fails with an assertion, as it is
109
110
  * not supported by all indexes.
110
111
  *
111
- * @param xids if non-null, ids to store for the vectors (size n)
112
+ * @param n number of vectors
113
+ * @param x input vectors, size n * d
114
+ * @param xids if non-null, ids to store for the vectors (size n)
112
115
  */
113
116
  virtual void add_with_ids(idx_t n, const float* x, const idx_t* xids);
114
117
 
@@ -117,9 +120,11 @@ struct Index {
117
120
  * return at most k vectors. If there are not enough results for a
118
121
  * query, the result array is padded with -1s.
119
122
  *
123
+ * @param n number of vectors
120
124
  * @param x input vectors to search, size n * d
121
- * @param labels output labels of the NNs, size n*k
125
+ * @param k number of extracted vectors
122
126
  * @param distances output pairwise distances, size n*k
127
+ * @param labels output labels of the NNs, size n*k
123
128
  */
124
129
  virtual void search(
125
130
  idx_t n,
@@ -135,6 +140,7 @@ struct Index {
135
140
  * indexes do not implement the range_search (only the k-NN search
136
141
  * is mandatory).
137
142
  *
143
+ * @param n number of vectors
138
144
  * @param x input vectors to search, size n * d
139
145
  * @param radius search radius
140
146
  * @param result result table
@@ -149,8 +155,10 @@ struct Index {
149
155
  /** return the indexes of the k vectors closest to the query x.
150
156
  *
151
157
  * This function is identical as search but only return labels of neighbors.
158
+ * @param n number of vectors
152
159
  * @param x input vectors to search, size n * d
153
160
  * @param labels output labels of the NNs, size n*k
161
+ * @param k number of nearest neighbours
154
162
  */
155
163
  virtual void assign(idx_t n, const float* x, idx_t* labels, idx_t k = 1)
156
164
  const;
@@ -174,7 +182,7 @@ struct Index {
174
182
  /** Reconstruct several stored vectors (or an approximation if lossy coding)
175
183
  *
176
184
  * this function may not be defined for some indexes
177
- * @param n number of vectors to reconstruct
185
+ * @param n number of vectors to reconstruct
178
186
  * @param keys ids of the vectors to reconstruct (size n)
179
187
  * @param recons reconstucted vector (size n * d)
180
188
  */
@@ -184,6 +192,8 @@ struct Index {
184
192
  /** Reconstruct vectors i0 to i0 + ni - 1
185
193
  *
186
194
  * this function may not be defined for some indexes
195
+ * @param i0 index of the first vector in the sequence
196
+ * @param ni number of vectors in the sequence
187
197
  * @param recons reconstucted vector (size ni * d)
188
198
  */
189
199
  virtual void reconstruct_n(idx_t i0, idx_t ni, float* recons) const;
@@ -194,6 +204,11 @@ struct Index {
194
204
  * If there are not enough results for a query, the resulting arrays
195
205
  * is padded with -1s.
196
206
  *
207
+ * @param n number of vectors
208
+ * @param x input vectors to search, size n * d
209
+ * @param k number of extracted vectors
210
+ * @param distances output pairwise distances, size n*k
211
+ * @param labels output labels of the NNs, size n*k
197
212
  * @param recons reconstructed vectors size (n, k, d)
198
213
  **/
199
214
  virtual void search_and_reconstruct(
@@ -10,10 +10,10 @@
10
10
  #include <faiss/Index2Layer.h>
11
11
 
12
12
  #include <faiss/impl/platform_macros.h>
13
- #include <stdint.h>
14
13
  #include <cassert>
15
14
  #include <cinttypes>
16
15
  #include <cmath>
16
+ #include <cstdint>
17
17
  #include <cstdio>
18
18
 
19
19
  #ifdef __SSE3__
@@ -47,7 +47,7 @@ Index2Layer::Index2Layer(
47
47
  pq(quantizer->d, M, nbit) {
48
48
  is_trained = false;
49
49
  for (int nbyte = 0; nbyte < 7; nbyte++) {
50
- if ((1L << (8 * nbyte)) >= nlist) {
50
+ if (((size_t)1 << (8 * nbyte)) >= nlist) {
51
51
  code_size_1 = nbyte;
52
52
  break;
53
53
  }
@@ -60,7 +60,7 @@ Index2Layer::Index2Layer() {
60
60
  code_size = code_size_1 = code_size_2 = 0;
61
61
  }
62
62
 
63
- Index2Layer::~Index2Layer() {}
63
+ Index2Layer::~Index2Layer() = default;
64
64
 
65
65
  void Index2Layer::train(idx_t n, const float* x) {
66
66
  if (verbose) {
@@ -83,7 +83,7 @@ void Index2Layer::train(idx_t n, const float* x) {
83
83
  verbose,
84
84
  pq.cp.seed);
85
85
 
86
- ScopeDeleter<float> del_x(x_in == x ? nullptr : x);
86
+ std::unique_ptr<const float[]> del_x(x_in == x ? nullptr : x);
87
87
 
88
88
  std::vector<idx_t> assign(n); // assignement to coarse centroids
89
89
  q1.quantizer->assign(n, x, assign.data());
@@ -179,7 +179,7 @@ struct DistanceXPQ4 : Distance2Level {
179
179
  float operator()(idx_t i) override {
180
180
  #ifdef __SSE3__
181
181
  const uint8_t* code = storage.codes.data() + i * storage.code_size;
182
- long key = 0;
182
+ idx_t key = 0;
183
183
  memcpy(&key, code, storage.code_size_1);
184
184
  code += storage.code_size_1;
185
185
 
@@ -225,7 +225,7 @@ struct Distance2xXPQ4 : Distance2Level {
225
225
 
226
226
  float operator()(idx_t i) override {
227
227
  const uint8_t* code = storage.codes.data() + i * storage.code_size;
228
- long key01 = 0;
228
+ int64_t key01 = 0;
229
229
  memcpy(&key01, code, storage.code_size_1);
230
230
  code += storage.code_size_1;
231
231
  #ifdef __SSE3__
@@ -237,7 +237,7 @@ struct Distance2xXPQ4 : Distance2Level {
237
237
  __m128 accu = _mm_setzero_ps();
238
238
 
239
239
  for (int mi_m = 0; mi_m < 2; mi_m++) {
240
- long l1_idx = key01 & ((1L << mi_nbits) - 1);
240
+ int64_t l1_idx = key01 & (((int64_t)1 << mi_nbits) - 1);
241
241
  const __m128* pq_l1 = pq_l1_t + M_2 * l1_idx;
242
242
 
243
243
  for (int m = 0; m < M_2; m++) {