faiss 0.3.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (171) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +1 -1
  5. data/ext/faiss/extconf.rb +9 -2
  6. data/ext/faiss/index.cpp +1 -1
  7. data/ext/faiss/index_binary.cpp +2 -2
  8. data/ext/faiss/product_quantizer.cpp +1 -1
  9. data/lib/faiss/version.rb +1 -1
  10. data/vendor/faiss/faiss/AutoTune.cpp +7 -7
  11. data/vendor/faiss/faiss/AutoTune.h +0 -1
  12. data/vendor/faiss/faiss/Clustering.cpp +4 -18
  13. data/vendor/faiss/faiss/Clustering.h +31 -21
  14. data/vendor/faiss/faiss/IVFlib.cpp +22 -11
  15. data/vendor/faiss/faiss/Index.cpp +1 -1
  16. data/vendor/faiss/faiss/Index.h +20 -5
  17. data/vendor/faiss/faiss/Index2Layer.cpp +7 -7
  18. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +176 -166
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +15 -15
  20. data/vendor/faiss/faiss/IndexBinary.cpp +9 -4
  21. data/vendor/faiss/faiss/IndexBinary.h +8 -19
  22. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +2 -1
  23. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +24 -31
  24. data/vendor/faiss/faiss/IndexBinaryHash.cpp +25 -50
  25. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +106 -187
  26. data/vendor/faiss/faiss/IndexFastScan.cpp +90 -159
  27. data/vendor/faiss/faiss/IndexFastScan.h +9 -8
  28. data/vendor/faiss/faiss/IndexFlat.cpp +195 -3
  29. data/vendor/faiss/faiss/IndexFlat.h +20 -1
  30. data/vendor/faiss/faiss/IndexFlatCodes.cpp +11 -0
  31. data/vendor/faiss/faiss/IndexFlatCodes.h +3 -1
  32. data/vendor/faiss/faiss/IndexHNSW.cpp +112 -316
  33. data/vendor/faiss/faiss/IndexHNSW.h +12 -48
  34. data/vendor/faiss/faiss/IndexIDMap.cpp +69 -28
  35. data/vendor/faiss/faiss/IndexIDMap.h +24 -2
  36. data/vendor/faiss/faiss/IndexIVF.cpp +159 -53
  37. data/vendor/faiss/faiss/IndexIVF.h +37 -5
  38. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +18 -26
  39. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +3 -2
  40. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +19 -46
  41. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -3
  42. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +433 -405
  43. data/vendor/faiss/faiss/IndexIVFFastScan.h +56 -26
  44. data/vendor/faiss/faiss/IndexIVFFlat.cpp +15 -5
  45. data/vendor/faiss/faiss/IndexIVFFlat.h +3 -2
  46. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +172 -0
  47. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.h +56 -0
  48. data/vendor/faiss/faiss/IndexIVFPQ.cpp +78 -122
  49. data/vendor/faiss/faiss/IndexIVFPQ.h +6 -7
  50. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +18 -50
  51. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +4 -3
  52. data/vendor/faiss/faiss/IndexIVFPQR.cpp +45 -29
  53. data/vendor/faiss/faiss/IndexIVFPQR.h +5 -2
  54. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +25 -27
  55. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +6 -6
  56. data/vendor/faiss/faiss/IndexLSH.cpp +14 -16
  57. data/vendor/faiss/faiss/IndexNNDescent.cpp +3 -4
  58. data/vendor/faiss/faiss/IndexNSG.cpp +11 -27
  59. data/vendor/faiss/faiss/IndexNSG.h +10 -10
  60. data/vendor/faiss/faiss/IndexPQ.cpp +72 -88
  61. data/vendor/faiss/faiss/IndexPQ.h +1 -4
  62. data/vendor/faiss/faiss/IndexPQFastScan.cpp +1 -1
  63. data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -31
  64. data/vendor/faiss/faiss/IndexRefine.cpp +49 -19
  65. data/vendor/faiss/faiss/IndexRefine.h +7 -0
  66. data/vendor/faiss/faiss/IndexReplicas.cpp +23 -26
  67. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +22 -16
  68. data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -4
  69. data/vendor/faiss/faiss/IndexShards.cpp +21 -29
  70. data/vendor/faiss/faiss/IndexShardsIVF.cpp +1 -2
  71. data/vendor/faiss/faiss/MatrixStats.cpp +17 -32
  72. data/vendor/faiss/faiss/MatrixStats.h +21 -9
  73. data/vendor/faiss/faiss/MetaIndexes.cpp +35 -35
  74. data/vendor/faiss/faiss/VectorTransform.cpp +13 -26
  75. data/vendor/faiss/faiss/VectorTransform.h +7 -7
  76. data/vendor/faiss/faiss/clone_index.cpp +15 -10
  77. data/vendor/faiss/faiss/clone_index.h +3 -0
  78. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +87 -4
  79. data/vendor/faiss/faiss/gpu/GpuCloner.h +22 -0
  80. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +7 -0
  81. data/vendor/faiss/faiss/gpu/GpuDistance.h +46 -38
  82. data/vendor/faiss/faiss/gpu/GpuIndex.h +28 -4
  83. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +4 -4
  84. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +8 -9
  85. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +18 -3
  86. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -11
  87. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +1 -3
  88. data/vendor/faiss/faiss/gpu/GpuResources.cpp +24 -3
  89. data/vendor/faiss/faiss/gpu/GpuResources.h +39 -11
  90. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +117 -17
  91. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +57 -3
  92. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +1 -1
  93. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +25 -0
  94. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +129 -9
  95. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +267 -40
  96. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +299 -208
  97. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +1 -0
  98. data/vendor/faiss/faiss/gpu/utils/RaftUtils.h +75 -0
  99. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +3 -1
  100. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +5 -5
  101. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
  102. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +1 -2
  103. data/vendor/faiss/faiss/impl/DistanceComputer.h +24 -1
  104. data/vendor/faiss/faiss/impl/FaissException.h +13 -34
  105. data/vendor/faiss/faiss/impl/HNSW.cpp +321 -70
  106. data/vendor/faiss/faiss/impl/HNSW.h +9 -8
  107. data/vendor/faiss/faiss/impl/IDSelector.h +4 -4
  108. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +3 -1
  109. data/vendor/faiss/faiss/impl/NNDescent.cpp +29 -19
  110. data/vendor/faiss/faiss/impl/NSG.h +1 -1
  111. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +14 -12
  112. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
  113. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +24 -22
  114. data/vendor/faiss/faiss/impl/ProductQuantizer.h +1 -1
  115. data/vendor/faiss/faiss/impl/Quantizer.h +1 -1
  116. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +27 -1015
  117. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +5 -63
  118. data/vendor/faiss/faiss/impl/ResultHandler.h +232 -176
  119. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +444 -104
  120. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +0 -8
  121. data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +280 -42
  122. data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +21 -14
  123. data/vendor/faiss/faiss/impl/code_distance/code_distance.h +22 -12
  124. data/vendor/faiss/faiss/impl/index_read.cpp +45 -19
  125. data/vendor/faiss/faiss/impl/index_write.cpp +60 -41
  126. data/vendor/faiss/faiss/impl/io.cpp +10 -10
  127. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
  128. data/vendor/faiss/faiss/impl/platform_macros.h +18 -1
  129. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +3 -0
  130. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +7 -6
  131. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +52 -38
  132. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +40 -49
  133. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +960 -0
  134. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +176 -0
  135. data/vendor/faiss/faiss/impl/simd_result_handlers.h +374 -202
  136. data/vendor/faiss/faiss/index_factory.cpp +10 -7
  137. data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
  138. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +27 -9
  139. data/vendor/faiss/faiss/invlists/InvertedLists.h +12 -3
  140. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
  141. data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
  142. data/vendor/faiss/faiss/utils/Heap.cpp +3 -1
  143. data/vendor/faiss/faiss/utils/WorkerThread.h +1 -0
  144. data/vendor/faiss/faiss/utils/distances.cpp +128 -74
  145. data/vendor/faiss/faiss/utils/distances.h +81 -4
  146. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +5 -5
  147. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +2 -2
  148. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
  149. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +1 -1
  150. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +5 -5
  151. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +1 -1
  152. data/vendor/faiss/faiss/utils/distances_simd.cpp +428 -70
  153. data/vendor/faiss/faiss/utils/fp16-arm.h +29 -0
  154. data/vendor/faiss/faiss/utils/fp16.h +2 -0
  155. data/vendor/faiss/faiss/utils/hamming.cpp +162 -110
  156. data/vendor/faiss/faiss/utils/hamming.h +58 -0
  157. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +16 -89
  158. data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -0
  159. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +15 -87
  160. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +57 -0
  161. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +14 -104
  162. data/vendor/faiss/faiss/utils/partitioning.cpp +3 -4
  163. data/vendor/faiss/faiss/utils/prefetch.h +77 -0
  164. data/vendor/faiss/faiss/utils/quantize_lut.cpp +0 -14
  165. data/vendor/faiss/faiss/utils/simdlib_avx2.h +0 -6
  166. data/vendor/faiss/faiss/utils/simdlib_neon.h +72 -77
  167. data/vendor/faiss/faiss/utils/sorting.cpp +140 -5
  168. data/vendor/faiss/faiss/utils/sorting.h +27 -0
  169. data/vendor/faiss/faiss/utils/utils.cpp +112 -6
  170. data/vendor/faiss/faiss/utils/utils.h +57 -20
  171. metadata +10 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4e9bd037bbb04bb31eb1856073a7d01d9b1871f1005c2f81b2b3d48c72f737a2
4
- data.tar.gz: bfde828c3c7780e2cba3eac0db39ab805c1892e3fb07968870097051ebc7b713
3
+ metadata.gz: e41b15bbcda6c4d2a250df5b98d86e9baf51b34b90fc2fccb6f0a37f486ef417
4
+ data.tar.gz: 768074275062ed45f1752e3a5c9d55a9695a6aa453e925aa0a6e607ce3215bab
5
5
  SHA512:
6
- metadata.gz: c7dd39002fa4f463c552b75fcfeed505816784ca986e65a46826d7982f9e8f9761750931b7e472ef3406d851c63e2038ef65370cd1c0d54113eb556190fb8c65
7
- data.tar.gz: 2f1fc38577e089b9a817feabe7a794354a385ca4cf99887253db12a88cfeb39c0ba456a44191b16680158e2c30d1356efa909df57a2823376036f47e25aed0b1
6
+ metadata.gz: cecc466dd24e03206219b63e750e48b554355c1c5dfc8e911879988a6f31eb628617133f5b584b3de29efcbe65d087cf5b4e219371cee959e8248c989a4dbffc
7
+ data.tar.gz: 3e0c6be53825949f9c51a0195d85cbed87bc198dd06852c88c537b13e6bcc8e7fa65a3e3c88667eefef44e95278fe2c73ece89d5f92bd24f8c0d27b543488b56
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.3.1 (2024-03-13)
2
+
3
+ - Updated Faiss to 1.8.0
4
+ - Fixed memory leak with `load` and `index_binary_factory` methods
5
+
1
6
  ## 0.3.0 (2023-05-11)
2
7
 
3
8
  - Fixed error on Fedora
data/LICENSE.txt CHANGED
@@ -1,7 +1,7 @@
1
1
  MIT License
2
2
 
3
3
  Copyright (c) Facebook, Inc. and its affiliates.
4
- Copyright (c) 2020-2023 Andrew Kane
4
+ Copyright (c) 2020-2024 Andrew Kane
5
5
 
6
6
  Permission is hereby granted, free of charge, to any person obtaining a copy
7
7
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -4,7 +4,7 @@
4
4
 
5
5
  Learn more about [Faiss](https://engineering.fb.com/data-infrastructure/faiss-a-library-for-efficient-similarity-search/)
6
6
 
7
- [![Build Status](https://github.com/ankane/faiss-ruby/workflows/build/badge.svg?branch=master)](https://github.com/ankane/faiss-ruby/actions)
7
+ [![Build Status](https://github.com/ankane/faiss-ruby/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/faiss-ruby/actions)
8
8
 
9
9
  ## Installation
10
10
 
data/ext/faiss/extconf.rb CHANGED
@@ -19,9 +19,16 @@ abort "Numo not found" unless find_header("numo/narray.h", numo)
19
19
  # for https://bugs.ruby-lang.org/issues/19005
20
20
  $LDFLAGS += " -Wl,-undefined,dynamic_lookup" if RbConfig::CONFIG["host_os"] =~ /darwin/i
21
21
 
22
+ $CXXFLAGS += " -std=c++17 $(optflags) -DFINTEGER=int"
23
+ $CXXFLAGS += " -Wall -Wno-unused-parameter -Wno-unused-function -Wno-unused-variable -Wno-deprecated-declarations -Wno-sign-compare"
24
+
22
25
  # -march=native not supported with ARM Mac
23
- default_optflags = RbConfig::CONFIG["host_os"] =~ /darwin/i && RbConfig::CONFIG["host_cpu"] =~ /arm|aarch64/i ? "" : "-march=native"
24
- $CXXFLAGS << " -std=c++17 $(optflags) -DFINTEGER=int " << with_config("optflags", default_optflags)
26
+ default_optflags = RbConfig::CONFIG["host_os"] =~ /darwin/i && RbConfig::CONFIG["host_cpu"] =~ /arm|aarch64/i ? "" : " -march=native"
27
+ $CXXFLAGS += with_config("optflags", default_optflags)
28
+
29
+ apple_clang = RbConfig::CONFIG["CC_VERSION_MESSAGE"] =~ /apple clang/i
30
+ $CXXFLAGS += " -Xclang" if apple_clang
31
+ $CXXFLAGS += " -fopenmp"
25
32
 
26
33
  ext = File.expand_path(".", __dir__)
27
34
  vendor = File.expand_path("../../vendor/faiss", __dir__)
data/ext/faiss/index.cpp CHANGED
@@ -157,7 +157,7 @@ void init_index(Rice::Module& m) {
157
157
  "load",
158
158
  [](Rice::String fname) {
159
159
  return faiss::read_index(fname.c_str());
160
- });
160
+ }, Rice::Return().takeOwnership());
161
161
 
162
162
  Rice::define_class_under<faiss::IndexFlatL2, faiss::Index>(m, "IndexFlatL2")
163
163
  .define_constructor(Rice::Constructor<faiss::IndexFlatL2, int64_t>());
@@ -59,7 +59,7 @@ void init_index_binary(Rice::Module& m) {
59
59
  "load",
60
60
  [](Rice::String fname) {
61
61
  return faiss::read_index_binary(fname.c_str());
62
- });
62
+ }, Rice::Return().takeOwnership());
63
63
 
64
64
  Rice::define_class_under<faiss::IndexBinaryFlat, faiss::IndexBinary>(m, "IndexBinaryFlat")
65
65
  .define_constructor(Rice::Constructor<faiss::IndexBinaryFlat, int64_t>());
@@ -71,5 +71,5 @@ void init_index_binary(Rice::Module& m) {
71
71
  "index_binary_factory",
72
72
  [](int d, Rice::String description) {
73
73
  return faiss::index_binary_factory(d, description.c_str());
74
- });
74
+ }, Rice::Return().takeOwnership());
75
75
  }
@@ -49,5 +49,5 @@ void init_product_quantizer(Rice::Module& m) {
49
49
  "load",
50
50
  [](Rice::String fname) {
51
51
  return faiss::read_ProductQuantizer(fname.c_str());
52
- });
52
+ }, Rice::Return().takeOwnership());
53
53
  }
data/lib/faiss/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Faiss
2
- VERSION = "0.3.0"
2
+ VERSION = "0.3.1"
3
3
  end
@@ -152,12 +152,10 @@ bool OperatingPoints::add(
152
152
  return false;
153
153
  }
154
154
  }
155
- { // remove non-optimal points from array
156
- int i = a.size() - 1;
157
- while (i > 0) {
158
- if (a[i].t < a[i - 1].t)
159
- a.erase(a.begin() + (i - 1));
160
- i--;
155
+ // remove non-optimal points from array
156
+ for (int i = a.size() - 1; i > 0; --i) {
157
+ if (a[i].t < a[i - 1].t) {
158
+ a.erase(a.begin() + (i - 1));
161
159
  }
162
160
  }
163
161
  return true;
@@ -286,6 +284,8 @@ std::string ParameterSpace::combination_name(size_t cno) const {
286
284
  char buf[1000], *wp = buf;
287
285
  *wp = 0;
288
286
  for (int i = 0; i < parameter_ranges.size(); i++) {
287
+ FAISS_THROW_IF_NOT_MSG(
288
+ buf + 1000 - wp >= 0, "Overflow detected in snprintf");
289
289
  const ParameterRange& pr = parameter_ranges[i];
290
290
  size_t j = cno % pr.values.size();
291
291
  cno /= pr.values.size();
@@ -334,7 +334,7 @@ ParameterRange& ParameterSpace::add_range(const std::string& name) {
334
334
  return pr;
335
335
  }
336
336
  }
337
- parameter_ranges.push_back(ParameterRange());
337
+ parameter_ranges.emplace_back();
338
338
  parameter_ranges.back().name = name;
339
339
  return parameter_ranges.back();
340
340
  }
@@ -11,7 +11,6 @@
11
11
  #define FAISS_AUTO_TUNE_H
12
12
 
13
13
  #include <stdint.h>
14
- #include <unordered_map>
15
14
  #include <vector>
16
15
 
17
16
  #include <faiss/Index.h>
@@ -27,20 +27,6 @@
27
27
 
28
28
  namespace faiss {
29
29
 
30
- ClusteringParameters::ClusteringParameters()
31
- : niter(25),
32
- nredo(1),
33
- verbose(false),
34
- spherical(false),
35
- int_centroids(false),
36
- update_index(false),
37
- frozen_centroids(false),
38
- min_points_per_centroid(39),
39
- max_points_per_centroid(256),
40
- seed(1234),
41
- decode_block_size(32768) {}
42
- // 39 corresponds to 10000 / 256 -> to avoid warnings on PQ tests with randu10k
43
-
44
30
  Clustering::Clustering(int d, int k) : d(d), k(k) {}
45
31
 
46
32
  Clustering::Clustering(int d, int k, const ClusteringParameters& cp)
@@ -231,7 +217,7 @@ int split_clusters(
231
217
  for (size_t ci = 0; ci < k; ci++) {
232
218
  if (hassign[ci] == 0) { /* need to redefine a centroid */
233
219
  size_t cj;
234
- for (cj = 0; 1; cj = (cj + 1) % k) {
220
+ for (cj = 0; true; cj = (cj + 1) % k) {
235
221
  /* probability to pick this cluster for split */
236
222
  float p = (hassign[cj] - 1.0) / (float)(n - k);
237
223
  float r = rng.rand_float();
@@ -264,7 +250,7 @@ int split_clusters(
264
250
  return nsplit;
265
251
  }
266
252
 
267
- }; // namespace
253
+ } // namespace
268
254
 
269
255
  void Clustering::train_encoded(
270
256
  idx_t nx,
@@ -590,7 +576,7 @@ float kmeans_clustering(
590
576
  const float* x,
591
577
  float* centroids) {
592
578
  Clustering clus(d, k);
593
- clus.verbose = d * n * k > (1L << 30);
579
+ clus.verbose = d * n * k > (size_t(1) << 30);
594
580
  // display logs if > 1Gflop per iteration
595
581
  IndexFlatL2 index(d);
596
582
  clus.train(n, x, index);
@@ -631,7 +617,7 @@ void copy_columns(idx_t n, idx_t d1, const float* src, idx_t d2, float* dest) {
631
617
  }
632
618
  }
633
619
 
634
- }; // namespace
620
+ } // namespace
635
621
 
636
622
  void ProgressiveDimClustering::train(
637
623
  idx_t n,
@@ -5,7 +5,7 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
- // -*- c++ -*-
8
+ /** Implementation of k-means clustering with many variants. */
9
9
 
10
10
  #ifndef FAISS_CLUSTERING_H
11
11
  #define FAISS_CLUSTERING_H
@@ -19,25 +19,35 @@ namespace faiss {
19
19
  * constructor of the Clustering object.
20
20
  */
21
21
  struct ClusteringParameters {
22
- int niter; ///< clustering iterations
23
- int nredo; ///< redo clustering this many times and keep best
24
-
25
- bool verbose;
26
- bool spherical; ///< do we want normalized centroids?
27
- bool int_centroids; ///< round centroids coordinates to integer
28
- bool update_index; ///< re-train index after each iteration?
29
- bool frozen_centroids; ///< use the centroids provided as input and do not
30
- ///< change them during iterations
31
-
32
- int min_points_per_centroid; ///< otherwise you get a warning
33
- int max_points_per_centroid; ///< to limit size of dataset
34
-
35
- int seed; ///< seed for the random number generator
36
-
37
- size_t decode_block_size; ///< how many vectors at a time to decode
38
-
39
- /// sets reasonable defaults
40
- ClusteringParameters();
22
+ /// number of clustering iterations
23
+ int niter = 25;
24
+ /// redo clustering this many times and keep the clusters with the best
25
+ /// objective
26
+ int nredo = 1;
27
+
28
+ bool verbose = false;
29
+ /// whether to normalize centroids after each iteration (useful for inner
30
+ /// product clustering)
31
+ bool spherical = false;
32
+ /// round centroids coordinates to integer after each iteration?
33
+ bool int_centroids = false;
34
+ /// re-train index after each iteration?
35
+ bool update_index = false;
36
+
37
+ /// Use the subset of centroids provided as input and do not change them
38
+ /// during iterations
39
+ bool frozen_centroids = false;
40
+ /// If fewer than this number of training vectors per centroid are provided,
41
+ /// writes a warning. Note that fewer than 1 point per centroid raises an
42
+ /// exception.
43
+ int min_points_per_centroid = 39;
44
+ /// to limit size of dataset, otherwise the training set is subsampled
45
+ int max_points_per_centroid = 256;
46
+ /// seed for the random number generator
47
+ int seed = 1234;
48
+
49
+ /// when the training set is encoded, batch size of the codec decoder
50
+ size_t decode_block_size = 32768;
41
51
  };
42
52
 
43
53
  struct ClusteringIterationStats {
@@ -94,7 +104,7 @@ struct Clustering : ClusteringParameters {
94
104
  * to decode the input vectors.
95
105
  *
96
106
  * @param codec codec used to decode the vectors (nullptr =
97
- * vectors are in fact floats) *
107
+ * vectors are in fact floats)
98
108
  */
99
109
  void train_encoded(
100
110
  idx_t nx,
@@ -12,7 +12,9 @@
12
12
 
13
13
  #include <faiss/IndexAdditiveQuantizer.h>
14
14
  #include <faiss/IndexIVFAdditiveQuantizer.h>
15
+ #include <faiss/IndexIVFIndependentQuantizer.h>
15
16
  #include <faiss/IndexPreTransform.h>
17
+ #include <faiss/IndexRefine.h>
16
18
  #include <faiss/MetaIndexes.h>
17
19
  #include <faiss/impl/FaissAssert.h>
18
20
  #include <faiss/utils/distances.h>
@@ -57,20 +59,29 @@ void check_compatible_for_merge(const Index* index0, const Index* index1) {
57
59
  }
58
60
 
59
61
  const IndexIVF* try_extract_index_ivf(const Index* index) {
60
- if (auto* pt = dynamic_cast<const IndexPreTransform*>(index)) {
61
- index = pt->index;
62
+ auto* ivf = dynamic_cast<const IndexIVF*>(index);
63
+ if (ivf != nullptr) {
64
+ return ivf;
62
65
  }
63
66
 
67
+ if (auto* pt = dynamic_cast<const IndexPreTransform*>(index)) {
68
+ return try_extract_index_ivf(pt->index);
69
+ }
64
70
  if (auto* idmap = dynamic_cast<const IndexIDMap*>(index)) {
65
- index = idmap->index;
71
+ return try_extract_index_ivf(idmap->index);
66
72
  }
67
73
  if (auto* idmap = dynamic_cast<const IndexIDMap2*>(index)) {
68
- index = idmap->index;
74
+ return try_extract_index_ivf(idmap->index);
75
+ }
76
+ if (auto* indep =
77
+ dynamic_cast<const IndexIVFIndependentQuantizer*>(index)) {
78
+ return try_extract_index_ivf(indep->index_ivf);
79
+ }
80
+ if (auto* refine = dynamic_cast<const IndexRefine*>(index)) {
81
+ return try_extract_index_ivf(refine->base_index);
69
82
  }
70
83
 
71
- auto* ivf = dynamic_cast<const IndexIVF*>(index);
72
-
73
- return ivf;
84
+ return nullptr;
74
85
  }
75
86
 
76
87
  IndexIVF* try_extract_index_ivf(Index* index) {
@@ -321,14 +332,14 @@ void search_with_parameters(
321
332
  double* ms_per_stage) {
322
333
  FAISS_THROW_IF_NOT(params);
323
334
  const float* prev_x = x;
324
- ScopeDeleter<float> del;
335
+ std::unique_ptr<const float[]> del;
325
336
 
326
337
  double t0 = getmillisecs();
327
338
 
328
339
  if (auto ip = dynamic_cast<const IndexPreTransform*>(index)) {
329
340
  x = ip->apply_chain(n, x);
330
341
  if (x != prev_x) {
331
- del.set(x);
342
+ del.reset(x);
332
343
  }
333
344
  index = ip->index;
334
345
  }
@@ -371,14 +382,14 @@ void range_search_with_parameters(
371
382
  double* ms_per_stage) {
372
383
  FAISS_THROW_IF_NOT(params);
373
384
  const float* prev_x = x;
374
- ScopeDeleter<float> del;
385
+ std::unique_ptr<const float[]> del;
375
386
 
376
387
  double t0 = getmillisecs();
377
388
 
378
389
  if (auto ip = dynamic_cast<const IndexPreTransform*>(index)) {
379
390
  x = ip->apply_chain(n, x);
380
391
  if (x != prev_x) {
381
- del.set(x);
392
+ del.reset(x);
382
393
  }
383
394
  index = ip->index;
384
395
  }
@@ -18,7 +18,7 @@
18
18
 
19
19
  namespace faiss {
20
20
 
21
- Index::~Index() {}
21
+ Index::~Index() = default;
22
22
 
23
23
  void Index::train(idx_t /*n*/, const float* /*x*/) {
24
24
  // does nothing by default
@@ -17,8 +17,8 @@
17
17
  #include <typeinfo>
18
18
 
19
19
  #define FAISS_VERSION_MAJOR 1
20
- #define FAISS_VERSION_MINOR 7
21
- #define FAISS_VERSION_PATCH 4
20
+ #define FAISS_VERSION_MINOR 8
21
+ #define FAISS_VERSION_PATCH 0
22
22
 
23
23
  /**
24
24
  * @namespace faiss
@@ -99,6 +99,7 @@ struct Index {
99
99
  * Vectors are implicitly assigned labels ntotal .. ntotal + n - 1
100
100
  * This function slices the input vectors in chunks smaller than
101
101
  * blocksize_add and calls add_core.
102
+ * @param n number of vectors
102
103
  * @param x input matrix, size n * d
103
104
  */
104
105
  virtual void add(idx_t n, const float* x) = 0;
@@ -108,7 +109,9 @@ struct Index {
108
109
  * The default implementation fails with an assertion, as it is
109
110
  * not supported by all indexes.
110
111
  *
111
- * @param xids if non-null, ids to store for the vectors (size n)
112
+ * @param n number of vectors
113
+ * @param x input vectors, size n * d
114
+ * @param xids if non-null, ids to store for the vectors (size n)
112
115
  */
113
116
  virtual void add_with_ids(idx_t n, const float* x, const idx_t* xids);
114
117
 
@@ -117,9 +120,11 @@ struct Index {
117
120
  * return at most k vectors. If there are not enough results for a
118
121
  * query, the result array is padded with -1s.
119
122
  *
123
+ * @param n number of vectors
120
124
  * @param x input vectors to search, size n * d
121
- * @param labels output labels of the NNs, size n*k
125
+ * @param k number of extracted vectors
122
126
  * @param distances output pairwise distances, size n*k
127
+ * @param labels output labels of the NNs, size n*k
123
128
  */
124
129
  virtual void search(
125
130
  idx_t n,
@@ -135,6 +140,7 @@ struct Index {
135
140
  * indexes do not implement the range_search (only the k-NN search
136
141
  * is mandatory).
137
142
  *
143
+ * @param n number of vectors
138
144
  * @param x input vectors to search, size n * d
139
145
  * @param radius search radius
140
146
  * @param result result table
@@ -149,8 +155,10 @@ struct Index {
149
155
  /** return the indexes of the k vectors closest to the query x.
150
156
  *
151
157
  * This function is identical as search but only return labels of neighbors.
158
+ * @param n number of vectors
152
159
  * @param x input vectors to search, size n * d
153
160
  * @param labels output labels of the NNs, size n*k
161
+ * @param k number of nearest neighbours
154
162
  */
155
163
  virtual void assign(idx_t n, const float* x, idx_t* labels, idx_t k = 1)
156
164
  const;
@@ -174,7 +182,7 @@ struct Index {
174
182
  /** Reconstruct several stored vectors (or an approximation if lossy coding)
175
183
  *
176
184
  * this function may not be defined for some indexes
177
- * @param n number of vectors to reconstruct
185
+ * @param n number of vectors to reconstruct
178
186
  * @param keys ids of the vectors to reconstruct (size n)
179
187
  * @param recons reconstucted vector (size n * d)
180
188
  */
@@ -184,6 +192,8 @@ struct Index {
184
192
  /** Reconstruct vectors i0 to i0 + ni - 1
185
193
  *
186
194
  * this function may not be defined for some indexes
195
+ * @param i0 index of the first vector in the sequence
196
+ * @param ni number of vectors in the sequence
187
197
  * @param recons reconstucted vector (size ni * d)
188
198
  */
189
199
  virtual void reconstruct_n(idx_t i0, idx_t ni, float* recons) const;
@@ -194,6 +204,11 @@ struct Index {
194
204
  * If there are not enough results for a query, the resulting arrays
195
205
  * is padded with -1s.
196
206
  *
207
+ * @param n number of vectors
208
+ * @param x input vectors to search, size n * d
209
+ * @param k number of extracted vectors
210
+ * @param distances output pairwise distances, size n*k
211
+ * @param labels output labels of the NNs, size n*k
197
212
  * @param recons reconstructed vectors size (n, k, d)
198
213
  **/
199
214
  virtual void search_and_reconstruct(
@@ -10,10 +10,10 @@
10
10
  #include <faiss/Index2Layer.h>
11
11
 
12
12
  #include <faiss/impl/platform_macros.h>
13
- #include <stdint.h>
14
13
  #include <cassert>
15
14
  #include <cinttypes>
16
15
  #include <cmath>
16
+ #include <cstdint>
17
17
  #include <cstdio>
18
18
 
19
19
  #ifdef __SSE3__
@@ -47,7 +47,7 @@ Index2Layer::Index2Layer(
47
47
  pq(quantizer->d, M, nbit) {
48
48
  is_trained = false;
49
49
  for (int nbyte = 0; nbyte < 7; nbyte++) {
50
- if ((1L << (8 * nbyte)) >= nlist) {
50
+ if (((size_t)1 << (8 * nbyte)) >= nlist) {
51
51
  code_size_1 = nbyte;
52
52
  break;
53
53
  }
@@ -60,7 +60,7 @@ Index2Layer::Index2Layer() {
60
60
  code_size = code_size_1 = code_size_2 = 0;
61
61
  }
62
62
 
63
- Index2Layer::~Index2Layer() {}
63
+ Index2Layer::~Index2Layer() = default;
64
64
 
65
65
  void Index2Layer::train(idx_t n, const float* x) {
66
66
  if (verbose) {
@@ -83,7 +83,7 @@ void Index2Layer::train(idx_t n, const float* x) {
83
83
  verbose,
84
84
  pq.cp.seed);
85
85
 
86
- ScopeDeleter<float> del_x(x_in == x ? nullptr : x);
86
+ std::unique_ptr<const float[]> del_x(x_in == x ? nullptr : x);
87
87
 
88
88
  std::vector<idx_t> assign(n); // assignement to coarse centroids
89
89
  q1.quantizer->assign(n, x, assign.data());
@@ -179,7 +179,7 @@ struct DistanceXPQ4 : Distance2Level {
179
179
  float operator()(idx_t i) override {
180
180
  #ifdef __SSE3__
181
181
  const uint8_t* code = storage.codes.data() + i * storage.code_size;
182
- long key = 0;
182
+ idx_t key = 0;
183
183
  memcpy(&key, code, storage.code_size_1);
184
184
  code += storage.code_size_1;
185
185
 
@@ -225,7 +225,7 @@ struct Distance2xXPQ4 : Distance2Level {
225
225
 
226
226
  float operator()(idx_t i) override {
227
227
  const uint8_t* code = storage.codes.data() + i * storage.code_size;
228
- long key01 = 0;
228
+ int64_t key01 = 0;
229
229
  memcpy(&key01, code, storage.code_size_1);
230
230
  code += storage.code_size_1;
231
231
  #ifdef __SSE3__
@@ -237,7 +237,7 @@ struct Distance2xXPQ4 : Distance2Level {
237
237
  __m128 accu = _mm_setzero_ps();
238
238
 
239
239
  for (int mi_m = 0; mi_m < 2; mi_m++) {
240
- long l1_idx = key01 & ((1L << mi_nbits) - 1);
240
+ int64_t l1_idx = key01 & (((int64_t)1 << mi_nbits) - 1);
241
241
  const __m128* pq_l1 = pq_l1_t + M_2 * l1_idx;
242
242
 
243
243
  for (int m = 0; m < M_2; m++) {