faiss 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/LICENSE.txt +1 -1
  4. data/lib/faiss/version.rb +1 -1
  5. data/vendor/faiss/faiss/Clustering.cpp +32 -0
  6. data/vendor/faiss/faiss/Clustering.h +14 -0
  7. data/vendor/faiss/faiss/Index.h +1 -1
  8. data/vendor/faiss/faiss/Index2Layer.cpp +19 -92
  9. data/vendor/faiss/faiss/Index2Layer.h +2 -16
  10. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
  11. data/vendor/faiss/faiss/{IndexResidual.h → IndexAdditiveQuantizer.h} +101 -58
  12. data/vendor/faiss/faiss/IndexFlat.cpp +22 -52
  13. data/vendor/faiss/faiss/IndexFlat.h +9 -15
  14. data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
  15. data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
  16. data/vendor/faiss/faiss/IndexIVF.cpp +79 -7
  17. data/vendor/faiss/faiss/IndexIVF.h +25 -7
  18. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
  19. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
  20. data/vendor/faiss/faiss/IndexIVFFlat.cpp +9 -12
  21. data/vendor/faiss/faiss/IndexIVFPQ.cpp +5 -4
  22. data/vendor/faiss/faiss/IndexIVFPQ.h +1 -1
  23. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +60 -39
  24. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +21 -6
  25. data/vendor/faiss/faiss/IndexLSH.cpp +4 -30
  26. data/vendor/faiss/faiss/IndexLSH.h +2 -15
  27. data/vendor/faiss/faiss/IndexNNDescent.cpp +0 -2
  28. data/vendor/faiss/faiss/IndexNSG.cpp +0 -2
  29. data/vendor/faiss/faiss/IndexPQ.cpp +2 -51
  30. data/vendor/faiss/faiss/IndexPQ.h +2 -17
  31. data/vendor/faiss/faiss/IndexRefine.cpp +28 -0
  32. data/vendor/faiss/faiss/IndexRefine.h +10 -0
  33. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -28
  34. data/vendor/faiss/faiss/IndexScalarQuantizer.h +2 -16
  35. data/vendor/faiss/faiss/VectorTransform.cpp +2 -1
  36. data/vendor/faiss/faiss/VectorTransform.h +3 -0
  37. data/vendor/faiss/faiss/clone_index.cpp +3 -2
  38. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +2 -2
  39. data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
  40. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +257 -24
  41. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +69 -9
  42. data/vendor/faiss/faiss/impl/HNSW.cpp +10 -5
  43. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +393 -210
  44. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +100 -28
  45. data/vendor/faiss/faiss/impl/NSG.cpp +0 -3
  46. data/vendor/faiss/faiss/impl/NSG.h +1 -1
  47. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +357 -47
  48. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +65 -7
  49. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +12 -19
  50. data/vendor/faiss/faiss/impl/index_read.cpp +102 -19
  51. data/vendor/faiss/faiss/impl/index_write.cpp +66 -16
  52. data/vendor/faiss/faiss/impl/io.cpp +1 -1
  53. data/vendor/faiss/faiss/impl/io_macros.h +20 -0
  54. data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
  55. data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
  56. data/vendor/faiss/faiss/index_factory.cpp +585 -414
  57. data/vendor/faiss/faiss/index_factory.h +3 -0
  58. data/vendor/faiss/faiss/utils/distances.cpp +4 -2
  59. data/vendor/faiss/faiss/utils/distances.h +36 -3
  60. data/vendor/faiss/faiss/utils/distances_simd.cpp +50 -0
  61. data/vendor/faiss/faiss/utils/utils.h +1 -1
  62. metadata +12 -5
  63. data/vendor/faiss/faiss/IndexResidual.cpp +0 -291
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 106966f6d5e7f6a3f5237a2ebb59912304bbe319ba2891c45166c753fc0e9df1
4
- data.tar.gz: 3d72777c777d75beb15c09cb223c4e63f188b7e89fab06d6fa9268bc2be4ff59
3
+ metadata.gz: 15428eb8dd7d27f8a94e3a7797dd765827e5454def33fa785055adb7ef0d20c5
4
+ data.tar.gz: 3e8eafebc49163c928bcab8d0ebd0f7b69e6659e49f36d7aaeb44e8651853ac9
5
5
  SHA512:
6
- metadata.gz: b1d822fd4e0850dd667aba09a02f81c43e2afde90a6d88fff1e631539a8d47f42b4dfe0705980a6ff8be88b021af805998d1e70b09cd1010302702a1c1363cac
7
- data.tar.gz: 24e4febd81142150541199523f0b1e19a0be7c658e504472778173e38006234ea47f7e362c1396c3135b4449b7ab5229dc027c4bcea59c880902e1fa23e3c956
6
+ metadata.gz: 598f6e626d5c970e408cff68ec479bf1aa2d6ee18adeeeb2489d1c4fbf627dacbc6e398ce149f0483720080a439df4d2887e5b6c9dc9f465e8ffa1bbeede84a8
7
+ data.tar.gz: fefebfbbbbceb58ac6c6b02636630943d30db7af4ee92521586b4fb40e73bf1aa2b401e28e4c27872bf9344f60e3c9fb288ec6420abc1ffccd05ffd3ec7379fd
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.2.4 (2022-01-10)
2
+
3
+ - Updated Faiss to 1.7.2
4
+
1
5
  ## 0.2.3 (2021-12-17)
2
6
 
3
7
  - Fixed installation error with ARM Mac
data/LICENSE.txt CHANGED
@@ -1,7 +1,7 @@
1
1
  MIT License
2
2
 
3
3
  Copyright (c) Facebook, Inc. and its affiliates.
4
- Copyright (c) 2020-2021 Andrew Kane
4
+ Copyright (c) 2020-2022 Andrew Kane
5
5
 
6
6
  Permission is hereby granted, free of charge, to any person obtaining a copy
7
7
  of this software and associated documentation files (the "Software"), to deal
data/lib/faiss/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Faiss
2
- VERSION = "0.2.3"
2
+ VERSION = "0.2.4"
3
3
  end
@@ -20,6 +20,7 @@
20
20
 
21
21
  #include <faiss/IndexFlat.h>
22
22
  #include <faiss/impl/FaissAssert.h>
23
+ #include <faiss/impl/kmeans1d.h>
23
24
  #include <faiss/utils/distances.h>
24
25
  #include <faiss/utils/random.h>
25
26
  #include <faiss/utils/utils.h>
@@ -553,6 +554,37 @@ void Clustering::train_encoded(
553
554
  }
554
555
  }
555
556
 
557
+ Clustering1D::Clustering1D(int k) : Clustering(1, k) {}
558
+
559
+ Clustering1D::Clustering1D(int k, const ClusteringParameters& cp)
560
+ : Clustering(1, k, cp) {}
561
+
562
+ void Clustering1D::train_exact(idx_t n, const float* x) {
563
+ const float* xt = x;
564
+
565
+ std::unique_ptr<uint8_t[]> del;
566
+ if (n > k * max_points_per_centroid) {
567
+ uint8_t* x_new;
568
+ float* weights_new;
569
+ n = subsample_training_set(
570
+ *this,
571
+ n,
572
+ (uint8_t*)x,
573
+ sizeof(float) * d,
574
+ nullptr,
575
+ &x_new,
576
+ &weights_new);
577
+ del.reset(x_new);
578
+ xt = (float*)x_new;
579
+ }
580
+
581
+ centroids.resize(k);
582
+ double uf = kmeans1d(xt, n, k, centroids.data());
583
+
584
+ ClusteringIterationStats stats = {0.0, 0.0, 0.0, uf, 0};
585
+ iteration_stats.push_back(stats);
586
+ }
587
+
556
588
  float kmeans_clustering(
557
589
  size_t d,
558
590
  size_t n,
@@ -111,6 +111,20 @@ struct Clustering : ClusteringParameters {
111
111
  virtual ~Clustering() {}
112
112
  };
113
113
 
114
+ /** Exact 1D clustering algorithm
115
+ *
116
+ * Since it does not use an index, it does not overload the train() function
117
+ */
118
+ struct Clustering1D : Clustering {
119
+ explicit Clustering1D(int k);
120
+
121
+ Clustering1D(int k, const ClusteringParameters& cp);
122
+
123
+ void train_exact(idx_t n, const float* x);
124
+
125
+ virtual ~Clustering1D() {}
126
+ };
127
+
114
128
  struct ProgressiveDimClusteringParameters : ClusteringParameters {
115
129
  int progressive_dim_steps; ///< number of incremental steps
116
130
  bool apply_pca; ///< apply PCA on input
@@ -18,7 +18,7 @@
18
18
 
19
19
  #define FAISS_VERSION_MAJOR 1
20
20
  #define FAISS_VERSION_MINOR 7
21
- #define FAISS_VERSION_PATCH 1
21
+ #define FAISS_VERSION_PATCH 2
22
22
 
23
23
  /**
24
24
  * @namespace faiss
@@ -30,16 +30,6 @@
30
30
  #include <faiss/utils/distances.h>
31
31
  #include <faiss/utils/utils.h>
32
32
 
33
- /*
34
- #include <faiss/utils/Heap.h>
35
-
36
- #include <faiss/Clustering.h>
37
-
38
- #include <faiss/utils/hamming.h>
39
-
40
-
41
- */
42
-
43
33
  namespace faiss {
44
34
 
45
35
  /*************************************
@@ -52,7 +42,7 @@ Index2Layer::Index2Layer(
52
42
  int M,
53
43
  int nbit,
54
44
  MetricType metric)
55
- : Index(quantizer->d, metric),
45
+ : IndexFlatCodes(0, quantizer->d, metric),
56
46
  q1(quantizer, nlist),
57
47
  pq(quantizer->d, M, nbit) {
58
48
  is_trained = false;
@@ -116,55 +106,6 @@ void Index2Layer::train(idx_t n, const float* x) {
116
106
  is_trained = true;
117
107
  }
118
108
 
119
- void Index2Layer::add(idx_t n, const float* x) {
120
- idx_t bs = 32768;
121
- if (n > bs) {
122
- for (idx_t i0 = 0; i0 < n; i0 += bs) {
123
- idx_t i1 = std::min(i0 + bs, n);
124
- if (verbose) {
125
- printf("Index2Layer::add: adding %" PRId64 ":%" PRId64
126
- " / %" PRId64 "\n",
127
- i0,
128
- i1,
129
- n);
130
- }
131
- add(i1 - i0, x + i0 * d);
132
- }
133
- return;
134
- }
135
-
136
- std::vector<idx_t> codes1(n);
137
- q1.quantizer->assign(n, x, codes1.data());
138
- std::vector<float> residuals(n * d);
139
- for (idx_t i = 0; i < n; i++) {
140
- q1.quantizer->compute_residual(
141
- x + i * d, residuals.data() + i * d, codes1[i]);
142
- }
143
- std::vector<uint8_t> codes2(n * code_size_2);
144
-
145
- pq.compute_codes(residuals.data(), codes2.data(), n);
146
-
147
- codes.resize((ntotal + n) * code_size);
148
- uint8_t* wp = &codes[ntotal * code_size];
149
-
150
- {
151
- int i = 0x11223344;
152
- const char* ip = (char*)&i;
153
- FAISS_THROW_IF_NOT_MSG(
154
- ip[0] == 0x44, "works only on a little-endian CPU");
155
- }
156
-
157
- // copy to output table
158
- for (idx_t i = 0; i < n; i++) {
159
- memcpy(wp, &codes1[i], code_size_1);
160
- wp += code_size_1;
161
- memcpy(wp, &codes2[i * code_size_2], code_size_2);
162
- wp += code_size_2;
163
- }
164
-
165
- ntotal += n;
166
- }
167
-
168
109
  void Index2Layer::search(
169
110
  idx_t /*n*/,
170
111
  const float* /*x*/,
@@ -174,25 +115,6 @@ void Index2Layer::search(
174
115
  FAISS_THROW_MSG("not implemented");
175
116
  }
176
117
 
177
- void Index2Layer::reconstruct_n(idx_t i0, idx_t ni, float* recons) const {
178
- std::vector<float> recons1(d);
179
- FAISS_THROW_IF_NOT(i0 >= 0 && i0 + ni <= ntotal);
180
- const uint8_t* rp = &codes[i0 * code_size];
181
-
182
- for (idx_t i = 0; i < ni; i++) {
183
- idx_t key = 0;
184
- memcpy(&key, rp, code_size_1);
185
- q1.quantizer->reconstruct(key, recons1.data());
186
- rp += code_size_1;
187
- pq.decode(rp, recons);
188
- for (idx_t j = 0; j < d; j++) {
189
- recons[j] += recons1[j];
190
- }
191
- rp += code_size_2;
192
- recons += d;
193
- }
194
- }
195
-
196
118
  void Index2Layer::transfer_to_IVFPQ(IndexIVFPQ& other) const {
197
119
  FAISS_THROW_IF_NOT(other.nlist == q1.nlist);
198
120
  FAISS_THROW_IF_NOT(other.code_size == code_size_2);
@@ -211,15 +133,6 @@ void Index2Layer::transfer_to_IVFPQ(IndexIVFPQ& other) const {
211
133
  other.ntotal = ntotal;
212
134
  }
213
135
 
214
- void Index2Layer::reconstruct(idx_t key, float* recons) const {
215
- reconstruct_n(key, 1, recons);
216
- }
217
-
218
- void Index2Layer::reset() {
219
- ntotal = 0;
220
- codes.clear();
221
- }
222
-
223
136
  namespace {
224
137
 
225
138
  struct Distance2Level : DistanceComputer {
@@ -259,7 +172,7 @@ struct DistanceXPQ4 : Distance2Level {
259
172
 
260
173
  FAISS_ASSERT(quantizer);
261
174
  M = storage.pq.M;
262
- pq_l1_tab = quantizer->xb.data();
175
+ pq_l1_tab = quantizer->get_xb();
263
176
  }
264
177
 
265
178
  float operator()(idx_t i) override {
@@ -368,12 +281,26 @@ DistanceComputer* Index2Layer::get_distance_computer() const {
368
281
  }
369
282
 
370
283
  /* The standalone codec interface */
371
- size_t Index2Layer::sa_code_size() const {
372
- return code_size;
373
- }
374
284
 
375
285
  void Index2Layer::sa_encode(idx_t n, const float* x, uint8_t* bytes) const {
376
286
  FAISS_THROW_IF_NOT(is_trained);
287
+
288
+ idx_t bs = 32768;
289
+ if (n > bs) {
290
+ for (idx_t i0 = 0; i0 < n; i0 += bs) {
291
+ idx_t i1 = std::min(i0 + bs, n);
292
+ if (verbose) {
293
+ printf("Index2Layer::add: adding %" PRId64 ":%" PRId64
294
+ " / %" PRId64 "\n",
295
+ i0,
296
+ i1,
297
+ n);
298
+ }
299
+ sa_encode(i1 - i0, x + i0 * d, bytes + i0 * code_size);
300
+ }
301
+ return;
302
+ }
303
+
377
304
  std::unique_ptr<int64_t[]> list_nos(new int64_t[n]);
378
305
  q1.quantizer->assign(n, x, list_nos.get());
379
306
  std::vector<float> residuals(n * d);
@@ -11,6 +11,7 @@
11
11
 
12
12
  #include <vector>
13
13
 
14
+ #include <faiss/IndexFlatCodes.h>
14
15
  #include <faiss/IndexIVF.h>
15
16
  #include <faiss/IndexPQ.h>
16
17
 
@@ -24,25 +25,19 @@ struct IndexIVFPQ;
24
25
  * The class is mainly inteded to store encoded vectors that can be
25
26
  * accessed randomly, the search function is not implemented.
26
27
  */
27
- struct Index2Layer : Index {
28
+ struct Index2Layer : IndexFlatCodes {
28
29
  /// first level quantizer
29
30
  Level1Quantizer q1;
30
31
 
31
32
  /// second level quantizer is always a PQ
32
33
  ProductQuantizer pq;
33
34
 
34
- /// Codes. Size ntotal * code_size.
35
- std::vector<uint8_t> codes;
36
-
37
35
  /// size of the code for the first level (ceil(log8(q1.nlist)))
38
36
  size_t code_size_1;
39
37
 
40
38
  /// size of the code for the second level
41
39
  size_t code_size_2;
42
40
 
43
- /// code_size_1 + code_size_2
44
- size_t code_size;
45
-
46
41
  Index2Layer(
47
42
  Index* quantizer,
48
43
  size_t nlist,
@@ -55,8 +50,6 @@ struct Index2Layer : Index {
55
50
 
56
51
  void train(idx_t n, const float* x) override;
57
52
 
58
- void add(idx_t n, const float* x) override;
59
-
60
53
  /// not implemented
61
54
  void search(
62
55
  idx_t n,
@@ -65,19 +58,12 @@ struct Index2Layer : Index {
65
58
  float* distances,
66
59
  idx_t* labels) const override;
67
60
 
68
- void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
69
-
70
- void reconstruct(idx_t key, float* recons) const override;
71
-
72
- void reset() override;
73
-
74
61
  DistanceComputer* get_distance_computer() const override;
75
62
 
76
63
  /// transfer the flat codes to an IVFPQ index
77
64
  void transfer_to_IVFPQ(IndexIVFPQ& other) const;
78
65
 
79
66
  /* The standalone codec interface */
80
- size_t sa_code_size() const override;
81
67
  void sa_encode(idx_t n, const float* x, uint8_t* bytes) const override;
82
68
  void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
83
69
  };