faiss 0.4.1 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +39 -29
- data/vendor/faiss/faiss/Clustering.cpp +4 -2
- data/vendor/faiss/faiss/IVFlib.cpp +14 -7
- data/vendor/faiss/faiss/Index.h +72 -3
- data/vendor/faiss/faiss/Index2Layer.cpp +2 -4
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +0 -1
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +1 -0
- data/vendor/faiss/faiss/IndexBinary.h +46 -3
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +118 -4
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +41 -0
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +0 -1
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +18 -7
- data/vendor/faiss/faiss/IndexBinaryIVF.h +5 -1
- data/vendor/faiss/faiss/IndexFlat.cpp +6 -4
- data/vendor/faiss/faiss/IndexHNSW.cpp +65 -24
- data/vendor/faiss/faiss/IndexHNSW.h +10 -1
- data/vendor/faiss/faiss/IndexIDMap.cpp +96 -18
- data/vendor/faiss/faiss/IndexIDMap.h +20 -0
- data/vendor/faiss/faiss/IndexIVF.cpp +28 -10
- data/vendor/faiss/faiss/IndexIVF.h +16 -1
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +84 -16
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +18 -6
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +33 -21
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +16 -6
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +24 -15
- data/vendor/faiss/faiss/IndexIVFFastScan.h +4 -2
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +59 -43
- data/vendor/faiss/faiss/IndexIVFFlat.h +10 -2
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +16 -3
- data/vendor/faiss/faiss/IndexIVFPQ.h +8 -1
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +14 -6
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +2 -1
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +14 -4
- data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +28 -3
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +8 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +9 -2
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +2 -1
- data/vendor/faiss/faiss/IndexLattice.cpp +8 -4
- data/vendor/faiss/faiss/IndexNNDescent.cpp +0 -7
- data/vendor/faiss/faiss/IndexNSG.cpp +3 -3
- data/vendor/faiss/faiss/IndexPQ.cpp +0 -1
- data/vendor/faiss/faiss/IndexPQ.h +1 -0
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +0 -2
- data/vendor/faiss/faiss/IndexPreTransform.cpp +4 -2
- data/vendor/faiss/faiss/IndexRefine.cpp +11 -6
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +16 -4
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +10 -3
- data/vendor/faiss/faiss/IndexShards.cpp +7 -6
- data/vendor/faiss/faiss/MatrixStats.cpp +16 -8
- data/vendor/faiss/faiss/MetaIndexes.cpp +12 -6
- data/vendor/faiss/faiss/MetricType.h +5 -3
- data/vendor/faiss/faiss/clone_index.cpp +2 -4
- data/vendor/faiss/faiss/cppcontrib/factory_tools.cpp +6 -0
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +9 -4
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +32 -10
- data/vendor/faiss/faiss/gpu/GpuIndex.h +88 -0
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryCagra.h +125 -0
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +39 -4
- data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +3 -3
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +1 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +3 -2
- data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +41 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +6 -3
- data/vendor/faiss/faiss/impl/HNSW.cpp +34 -19
- data/vendor/faiss/faiss/impl/IDSelector.cpp +2 -1
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +2 -3
- data/vendor/faiss/faiss/impl/NNDescent.cpp +17 -9
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +42 -21
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +6 -24
- data/vendor/faiss/faiss/impl/ResultHandler.h +56 -47
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +28 -15
- data/vendor/faiss/faiss/impl/index_read.cpp +36 -11
- data/vendor/faiss/faiss/impl/index_write.cpp +19 -6
- data/vendor/faiss/faiss/impl/io.cpp +9 -5
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +18 -11
- data/vendor/faiss/faiss/impl/mapped_io.cpp +4 -7
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +0 -1
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +0 -1
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +6 -6
- data/vendor/faiss/faiss/impl/zerocopy_io.cpp +1 -1
- data/vendor/faiss/faiss/impl/zerocopy_io.h +2 -2
- data/vendor/faiss/faiss/index_factory.cpp +49 -33
- data/vendor/faiss/faiss/index_factory.h +8 -2
- data/vendor/faiss/faiss/index_io.h +0 -3
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +2 -1
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +12 -6
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +8 -4
- data/vendor/faiss/faiss/utils/Heap.cpp +15 -8
- data/vendor/faiss/faiss/utils/Heap.h +23 -12
- data/vendor/faiss/faiss/utils/distances.cpp +42 -21
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +1 -1
- data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -3
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +27 -4
- data/vendor/faiss/faiss/utils/extra_distances.cpp +8 -4
- data/vendor/faiss/faiss/utils/hamming.cpp +20 -10
- data/vendor/faiss/faiss/utils/partitioning.cpp +8 -4
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +17 -9
- data/vendor/faiss/faiss/utils/rabitq_simd.h +539 -0
- data/vendor/faiss/faiss/utils/random.cpp +14 -7
- data/vendor/faiss/faiss/utils/utils.cpp +0 -3
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 67b63e8dcaa5dcd8c8d08f823c3e1e063906b4a20ad70b28c2fd396e8cccdf48
|
4
|
+
data.tar.gz: 7e9fddc693db40a58c71b5a5ca6b89638b5cc1db708b30ea32a897519d48b970
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 10b9b0efa35ab53fefd3cea964339c856b67af1f7c773f421952f247d16fbd50d76b29cf59eec9e3b77b1a091e96fccb58c1f24afde7d233f298f06ecc90b3b3
|
7
|
+
data.tar.gz: 70037e0611a6f6adddf30f95710b0e1f1c17ffadfe561c3010db8e66265692da17b49a07756ee946d70eed6602ddfbd2107248c01be353d58d3544b6bbc2b15e
|
data/CHANGELOG.md
CHANGED
data/lib/faiss/version.rb
CHANGED
@@ -14,30 +14,21 @@
|
|
14
14
|
#include <faiss/AutoTune.h>
|
15
15
|
|
16
16
|
#include <cinttypes>
|
17
|
-
#include <cmath>
|
18
17
|
|
19
18
|
#include <faiss/impl/FaissAssert.h>
|
20
19
|
#include <faiss/utils/random.h>
|
21
20
|
#include <faiss/utils/utils.h>
|
22
21
|
|
23
|
-
#include <faiss/IndexFlat.h>
|
24
22
|
#include <faiss/IndexHNSW.h>
|
25
23
|
#include <faiss/IndexIVF.h>
|
26
24
|
#include <faiss/IndexIVFFlat.h>
|
27
25
|
#include <faiss/IndexIVFPQ.h>
|
28
26
|
#include <faiss/IndexIVFPQR.h>
|
29
|
-
#include <faiss/IndexLSH.h>
|
30
27
|
#include <faiss/IndexPQ.h>
|
31
28
|
#include <faiss/IndexPreTransform.h>
|
32
29
|
#include <faiss/IndexRefine.h>
|
33
|
-
#include <faiss/IndexScalarQuantizer.h>
|
34
30
|
#include <faiss/IndexShardsIVF.h>
|
35
31
|
#include <faiss/MetaIndexes.h>
|
36
|
-
#include <faiss/VectorTransform.h>
|
37
|
-
|
38
|
-
#include <faiss/IndexBinaryFlat.h>
|
39
|
-
#include <faiss/IndexBinaryHNSW.h>
|
40
|
-
#include <faiss/IndexBinaryIVF.h>
|
41
32
|
|
42
33
|
namespace faiss {
|
43
34
|
|
@@ -137,8 +128,9 @@ bool OperatingPoints::add(
|
|
137
128
|
int i;
|
138
129
|
// stricto sensu this should be a bissection
|
139
130
|
for (i = 0; i < a.size(); i++) {
|
140
|
-
if (a[i].perf >= perf)
|
131
|
+
if (a[i].perf >= perf) {
|
141
132
|
break;
|
133
|
+
}
|
142
134
|
}
|
143
135
|
assert(i < a.size());
|
144
136
|
if (t < a[i].t) {
|
@@ -166,8 +158,9 @@ int OperatingPoints::merge_with(
|
|
166
158
|
int n_add = 0;
|
167
159
|
for (int i = 0; i < other.all_pts.size(); i++) {
|
168
160
|
const OperatingPoint& op = other.all_pts[i];
|
169
|
-
if (add(op.perf, op.t, prefix + op.key, op.cno))
|
161
|
+
if (add(op.perf, op.t, prefix + op.key, op.cno)) {
|
170
162
|
n_add++;
|
163
|
+
}
|
171
164
|
}
|
172
165
|
return n_add;
|
173
166
|
}
|
@@ -175,15 +168,17 @@ int OperatingPoints::merge_with(
|
|
175
168
|
/// get time required to obtain a given performance measure
|
176
169
|
double OperatingPoints::t_for_perf(double perf) const {
|
177
170
|
const std::vector<OperatingPoint>& a = optimal_pts;
|
178
|
-
if (perf > a.back().perf)
|
171
|
+
if (perf > a.back().perf) {
|
179
172
|
return 1e50;
|
173
|
+
}
|
180
174
|
int i0 = -1, i1 = a.size() - 1;
|
181
175
|
while (i0 + 1 < i1) {
|
182
176
|
int imed = (i0 + i1 + 1) / 2;
|
183
|
-
if (a[imed].perf < perf)
|
177
|
+
if (a[imed].perf < perf) {
|
184
178
|
i0 = imed;
|
185
|
-
else
|
179
|
+
} else {
|
186
180
|
i1 = imed;
|
181
|
+
}
|
187
182
|
}
|
188
183
|
return a[i1].t;
|
189
184
|
}
|
@@ -273,8 +268,9 @@ ParameterSpace::ParameterSpace (Index *index):
|
|
273
268
|
|
274
269
|
size_t ParameterSpace::n_combinations() const {
|
275
270
|
size_t n = 1;
|
276
|
-
for (int i = 0; i < parameter_ranges.size(); i++)
|
271
|
+
for (int i = 0; i < parameter_ranges.size(); i++) {
|
277
272
|
n *= parameter_ranges[i].values.size();
|
273
|
+
}
|
278
274
|
return n;
|
279
275
|
}
|
280
276
|
|
@@ -304,8 +300,9 @@ bool ParameterSpace::combination_ge(size_t c1, size_t c2) const {
|
|
304
300
|
int nval = parameter_ranges[i].values.size();
|
305
301
|
size_t j1 = c1 % nval;
|
306
302
|
size_t j2 = c2 % nval;
|
307
|
-
if (!(j1 >= j2))
|
303
|
+
if (!(j1 >= j2)) {
|
308
304
|
return false;
|
305
|
+
}
|
309
306
|
c1 /= nval;
|
310
307
|
c2 /= nval;
|
311
308
|
}
|
@@ -318,8 +315,9 @@ static void init_pq_ParameterRange(
|
|
318
315
|
if (pq.code_size % 4 == 0) {
|
319
316
|
// Polysemous not supported for code sizes that are not a
|
320
317
|
// multiple of 4
|
321
|
-
for (int i = 2; i <= pq.code_size * 8 / 2; i += 2)
|
318
|
+
for (int i = 2; i <= pq.code_size * 8 / 2; i += 2) {
|
322
319
|
pr.values.push_back(i);
|
320
|
+
}
|
323
321
|
}
|
324
322
|
pr.values.push_back(pq.code_size * 8);
|
325
323
|
}
|
@@ -360,8 +358,9 @@ void ParameterSpace::initialize(const Index* index) {
|
|
360
358
|
ParameterRange& pr = add_range("nprobe");
|
361
359
|
for (int i = 0; i < 13; i++) {
|
362
360
|
size_t nprobe = 1 << i;
|
363
|
-
if (nprobe >= ix->nlist)
|
361
|
+
if (nprobe >= ix->nlist) {
|
364
362
|
break;
|
363
|
+
}
|
365
364
|
pr.values.push_back(nprobe);
|
366
365
|
}
|
367
366
|
}
|
@@ -599,12 +598,14 @@ void ParameterSpace::update_bounds(
|
|
599
598
|
double* upper_bound_perf,
|
600
599
|
double* lower_bound_t) const {
|
601
600
|
if (combination_ge(cno, op.cno)) {
|
602
|
-
if (op.t > *lower_bound_t)
|
601
|
+
if (op.t > *lower_bound_t) {
|
603
602
|
*lower_bound_t = op.t;
|
603
|
+
}
|
604
604
|
}
|
605
605
|
if (combination_ge(op.cno, cno)) {
|
606
|
-
if (op.perf < *upper_bound_perf)
|
606
|
+
if (op.perf < *upper_bound_perf) {
|
607
607
|
*upper_bound_perf = op.perf;
|
608
|
+
}
|
608
609
|
}
|
609
610
|
}
|
610
611
|
|
@@ -633,7 +634,7 @@ void ParameterSpace::explore(
|
|
633
634
|
|
634
635
|
bool keep = ops->add(perf, t_search, combination_name(cno), cno);
|
635
636
|
|
636
|
-
if (verbose)
|
637
|
+
if (verbose) {
|
637
638
|
printf(" %zd/%zd: %s perf=%.3f t=%.3f s %s\n",
|
638
639
|
cno,
|
639
640
|
n_comb,
|
@@ -641,14 +642,16 @@ void ParameterSpace::explore(
|
|
641
642
|
perf,
|
642
643
|
t_search,
|
643
644
|
keep ? "*" : "");
|
645
|
+
}
|
644
646
|
}
|
645
647
|
return;
|
646
648
|
}
|
647
649
|
|
648
650
|
int n_exp = n_experiments;
|
649
651
|
|
650
|
-
if (n_exp > n_comb)
|
652
|
+
if (n_exp > n_comb) {
|
651
653
|
n_exp = n_comb;
|
654
|
+
}
|
652
655
|
FAISS_THROW_IF_NOT(n_comb == 1 || n_exp > 2);
|
653
656
|
std::vector<int> perm(n_comb);
|
654
657
|
// make sure the slowest and fastest experiment are run
|
@@ -656,19 +659,21 @@ void ParameterSpace::explore(
|
|
656
659
|
if (n_comb > 1) {
|
657
660
|
perm[1] = n_comb - 1;
|
658
661
|
rand_perm(&perm[2], n_comb - 2, 1234);
|
659
|
-
for (int i = 2; i < perm.size(); i++)
|
662
|
+
for (int i = 2; i < perm.size(); i++) {
|
660
663
|
perm[i]++;
|
664
|
+
}
|
661
665
|
}
|
662
666
|
|
663
667
|
for (size_t xp = 0; xp < n_exp; xp++) {
|
664
668
|
size_t cno = perm[xp];
|
665
669
|
|
666
|
-
if (verbose)
|
670
|
+
if (verbose) {
|
667
671
|
printf(" %zd/%d: cno=%zd %s ",
|
668
672
|
xp,
|
669
673
|
n_exp,
|
670
674
|
cno,
|
671
675
|
combination_name(cno).c_str());
|
676
|
+
}
|
672
677
|
|
673
678
|
{
|
674
679
|
double lower_bound_t = 0.0;
|
@@ -681,13 +686,15 @@ void ParameterSpace::explore(
|
|
681
686
|
&lower_bound_t);
|
682
687
|
}
|
683
688
|
double best_t = ops->t_for_perf(upper_bound_perf);
|
684
|
-
if (verbose)
|
689
|
+
if (verbose) {
|
685
690
|
printf("bounds [perf<=%.3f t>=%.3f] %s",
|
686
691
|
upper_bound_perf,
|
687
692
|
lower_bound_t,
|
688
693
|
best_t <= lower_bound_t ? "skip\n" : "");
|
689
|
-
|
694
|
+
}
|
695
|
+
if (best_t <= lower_bound_t) {
|
690
696
|
continue;
|
697
|
+
}
|
691
698
|
}
|
692
699
|
|
693
700
|
set_index_parameters(index, cno);
|
@@ -704,8 +711,9 @@ void ParameterSpace::explore(
|
|
704
711
|
#pragma omp parallel for
|
705
712
|
for (idx_t q0 = 0; q0 < nq; q0 += batchsize) {
|
706
713
|
size_t q1 = q0 + batchsize;
|
707
|
-
if (q1 > nq)
|
714
|
+
if (q1 > nq) {
|
708
715
|
q1 = nq;
|
716
|
+
}
|
709
717
|
index->search(
|
710
718
|
q1 - q0,
|
711
719
|
xq + q0 * index->d,
|
@@ -716,8 +724,9 @@ void ParameterSpace::explore(
|
|
716
724
|
} else {
|
717
725
|
for (size_t q0 = 0; q0 < nq; q0 += batchsize) {
|
718
726
|
size_t q1 = q0 + batchsize;
|
719
|
-
if (q1 > nq)
|
727
|
+
if (q1 > nq) {
|
720
728
|
q1 = nq;
|
729
|
+
}
|
721
730
|
index->search(
|
722
731
|
q1 - q0,
|
723
732
|
xq + q0 * index->d,
|
@@ -737,13 +746,14 @@ void ParameterSpace::explore(
|
|
737
746
|
|
738
747
|
bool keep = ops->add(perf, t_search, combination_name(cno), cno);
|
739
748
|
|
740
|
-
if (verbose)
|
749
|
+
if (verbose) {
|
741
750
|
printf(" perf %.3f t %.3f (%d %s) %s\n",
|
742
751
|
perf,
|
743
752
|
t_search,
|
744
753
|
nrun,
|
745
754
|
nrun >= 2 ? "runs" : "run",
|
746
755
|
keep ? "*" : "");
|
756
|
+
}
|
747
757
|
}
|
748
758
|
}
|
749
759
|
|
@@ -39,8 +39,9 @@ void Clustering::post_process_centroids() {
|
|
39
39
|
}
|
40
40
|
|
41
41
|
if (int_centroids) {
|
42
|
-
for (size_t i = 0; i < centroids.size(); i++)
|
42
|
+
for (size_t i = 0; i < centroids.size(); i++) {
|
43
43
|
centroids[i] = roundf(centroids[i]);
|
44
|
+
}
|
44
45
|
}
|
45
46
|
}
|
46
47
|
|
@@ -530,8 +531,9 @@ void Clustering::train_encoded(
|
|
530
531
|
InterruptCallback::check();
|
531
532
|
}
|
532
533
|
|
533
|
-
if (verbose)
|
534
|
+
if (verbose) {
|
534
535
|
printf("\n");
|
536
|
+
}
|
535
537
|
if (nredo > 1) {
|
536
538
|
if ((lower_is_better && obj < best_obj) ||
|
537
539
|
(!lower_is_better && obj > best_obj)) {
|
@@ -155,8 +155,9 @@ void search_and_return_centroids(
|
|
155
155
|
n, x, nprobe, cent_dis.data(), cent_nos.data());
|
156
156
|
|
157
157
|
if (query_centroid_ids) {
|
158
|
-
for (size_t i = 0; i < n; i++)
|
158
|
+
for (size_t i = 0; i < n; i++) {
|
159
159
|
query_centroid_ids[i] = cent_nos[i * nprobe];
|
160
|
+
}
|
160
161
|
}
|
161
162
|
|
162
163
|
index_ivf->search_preassigned(
|
@@ -165,13 +166,15 @@ void search_and_return_centroids(
|
|
165
166
|
for (size_t i = 0; i < n * k; i++) {
|
166
167
|
idx_t label = labels[i];
|
167
168
|
if (label < 0) {
|
168
|
-
if (result_centroid_ids)
|
169
|
+
if (result_centroid_ids) {
|
169
170
|
result_centroid_ids[i] = -1;
|
171
|
+
}
|
170
172
|
} else {
|
171
173
|
long list_no = lo_listno(label);
|
172
174
|
long list_index = lo_offset(label);
|
173
|
-
if (result_centroid_ids)
|
175
|
+
if (result_centroid_ids) {
|
174
176
|
result_centroid_ids[i] = list_no;
|
177
|
+
}
|
175
178
|
labels[i] = index_ivf->invlists->get_single_id(list_no, list_index);
|
176
179
|
}
|
177
180
|
}
|
@@ -192,10 +195,11 @@ static void shift_and_add(
|
|
192
195
|
std::vector<T>& dst,
|
193
196
|
size_t remove,
|
194
197
|
const std::vector<T>& src) {
|
195
|
-
if (remove > 0)
|
198
|
+
if (remove > 0) {
|
196
199
|
memmove(dst.data(),
|
197
200
|
dst.data() + remove,
|
198
201
|
(dst.size() - remove) * sizeof(T));
|
202
|
+
}
|
199
203
|
size_t insert_point = dst.size() - remove;
|
200
204
|
dst.resize(insert_point + src.size());
|
201
205
|
memcpy(dst.data() + insert_point, src.data(), src.size() * sizeof(T));
|
@@ -206,10 +210,11 @@ static void shift_and_add(
|
|
206
210
|
MaybeOwnedVector<T>& dst,
|
207
211
|
size_t remove,
|
208
212
|
const MaybeOwnedVector<T>& src) {
|
209
|
-
if (remove > 0)
|
213
|
+
if (remove > 0) {
|
210
214
|
memmove(dst.data(),
|
211
215
|
dst.data() + remove,
|
212
216
|
(dst.size() - remove) * sizeof(T));
|
217
|
+
}
|
213
218
|
size_t insert_point = dst.size() - remove;
|
214
219
|
dst.resize(insert_point + src.size());
|
215
220
|
memcpy(dst.data() + insert_point, src.data(), src.size() * sizeof(T));
|
@@ -217,14 +222,16 @@ static void shift_and_add(
|
|
217
222
|
|
218
223
|
template <class T>
|
219
224
|
static void remove_from_begin(std::vector<T>& v, size_t remove) {
|
220
|
-
if (remove > 0)
|
225
|
+
if (remove > 0) {
|
221
226
|
v.erase(v.begin(), v.begin() + remove);
|
227
|
+
}
|
222
228
|
}
|
223
229
|
|
224
230
|
template <class T>
|
225
231
|
static void remove_from_begin(MaybeOwnedVector<T>& v, size_t remove) {
|
226
|
-
if (remove > 0)
|
232
|
+
if (remove > 0) {
|
227
233
|
v.erase(v.begin(), v.begin() + remove);
|
234
|
+
}
|
228
235
|
}
|
229
236
|
|
230
237
|
void SlidingIndexWindow::step(const Index* sub_index, bool remove_oldest) {
|
data/vendor/faiss/faiss/Index.h
CHANGED
@@ -11,13 +11,13 @@
|
|
11
11
|
#define FAISS_INDEX_H
|
12
12
|
|
13
13
|
#include <faiss/MetricType.h>
|
14
|
+
#include <faiss/impl/FaissAssert.h>
|
15
|
+
|
14
16
|
#include <cstdio>
|
15
17
|
#include <sstream>
|
16
|
-
#include <string>
|
17
|
-
#include <typeinfo>
|
18
18
|
|
19
19
|
#define FAISS_VERSION_MAJOR 1
|
20
|
-
#define FAISS_VERSION_MINOR
|
20
|
+
#define FAISS_VERSION_MINOR 12
|
21
21
|
#define FAISS_VERSION_PATCH 0
|
22
22
|
|
23
23
|
// Macro to combine the version components into a single string
|
@@ -56,6 +56,28 @@ struct IDSelector;
|
|
56
56
|
struct RangeSearchResult;
|
57
57
|
struct DistanceComputer;
|
58
58
|
|
59
|
+
enum NumericType {
|
60
|
+
Float32,
|
61
|
+
Float16,
|
62
|
+
UInt8,
|
63
|
+
Int8,
|
64
|
+
};
|
65
|
+
|
66
|
+
inline size_t get_numeric_type_size(NumericType numeric_type) {
|
67
|
+
switch (numeric_type) {
|
68
|
+
case NumericType::Float32:
|
69
|
+
return 4;
|
70
|
+
case NumericType::Float16:
|
71
|
+
return 2;
|
72
|
+
case NumericType::UInt8:
|
73
|
+
case NumericType::Int8:
|
74
|
+
return 1;
|
75
|
+
default:
|
76
|
+
FAISS_THROW_MSG(
|
77
|
+
"Unknown Numeric Type. Only supports Float32, Float16");
|
78
|
+
}
|
79
|
+
}
|
80
|
+
|
59
81
|
/** Parent class for the optional search paramenters.
|
60
82
|
*
|
61
83
|
* Sub-classes with additional search parameters should inherit this class.
|
@@ -107,6 +129,14 @@ struct Index {
|
|
107
129
|
*/
|
108
130
|
virtual void train(idx_t n, const float* x);
|
109
131
|
|
132
|
+
virtual void trainEx(idx_t n, const void* x, NumericType numeric_type) {
|
133
|
+
if (numeric_type == NumericType::Float32) {
|
134
|
+
train(n, static_cast<const float*>(x));
|
135
|
+
} else {
|
136
|
+
FAISS_THROW_MSG("Index::train: unsupported numeric type");
|
137
|
+
}
|
138
|
+
}
|
139
|
+
|
110
140
|
/** Add n vectors of dimension d to the index.
|
111
141
|
*
|
112
142
|
* Vectors are implicitly assigned labels ntotal .. ntotal + n - 1
|
@@ -117,6 +147,14 @@ struct Index {
|
|
117
147
|
*/
|
118
148
|
virtual void add(idx_t n, const float* x) = 0;
|
119
149
|
|
150
|
+
virtual void addEx(idx_t n, const void* x, NumericType numeric_type) {
|
151
|
+
if (numeric_type == NumericType::Float32) {
|
152
|
+
add(n, static_cast<const float*>(x));
|
153
|
+
} else {
|
154
|
+
FAISS_THROW_MSG("Index::add: unsupported numeric type");
|
155
|
+
}
|
156
|
+
}
|
157
|
+
|
120
158
|
/** Same as add, but stores xids instead of sequential ids.
|
121
159
|
*
|
122
160
|
* The default implementation fails with an assertion, as it is
|
@@ -127,6 +165,17 @@ struct Index {
|
|
127
165
|
* @param xids if non-null, ids to store for the vectors (size n)
|
128
166
|
*/
|
129
167
|
virtual void add_with_ids(idx_t n, const float* x, const idx_t* xids);
|
168
|
+
virtual void add_with_idsEx(
|
169
|
+
idx_t n,
|
170
|
+
const void* x,
|
171
|
+
NumericType numeric_type,
|
172
|
+
const idx_t* xids) {
|
173
|
+
if (numeric_type == NumericType::Float32) {
|
174
|
+
add_with_ids(n, static_cast<const float*>(x), xids);
|
175
|
+
} else {
|
176
|
+
FAISS_THROW_MSG("Index::add_with_ids: unsupported numeric type");
|
177
|
+
}
|
178
|
+
}
|
130
179
|
|
131
180
|
/** query n vectors of dimension d to the index.
|
132
181
|
*
|
@@ -147,6 +196,26 @@ struct Index {
|
|
147
196
|
idx_t* labels,
|
148
197
|
const SearchParameters* params = nullptr) const = 0;
|
149
198
|
|
199
|
+
virtual void searchEx(
|
200
|
+
idx_t n,
|
201
|
+
const void* x,
|
202
|
+
NumericType numeric_type,
|
203
|
+
idx_t k,
|
204
|
+
float* distances,
|
205
|
+
idx_t* labels,
|
206
|
+
const SearchParameters* params = nullptr) const {
|
207
|
+
if (numeric_type == NumericType::Float32) {
|
208
|
+
search(n,
|
209
|
+
static_cast<const float*>(x),
|
210
|
+
k,
|
211
|
+
distances,
|
212
|
+
labels,
|
213
|
+
params);
|
214
|
+
} else {
|
215
|
+
FAISS_THROW_MSG("Index::search: unsupported numeric type");
|
216
|
+
}
|
217
|
+
}
|
218
|
+
|
150
219
|
/** query n vectors of dimension d to the index.
|
151
220
|
*
|
152
221
|
* return all vectors with distance < radius. Note that many
|
@@ -9,10 +9,7 @@
|
|
9
9
|
|
10
10
|
#include <faiss/Index2Layer.h>
|
11
11
|
|
12
|
-
#include <faiss/impl/platform_macros.h>
|
13
|
-
#include <cassert>
|
14
12
|
#include <cinttypes>
|
15
|
-
#include <cmath>
|
16
13
|
#include <cstdint>
|
17
14
|
#include <cstdio>
|
18
15
|
|
@@ -93,13 +90,14 @@ void Index2Layer::train(idx_t n, const float* x) {
|
|
93
90
|
x + i * d, residuals.data() + i * d, assign[i]);
|
94
91
|
}
|
95
92
|
|
96
|
-
if (verbose)
|
93
|
+
if (verbose) {
|
97
94
|
printf("training %zdx%zd product quantizer on %" PRId64
|
98
95
|
" vectors in %dD\n",
|
99
96
|
pq.M,
|
100
97
|
pq.ksub,
|
101
98
|
n,
|
102
99
|
d);
|
100
|
+
}
|
103
101
|
pq.verbose = verbose;
|
104
102
|
pq.train(n, residuals.data());
|
105
103
|
|
@@ -8,10 +8,8 @@
|
|
8
8
|
#ifndef FAISS_INDEX_BINARY_H
|
9
9
|
#define FAISS_INDEX_BINARY_H
|
10
10
|
|
11
|
+
#include <cstdint>
|
11
12
|
#include <cstdio>
|
12
|
-
#include <sstream>
|
13
|
-
#include <string>
|
14
|
-
#include <typeinfo>
|
15
13
|
|
16
14
|
#include <faiss/Index.h>
|
17
15
|
|
@@ -54,6 +52,13 @@ struct IndexBinary {
|
|
54
52
|
* @param x training vecors, size n * d / 8
|
55
53
|
*/
|
56
54
|
virtual void train(idx_t n, const uint8_t* x);
|
55
|
+
virtual void trainEx(idx_t n, const void* x, NumericType numeric_type) {
|
56
|
+
if (numeric_type == NumericType::UInt8) {
|
57
|
+
train(n, static_cast<const uint8_t*>(x));
|
58
|
+
} else {
|
59
|
+
FAISS_THROW_MSG("IndexBinary::train: unsupported numeric type");
|
60
|
+
}
|
61
|
+
};
|
57
62
|
|
58
63
|
/** Add n vectors of dimension d to the index.
|
59
64
|
*
|
@@ -61,6 +66,13 @@ struct IndexBinary {
|
|
61
66
|
* @param x input matrix, size n * d / 8
|
62
67
|
*/
|
63
68
|
virtual void add(idx_t n, const uint8_t* x) = 0;
|
69
|
+
virtual void addEx(idx_t n, const void* x, NumericType numeric_type) {
|
70
|
+
if (numeric_type == NumericType::UInt8) {
|
71
|
+
add(n, static_cast<const uint8_t*>(x));
|
72
|
+
} else {
|
73
|
+
FAISS_THROW_MSG("IndexBinary::add: unsupported numeric type");
|
74
|
+
}
|
75
|
+
};
|
64
76
|
|
65
77
|
/** Same as add, but stores xids instead of sequential ids.
|
66
78
|
*
|
@@ -70,6 +82,18 @@ struct IndexBinary {
|
|
70
82
|
* @param xids if non-null, ids to store for the vectors (size n)
|
71
83
|
*/
|
72
84
|
virtual void add_with_ids(idx_t n, const uint8_t* x, const idx_t* xids);
|
85
|
+
virtual void add_with_idsEx(
|
86
|
+
idx_t n,
|
87
|
+
const void* x,
|
88
|
+
NumericType numeric_type,
|
89
|
+
const idx_t* xids) {
|
90
|
+
if (numeric_type == NumericType::UInt8) {
|
91
|
+
add_with_ids(n, static_cast<const uint8_t*>(x), xids);
|
92
|
+
} else {
|
93
|
+
FAISS_THROW_MSG(
|
94
|
+
"IndexBinary::add_with_ids: unsupported numeric type");
|
95
|
+
}
|
96
|
+
};
|
73
97
|
|
74
98
|
/** Query n vectors of dimension d to the index.
|
75
99
|
*
|
@@ -87,6 +111,25 @@ struct IndexBinary {
|
|
87
111
|
int32_t* distances,
|
88
112
|
idx_t* labels,
|
89
113
|
const SearchParameters* params = nullptr) const = 0;
|
114
|
+
virtual void searchEx(
|
115
|
+
idx_t n,
|
116
|
+
const void* x,
|
117
|
+
NumericType numeric_type,
|
118
|
+
idx_t k,
|
119
|
+
int32_t* distances,
|
120
|
+
idx_t* labels,
|
121
|
+
const SearchParameters* params = nullptr) const {
|
122
|
+
if (numeric_type == NumericType::UInt8) {
|
123
|
+
search(n,
|
124
|
+
static_cast<const uint8_t*>(x),
|
125
|
+
k,
|
126
|
+
distances,
|
127
|
+
labels,
|
128
|
+
params);
|
129
|
+
} else {
|
130
|
+
FAISS_THROW_MSG("IndexBinary::search: unsupported numeric type");
|
131
|
+
}
|
132
|
+
};
|
90
133
|
|
91
134
|
/** Query n vectors of dimension d to the index.
|
92
135
|
*
|