faiss 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +18 -18
- data/README.md +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/Clustering.cpp +318 -53
- data/vendor/faiss/Clustering.h +39 -11
- data/vendor/faiss/DirectMap.cpp +267 -0
- data/vendor/faiss/DirectMap.h +120 -0
- data/vendor/faiss/IVFlib.cpp +24 -4
- data/vendor/faiss/IVFlib.h +4 -0
- data/vendor/faiss/Index.h +5 -24
- data/vendor/faiss/Index2Layer.cpp +0 -1
- data/vendor/faiss/IndexBinary.h +7 -3
- data/vendor/faiss/IndexBinaryFlat.cpp +5 -0
- data/vendor/faiss/IndexBinaryFlat.h +3 -0
- data/vendor/faiss/IndexBinaryHash.cpp +492 -0
- data/vendor/faiss/IndexBinaryHash.h +116 -0
- data/vendor/faiss/IndexBinaryIVF.cpp +160 -107
- data/vendor/faiss/IndexBinaryIVF.h +14 -4
- data/vendor/faiss/IndexFlat.h +2 -1
- data/vendor/faiss/IndexHNSW.cpp +68 -16
- data/vendor/faiss/IndexHNSW.h +3 -3
- data/vendor/faiss/IndexIVF.cpp +72 -76
- data/vendor/faiss/IndexIVF.h +24 -5
- data/vendor/faiss/IndexIVFFlat.cpp +19 -54
- data/vendor/faiss/IndexIVFFlat.h +1 -11
- data/vendor/faiss/IndexIVFPQ.cpp +49 -26
- data/vendor/faiss/IndexIVFPQ.h +9 -10
- data/vendor/faiss/IndexIVFPQR.cpp +2 -2
- data/vendor/faiss/IndexIVFSpectralHash.cpp +2 -2
- data/vendor/faiss/IndexLSH.h +4 -1
- data/vendor/faiss/IndexPreTransform.cpp +0 -1
- data/vendor/faiss/IndexScalarQuantizer.cpp +8 -1
- data/vendor/faiss/InvertedLists.cpp +0 -2
- data/vendor/faiss/MetaIndexes.cpp +0 -1
- data/vendor/faiss/MetricType.h +36 -0
- data/vendor/faiss/c_api/Clustering_c.cpp +13 -7
- data/vendor/faiss/c_api/Clustering_c.h +11 -5
- data/vendor/faiss/c_api/IndexIVF_c.cpp +7 -0
- data/vendor/faiss/c_api/IndexIVF_c.h +7 -0
- data/vendor/faiss/c_api/IndexPreTransform_c.cpp +21 -0
- data/vendor/faiss/c_api/IndexPreTransform_c.h +32 -0
- data/vendor/faiss/demos/demo_weighted_kmeans.cpp +185 -0
- data/vendor/faiss/gpu/GpuCloner.cpp +4 -0
- data/vendor/faiss/gpu/GpuClonerOptions.cpp +1 -1
- data/vendor/faiss/gpu/GpuDistance.h +93 -0
- data/vendor/faiss/gpu/GpuIndex.h +7 -0
- data/vendor/faiss/gpu/GpuIndexFlat.h +0 -10
- data/vendor/faiss/gpu/GpuIndexIVF.h +1 -0
- data/vendor/faiss/gpu/StandardGpuResources.cpp +8 -0
- data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +49 -27
- data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +110 -2
- data/vendor/faiss/gpu/utils/DeviceUtils.h +6 -0
- data/vendor/faiss/impl/AuxIndexStructures.cpp +17 -0
- data/vendor/faiss/impl/AuxIndexStructures.h +14 -3
- data/vendor/faiss/impl/HNSW.cpp +0 -1
- data/vendor/faiss/impl/PolysemousTraining.h +5 -5
- data/vendor/faiss/impl/ProductQuantizer-inl.h +138 -0
- data/vendor/faiss/impl/ProductQuantizer.cpp +1 -113
- data/vendor/faiss/impl/ProductQuantizer.h +42 -47
- data/vendor/faiss/impl/index_read.cpp +103 -7
- data/vendor/faiss/impl/index_write.cpp +101 -5
- data/vendor/faiss/impl/io.cpp +111 -1
- data/vendor/faiss/impl/io.h +38 -0
- data/vendor/faiss/index_factory.cpp +0 -1
- data/vendor/faiss/tests/test_merge.cpp +0 -1
- data/vendor/faiss/tests/test_pq_encoding.cpp +6 -6
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +1 -0
- data/vendor/faiss/utils/distances.cpp +4 -5
- data/vendor/faiss/utils/distances_simd.cpp +0 -1
- data/vendor/faiss/utils/hamming.cpp +85 -3
- data/vendor/faiss/utils/hamming.h +20 -0
- data/vendor/faiss/utils/utils.cpp +0 -96
- data/vendor/faiss/utils/utils.h +0 -15
- metadata +11 -3
- data/lib/faiss/ext.bundle +0 -0
@@ -19,6 +19,7 @@
|
|
19
19
|
|
20
20
|
#include <faiss/impl/FaissAssert.h>
|
21
21
|
#include <faiss/impl/io.h>
|
22
|
+
#include <faiss/utils/hamming.h>
|
22
23
|
|
23
24
|
#include <faiss/IndexFlat.h>
|
24
25
|
#include <faiss/VectorTransform.h>
|
@@ -41,6 +42,7 @@
|
|
41
42
|
#include <faiss/IndexBinaryFromFloat.h>
|
42
43
|
#include <faiss/IndexBinaryHNSW.h>
|
43
44
|
#include <faiss/IndexBinaryIVF.h>
|
45
|
+
#include <faiss/IndexBinaryHash.h>
|
44
46
|
|
45
47
|
|
46
48
|
|
@@ -364,6 +366,25 @@ ProductQuantizer * read_ProductQuantizer (IOReader *reader) {
|
|
364
366
|
return pq;
|
365
367
|
}
|
366
368
|
|
369
|
+
static void read_direct_map (DirectMap *dm, IOReader *f) {
|
370
|
+
char maintain_direct_map;
|
371
|
+
READ1 (maintain_direct_map);
|
372
|
+
dm->type = (DirectMap::Type)maintain_direct_map;
|
373
|
+
READVECTOR (dm->array);
|
374
|
+
if (dm->type == DirectMap::Hashtable) {
|
375
|
+
using idx_t = Index::idx_t;
|
376
|
+
std::vector<std::pair<idx_t, idx_t>> v;
|
377
|
+
READVECTOR (v);
|
378
|
+
std::unordered_map<idx_t, idx_t> & map = dm->hashtable;
|
379
|
+
map.reserve (v.size());
|
380
|
+
for (auto it: v) {
|
381
|
+
map [it.first] = it.second;
|
382
|
+
}
|
383
|
+
}
|
384
|
+
|
385
|
+
}
|
386
|
+
|
387
|
+
|
367
388
|
static void read_ivf_header (
|
368
389
|
IndexIVF *ivf, IOReader *f,
|
369
390
|
std::vector<std::vector<Index::idx_t> > *ids = nullptr)
|
@@ -378,8 +399,7 @@ static void read_ivf_header (
|
|
378
399
|
for (size_t i = 0; i < ivf->nlist; i++)
|
379
400
|
READVECTOR ((*ids)[i]);
|
380
401
|
}
|
381
|
-
|
382
|
-
READVECTOR (ivf->direct_map);
|
402
|
+
read_direct_map (&ivf->direct_map, f);
|
383
403
|
}
|
384
404
|
|
385
405
|
// used for legacy formats
|
@@ -437,10 +457,15 @@ Index *read_index (IOReader *f, int io_flags) {
|
|
437
457
|
Index * idx = nullptr;
|
438
458
|
uint32_t h;
|
439
459
|
READ1 (h);
|
440
|
-
if (h == fourcc ("IxFI") || h == fourcc ("IxF2")) {
|
460
|
+
if (h == fourcc ("IxFI") || h == fourcc ("IxF2") || h == fourcc("IxFl")) {
|
441
461
|
IndexFlat *idxf;
|
442
|
-
if (h == fourcc ("IxFI"))
|
443
|
-
|
462
|
+
if (h == fourcc ("IxFI")) {
|
463
|
+
idxf = new IndexFlatIP ();
|
464
|
+
} else if (h == fourcc("IxF2")) {
|
465
|
+
idxf = new IndexFlatL2 ();
|
466
|
+
} else {
|
467
|
+
idxf = new IndexFlat ();
|
468
|
+
}
|
444
469
|
read_index_header (idxf, f);
|
445
470
|
READVECTOR (idxf->xb);
|
446
471
|
FAISS_THROW_IF_NOT (idxf->xb.size() == idxf->ntotal * idxf->d);
|
@@ -726,10 +751,59 @@ static void read_binary_ivf_header (
|
|
726
751
|
for (size_t i = 0; i < ivf->nlist; i++)
|
727
752
|
READVECTOR ((*ids)[i]);
|
728
753
|
}
|
729
|
-
|
730
|
-
|
754
|
+
read_direct_map (&ivf->direct_map, f);
|
755
|
+
}
|
756
|
+
|
757
|
+
static void read_binary_hash_invlists (
|
758
|
+
IndexBinaryHash::InvertedListMap &invlists,
|
759
|
+
int b, IOReader *f)
|
760
|
+
{
|
761
|
+
size_t sz;
|
762
|
+
READ1 (sz);
|
763
|
+
int il_nbit = 0;
|
764
|
+
READ1 (il_nbit);
|
765
|
+
// buffer for bitstrings
|
766
|
+
std::vector<uint8_t> buf((b + il_nbit) * sz);
|
767
|
+
READVECTOR (buf);
|
768
|
+
BitstringReader rd (buf.data(), buf.size());
|
769
|
+
invlists.reserve (sz);
|
770
|
+
for (size_t i = 0; i < sz; i++) {
|
771
|
+
uint64_t hash = rd.read(b);
|
772
|
+
uint64_t ilsz = rd.read(il_nbit);
|
773
|
+
auto & il = invlists[hash];
|
774
|
+
READVECTOR (il.ids);
|
775
|
+
FAISS_THROW_IF_NOT (il.ids.size() == ilsz);
|
776
|
+
READVECTOR (il.vecs);
|
777
|
+
}
|
778
|
+
}
|
779
|
+
|
780
|
+
static void read_binary_multi_hash_map(
|
781
|
+
IndexBinaryMultiHash::Map &map,
|
782
|
+
int b, size_t ntotal,
|
783
|
+
IOReader *f)
|
784
|
+
{
|
785
|
+
int id_bits;
|
786
|
+
size_t sz;
|
787
|
+
READ1 (id_bits);
|
788
|
+
READ1 (sz);
|
789
|
+
std::vector<uint8_t> buf;
|
790
|
+
READVECTOR (buf);
|
791
|
+
size_t nbit = (b + id_bits) * sz + ntotal * id_bits;
|
792
|
+
FAISS_THROW_IF_NOT (buf.size() == (nbit + 7) / 8);
|
793
|
+
BitstringReader rd (buf.data(), buf.size());
|
794
|
+
map.reserve (sz);
|
795
|
+
for (size_t i = 0; i < sz; i++) {
|
796
|
+
uint64_t hash = rd.read(b);
|
797
|
+
uint64_t ilsz = rd.read(id_bits);
|
798
|
+
auto & il = map[hash];
|
799
|
+
for (size_t j = 0; j < ilsz; j++) {
|
800
|
+
il.push_back (rd.read (id_bits));
|
801
|
+
}
|
802
|
+
}
|
731
803
|
}
|
732
804
|
|
805
|
+
|
806
|
+
|
733
807
|
IndexBinary *read_index_binary (IOReader *f, int io_flags) {
|
734
808
|
IndexBinary * idx = nullptr;
|
735
809
|
uint32_t h;
|
@@ -771,6 +845,28 @@ IndexBinary *read_index_binary (IOReader *f, int io_flags) {
|
|
771
845
|
static_cast<IndexBinaryIDMap2*>(idxmap)->construct_rev_map ();
|
772
846
|
}
|
773
847
|
idx = idxmap;
|
848
|
+
} else if(h == fourcc("IBHh")) {
|
849
|
+
IndexBinaryHash *idxh = new IndexBinaryHash ();
|
850
|
+
read_index_binary_header (idxh, f);
|
851
|
+
READ1 (idxh->b);
|
852
|
+
READ1 (idxh->nflip);
|
853
|
+
read_binary_hash_invlists(idxh->invlists, idxh->b, f);
|
854
|
+
idx = idxh;
|
855
|
+
} else if(h == fourcc("IBHm")) {
|
856
|
+
IndexBinaryMultiHash* idxmh = new IndexBinaryMultiHash ();
|
857
|
+
read_index_binary_header (idxmh, f);
|
858
|
+
idxmh->storage = dynamic_cast<IndexBinaryFlat*> (read_index_binary (f));
|
859
|
+
FAISS_THROW_IF_NOT(idxmh->storage && idxmh->storage->ntotal == idxmh->ntotal);
|
860
|
+
idxmh->own_fields = true;
|
861
|
+
READ1 (idxmh->b);
|
862
|
+
READ1 (idxmh->nhash);
|
863
|
+
READ1 (idxmh->nflip);
|
864
|
+
idxmh->maps.resize (idxmh->nhash);
|
865
|
+
for (int i = 0; i < idxmh->nhash; i++) {
|
866
|
+
read_binary_multi_hash_map(
|
867
|
+
idxmh->maps[i], idxmh->b, idxmh->ntotal, f);
|
868
|
+
}
|
869
|
+
idx = idxmh;
|
774
870
|
} else {
|
775
871
|
FAISS_THROW_FMT("Index type 0x%08x not supported\n", h);
|
776
872
|
idx = nullptr;
|
@@ -19,6 +19,7 @@
|
|
19
19
|
|
20
20
|
#include <faiss/impl/FaissAssert.h>
|
21
21
|
#include <faiss/impl/io.h>
|
22
|
+
#include <faiss/utils/hamming.h>
|
22
23
|
|
23
24
|
#include <faiss/IndexFlat.h>
|
24
25
|
#include <faiss/VectorTransform.h>
|
@@ -41,6 +42,7 @@
|
|
41
42
|
#include <faiss/IndexBinaryFromFloat.h>
|
42
43
|
#include <faiss/IndexBinaryHNSW.h>
|
43
44
|
#include <faiss/IndexBinaryIVF.h>
|
45
|
+
#include <faiss/IndexBinaryHash.h>
|
44
46
|
|
45
47
|
|
46
48
|
|
@@ -286,20 +288,33 @@ static void write_HNSW (const HNSW *hnsw, IOWriter *f) {
|
|
286
288
|
WRITE1 (hnsw->upper_beam);
|
287
289
|
}
|
288
290
|
|
291
|
+
static void write_direct_map (const DirectMap *dm, IOWriter *f) {
|
292
|
+
char maintain_direct_map = (char)dm->type; // for backwards compatibility with bool
|
293
|
+
WRITE1 (maintain_direct_map);
|
294
|
+
WRITEVECTOR (dm->array);
|
295
|
+
if (dm->type == DirectMap::Hashtable) {
|
296
|
+
using idx_t = Index::idx_t;
|
297
|
+
std::vector<std::pair<idx_t, idx_t>> v;
|
298
|
+
const std::unordered_map<idx_t, idx_t> & map = dm->hashtable;
|
299
|
+
v.resize (map.size());
|
300
|
+
std::copy(map.begin(), map.end(), v.begin());
|
301
|
+
WRITEVECTOR (v);
|
302
|
+
}
|
303
|
+
}
|
304
|
+
|
289
305
|
static void write_ivf_header (const IndexIVF *ivf, IOWriter *f) {
|
290
306
|
write_index_header (ivf, f);
|
291
307
|
WRITE1 (ivf->nlist);
|
292
308
|
WRITE1 (ivf->nprobe);
|
293
309
|
write_index (ivf->quantizer, f);
|
294
|
-
|
295
|
-
WRITEVECTOR (ivf->direct_map);
|
310
|
+
write_direct_map (&ivf->direct_map, f);
|
296
311
|
}
|
297
312
|
|
298
313
|
void write_index (const Index *idx, IOWriter *f) {
|
299
314
|
if (const IndexFlat * idxf = dynamic_cast<const IndexFlat *> (idx)) {
|
300
315
|
uint32_t h = fourcc (
|
301
316
|
idxf->metric_type == METRIC_INNER_PRODUCT ? "IxFI" :
|
302
|
-
idxf->metric_type == METRIC_L2 ? "IxF2" :
|
317
|
+
idxf->metric_type == METRIC_L2 ? "IxF2" : "IxFl");
|
303
318
|
WRITE1 (h);
|
304
319
|
write_index_header (idx, f);
|
305
320
|
WRITEVECTOR (idxf->xb);
|
@@ -499,8 +514,68 @@ static void write_binary_ivf_header (const IndexBinaryIVF *ivf, IOWriter *f) {
|
|
499
514
|
WRITE1 (ivf->nlist);
|
500
515
|
WRITE1 (ivf->nprobe);
|
501
516
|
write_index_binary (ivf->quantizer, f);
|
502
|
-
|
503
|
-
|
517
|
+
write_direct_map (&ivf->direct_map, f);
|
518
|
+
}
|
519
|
+
|
520
|
+
static void write_binary_hash_invlists (
|
521
|
+
const IndexBinaryHash::InvertedListMap &invlists,
|
522
|
+
int b, IOWriter *f)
|
523
|
+
{
|
524
|
+
size_t sz = invlists.size();
|
525
|
+
WRITE1 (sz);
|
526
|
+
size_t maxil = 0;
|
527
|
+
for (auto it = invlists.begin(); it != invlists.end(); ++it) {
|
528
|
+
if(it->second.ids.size() > maxil) {
|
529
|
+
maxil = it->second.ids.size();
|
530
|
+
}
|
531
|
+
}
|
532
|
+
int il_nbit = 0;
|
533
|
+
while(maxil >= ((uint64_t)1 << il_nbit)) {
|
534
|
+
il_nbit++;
|
535
|
+
}
|
536
|
+
WRITE1(il_nbit);
|
537
|
+
|
538
|
+
// first write sizes then data, may be useful if we want to
|
539
|
+
// memmap it at some point
|
540
|
+
|
541
|
+
// buffer for bitstrings
|
542
|
+
std::vector<uint8_t> buf (((b + il_nbit) * sz + 7) / 8);
|
543
|
+
BitstringWriter wr (buf.data(), buf.size());
|
544
|
+
for (auto it = invlists.begin(); it != invlists.end(); ++it) {
|
545
|
+
wr.write (it->first, b);
|
546
|
+
wr.write (it->second.ids.size(), il_nbit);
|
547
|
+
}
|
548
|
+
WRITEVECTOR (buf);
|
549
|
+
|
550
|
+
for (auto it = invlists.begin(); it != invlists.end(); ++it) {
|
551
|
+
WRITEVECTOR (it->second.ids);
|
552
|
+
WRITEVECTOR (it->second.vecs);
|
553
|
+
}
|
554
|
+
}
|
555
|
+
|
556
|
+
static void write_binary_multi_hash_map(
|
557
|
+
const IndexBinaryMultiHash::Map &map,
|
558
|
+
int b, size_t ntotal,
|
559
|
+
IOWriter *f)
|
560
|
+
{
|
561
|
+
int id_bits = 0;
|
562
|
+
while ((ntotal > ((Index::idx_t)1 << id_bits))) {
|
563
|
+
id_bits++;
|
564
|
+
}
|
565
|
+
WRITE1(id_bits);
|
566
|
+
size_t sz = map.size();
|
567
|
+
WRITE1(sz);
|
568
|
+
size_t nbit = (b + id_bits) * sz + ntotal * id_bits;
|
569
|
+
std::vector<uint8_t> buf((nbit + 7) / 8);
|
570
|
+
BitstringWriter wr (buf.data(), buf.size());
|
571
|
+
for (auto it = map.begin(); it != map.end(); ++it) {
|
572
|
+
wr.write(it->first, b);
|
573
|
+
wr.write(it->second.size(), id_bits);
|
574
|
+
for (auto id : it->second) {
|
575
|
+
wr.write(id, id_bits);
|
576
|
+
}
|
577
|
+
}
|
578
|
+
WRITEVECTOR (buf);
|
504
579
|
}
|
505
580
|
|
506
581
|
void write_index_binary (const IndexBinary *idx, IOWriter *f) {
|
@@ -539,6 +614,27 @@ void write_index_binary (const IndexBinary *idx, IOWriter *f) {
|
|
539
614
|
write_index_binary_header (idxmap, f);
|
540
615
|
write_index_binary (idxmap->index, f);
|
541
616
|
WRITEVECTOR (idxmap->id_map);
|
617
|
+
} else if (const IndexBinaryHash *idxh =
|
618
|
+
dynamic_cast<const IndexBinaryHash *> (idx)) {
|
619
|
+
uint32_t h = fourcc ("IBHh");
|
620
|
+
WRITE1 (h);
|
621
|
+
write_index_binary_header (idxh, f);
|
622
|
+
WRITE1 (idxh->b);
|
623
|
+
WRITE1 (idxh->nflip);
|
624
|
+
write_binary_hash_invlists(idxh->invlists, idxh->b, f);
|
625
|
+
} else if (const IndexBinaryMultiHash *idxmh =
|
626
|
+
dynamic_cast<const IndexBinaryMultiHash *> (idx)) {
|
627
|
+
uint32_t h = fourcc ("IBHm");
|
628
|
+
WRITE1 (h);
|
629
|
+
write_index_binary_header (idxmh, f);
|
630
|
+
write_index_binary (idxmh->storage, f);
|
631
|
+
WRITE1 (idxmh->b);
|
632
|
+
WRITE1 (idxmh->nhash);
|
633
|
+
WRITE1 (idxmh->nflip);
|
634
|
+
for (int i = 0; i < idxmh->nhash; i++) {
|
635
|
+
write_binary_multi_hash_map(
|
636
|
+
idxmh->maps[i], idxmh->b, idxmh->ntotal, f);
|
637
|
+
}
|
542
638
|
} else {
|
543
639
|
FAISS_THROW_MSG ("don't know how to serialize this type of index");
|
544
640
|
}
|
data/vendor/faiss/impl/io.cpp
CHANGED
@@ -37,7 +37,6 @@ int IOWriter::fileno ()
|
|
37
37
|
***********************************************************************/
|
38
38
|
|
39
39
|
|
40
|
-
|
41
40
|
size_t VectorIOWriter::operator()(
|
42
41
|
const void *ptr, size_t size, size_t nitems)
|
43
42
|
{
|
@@ -132,6 +131,117 @@ int FileIOWriter::fileno() {
|
|
132
131
|
return ::fileno (f);
|
133
132
|
}
|
134
133
|
|
134
|
+
/***********************************************************************
|
135
|
+
* IO buffer
|
136
|
+
***********************************************************************/
|
137
|
+
|
138
|
+
BufferedIOReader::BufferedIOReader(IOReader *reader, size_t bsz, size_t totsz):
|
139
|
+
reader(reader), bsz(bsz), totsz(totsz), ofs(0), b0(0), b1(0), buffer(bsz)
|
140
|
+
{
|
141
|
+
}
|
142
|
+
|
143
|
+
|
144
|
+
size_t BufferedIOReader::operator()(void *ptr, size_t unitsize, size_t nitems)
|
145
|
+
{
|
146
|
+
size_t size = unitsize * nitems;
|
147
|
+
if (size == 0) return 0;
|
148
|
+
char * dst = (char*)ptr;
|
149
|
+
size_t nb;
|
150
|
+
|
151
|
+
{ // first copy available bytes
|
152
|
+
nb = std::min(b1 - b0, size);
|
153
|
+
memcpy (dst, buffer.data() + b0, nb);
|
154
|
+
b0 += nb;
|
155
|
+
dst += nb;
|
156
|
+
size -= nb;
|
157
|
+
}
|
158
|
+
|
159
|
+
if (size > totsz - ofs) {
|
160
|
+
size = totsz - ofs;
|
161
|
+
}
|
162
|
+
// while we would like to have more data
|
163
|
+
while (size > 0) {
|
164
|
+
assert (b0 == b1); // buffer empty on input
|
165
|
+
// try to read from main reader
|
166
|
+
b0 = 0;
|
167
|
+
b1 = (*reader)(buffer.data(), 1, std::min(bsz, size));
|
168
|
+
|
169
|
+
if (b1 == 0) {
|
170
|
+
// no more bytes available
|
171
|
+
break;
|
172
|
+
}
|
173
|
+
ofs += b1;
|
174
|
+
|
175
|
+
// copy remaining bytes
|
176
|
+
size_t nb2 = std::min(b1, size);
|
177
|
+
memcpy (dst, buffer.data(), nb2);
|
178
|
+
b0 = nb2;
|
179
|
+
nb += nb2;
|
180
|
+
dst += nb2;
|
181
|
+
size -= nb2;
|
182
|
+
}
|
183
|
+
return nb / unitsize;
|
184
|
+
}
|
185
|
+
|
186
|
+
|
187
|
+
BufferedIOWriter::BufferedIOWriter(IOWriter *writer, size_t bsz):
|
188
|
+
writer(writer), bsz(bsz), b0(0), buffer(bsz)
|
189
|
+
{
|
190
|
+
}
|
191
|
+
|
192
|
+
size_t BufferedIOWriter::operator()(const void *ptr, size_t unitsize, size_t nitems)
|
193
|
+
{
|
194
|
+
size_t size = unitsize * nitems;
|
195
|
+
if (size == 0) return 0;
|
196
|
+
const char * src = (const char*)ptr;
|
197
|
+
size_t nb;
|
198
|
+
|
199
|
+
{ // copy as many bytes as possible to buffer
|
200
|
+
nb = std::min(bsz - b0, size);
|
201
|
+
memcpy (buffer.data() + b0, src, nb);
|
202
|
+
b0 += nb;
|
203
|
+
src += nb;
|
204
|
+
size -= nb;
|
205
|
+
}
|
206
|
+
while (size > 0) {
|
207
|
+
assert(b0 == bsz);
|
208
|
+
// now we need to flush to add more bytes
|
209
|
+
size_t ofs = 0;
|
210
|
+
do {
|
211
|
+
assert (ofs < 10000000);
|
212
|
+
size_t written = (*writer)(buffer.data() + ofs, 1, bsz - ofs);
|
213
|
+
FAISS_THROW_IF_NOT(written > 0);
|
214
|
+
ofs += written;
|
215
|
+
} while(ofs != bsz);
|
216
|
+
|
217
|
+
// copy src to buffer
|
218
|
+
size_t nb1 = std::min(bsz, size);
|
219
|
+
memcpy (buffer.data(), src, nb1);
|
220
|
+
b0 = nb1;
|
221
|
+
nb += nb1;
|
222
|
+
src += nb1;
|
223
|
+
size -= nb1;
|
224
|
+
}
|
225
|
+
|
226
|
+
return nb / unitsize;
|
227
|
+
}
|
228
|
+
|
229
|
+
BufferedIOWriter::~BufferedIOWriter()
|
230
|
+
{
|
231
|
+
size_t ofs = 0;
|
232
|
+
while(ofs != b0) {
|
233
|
+
printf("Destructor write %ld \n", b0 - ofs);
|
234
|
+
size_t written = (*writer)(buffer.data() + ofs, 1, b0 - ofs);
|
235
|
+
FAISS_THROW_IF_NOT(written > 0);
|
236
|
+
ofs += written;
|
237
|
+
}
|
238
|
+
|
239
|
+
}
|
240
|
+
|
241
|
+
|
242
|
+
|
243
|
+
|
244
|
+
|
135
245
|
uint32_t fourcc (const char sx[4]) {
|
136
246
|
assert(4 == strlen(sx));
|
137
247
|
const unsigned char *x = (unsigned char*)sx;
|
data/vendor/faiss/impl/io.h
CHANGED
@@ -9,6 +9,9 @@
|
|
9
9
|
|
10
10
|
/***********************************************************
|
11
11
|
* Abstract I/O objects
|
12
|
+
*
|
13
|
+
* I/O is always sequential, seek does not need to be supported
|
14
|
+
* (indexes could be read or written to a pipe).
|
12
15
|
***********************************************************/
|
13
16
|
|
14
17
|
#pragma once
|
@@ -92,6 +95,41 @@ struct FileIOWriter: IOWriter {
|
|
92
95
|
int fileno() override;
|
93
96
|
};
|
94
97
|
|
98
|
+
/*******************************************************
|
99
|
+
* Buffered reader + writer
|
100
|
+
*******************************************************/
|
101
|
+
|
102
|
+
|
103
|
+
|
104
|
+
/** wraps an ioreader to make buffered reads to avoid too small reads */
|
105
|
+
struct BufferedIOReader: IOReader {
|
106
|
+
|
107
|
+
IOReader *reader;
|
108
|
+
size_t bsz, totsz, ofs;
|
109
|
+
size_t b0, b1; ///< range of available bytes in the buffer
|
110
|
+
std::vector<char> buffer;
|
111
|
+
|
112
|
+
BufferedIOReader(IOReader *reader, size_t bsz,
|
113
|
+
size_t totsz=(size_t)(-1));
|
114
|
+
|
115
|
+
size_t operator()(void *ptr, size_t size, size_t nitems) override;
|
116
|
+
};
|
117
|
+
|
118
|
+
struct BufferedIOWriter: IOWriter {
|
119
|
+
|
120
|
+
IOWriter *writer;
|
121
|
+
size_t bsz, ofs;
|
122
|
+
size_t b0; ///< amount of data in buffer
|
123
|
+
std::vector<char> buffer;
|
124
|
+
|
125
|
+
BufferedIOWriter(IOWriter *writer, size_t bsz);
|
126
|
+
|
127
|
+
size_t operator()(const void *ptr, size_t size, size_t nitems) override;
|
128
|
+
|
129
|
+
// flushes
|
130
|
+
~BufferedIOWriter();
|
131
|
+
};
|
132
|
+
|
95
133
|
/// cast a 4-character string to a uint32_t that can be written and read easily
|
96
134
|
uint32_t fourcc (const char sx[4]);
|
97
135
|
|