faiss 0.3.4 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +11 -8
- data/vendor/faiss/faiss/Clustering.cpp +0 -16
- data/vendor/faiss/faiss/IVFlib.cpp +213 -0
- data/vendor/faiss/faiss/IVFlib.h +42 -0
- data/vendor/faiss/faiss/Index.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +9 -7
- data/vendor/faiss/faiss/IndexBinaryFlat.h +2 -1
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +1 -1
- data/vendor/faiss/faiss/IndexFlatCodes.h +4 -2
- data/vendor/faiss/faiss/IndexHNSW.cpp +13 -20
- data/vendor/faiss/faiss/IndexHNSW.h +1 -1
- data/vendor/faiss/faiss/IndexIVF.cpp +20 -3
- data/vendor/faiss/faiss/IndexIVF.h +5 -2
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +2 -1
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +2 -1
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +2 -1
- data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +2 -1
- data/vendor/faiss/faiss/IndexIVFPQ.h +2 -1
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +277 -0
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +70 -0
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +2 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +2 -1
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +148 -0
- data/vendor/faiss/faiss/IndexRaBitQ.h +65 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -1
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +2 -1
- data/vendor/faiss/faiss/clone_index.cpp +38 -3
- data/vendor/faiss/faiss/cppcontrib/factory_tools.cpp +19 -0
- data/vendor/faiss/faiss/cppcontrib/factory_tools.h +4 -11
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +2 -1
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +13 -3
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +1 -1
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +1 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIcmEncoder.cpp +112 -0
- data/vendor/faiss/faiss/impl/HNSW.cpp +35 -13
- data/vendor/faiss/faiss/impl/HNSW.h +5 -4
- data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -1
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +519 -0
- data/vendor/faiss/faiss/impl/RaBitQuantizer.h +78 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +2 -2
- data/vendor/faiss/faiss/impl/code_distance/code_distance-sve.h +3 -4
- data/vendor/faiss/faiss/impl/index_read.cpp +220 -25
- data/vendor/faiss/faiss/impl/index_write.cpp +29 -0
- data/vendor/faiss/faiss/impl/io.h +2 -2
- data/vendor/faiss/faiss/impl/io_macros.h +2 -0
- data/vendor/faiss/faiss/impl/mapped_io.cpp +313 -0
- data/vendor/faiss/faiss/impl/mapped_io.h +51 -0
- data/vendor/faiss/faiss/impl/maybe_owned_vector.h +316 -0
- data/vendor/faiss/faiss/impl/platform_macros.h +7 -3
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +1 -1
- data/vendor/faiss/faiss/impl/zerocopy_io.cpp +67 -0
- data/vendor/faiss/faiss/impl/zerocopy_io.h +32 -0
- data/vendor/faiss/faiss/index_factory.cpp +16 -5
- data/vendor/faiss/faiss/index_io.h +4 -0
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +3 -3
- data/vendor/faiss/faiss/invlists/InvertedLists.h +5 -3
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +3 -3
- data/vendor/faiss/faiss/python/python_callbacks.cpp +24 -0
- data/vendor/faiss/faiss/python/python_callbacks.h +22 -0
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +30 -12
- data/vendor/faiss/faiss/utils/hamming.cpp +45 -21
- data/vendor/faiss/faiss/utils/hamming.h +7 -3
- data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +1 -1
- data/vendor/faiss/faiss/utils/utils.cpp +4 -4
- data/vendor/faiss/faiss/utils/utils.h +3 -3
- metadata +16 -4
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 57303540dbb2c3de9e0d34e8a3ffcbb7c31ed44bb71ece9cafeb8de80594660d
         | 
| 4 | 
            +
              data.tar.gz: 31c77390c331a0622c230bb245751ab3799196b0918f542fb4ba49b7582f28c9
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 2879d2d866bf10d1dc745841b5047933f43d9e84003b8b3c49a9ac5862be48c9fae31d042d2b39f2aaf188e1f110aa1ffd05b050c0f6510a35670724f890652c
         | 
| 7 | 
            +
              data.tar.gz: 92dcbf603c5fab0b5f3fadfe377b91ec92e40840d91169cabb86f7c32d87f3b611937f4ad2423fe76abdc787af418de02bd23467a78a9b15633dd06a3c9d8187
         | 
    
        data/CHANGELOG.md
    CHANGED
    
    
    
        data/lib/faiss/version.rb
    CHANGED
    
    
| @@ -15,7 +15,6 @@ | |
| 15 15 |  | 
| 16 16 | 
             
            #include <cinttypes>
         | 
| 17 17 | 
             
            #include <cmath>
         | 
| 18 | 
            -
            #include <typeinfo>
         | 
| 19 18 |  | 
| 20 19 | 
             
            #include <faiss/impl/FaissAssert.h>
         | 
| 21 20 | 
             
            #include <faiss/utils/random.h>
         | 
| @@ -313,9 +312,6 @@ bool ParameterSpace::combination_ge(size_t c1, size_t c2) const { | |
| 313 312 | 
             
                return true;
         | 
| 314 313 | 
             
            }
         | 
| 315 314 |  | 
| 316 | 
            -
            #define DC(classname) \
         | 
| 317 | 
            -
                const classname* ix = dynamic_cast<const classname*>(index)
         | 
| 318 | 
            -
             | 
| 319 315 | 
             
            static void init_pq_ParameterRange(
         | 
| 320 316 | 
             
                    const ProductQuantizer& pq,
         | 
| 321 317 | 
             
                    ParameterRange& pr) {
         | 
| @@ -339,6 +335,10 @@ ParameterRange& ParameterSpace::add_range(const std::string& name) { | |
| 339 335 | 
             
                return parameter_ranges.back();
         | 
| 340 336 | 
             
            }
         | 
| 341 337 |  | 
| 338 | 
            +
            // Do not use this macro if ix will be unused
         | 
| 339 | 
            +
            #define DC(classname) \
         | 
| 340 | 
            +
                const classname* ix = dynamic_cast<const classname*>(index)
         | 
| 341 | 
            +
             | 
| 342 342 | 
             
            /// initialize with reasonable parameters for this type of index
         | 
| 343 343 | 
             
            void ParameterSpace::initialize(const Index* index) {
         | 
| 344 344 | 
             
                if (DC(IndexPreTransform)) {
         | 
| @@ -394,7 +394,7 @@ void ParameterSpace::initialize(const Index* index) { | |
| 394 394 | 
             
                                std::numeric_limits<double>::infinity());
         | 
| 395 395 | 
             
                    }
         | 
| 396 396 | 
             
                }
         | 
| 397 | 
            -
                if ( | 
| 397 | 
            +
                if (dynamic_cast<const IndexIVFPQR*>(index)) {
         | 
| 398 398 | 
             
                    ParameterRange& pr = add_range("k_factor");
         | 
| 399 399 | 
             
                    for (int i = 0; i <= 6; i++) {
         | 
| 400 400 | 
             
                        pr.values.push_back(1 << i);
         | 
| @@ -410,9 +410,6 @@ void ParameterSpace::initialize(const Index* index) { | |
| 410 410 |  | 
| 411 411 | 
             
            #undef DC
         | 
| 412 412 |  | 
| 413 | 
            -
            // non-const version
         | 
| 414 | 
            -
            #define DC(classname) classname* ix = dynamic_cast<classname*>(index)
         | 
| 415 | 
            -
             | 
| 416 413 | 
             
            /// set a combination of parameters on an index
         | 
| 417 414 | 
             
            void ParameterSpace::set_index_parameters(Index* index, size_t cno) const {
         | 
| 418 415 | 
             
                for (int i = 0; i < parameter_ranges.size(); i++) {
         | 
| @@ -442,6 +439,10 @@ void ParameterSpace::set_index_parameters( | |
| 442 439 | 
             
                }
         | 
| 443 440 | 
             
            }
         | 
| 444 441 |  | 
| 442 | 
            +
            // non-const version
         | 
| 443 | 
            +
            // Do not use this macro if ix will be unused
         | 
| 444 | 
            +
            #define DC(classname) classname* ix = dynamic_cast<classname*>(index)
         | 
| 445 | 
            +
             | 
| 445 446 | 
             
            void ParameterSpace::set_index_parameter(
         | 
| 446 447 | 
             
                    Index* index,
         | 
| 447 448 | 
             
                    const std::string& name,
         | 
| @@ -574,6 +575,8 @@ void ParameterSpace::set_index_parameter( | |
| 574 575 | 
             
                        name.c_str());
         | 
| 575 576 | 
             
            }
         | 
| 576 577 |  | 
| 578 | 
            +
            #undef DC
         | 
| 579 | 
            +
             | 
| 577 580 | 
             
            void ParameterSpace::display() const {
         | 
| 578 581 | 
             
                printf("ParameterSpace, %zd parameters, %zd combinations:\n",
         | 
| 579 582 | 
             
                       parameter_ranges.size(),
         | 
| @@ -33,22 +33,6 @@ Clustering::Clustering(int d, int k) : d(d), k(k) {} | |
| 33 33 | 
             
            Clustering::Clustering(int d, int k, const ClusteringParameters& cp)
         | 
| 34 34 | 
             
                    : ClusteringParameters(cp), d(d), k(k) {}
         | 
| 35 35 |  | 
| 36 | 
            -
            static double imbalance_factor(int n, int k, int64_t* assign) {
         | 
| 37 | 
            -
                std::vector<int> hist(k, 0);
         | 
| 38 | 
            -
                for (int i = 0; i < n; i++)
         | 
| 39 | 
            -
                    hist[assign[i]]++;
         | 
| 40 | 
            -
             | 
| 41 | 
            -
                double tot = 0, uf = 0;
         | 
| 42 | 
            -
             | 
| 43 | 
            -
                for (int i = 0; i < k; i++) {
         | 
| 44 | 
            -
                    tot += hist[i];
         | 
| 45 | 
            -
                    uf += hist[i] * (double)hist[i];
         | 
| 46 | 
            -
                }
         | 
| 47 | 
            -
                uf = uf * k / (tot * tot);
         | 
| 48 | 
            -
             | 
| 49 | 
            -
                return uf;
         | 
| 50 | 
            -
            }
         | 
| 51 | 
            -
             | 
| 52 36 | 
             
            void Clustering::post_process_centroids() {
         | 
| 53 37 | 
             
                if (spherical) {
         | 
| 54 38 | 
             
                    fvec_renorm_L2(d, k, centroids.data());
         | 
| @@ -9,6 +9,7 @@ | |
| 9 9 | 
             
            #include <omp.h>
         | 
| 10 10 |  | 
| 11 11 | 
             
            #include <memory>
         | 
| 12 | 
            +
            #include <numeric>
         | 
| 12 13 |  | 
| 13 14 | 
             
            #include <faiss/IndexAdditiveQuantizer.h>
         | 
| 14 15 | 
             
            #include <faiss/IndexIVFAdditiveQuantizer.h>
         | 
| @@ -16,7 +17,9 @@ | |
| 16 17 | 
             
            #include <faiss/IndexPreTransform.h>
         | 
| 17 18 | 
             
            #include <faiss/IndexRefine.h>
         | 
| 18 19 | 
             
            #include <faiss/MetaIndexes.h>
         | 
| 20 | 
            +
            #include <faiss/clone_index.h>
         | 
| 19 21 | 
             
            #include <faiss/impl/FaissAssert.h>
         | 
| 22 | 
            +
            #include <faiss/index_io.h>
         | 
| 20 23 | 
             
            #include <faiss/utils/distances.h>
         | 
| 21 24 | 
             
            #include <faiss/utils/hamming.h>
         | 
| 22 25 | 
             
            #include <faiss/utils/utils.h>
         | 
| @@ -198,12 +201,32 @@ static void shift_and_add( | |
| 198 201 | 
             
                memcpy(dst.data() + insert_point, src.data(), src.size() * sizeof(T));
         | 
| 199 202 | 
             
            }
         | 
| 200 203 |  | 
| 204 | 
            +
            template <class T>
         | 
| 205 | 
            +
            static void shift_and_add(
         | 
| 206 | 
            +
                    MaybeOwnedVector<T>& dst,
         | 
| 207 | 
            +
                    size_t remove,
         | 
| 208 | 
            +
                    const MaybeOwnedVector<T>& src) {
         | 
| 209 | 
            +
                if (remove > 0)
         | 
| 210 | 
            +
                    memmove(dst.data(),
         | 
| 211 | 
            +
                            dst.data() + remove,
         | 
| 212 | 
            +
                            (dst.size() - remove) * sizeof(T));
         | 
| 213 | 
            +
                size_t insert_point = dst.size() - remove;
         | 
| 214 | 
            +
                dst.resize(insert_point + src.size());
         | 
| 215 | 
            +
                memcpy(dst.data() + insert_point, src.data(), src.size() * sizeof(T));
         | 
| 216 | 
            +
            }
         | 
| 217 | 
            +
             | 
| 201 218 | 
             
            template <class T>
         | 
| 202 219 | 
             
            static void remove_from_begin(std::vector<T>& v, size_t remove) {
         | 
| 203 220 | 
             
                if (remove > 0)
         | 
| 204 221 | 
             
                    v.erase(v.begin(), v.begin() + remove);
         | 
| 205 222 | 
             
            }
         | 
| 206 223 |  | 
| 224 | 
            +
            template <class T>
         | 
| 225 | 
            +
            static void remove_from_begin(MaybeOwnedVector<T>& v, size_t remove) {
         | 
| 226 | 
            +
                if (remove > 0)
         | 
| 227 | 
            +
                    v.erase(v.begin(), v.begin() + remove);
         | 
| 228 | 
            +
            }
         | 
| 229 | 
            +
             | 
| 207 230 | 
             
            void SlidingIndexWindow::step(const Index* sub_index, bool remove_oldest) {
         | 
| 208 231 | 
             
                FAISS_THROW_IF_NOT_MSG(
         | 
| 209 232 | 
             
                        !remove_oldest || n_slice > 0,
         | 
| @@ -519,5 +542,195 @@ void ivf_residual_add_from_flat_codes( | |
| 519 542 | 
             
                index->ntotal += nb;
         | 
| 520 543 | 
             
            }
         | 
| 521 544 |  | 
| 545 | 
            +
            int64_t DefaultShardingFunction::operator()(int64_t i, int64_t shard_count) {
         | 
| 546 | 
            +
                return i % shard_count;
         | 
| 547 | 
            +
            }
         | 
| 548 | 
            +
             | 
| 549 | 
            +
            void handle_ivf(
         | 
| 550 | 
            +
                    faiss::IndexIVF* index,
         | 
| 551 | 
            +
                    int64_t shard_count,
         | 
| 552 | 
            +
                    const std::string& filename_template,
         | 
| 553 | 
            +
                    ShardingFunction* sharding_function,
         | 
| 554 | 
            +
                    bool generate_ids) {
         | 
| 555 | 
            +
                std::vector<faiss::IndexIVF*> sharded_indexes(shard_count);
         | 
| 556 | 
            +
                auto clone = static_cast<faiss::IndexIVF*>(faiss::clone_index(index));
         | 
| 557 | 
            +
                clone->quantizer->reset();
         | 
| 558 | 
            +
                for (int64_t i = 0; i < shard_count; i++) {
         | 
| 559 | 
            +
                    sharded_indexes[i] =
         | 
| 560 | 
            +
                            static_cast<faiss::IndexIVF*>(faiss::clone_index(clone));
         | 
| 561 | 
            +
                    if (generate_ids) {
         | 
| 562 | 
            +
                        // Assume the quantizer does not natively support add_with_ids.
         | 
| 563 | 
            +
                        sharded_indexes[i]->quantizer =
         | 
| 564 | 
            +
                                new IndexIDMap2(sharded_indexes[i]->quantizer);
         | 
| 565 | 
            +
                    }
         | 
| 566 | 
            +
                }
         | 
| 567 | 
            +
             | 
| 568 | 
            +
                // assign centroids to each sharded Index based on sharding_function, and
         | 
| 569 | 
            +
                // add them to the quantizer of each sharded index
         | 
| 570 | 
            +
                std::vector<std::vector<float>> sharded_centroids(shard_count);
         | 
| 571 | 
            +
                std::vector<std::vector<idx_t>> xids(shard_count);
         | 
| 572 | 
            +
                for (int64_t i = 0; i < index->quantizer->ntotal; i++) {
         | 
| 573 | 
            +
                    int64_t shard_id = (*sharding_function)(i, shard_count);
         | 
| 574 | 
            +
                    // Since the quantizer does not natively support add_with_ids, we simply
         | 
| 575 | 
            +
                    // generate them.
         | 
| 576 | 
            +
                    xids[shard_id].push_back(i);
         | 
| 577 | 
            +
                    float* reconstructed = new float[index->quantizer->d];
         | 
| 578 | 
            +
                    index->quantizer->reconstruct(i, reconstructed);
         | 
| 579 | 
            +
                    sharded_centroids[shard_id].insert(
         | 
| 580 | 
            +
                            sharded_centroids[shard_id].end(),
         | 
| 581 | 
            +
                            &reconstructed[0],
         | 
| 582 | 
            +
                            &reconstructed[index->quantizer->d]);
         | 
| 583 | 
            +
                    delete[] reconstructed;
         | 
| 584 | 
            +
                }
         | 
| 585 | 
            +
                for (int64_t i = 0; i < shard_count; i++) {
         | 
| 586 | 
            +
                    if (generate_ids) {
         | 
| 587 | 
            +
                        sharded_indexes[i]->quantizer->add_with_ids(
         | 
| 588 | 
            +
                                sharded_centroids[i].size() / index->quantizer->d,
         | 
| 589 | 
            +
                                sharded_centroids[i].data(),
         | 
| 590 | 
            +
                                xids[i].data());
         | 
| 591 | 
            +
                    } else {
         | 
| 592 | 
            +
                        sharded_indexes[i]->quantizer->add(
         | 
| 593 | 
            +
                                sharded_centroids[i].size() / index->quantizer->d,
         | 
| 594 | 
            +
                                sharded_centroids[i].data());
         | 
| 595 | 
            +
                    }
         | 
| 596 | 
            +
                }
         | 
| 597 | 
            +
             | 
| 598 | 
            +
                for (int64_t i = 0; i < shard_count; i++) {
         | 
| 599 | 
            +
                    char fname[256];
         | 
| 600 | 
            +
                    snprintf(fname, 256, filename_template.c_str(), i);
         | 
| 601 | 
            +
                    faiss::write_index(sharded_indexes[i], fname);
         | 
| 602 | 
            +
                }
         | 
| 603 | 
            +
             | 
| 604 | 
            +
                for (int64_t i = 0; i < shard_count; i++) {
         | 
| 605 | 
            +
                    delete sharded_indexes[i];
         | 
| 606 | 
            +
                }
         | 
| 607 | 
            +
            }
         | 
| 608 | 
            +
             | 
| 609 | 
            +
            void handle_binary_ivf(
         | 
| 610 | 
            +
                    faiss::IndexBinaryIVF* index,
         | 
| 611 | 
            +
                    int64_t shard_count,
         | 
| 612 | 
            +
                    const std::string& filename_template,
         | 
| 613 | 
            +
                    ShardingFunction* sharding_function,
         | 
| 614 | 
            +
                    bool generate_ids) {
         | 
| 615 | 
            +
                std::vector<faiss::IndexBinaryIVF*> sharded_indexes(shard_count);
         | 
| 616 | 
            +
             | 
| 617 | 
            +
                auto clone = static_cast<faiss::IndexBinaryIVF*>(
         | 
| 618 | 
            +
                        faiss::clone_binary_index(index));
         | 
| 619 | 
            +
                clone->quantizer->reset();
         | 
| 620 | 
            +
             | 
| 621 | 
            +
                for (int64_t i = 0; i < shard_count; i++) {
         | 
| 622 | 
            +
                    sharded_indexes[i] = static_cast<faiss::IndexBinaryIVF*>(
         | 
| 623 | 
            +
                            faiss::clone_binary_index(clone));
         | 
| 624 | 
            +
                    if (generate_ids) {
         | 
| 625 | 
            +
                        // Assume the quantizer does not natively support add_with_ids.
         | 
| 626 | 
            +
                        sharded_indexes[i]->quantizer =
         | 
| 627 | 
            +
                                new IndexBinaryIDMap2(sharded_indexes[i]->quantizer);
         | 
| 628 | 
            +
                    }
         | 
| 629 | 
            +
                }
         | 
| 630 | 
            +
             | 
| 631 | 
            +
                // assign centroids to each sharded Index based on sharding_function, and
         | 
| 632 | 
            +
                // add them to the quantizer of each sharded index
         | 
| 633 | 
            +
                int64_t reconstruction_size = index->quantizer->d / 8;
         | 
| 634 | 
            +
                std::vector<std::vector<uint8_t>> sharded_centroids(shard_count);
         | 
| 635 | 
            +
                std::vector<std::vector<idx_t>> xids(shard_count);
         | 
| 636 | 
            +
                for (int64_t i = 0; i < index->quantizer->ntotal; i++) {
         | 
| 637 | 
            +
                    int64_t shard_id = (*sharding_function)(i, shard_count);
         | 
| 638 | 
            +
                    // Since the quantizer does not natively support add_with_ids, we simply
         | 
| 639 | 
            +
                    // generate them.
         | 
| 640 | 
            +
                    xids[shard_id].push_back(i);
         | 
| 641 | 
            +
                    uint8_t* reconstructed = new uint8_t[reconstruction_size];
         | 
| 642 | 
            +
                    index->quantizer->reconstruct(i, reconstructed);
         | 
| 643 | 
            +
                    sharded_centroids[shard_id].insert(
         | 
| 644 | 
            +
                            sharded_centroids[shard_id].end(),
         | 
| 645 | 
            +
                            &reconstructed[0],
         | 
| 646 | 
            +
                            &reconstructed[reconstruction_size]);
         | 
| 647 | 
            +
                    delete[] reconstructed;
         | 
| 648 | 
            +
                }
         | 
| 649 | 
            +
                for (int64_t i = 0; i < shard_count; i++) {
         | 
| 650 | 
            +
                    if (generate_ids) {
         | 
| 651 | 
            +
                        sharded_indexes[i]->quantizer->add_with_ids(
         | 
| 652 | 
            +
                                sharded_centroids[i].size() / reconstruction_size,
         | 
| 653 | 
            +
                                sharded_centroids[i].data(),
         | 
| 654 | 
            +
                                xids[i].data());
         | 
| 655 | 
            +
                    } else {
         | 
| 656 | 
            +
                        sharded_indexes[i]->quantizer->add(
         | 
| 657 | 
            +
                                sharded_centroids[i].size() / reconstruction_size,
         | 
| 658 | 
            +
                                sharded_centroids[i].data());
         | 
| 659 | 
            +
                    }
         | 
| 660 | 
            +
                }
         | 
| 661 | 
            +
             | 
| 662 | 
            +
                for (int64_t i = 0; i < shard_count; i++) {
         | 
| 663 | 
            +
                    char fname[256];
         | 
| 664 | 
            +
                    snprintf(fname, 256, filename_template.c_str(), i);
         | 
| 665 | 
            +
                    faiss::write_index_binary(sharded_indexes[i], fname);
         | 
| 666 | 
            +
                }
         | 
| 667 | 
            +
             | 
| 668 | 
            +
                for (int64_t i = 0; i < shard_count; i++) {
         | 
| 669 | 
            +
                    delete sharded_indexes[i];
         | 
| 670 | 
            +
                }
         | 
| 671 | 
            +
            }
         | 
| 672 | 
            +
             | 
| 673 | 
            +
            template <typename IndexType>
         | 
| 674 | 
            +
            void sharding_helper(
         | 
| 675 | 
            +
                    IndexType* index,
         | 
| 676 | 
            +
                    int64_t shard_count,
         | 
| 677 | 
            +
                    const std::string& filename_template,
         | 
| 678 | 
            +
                    ShardingFunction* sharding_function,
         | 
| 679 | 
            +
                    bool generate_ids) {
         | 
| 680 | 
            +
                FAISS_THROW_IF_MSG(index->quantizer->ntotal == 0, "No centroids to shard.");
         | 
| 681 | 
            +
                FAISS_THROW_IF_MSG(
         | 
| 682 | 
            +
                        filename_template.find("%d") == std::string::npos,
         | 
| 683 | 
            +
                        "Invalid filename_template. Must contain format specifier for shard count.");
         | 
| 684 | 
            +
             | 
| 685 | 
            +
                DefaultShardingFunction default_sharding_function;
         | 
| 686 | 
            +
                if (sharding_function == nullptr) {
         | 
| 687 | 
            +
                    sharding_function = &default_sharding_function;
         | 
| 688 | 
            +
                }
         | 
| 689 | 
            +
             | 
| 690 | 
            +
                if (typeid(IndexType) == typeid(faiss::IndexIVF)) {
         | 
| 691 | 
            +
                    handle_ivf(
         | 
| 692 | 
            +
                            dynamic_cast<faiss::IndexIVF*>(index),
         | 
| 693 | 
            +
                            shard_count,
         | 
| 694 | 
            +
                            filename_template,
         | 
| 695 | 
            +
                            sharding_function,
         | 
| 696 | 
            +
                            generate_ids);
         | 
| 697 | 
            +
                } else if (typeid(IndexType) == typeid(faiss::IndexBinaryIVF)) {
         | 
| 698 | 
            +
                    handle_binary_ivf(
         | 
| 699 | 
            +
                            dynamic_cast<faiss::IndexBinaryIVF*>(index),
         | 
| 700 | 
            +
                            shard_count,
         | 
| 701 | 
            +
                            filename_template,
         | 
| 702 | 
            +
                            sharding_function,
         | 
| 703 | 
            +
                            generate_ids);
         | 
| 704 | 
            +
                }
         | 
| 705 | 
            +
            }
         | 
| 706 | 
            +
             | 
| 707 | 
            +
            void shard_ivf_index_centroids(
         | 
| 708 | 
            +
                    faiss::IndexIVF* index,
         | 
| 709 | 
            +
                    int64_t shard_count,
         | 
| 710 | 
            +
                    const std::string& filename_template,
         | 
| 711 | 
            +
                    ShardingFunction* sharding_function,
         | 
| 712 | 
            +
                    bool generate_ids) {
         | 
| 713 | 
            +
                sharding_helper(
         | 
| 714 | 
            +
                        index,
         | 
| 715 | 
            +
                        shard_count,
         | 
| 716 | 
            +
                        filename_template,
         | 
| 717 | 
            +
                        sharding_function,
         | 
| 718 | 
            +
                        generate_ids);
         | 
| 719 | 
            +
            }
         | 
| 720 | 
            +
             | 
| 721 | 
            +
            void shard_binary_ivf_index_centroids(
         | 
| 722 | 
            +
                    faiss::IndexBinaryIVF* index,
         | 
| 723 | 
            +
                    int64_t shard_count,
         | 
| 724 | 
            +
                    const std::string& filename_template,
         | 
| 725 | 
            +
                    ShardingFunction* sharding_function,
         | 
| 726 | 
            +
                    bool generate_ids) {
         | 
| 727 | 
            +
                sharding_helper(
         | 
| 728 | 
            +
                        index,
         | 
| 729 | 
            +
                        shard_count,
         | 
| 730 | 
            +
                        filename_template,
         | 
| 731 | 
            +
                        sharding_function,
         | 
| 732 | 
            +
                        generate_ids);
         | 
| 733 | 
            +
            }
         | 
| 734 | 
            +
             | 
| 522 735 | 
             
            } // namespace ivflib
         | 
| 523 736 | 
             
            } // namespace faiss
         | 
    
        data/vendor/faiss/faiss/IVFlib.h
    CHANGED
    
    | @@ -14,6 +14,7 @@ | |
| 14 14 | 
             
             * IndexIVFs embedded within an IndexPreTransform.
         | 
| 15 15 | 
             
             */
         | 
| 16 16 |  | 
| 17 | 
            +
            #include <faiss/IndexBinaryIVF.h>
         | 
| 17 18 | 
             
            #include <faiss/IndexIVF.h>
         | 
| 18 19 | 
             
            #include <vector>
         | 
| 19 20 |  | 
| @@ -167,6 +168,47 @@ void ivf_residual_add_from_flat_codes( | |
| 167 168 | 
             
                    const uint8_t* codes,
         | 
| 168 169 | 
             
                    int64_t code_size = -1);
         | 
| 169 170 |  | 
| 171 | 
            +
            struct ShardingFunction {
         | 
| 172 | 
            +
                virtual int64_t operator()(int64_t i, int64_t shard_count) = 0;
         | 
| 173 | 
            +
                virtual ~ShardingFunction() = default;
         | 
| 174 | 
            +
                ShardingFunction() {}
         | 
| 175 | 
            +
                ShardingFunction(const ShardingFunction&) = default;
         | 
| 176 | 
            +
                ShardingFunction(ShardingFunction&&) = default;
         | 
| 177 | 
            +
                ShardingFunction& operator=(const ShardingFunction&) = default;
         | 
| 178 | 
            +
                ShardingFunction& operator=(ShardingFunction&&) = default;
         | 
| 179 | 
            +
            };
         | 
| 180 | 
            +
            struct DefaultShardingFunction : ShardingFunction {
         | 
| 181 | 
            +
                int64_t operator()(int64_t i, int64_t shard_count) override;
         | 
| 182 | 
            +
            };
         | 
| 183 | 
            +
             | 
| 184 | 
            +
            /**
         | 
| 185 | 
            +
             * Shards an IVF index centroids by the given sharding function, and writes
         | 
| 186 | 
            +
             * the index to the path given by filename_generator. The centroids must already
         | 
| 187 | 
            +
             * be added to the index quantizer.
         | 
| 188 | 
            +
             *
         | 
| 189 | 
            +
             * @param index             The IVF index containing centroids to shard.
         | 
| 190 | 
            +
             * @param shard_count       Number of shards.
         | 
| 191 | 
            +
             * @param filename_template Template for shard filenames.
         | 
| 192 | 
            +
             * @param sharding_function The function to shard by. The default is ith vector
         | 
| 193 | 
            +
             *                          mod shard_count.
         | 
| 194 | 
            +
             * @param generate_ids      Generates ids using IndexIDMap2. If true, ids will
         | 
| 195 | 
            +
             *                          match the default ids in the unsharded index.
         | 
| 196 | 
            +
             * @return                  The number of shards written.
         | 
| 197 | 
            +
             */
         | 
| 198 | 
            +
            void shard_ivf_index_centroids(
         | 
| 199 | 
            +
                    IndexIVF* index,
         | 
| 200 | 
            +
                    int64_t shard_count = 20,
         | 
| 201 | 
            +
                    const std::string& filename_template = "shard.%d.index",
         | 
| 202 | 
            +
                    ShardingFunction* sharding_function = nullptr,
         | 
| 203 | 
            +
                    bool generate_ids = false);
         | 
| 204 | 
            +
             | 
| 205 | 
            +
            void shard_binary_ivf_index_centroids(
         | 
| 206 | 
            +
                    faiss::IndexBinaryIVF* index,
         | 
| 207 | 
            +
                    int64_t shard_count = 20,
         | 
| 208 | 
            +
                    const std::string& filename_template = "shard.%d.index",
         | 
| 209 | 
            +
                    ShardingFunction* sharding_function = nullptr,
         | 
| 210 | 
            +
                    bool generate_ids = false);
         | 
| 211 | 
            +
             | 
| 170 212 | 
             
            } // namespace ivflib
         | 
| 171 213 | 
             
            } // namespace faiss
         | 
| 172 214 |  | 
    
        data/vendor/faiss/faiss/Index.h
    CHANGED
    
    
| @@ -37,8 +37,8 @@ void IndexBinaryFlat::search( | |
| 37 37 | 
             
                    int32_t* distances,
         | 
| 38 38 | 
             
                    idx_t* labels,
         | 
| 39 39 | 
             
                    const SearchParameters* params) const {
         | 
| 40 | 
            -
                 | 
| 41 | 
            -
             | 
| 40 | 
            +
                // Extract IDSelector from params if present
         | 
| 41 | 
            +
                const IDSelector* sel = params ? params->sel : nullptr;
         | 
| 42 42 | 
             
                FAISS_THROW_IF_NOT(k > 0);
         | 
| 43 43 |  | 
| 44 44 | 
             
                const idx_t block_size = query_batch_size;
         | 
| @@ -60,7 +60,8 @@ void IndexBinaryFlat::search( | |
| 60 60 | 
             
                                ntotal,
         | 
| 61 61 | 
             
                                code_size,
         | 
| 62 62 | 
             
                                /* ordered = */ true,
         | 
| 63 | 
            -
                                approx_topk_mode | 
| 63 | 
            +
                                approx_topk_mode,
         | 
| 64 | 
            +
                                sel);
         | 
| 64 65 | 
             
                    } else {
         | 
| 65 66 | 
             
                        hammings_knn_mc(
         | 
| 66 67 | 
             
                                x + s * code_size,
         | 
| @@ -70,7 +71,8 @@ void IndexBinaryFlat::search( | |
| 70 71 | 
             
                                k,
         | 
| 71 72 | 
             
                                code_size,
         | 
| 72 73 | 
             
                                distances + s * k,
         | 
| 73 | 
            -
                                labels + s * k | 
| 74 | 
            +
                                labels + s * k,
         | 
| 75 | 
            +
                                sel);
         | 
| 74 76 | 
             
                    }
         | 
| 75 77 | 
             
                }
         | 
| 76 78 | 
             
            }
         | 
| @@ -107,9 +109,9 @@ void IndexBinaryFlat::range_search( | |
| 107 109 | 
             
                    int radius,
         | 
| 108 110 | 
             
                    RangeSearchResult* result,
         | 
| 109 111 | 
             
                    const SearchParameters* params) const {
         | 
| 110 | 
            -
                 | 
| 111 | 
            -
             | 
| 112 | 
            -
             | 
| 112 | 
            +
                const IDSelector* sel = params ? params->sel : nullptr;
         | 
| 113 | 
            +
                hamming_range_search(
         | 
| 114 | 
            +
                        x, xb.data(), n, ntotal, radius, code_size, result, sel);
         | 
| 113 115 | 
             
            }
         | 
| 114 116 |  | 
| 115 117 | 
             
            } // namespace faiss
         | 
| @@ -14,6 +14,7 @@ | |
| 14 14 |  | 
| 15 15 | 
             
            #include <faiss/IndexBinary.h>
         | 
| 16 16 |  | 
| 17 | 
            +
            #include <faiss/impl/maybe_owned_vector.h>
         | 
| 17 18 | 
             
            #include <faiss/utils/approx_topk/mode.h>
         | 
| 18 19 |  | 
| 19 20 | 
             
            namespace faiss {
         | 
| @@ -21,7 +22,7 @@ namespace faiss { | |
| 21 22 | 
             
            /** Index that stores the full vectors and performs exhaustive search. */
         | 
| 22 23 | 
             
            struct IndexBinaryFlat : IndexBinary {
         | 
| 23 24 | 
             
                /// database vectors, size ntotal * d / 8
         | 
| 24 | 
            -
                 | 
| 25 | 
            +
                MaybeOwnedVector<uint8_t> xb;
         | 
| 25 26 |  | 
| 26 27 | 
             
                /** Select between using a heap or counting to select the k smallest values
         | 
| 27 28 | 
             
                 * when scanning inverted lists.
         | 
| @@ -110,7 +110,7 @@ CodePacker* IndexFlatCodes::get_CodePacker() const { | |
| 110 110 | 
             
            }
         | 
| 111 111 |  | 
| 112 112 | 
             
            void IndexFlatCodes::permute_entries(const idx_t* perm) {
         | 
| 113 | 
            -
                 | 
| 113 | 
            +
                MaybeOwnedVector<uint8_t> new_codes(codes.size());
         | 
| 114 114 |  | 
| 115 115 | 
             
                for (idx_t i = 0; i < ntotal; i++) {
         | 
| 116 116 | 
             
                    memcpy(new_codes.data() + i * code_size,
         | 
| @@ -7,9 +7,11 @@ | |
| 7 7 |  | 
| 8 8 | 
             
            #pragma once
         | 
| 9 9 |  | 
| 10 | 
            +
            #include <vector>
         | 
| 11 | 
            +
             | 
| 10 12 | 
             
            #include <faiss/Index.h>
         | 
| 11 13 | 
             
            #include <faiss/impl/DistanceComputer.h>
         | 
| 12 | 
            -
            #include < | 
| 14 | 
            +
            #include <faiss/impl/maybe_owned_vector.h>
         | 
| 13 15 |  | 
| 14 16 | 
             
            namespace faiss {
         | 
| 15 17 |  | 
| @@ -21,7 +23,7 @@ struct IndexFlatCodes : Index { | |
| 21 23 | 
             
                size_t code_size;
         | 
| 22 24 |  | 
| 23 25 | 
             
                /// encoded dataset, size ntotal * code_size
         | 
| 24 | 
            -
                 | 
| 26 | 
            +
                MaybeOwnedVector<uint8_t> codes;
         | 
| 25 27 |  | 
| 26 28 | 
             
                IndexFlatCodes();
         | 
| 27 29 |  | 
| @@ -8,9 +8,7 @@ | |
| 8 8 | 
             
            #include <faiss/IndexHNSW.h>
         | 
| 9 9 |  | 
| 10 10 | 
             
            #include <omp.h>
         | 
| 11 | 
            -
            #include <cassert>
         | 
| 12 11 | 
             
            #include <cinttypes>
         | 
| 13 | 
            -
            #include <cmath>
         | 
| 14 12 | 
             
            #include <cstdio>
         | 
| 15 13 | 
             
            #include <cstdlib>
         | 
| 16 14 | 
             
            #include <cstring>
         | 
| @@ -124,7 +122,7 @@ void hnsw_add_vertices( | |
| 124 122 | 
             
                    int i1 = n;
         | 
| 125 123 |  | 
| 126 124 | 
             
                    for (int pt_level = hist.size() - 1;
         | 
| 127 | 
            -
                         pt_level >= !index_hnsw.init_level0;
         | 
| 125 | 
            +
                         pt_level >= int(!index_hnsw.init_level0);
         | 
| 128 126 | 
             
                         pt_level--) {
         | 
| 129 127 | 
             
                        int i0 = i1 - hist[pt_level];
         | 
| 130 128 |  | 
| @@ -212,7 +210,9 @@ IndexHNSW::IndexHNSW(int d, int M, MetricType metric) | |
| 212 210 | 
             
                    : Index(d, metric), hnsw(M) {}
         | 
| 213 211 |  | 
| 214 212 | 
             
            IndexHNSW::IndexHNSW(Index* storage, int M)
         | 
| 215 | 
            -
                    : Index(storage->d, storage->metric_type), hnsw(M), storage(storage) { | 
| 213 | 
            +
                    : Index(storage->d, storage->metric_type), hnsw(M), storage(storage) {
         | 
| 214 | 
            +
                metric_arg = storage->metric_arg;
         | 
| 215 | 
            +
            }
         | 
| 216 216 |  | 
| 217 217 | 
             
            IndexHNSW::~IndexHNSW() {
         | 
| 218 218 | 
             
                if (own_fields) {
         | 
| @@ -237,19 +237,19 @@ void hnsw_search( | |
| 237 237 | 
             
                    idx_t n,
         | 
| 238 238 | 
             
                    const float* x,
         | 
| 239 239 | 
             
                    BlockResultHandler& bres,
         | 
| 240 | 
            -
                    const SearchParameters*  | 
| 240 | 
            +
                    const SearchParameters* params) {
         | 
| 241 241 | 
             
                FAISS_THROW_IF_NOT_MSG(
         | 
| 242 242 | 
             
                        index->storage,
         | 
| 243 243 | 
             
                        "No storage index, please use IndexHNSWFlat (or variants) "
         | 
| 244 244 | 
             
                        "instead of IndexHNSW directly");
         | 
| 245 | 
            -
                const SearchParametersHNSW* params = nullptr;
         | 
| 246 245 | 
             
                const HNSW& hnsw = index->hnsw;
         | 
| 247 246 |  | 
| 248 247 | 
             
                int efSearch = hnsw.efSearch;
         | 
| 249 | 
            -
                if ( | 
| 250 | 
            -
                     | 
| 251 | 
            -
             | 
| 252 | 
            -
             | 
| 248 | 
            +
                if (params) {
         | 
| 249 | 
            +
                    if (const SearchParametersHNSW* hnsw_params =
         | 
| 250 | 
            +
                                dynamic_cast<const SearchParametersHNSW*>(params)) {
         | 
| 251 | 
            +
                        efSearch = hnsw_params->efSearch;
         | 
| 252 | 
            +
                    }
         | 
| 253 253 | 
             
                }
         | 
| 254 254 | 
             
                size_t n1 = 0, n2 = 0, ndis = 0, nhops = 0;
         | 
| 255 255 |  | 
| @@ -294,13 +294,13 @@ void IndexHNSW::search( | |
| 294 294 | 
             
                    idx_t k,
         | 
| 295 295 | 
             
                    float* distances,
         | 
| 296 296 | 
             
                    idx_t* labels,
         | 
| 297 | 
            -
                    const SearchParameters*  | 
| 297 | 
            +
                    const SearchParameters* params) const {
         | 
| 298 298 | 
             
                FAISS_THROW_IF_NOT(k > 0);
         | 
| 299 299 |  | 
| 300 300 | 
             
                using RH = HeapBlockResultHandler<HNSW::C>;
         | 
| 301 301 | 
             
                RH bres(n, distances, labels, k);
         | 
| 302 302 |  | 
| 303 | 
            -
                hnsw_search(this, n, x, bres,  | 
| 303 | 
            +
                hnsw_search(this, n, x, bres, params);
         | 
| 304 304 |  | 
| 305 305 | 
             
                if (is_similarity_metric(this->metric_type)) {
         | 
| 306 306 | 
             
                    // we need to revert the negated distances
         | 
| @@ -408,17 +408,10 @@ void IndexHNSW::search_level_0( | |
| 408 408 | 
             
                    idx_t* labels,
         | 
| 409 409 | 
             
                    int nprobe,
         | 
| 410 410 | 
             
                    int search_type,
         | 
| 411 | 
            -
                    const SearchParameters*  | 
| 411 | 
            +
                    const SearchParameters* params) const {
         | 
| 412 412 | 
             
                FAISS_THROW_IF_NOT(k > 0);
         | 
| 413 413 | 
             
                FAISS_THROW_IF_NOT(nprobe > 0);
         | 
| 414 414 |  | 
| 415 | 
            -
                const SearchParametersHNSW* params = nullptr;
         | 
| 416 | 
            -
             | 
| 417 | 
            -
                if (params_in) {
         | 
| 418 | 
            -
                    params = dynamic_cast<const SearchParametersHNSW*>(params_in);
         | 
| 419 | 
            -
                    FAISS_THROW_IF_NOT_MSG(params, "params type invalid");
         | 
| 420 | 
            -
                }
         | 
| 421 | 
            -
             | 
| 422 415 | 
             
                storage_idx_t ntotal = hnsw.levels.size();
         | 
| 423 416 |  | 
| 424 417 | 
             
                using RH = HeapBlockResultHandler<HNSW::C>;
         | 
| @@ -138,7 +138,7 @@ struct IndexHNSWPQ : IndexHNSW { | |
| 138 138 | 
             
                void train(idx_t n, const float* x) override;
         | 
| 139 139 | 
             
            };
         | 
| 140 140 |  | 
| 141 | 
            -
            /** SQ index topped with  | 
| 141 | 
            +
            /** SQ index topped with a HNSW structure to access elements
         | 
| 142 142 | 
             
             *  more efficiently.
         | 
| 143 143 | 
             
             */
         | 
| 144 144 | 
             
            struct IndexHNSWSQ : IndexHNSW {
         | 
| @@ -455,7 +455,7 @@ void IndexIVF::search_preassigned( | |
| 455 455 | 
             
            #pragma omp parallel if (do_parallel) reduction(+ : nlistv, ndis, nheap)
         | 
| 456 456 | 
             
                {
         | 
| 457 457 | 
             
                    std::unique_ptr<InvertedListScanner> scanner(
         | 
| 458 | 
            -
                            get_InvertedListScanner(store_pairs, sel));
         | 
| 458 | 
            +
                            get_InvertedListScanner(store_pairs, sel, params));
         | 
| 459 459 |  | 
| 460 460 | 
             
                    /*****************************************************
         | 
| 461 461 | 
             
                     * Depending on parallel_mode, there are two possible ways
         | 
| @@ -796,7 +796,7 @@ void IndexIVF::range_search_preassigned( | |
| 796 796 | 
             
                {
         | 
| 797 797 | 
             
                    RangeSearchPartialResult pres(result);
         | 
| 798 798 | 
             
                    std::unique_ptr<InvertedListScanner> scanner(
         | 
| 799 | 
            -
                            get_InvertedListScanner(store_pairs, sel));
         | 
| 799 | 
            +
                            get_InvertedListScanner(store_pairs, sel, params));
         | 
| 800 800 | 
             
                    FAISS_THROW_IF_NOT(scanner.get());
         | 
| 801 801 | 
             
                    all_pres[omp_get_thread_num()] = &pres;
         | 
| 802 802 |  | 
| @@ -912,7 +912,8 @@ void IndexIVF::range_search_preassigned( | |
| 912 912 |  | 
| 913 913 | 
             
            InvertedListScanner* IndexIVF::get_InvertedListScanner(
         | 
| 914 914 | 
             
                    bool /*store_pairs*/,
         | 
| 915 | 
            -
                    const IDSelector* /* sel  | 
| 915 | 
            +
                    const IDSelector* /* sel */,
         | 
| 916 | 
            +
                    const IVFSearchParameters* /* params */) const {
         | 
| 916 917 | 
             
                FAISS_THROW_MSG("get_InvertedListScanner not implemented");
         | 
| 917 918 | 
             
            }
         | 
| 918 919 |  | 
| @@ -1290,6 +1291,14 @@ size_t InvertedListScanner::scan_codes( | |
| 1290 1291 |  | 
| 1291 1292 | 
             
                if (!keep_max) {
         | 
| 1292 1293 | 
             
                    for (size_t j = 0; j < list_size; j++) {
         | 
| 1294 | 
            +
                        if (sel != nullptr) {
         | 
| 1295 | 
            +
                            int64_t id = store_pairs ? lo_build(list_no, j) : ids[j];
         | 
| 1296 | 
            +
                            if (!sel->is_member(id)) {
         | 
| 1297 | 
            +
                                codes += code_size;
         | 
| 1298 | 
            +
                                continue;
         | 
| 1299 | 
            +
                            }
         | 
| 1300 | 
            +
                        }
         | 
| 1301 | 
            +
             | 
| 1293 1302 | 
             
                        float dis = distance_to_code(codes);
         | 
| 1294 1303 | 
             
                        if (dis < simi[0]) {
         | 
| 1295 1304 | 
             
                            int64_t id = store_pairs ? lo_build(list_no, j) : ids[j];
         | 
| @@ -1300,6 +1309,14 @@ size_t InvertedListScanner::scan_codes( | |
| 1300 1309 | 
             
                    }
         | 
| 1301 1310 | 
             
                } else {
         | 
| 1302 1311 | 
             
                    for (size_t j = 0; j < list_size; j++) {
         | 
| 1312 | 
            +
                        if (sel != nullptr) {
         | 
| 1313 | 
            +
                            int64_t id = store_pairs ? lo_build(list_no, j) : ids[j];
         | 
| 1314 | 
            +
                            if (!sel->is_member(id)) {
         | 
| 1315 | 
            +
                                codes += code_size;
         | 
| 1316 | 
            +
                                continue;
         | 
| 1317 | 
            +
                            }
         | 
| 1318 | 
            +
                        }
         | 
| 1319 | 
            +
             | 
| 1303 1320 | 
             
                        float dis = distance_to_code(codes);
         | 
| 1304 1321 | 
             
                        if (dis > simi[0]) {
         | 
| 1305 1322 | 
             
                            int64_t id = store_pairs ? lo_build(list_no, j) : ids[j];
         |