faiss 0.3.4 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +11 -8
- data/vendor/faiss/faiss/Clustering.cpp +0 -16
- data/vendor/faiss/faiss/IVFlib.cpp +213 -0
- data/vendor/faiss/faiss/IVFlib.h +42 -0
- data/vendor/faiss/faiss/Index.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +9 -7
- data/vendor/faiss/faiss/IndexBinaryFlat.h +2 -1
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +1 -1
- data/vendor/faiss/faiss/IndexFlatCodes.h +4 -2
- data/vendor/faiss/faiss/IndexHNSW.cpp +13 -20
- data/vendor/faiss/faiss/IndexHNSW.h +1 -1
- data/vendor/faiss/faiss/IndexIVF.cpp +20 -3
- data/vendor/faiss/faiss/IndexIVF.h +5 -2
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +2 -1
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +2 -1
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +2 -1
- data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +2 -1
- data/vendor/faiss/faiss/IndexIVFPQ.h +2 -1
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +277 -0
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +70 -0
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +2 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +2 -1
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +148 -0
- data/vendor/faiss/faiss/IndexRaBitQ.h +65 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -1
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +2 -1
- data/vendor/faiss/faiss/clone_index.cpp +38 -3
- data/vendor/faiss/faiss/cppcontrib/factory_tools.cpp +19 -0
- data/vendor/faiss/faiss/cppcontrib/factory_tools.h +4 -11
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +2 -1
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +13 -3
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +1 -1
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +1 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIcmEncoder.cpp +112 -0
- data/vendor/faiss/faiss/impl/HNSW.cpp +35 -13
- data/vendor/faiss/faiss/impl/HNSW.h +5 -4
- data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -1
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +519 -0
- data/vendor/faiss/faiss/impl/RaBitQuantizer.h +78 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +2 -2
- data/vendor/faiss/faiss/impl/code_distance/code_distance-sve.h +3 -4
- data/vendor/faiss/faiss/impl/index_read.cpp +220 -25
- data/vendor/faiss/faiss/impl/index_write.cpp +29 -0
- data/vendor/faiss/faiss/impl/io.h +2 -2
- data/vendor/faiss/faiss/impl/io_macros.h +2 -0
- data/vendor/faiss/faiss/impl/mapped_io.cpp +313 -0
- data/vendor/faiss/faiss/impl/mapped_io.h +51 -0
- data/vendor/faiss/faiss/impl/maybe_owned_vector.h +316 -0
- data/vendor/faiss/faiss/impl/platform_macros.h +7 -3
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +1 -1
- data/vendor/faiss/faiss/impl/zerocopy_io.cpp +67 -0
- data/vendor/faiss/faiss/impl/zerocopy_io.h +32 -0
- data/vendor/faiss/faiss/index_factory.cpp +16 -5
- data/vendor/faiss/faiss/index_io.h +4 -0
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +3 -3
- data/vendor/faiss/faiss/invlists/InvertedLists.h +5 -3
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +3 -3
- data/vendor/faiss/faiss/python/python_callbacks.cpp +24 -0
- data/vendor/faiss/faiss/python/python_callbacks.h +22 -0
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +30 -12
- data/vendor/faiss/faiss/utils/hamming.cpp +45 -21
- data/vendor/faiss/faiss/utils/hamming.h +7 -3
- data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +1 -1
- data/vendor/faiss/faiss/utils/utils.cpp +4 -4
- data/vendor/faiss/faiss/utils/utils.h +3 -3
- metadata +16 -4
| @@ -0,0 +1,313 @@ | |
| 1 | 
            +
            /*
         | 
| 2 | 
            +
             * Copyright (c) Meta Platforms, Inc. and affiliates.
         | 
| 3 | 
            +
             *
         | 
| 4 | 
            +
             * This source code is licensed under the MIT license found in the
         | 
| 5 | 
            +
             * LICENSE file in the root directory of this source tree.
         | 
| 6 | 
            +
             */
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            #include <stdio.h>
         | 
| 9 | 
            +
            #include <string.h>
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            #ifdef __linux__
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            #include <fcntl.h>
         | 
| 14 | 
            +
            #include <sys/mman.h>
         | 
| 15 | 
            +
            #include <sys/stat.h>
         | 
| 16 | 
            +
            #include <sys/types.h>
         | 
| 17 | 
            +
            #include <unistd.h>
         | 
| 18 | 
            +
             | 
| 19 | 
            +
            #elif defined(_WIN32)
         | 
| 20 | 
            +
             | 
| 21 | 
            +
            #include <Windows.h> // @manual
         | 
| 22 | 
            +
            #include <io.h>      // @manual
         | 
| 23 | 
            +
             | 
| 24 | 
            +
            #endif
         | 
| 25 | 
            +
             | 
| 26 | 
            +
            #include <cstring>
         | 
| 27 | 
            +
             | 
| 28 | 
            +
            #include <faiss/impl/FaissAssert.h>
         | 
| 29 | 
            +
            #include <faiss/impl/mapped_io.h>
         | 
| 30 | 
            +
             | 
| 31 | 
            +
            namespace faiss {
         | 
| 32 | 
            +
             | 
| 33 | 
            +
            #ifdef __linux__
         | 
| 34 | 
            +
             | 
| 35 | 
            +
            struct MmappedFileMappingOwner::PImpl {
         | 
| 36 | 
            +
                void* ptr = nullptr;
         | 
| 37 | 
            +
                size_t ptr_size = 0;
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                PImpl(const std::string& filename) {
         | 
| 40 | 
            +
                    auto f = std::unique_ptr<FILE, decltype(&fclose)>(
         | 
| 41 | 
            +
                            fopen(filename.c_str(), "r"), &fclose);
         | 
| 42 | 
            +
                    FAISS_THROW_IF_NOT_FMT(
         | 
| 43 | 
            +
                            f.get(),
         | 
| 44 | 
            +
                            "could not open %s for reading: %s",
         | 
| 45 | 
            +
                            filename.c_str(),
         | 
| 46 | 
            +
                            strerror(errno));
         | 
| 47 | 
            +
             | 
| 48 | 
            +
                    // get the size
         | 
| 49 | 
            +
                    struct stat s;
         | 
| 50 | 
            +
                    int status = fstat(fileno(f.get()), &s);
         | 
| 51 | 
            +
                    FAISS_THROW_IF_NOT_FMT(
         | 
| 52 | 
            +
                            status >= 0, "fstat() failed: %s", strerror(errno));
         | 
| 53 | 
            +
             | 
| 54 | 
            +
                    const size_t filesize = s.st_size;
         | 
| 55 | 
            +
             | 
| 56 | 
            +
                    void* address = mmap(
         | 
| 57 | 
            +
                            nullptr, filesize, PROT_READ, MAP_SHARED, fileno(f.get()), 0);
         | 
| 58 | 
            +
                    FAISS_THROW_IF_NOT_FMT(
         | 
| 59 | 
            +
                            address != nullptr, "could not mmap(): %s", strerror(errno));
         | 
| 60 | 
            +
             | 
| 61 | 
            +
                    // btw, fd can be closed here
         | 
| 62 | 
            +
             | 
| 63 | 
            +
                    madvise(address, filesize, MADV_RANDOM);
         | 
| 64 | 
            +
             | 
| 65 | 
            +
                    // save it
         | 
| 66 | 
            +
                    ptr = address;
         | 
| 67 | 
            +
                    ptr_size = filesize;
         | 
| 68 | 
            +
                }
         | 
| 69 | 
            +
             | 
| 70 | 
            +
                PImpl(FILE* f) {
         | 
| 71 | 
            +
                    // get the size
         | 
| 72 | 
            +
                    struct stat s;
         | 
| 73 | 
            +
                    int status = fstat(fileno(f), &s);
         | 
| 74 | 
            +
                    FAISS_THROW_IF_NOT_FMT(
         | 
| 75 | 
            +
                            status >= 0, "fstat() failed: %s", strerror(errno));
         | 
| 76 | 
            +
             | 
| 77 | 
            +
                    const size_t filesize = s.st_size;
         | 
| 78 | 
            +
             | 
| 79 | 
            +
                    void* address =
         | 
| 80 | 
            +
                            mmap(nullptr, filesize, PROT_READ, MAP_SHARED, fileno(f), 0);
         | 
| 81 | 
            +
                    FAISS_THROW_IF_NOT_FMT(
         | 
| 82 | 
            +
                            address != nullptr, "could not mmap(): %s", strerror(errno));
         | 
| 83 | 
            +
             | 
| 84 | 
            +
                    // btw, fd can be closed here
         | 
| 85 | 
            +
             | 
| 86 | 
            +
                    madvise(address, filesize, MADV_RANDOM);
         | 
| 87 | 
            +
             | 
| 88 | 
            +
                    // save it
         | 
| 89 | 
            +
                    ptr = address;
         | 
| 90 | 
            +
                    ptr_size = filesize;
         | 
| 91 | 
            +
                }
         | 
| 92 | 
            +
             | 
| 93 | 
            +
                ~PImpl() {
         | 
| 94 | 
            +
                    // todo: check for an error
         | 
| 95 | 
            +
                    munmap(ptr, ptr_size);
         | 
| 96 | 
            +
                }
         | 
| 97 | 
            +
            };
         | 
| 98 | 
            +
             | 
| 99 | 
            +
            #elif defined(_WIN32)
         | 
| 100 | 
            +
             | 
| 101 | 
            +
            struct MmappedFileMappingOwner::PImpl {
         | 
| 102 | 
            +
                void* ptr = nullptr;
         | 
| 103 | 
            +
                size_t ptr_size = 0;
         | 
| 104 | 
            +
                HANDLE mapping_handle = INVALID_HANDLE_VALUE;
         | 
| 105 | 
            +
             | 
| 106 | 
            +
                PImpl(const std::string& filename) {
         | 
| 107 | 
            +
                    HANDLE file_handle = CreateFile(
         | 
| 108 | 
            +
                            filename.c_str(),
         | 
| 109 | 
            +
                            GENERIC_READ,
         | 
| 110 | 
            +
                            FILE_SHARE_READ,
         | 
| 111 | 
            +
                            nullptr,
         | 
| 112 | 
            +
                            OPEN_EXISTING,
         | 
| 113 | 
            +
                            0,
         | 
| 114 | 
            +
                            nullptr);
         | 
| 115 | 
            +
                    if (file_handle == INVALID_HANDLE_VALUE) {
         | 
| 116 | 
            +
                        const auto error = GetLastError();
         | 
| 117 | 
            +
                        FAISS_THROW_FMT(
         | 
| 118 | 
            +
                                "could not open the file, %s (error %d)",
         | 
| 119 | 
            +
                                filename.c_str(),
         | 
| 120 | 
            +
                                error);
         | 
| 121 | 
            +
                    }
         | 
| 122 | 
            +
             | 
| 123 | 
            +
                    // get the size of the file
         | 
| 124 | 
            +
                    LARGE_INTEGER len_li;
         | 
| 125 | 
            +
                    if (GetFileSizeEx(file_handle, &len_li) == 0) {
         | 
| 126 | 
            +
                        const auto error = GetLastError();
         | 
| 127 | 
            +
             | 
| 128 | 
            +
                        CloseHandle(file_handle);
         | 
| 129 | 
            +
             | 
| 130 | 
            +
                        FAISS_THROW_FMT(
         | 
| 131 | 
            +
                                "could not get the file size, %s (error %d)",
         | 
| 132 | 
            +
                                filename.c_str(),
         | 
| 133 | 
            +
                                error);
         | 
| 134 | 
            +
                    }
         | 
| 135 | 
            +
             | 
| 136 | 
            +
                    // create a mapping
         | 
| 137 | 
            +
                    mapping_handle = CreateFileMapping(
         | 
| 138 | 
            +
                            file_handle, nullptr, PAGE_READONLY, 0, 0, nullptr);
         | 
| 139 | 
            +
                    if (mapping_handle == 0) {
         | 
| 140 | 
            +
                        const auto error = GetLastError();
         | 
| 141 | 
            +
             | 
| 142 | 
            +
                        CloseHandle(file_handle);
         | 
| 143 | 
            +
             | 
| 144 | 
            +
                        FAISS_THROW_FMT(
         | 
| 145 | 
            +
                                "could not create a file mapping, %s (error %d)",
         | 
| 146 | 
            +
                                filename.c_str(),
         | 
| 147 | 
            +
                                error);
         | 
| 148 | 
            +
                    }
         | 
| 149 | 
            +
                    CloseHandle(file_handle);
         | 
| 150 | 
            +
             | 
| 151 | 
            +
                    char* data =
         | 
| 152 | 
            +
                            (char*)MapViewOfFile(mapping_handle, FILE_MAP_READ, 0, 0, 0);
         | 
| 153 | 
            +
                    if (data == nullptr) {
         | 
| 154 | 
            +
                        const auto error = GetLastError();
         | 
| 155 | 
            +
             | 
| 156 | 
            +
                        CloseHandle(mapping_handle);
         | 
| 157 | 
            +
                        mapping_handle = INVALID_HANDLE_VALUE;
         | 
| 158 | 
            +
             | 
| 159 | 
            +
                        FAISS_THROW_FMT(
         | 
| 160 | 
            +
                                "could not get map the file, %s (error %d)",
         | 
| 161 | 
            +
                                filename.c_str(),
         | 
| 162 | 
            +
                                error);
         | 
| 163 | 
            +
                    }
         | 
| 164 | 
            +
             | 
| 165 | 
            +
                    ptr = data;
         | 
| 166 | 
            +
                    ptr_size = len_li.QuadPart;
         | 
| 167 | 
            +
                }
         | 
| 168 | 
            +
             | 
| 169 | 
            +
                PImpl(FILE* f) {
         | 
| 170 | 
            +
                    // obtain a HANDLE from a FILE
         | 
| 171 | 
            +
                    const int fd = _fileno(f);
         | 
| 172 | 
            +
                    if (fd == -1) {
         | 
| 173 | 
            +
                        // no good
         | 
| 174 | 
            +
                        FAISS_THROW_FMT("could not get a HANDLE");
         | 
| 175 | 
            +
                    }
         | 
| 176 | 
            +
             | 
| 177 | 
            +
                    HANDLE file_handle = (HANDLE)_get_osfhandle(fd);
         | 
| 178 | 
            +
                    if (file_handle == INVALID_HANDLE_VALUE) {
         | 
| 179 | 
            +
                        FAISS_THROW_FMT("could not get an OS HANDLE");
         | 
| 180 | 
            +
                    }
         | 
| 181 | 
            +
             | 
| 182 | 
            +
                    // get the size of the file
         | 
| 183 | 
            +
                    LARGE_INTEGER len_li;
         | 
| 184 | 
            +
                    if (GetFileSizeEx(file_handle, &len_li) == 0) {
         | 
| 185 | 
            +
                        const auto error = GetLastError();
         | 
| 186 | 
            +
                        FAISS_THROW_FMT("could not get the file size (error %d)", error);
         | 
| 187 | 
            +
                    }
         | 
| 188 | 
            +
             | 
| 189 | 
            +
                    // create a mapping
         | 
| 190 | 
            +
                    mapping_handle = CreateFileMapping(
         | 
| 191 | 
            +
                            file_handle, nullptr, PAGE_READONLY, 0, 0, nullptr);
         | 
| 192 | 
            +
                    if (mapping_handle == 0) {
         | 
| 193 | 
            +
                        const auto error = GetLastError();
         | 
| 194 | 
            +
                        FAISS_THROW_FMT(
         | 
| 195 | 
            +
                                "could not create a file mapping, (error %d)", error);
         | 
| 196 | 
            +
                    }
         | 
| 197 | 
            +
             | 
| 198 | 
            +
                    // the handle is provided externally, so this is not our business
         | 
| 199 | 
            +
                    //   to close file_handle.
         | 
| 200 | 
            +
             | 
| 201 | 
            +
                    char* data =
         | 
| 202 | 
            +
                            (char*)MapViewOfFile(mapping_handle, FILE_MAP_READ, 0, 0, 0);
         | 
| 203 | 
            +
                    if (data == nullptr) {
         | 
| 204 | 
            +
                        const auto error = GetLastError();
         | 
| 205 | 
            +
             | 
| 206 | 
            +
                        CloseHandle(mapping_handle);
         | 
| 207 | 
            +
                        mapping_handle = INVALID_HANDLE_VALUE;
         | 
| 208 | 
            +
             | 
| 209 | 
            +
                        FAISS_THROW_FMT("could not get map the file, (error %d)", error);
         | 
| 210 | 
            +
                    }
         | 
| 211 | 
            +
             | 
| 212 | 
            +
                    ptr = data;
         | 
| 213 | 
            +
                    ptr_size = len_li.QuadPart;
         | 
| 214 | 
            +
                }
         | 
| 215 | 
            +
             | 
| 216 | 
            +
                ~PImpl() {
         | 
| 217 | 
            +
                    if (mapping_handle != INVALID_HANDLE_VALUE) {
         | 
| 218 | 
            +
                        UnmapViewOfFile(ptr);
         | 
| 219 | 
            +
                        CloseHandle(mapping_handle);
         | 
| 220 | 
            +
             | 
| 221 | 
            +
                        mapping_handle = INVALID_HANDLE_VALUE;
         | 
| 222 | 
            +
                        ptr = nullptr;
         | 
| 223 | 
            +
                    }
         | 
| 224 | 
            +
                }
         | 
| 225 | 
            +
            };
         | 
| 226 | 
            +
             | 
| 227 | 
            +
            #else
         | 
| 228 | 
            +
             | 
| 229 | 
            +
            struct MmappedFileMappingOwner::PImpl {
         | 
| 230 | 
            +
                void* ptr = nullptr;
         | 
| 231 | 
            +
                size_t ptr_size = 0;
         | 
| 232 | 
            +
             | 
| 233 | 
            +
                PImpl(const std::string& filename) {
         | 
| 234 | 
            +
                    FAISS_THROW_MSG("Not implemented");
         | 
| 235 | 
            +
                }
         | 
| 236 | 
            +
             | 
| 237 | 
            +
                PImpl(FILE* f) {
         | 
| 238 | 
            +
                    FAISS_THROW_MSG("Not implemented");
         | 
| 239 | 
            +
                }
         | 
| 240 | 
            +
            };
         | 
| 241 | 
            +
             | 
| 242 | 
            +
            #endif
         | 
| 243 | 
            +
             | 
| 244 | 
            +
            MmappedFileMappingOwner::MmappedFileMappingOwner(const std::string& filename) {
         | 
| 245 | 
            +
                p_impl = std::make_unique<MmappedFileMappingOwner::PImpl>(filename);
         | 
| 246 | 
            +
            }
         | 
| 247 | 
            +
             | 
| 248 | 
            +
            MmappedFileMappingOwner::MmappedFileMappingOwner(FILE* f) {
         | 
| 249 | 
            +
                p_impl = std::make_unique<MmappedFileMappingOwner::PImpl>(f);
         | 
| 250 | 
            +
            }
         | 
| 251 | 
            +
             | 
| 252 | 
            +
            MmappedFileMappingOwner::~MmappedFileMappingOwner() = default;
         | 
| 253 | 
            +
             | 
| 254 | 
            +
            //
         | 
| 255 | 
            +
            void* MmappedFileMappingOwner::data() const {
         | 
| 256 | 
            +
                return p_impl->ptr;
         | 
| 257 | 
            +
            }
         | 
| 258 | 
            +
             | 
| 259 | 
            +
            size_t MmappedFileMappingOwner::size() const {
         | 
| 260 | 
            +
                return p_impl->ptr_size;
         | 
| 261 | 
            +
            }
         | 
| 262 | 
            +
             | 
| 263 | 
            +
            MappedFileIOReader::MappedFileIOReader(
         | 
| 264 | 
            +
                    const std::shared_ptr<MmappedFileMappingOwner>& owner)
         | 
| 265 | 
            +
                    : mmap_owner(owner) {}
         | 
| 266 | 
            +
             | 
| 267 | 
            +
            // this operation performs a copy
         | 
| 268 | 
            +
            size_t MappedFileIOReader::operator()(void* ptr, size_t size, size_t nitems) {
         | 
| 269 | 
            +
                if (size * nitems == 0) {
         | 
| 270 | 
            +
                    return 0;
         | 
| 271 | 
            +
                }
         | 
| 272 | 
            +
             | 
| 273 | 
            +
                char* ptr_c = nullptr;
         | 
| 274 | 
            +
             | 
| 275 | 
            +
                const size_t actual_nitems = this->mmap((void**)&ptr_c, size, nitems);
         | 
| 276 | 
            +
                if (actual_nitems > 0) {
         | 
| 277 | 
            +
                    memcpy(ptr, ptr_c, size * actual_nitems);
         | 
| 278 | 
            +
                }
         | 
| 279 | 
            +
             | 
| 280 | 
            +
                return actual_nitems;
         | 
| 281 | 
            +
            }
         | 
| 282 | 
            +
             | 
| 283 | 
            +
            // this operation returns a mmapped address, owned by mmap_owner
         | 
| 284 | 
            +
            size_t MappedFileIOReader::mmap(void** ptr, size_t size, size_t nitems) {
         | 
| 285 | 
            +
                if (size == 0) {
         | 
| 286 | 
            +
                    return nitems;
         | 
| 287 | 
            +
                }
         | 
| 288 | 
            +
             | 
| 289 | 
            +
                size_t actual_size = size * nitems;
         | 
| 290 | 
            +
                if (pos + size * nitems > mmap_owner->size()) {
         | 
| 291 | 
            +
                    actual_size = mmap_owner->size() - pos;
         | 
| 292 | 
            +
                }
         | 
| 293 | 
            +
             | 
| 294 | 
            +
                size_t actual_nitems = (actual_size + size - 1) / size;
         | 
| 295 | 
            +
                if (actual_nitems == 0) {
         | 
| 296 | 
            +
                    return 0;
         | 
| 297 | 
            +
                }
         | 
| 298 | 
            +
             | 
| 299 | 
            +
                // get an address
         | 
| 300 | 
            +
                *ptr = (void*)(reinterpret_cast<const char*>(mmap_owner->data()) + pos);
         | 
| 301 | 
            +
             | 
| 302 | 
            +
                // alter pos
         | 
| 303 | 
            +
                pos += size * actual_nitems;
         | 
| 304 | 
            +
             | 
| 305 | 
            +
                return actual_nitems;
         | 
| 306 | 
            +
            }
         | 
| 307 | 
            +
             | 
| 308 | 
            +
            int MappedFileIOReader::filedescriptor() {
         | 
| 309 | 
            +
                // todo
         | 
| 310 | 
            +
                return -1;
         | 
| 311 | 
            +
            }
         | 
| 312 | 
            +
             | 
| 313 | 
            +
            } // namespace faiss
         | 
| @@ -0,0 +1,51 @@ | |
| 1 | 
            +
            /*
         | 
| 2 | 
            +
             * Copyright (c) Meta Platforms, Inc. and affiliates.
         | 
| 3 | 
            +
             *
         | 
| 4 | 
            +
             * This source code is licensed under the MIT license found in the
         | 
| 5 | 
            +
             * LICENSE file in the root directory of this source tree.
         | 
| 6 | 
            +
             */
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            #pragma once
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            #include <cstddef>
         | 
| 11 | 
            +
            #include <cstdint>
         | 
| 12 | 
            +
            #include <memory>
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            #include <faiss/impl/io.h>
         | 
| 15 | 
            +
            #include <faiss/impl/maybe_owned_vector.h>
         | 
| 16 | 
            +
             | 
| 17 | 
            +
            namespace faiss {
         | 
| 18 | 
            +
             | 
| 19 | 
            +
            // holds a memory-mapped region over a file
         | 
| 20 | 
            +
            struct MmappedFileMappingOwner : public MaybeOwnedVectorOwner {
         | 
| 21 | 
            +
                MmappedFileMappingOwner(const std::string& filename);
         | 
| 22 | 
            +
                MmappedFileMappingOwner(FILE* f);
         | 
| 23 | 
            +
                ~MmappedFileMappingOwner();
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                void* data() const;
         | 
| 26 | 
            +
                size_t size() const;
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                struct PImpl;
         | 
| 29 | 
            +
                std::unique_ptr<PImpl> p_impl;
         | 
| 30 | 
            +
            };
         | 
| 31 | 
            +
             | 
| 32 | 
            +
            // A deserializer that supports memory-mapped files.
         | 
| 33 | 
            +
            // All de-allocations should happen as soon as the index gets destroyed,
         | 
| 34 | 
            +
            //   after all underlying the MaybeOwnerVector objects are destroyed.
         | 
| 35 | 
            +
            struct MappedFileIOReader : IOReader {
         | 
| 36 | 
            +
                std::shared_ptr<MmappedFileMappingOwner> mmap_owner;
         | 
| 37 | 
            +
             | 
| 38 | 
            +
                size_t pos = 0;
         | 
| 39 | 
            +
             | 
| 40 | 
            +
                MappedFileIOReader(const std::shared_ptr<MmappedFileMappingOwner>& owner);
         | 
| 41 | 
            +
             | 
| 42 | 
            +
                // perform a copy
         | 
| 43 | 
            +
                size_t operator()(void* ptr, size_t size, size_t nitems) override;
         | 
| 44 | 
            +
                // perform a quasi-read that returns a mmapped address, owned by mmap_owner,
         | 
| 45 | 
            +
                //   and updates the position
         | 
| 46 | 
            +
                size_t mmap(void** ptr, size_t size, size_t nitems);
         | 
| 47 | 
            +
             | 
| 48 | 
            +
                int filedescriptor() override;
         | 
| 49 | 
            +
            };
         | 
| 50 | 
            +
             | 
| 51 | 
            +
            } // namespace faiss
         | 
| @@ -0,0 +1,316 @@ | |
| 1 | 
            +
            /*
         | 
| 2 | 
            +
             * Copyright (c) Meta Platforms, Inc. and affiliates.
         | 
| 3 | 
            +
             *
         | 
| 4 | 
            +
             * This source code is licensed under the MIT license found in the
         | 
| 5 | 
            +
             * LICENSE file in the root directory of this source tree.
         | 
| 6 | 
            +
             */
         | 
| 7 | 
            +
            #pragma once
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            #include <cstddef>
         | 
| 10 | 
            +
            #include <cstdint>
         | 
| 11 | 
            +
            #include <cstring>
         | 
| 12 | 
            +
            #include <memory>
         | 
| 13 | 
            +
            #include <vector>
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            #include <faiss/impl/FaissAssert.h>
         | 
| 16 | 
            +
             | 
| 17 | 
            +
            namespace faiss {
         | 
| 18 | 
            +
             | 
| 19 | 
            +
            // An interface for an owner of a MaybeOwnedVector.
         | 
| 20 | 
            +
            struct MaybeOwnedVectorOwner {
         | 
| 21 | 
            +
                virtual ~MaybeOwnedVectorOwner() = default;
         | 
| 22 | 
            +
            };
         | 
| 23 | 
            +
             | 
| 24 | 
            +
            // a container that either works as std::vector<T> that owns its own memory,
         | 
| 25 | 
            +
            //    or as a view of a memory buffer, with a known size
         | 
| 26 | 
            +
            template <typename T>
         | 
| 27 | 
            +
            struct MaybeOwnedVector {
         | 
| 28 | 
            +
                using value_type = T;
         | 
| 29 | 
            +
                using self_type = MaybeOwnedVector<T>;
         | 
| 30 | 
            +
                using iterator = typename std::vector<T>::iterator;
         | 
| 31 | 
            +
                using const_iterator = typename std::vector<T>::const_iterator;
         | 
| 32 | 
            +
                using size_type = typename std::vector<T>::size_type;
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                bool is_owned = true;
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                // this one is used if is_owned == true
         | 
| 37 | 
            +
                std::vector<T> owned_data;
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                // these three are used if is_owned == false
         | 
| 40 | 
            +
                T* view_data = nullptr;
         | 
| 41 | 
            +
                // the number of T elements
         | 
| 42 | 
            +
                size_t view_size = 0;
         | 
| 43 | 
            +
                // who owns the data.
         | 
| 44 | 
            +
                // This field can be nullptr, and it is present ONLY in order
         | 
| 45 | 
            +
                //   to avoid possible tricky memory / resource leaks.
         | 
| 46 | 
            +
                std::shared_ptr<MaybeOwnedVectorOwner> owner;
         | 
| 47 | 
            +
             | 
| 48 | 
            +
                // points either to view_data, or to owned.data()
         | 
| 49 | 
            +
                T* c_ptr = nullptr;
         | 
| 50 | 
            +
                // uses either view_size, or owned.size();
         | 
| 51 | 
            +
                size_t c_size = 0;
         | 
| 52 | 
            +
             | 
| 53 | 
            +
                MaybeOwnedVector() = default;
         | 
| 54 | 
            +
                MaybeOwnedVector(const size_t initial_size) {
         | 
| 55 | 
            +
                    is_owned = true;
         | 
| 56 | 
            +
             | 
| 57 | 
            +
                    owned_data.resize(initial_size);
         | 
| 58 | 
            +
                    c_ptr = owned_data.data();
         | 
| 59 | 
            +
                    c_size = owned_data.size();
         | 
| 60 | 
            +
                }
         | 
| 61 | 
            +
             | 
| 62 | 
            +
                explicit MaybeOwnedVector(const std::vector<T>& vec)
         | 
| 63 | 
            +
                        : faiss::MaybeOwnedVector<T>(vec.size()) {
         | 
| 64 | 
            +
                    if (vec.size() > 0) {
         | 
| 65 | 
            +
                        memcpy(owned_data.data(), vec.data(), sizeof(T) * vec.size());
         | 
| 66 | 
            +
                    }
         | 
| 67 | 
            +
                }
         | 
| 68 | 
            +
             | 
| 69 | 
            +
                MaybeOwnedVector(const MaybeOwnedVector& other) {
         | 
| 70 | 
            +
                    is_owned = other.is_owned;
         | 
| 71 | 
            +
                    owned_data = other.owned_data;
         | 
| 72 | 
            +
             | 
| 73 | 
            +
                    view_data = other.view_data;
         | 
| 74 | 
            +
                    view_size = other.view_size;
         | 
| 75 | 
            +
                    owner = other.owner;
         | 
| 76 | 
            +
             | 
| 77 | 
            +
                    if (is_owned) {
         | 
| 78 | 
            +
                        c_ptr = owned_data.data();
         | 
| 79 | 
            +
                        c_size = owned_data.size();
         | 
| 80 | 
            +
                    } else {
         | 
| 81 | 
            +
                        c_ptr = view_data;
         | 
| 82 | 
            +
                        c_size = view_size;
         | 
| 83 | 
            +
                    }
         | 
| 84 | 
            +
                }
         | 
| 85 | 
            +
             | 
| 86 | 
            +
                MaybeOwnedVector(MaybeOwnedVector&& other) {
         | 
| 87 | 
            +
                    is_owned = other.is_owned;
         | 
| 88 | 
            +
                    owned_data = std::move(other.owned_data);
         | 
| 89 | 
            +
             | 
| 90 | 
            +
                    view_data = other.view_data;
         | 
| 91 | 
            +
                    view_size = other.view_size;
         | 
| 92 | 
            +
                    owner = std::move(other.owner);
         | 
| 93 | 
            +
                    other.owner = nullptr;
         | 
| 94 | 
            +
             | 
| 95 | 
            +
                    if (is_owned) {
         | 
| 96 | 
            +
                        c_ptr = owned_data.data();
         | 
| 97 | 
            +
                        c_size = owned_data.size();
         | 
| 98 | 
            +
                    } else {
         | 
| 99 | 
            +
                        c_ptr = view_data;
         | 
| 100 | 
            +
                        c_size = view_size;
         | 
| 101 | 
            +
                    }
         | 
| 102 | 
            +
                }
         | 
| 103 | 
            +
             | 
| 104 | 
            +
                MaybeOwnedVector& operator=(const MaybeOwnedVector& other) {
         | 
| 105 | 
            +
                    if (this == &other) {
         | 
| 106 | 
            +
                        return *this;
         | 
| 107 | 
            +
                    }
         | 
| 108 | 
            +
             | 
| 109 | 
            +
                    // create a copy
         | 
| 110 | 
            +
                    MaybeOwnedVector cloned(other);
         | 
| 111 | 
            +
                    // swap
         | 
| 112 | 
            +
                    swap(*this, cloned);
         | 
| 113 | 
            +
             | 
| 114 | 
            +
                    return *this;
         | 
| 115 | 
            +
                }
         | 
| 116 | 
            +
             | 
| 117 | 
            +
                MaybeOwnedVector& operator=(MaybeOwnedVector&& other) {
         | 
| 118 | 
            +
                    if (this == &other) {
         | 
| 119 | 
            +
                        return *this;
         | 
| 120 | 
            +
                    }
         | 
| 121 | 
            +
             | 
| 122 | 
            +
                    // moved
         | 
| 123 | 
            +
                    MaybeOwnedVector moved(std::move(other));
         | 
| 124 | 
            +
                    // swap
         | 
| 125 | 
            +
                    swap(*this, moved);
         | 
| 126 | 
            +
             | 
| 127 | 
            +
                    return *this;
         | 
| 128 | 
            +
                }
         | 
| 129 | 
            +
             | 
| 130 | 
            +
                MaybeOwnedVector(std::vector<T>&& other) {
         | 
| 131 | 
            +
                    is_owned = true;
         | 
| 132 | 
            +
             | 
| 133 | 
            +
                    owned_data = std::move(other);
         | 
| 134 | 
            +
                    c_ptr = owned_data.data();
         | 
| 135 | 
            +
                    c_size = owned_data.size();
         | 
| 136 | 
            +
                }
         | 
| 137 | 
            +
             | 
| 138 | 
            +
                static MaybeOwnedVector create_view(
         | 
| 139 | 
            +
                        void* address,
         | 
| 140 | 
            +
                        const size_t n_elements,
         | 
| 141 | 
            +
                        const std::shared_ptr<MaybeOwnedVectorOwner>& owner) {
         | 
| 142 | 
            +
                    MaybeOwnedVector vec;
         | 
| 143 | 
            +
                    vec.is_owned = false;
         | 
| 144 | 
            +
                    vec.view_data = reinterpret_cast<T*>(address);
         | 
| 145 | 
            +
                    vec.view_size = n_elements;
         | 
| 146 | 
            +
                    vec.owner = owner;
         | 
| 147 | 
            +
             | 
| 148 | 
            +
                    vec.c_ptr = vec.view_data;
         | 
| 149 | 
            +
                    vec.c_size = vec.view_size;
         | 
| 150 | 
            +
             | 
| 151 | 
            +
                    return vec;
         | 
| 152 | 
            +
                }
         | 
| 153 | 
            +
             | 
| 154 | 
            +
                const T* data() const {
         | 
| 155 | 
            +
                    return c_ptr;
         | 
| 156 | 
            +
                }
         | 
| 157 | 
            +
             | 
| 158 | 
            +
                T* data() {
         | 
| 159 | 
            +
                    return c_ptr;
         | 
| 160 | 
            +
                }
         | 
| 161 | 
            +
             | 
| 162 | 
            +
                size_t size() const {
         | 
| 163 | 
            +
                    return c_size;
         | 
| 164 | 
            +
                }
         | 
| 165 | 
            +
             | 
| 166 | 
            +
                size_t byte_size() const {
         | 
| 167 | 
            +
                    return c_size * sizeof(T);
         | 
| 168 | 
            +
                }
         | 
| 169 | 
            +
             | 
| 170 | 
            +
                T& operator[](const size_t idx) {
         | 
| 171 | 
            +
                    return c_ptr[idx];
         | 
| 172 | 
            +
                }
         | 
| 173 | 
            +
             | 
| 174 | 
            +
                const T& operator[](const size_t idx) const {
         | 
| 175 | 
            +
                    return c_ptr[idx];
         | 
| 176 | 
            +
                }
         | 
| 177 | 
            +
             | 
| 178 | 
            +
                T& at(size_type pos) {
         | 
| 179 | 
            +
                    FAISS_ASSERT_MSG(
         | 
| 180 | 
            +
                            is_owned,
         | 
| 181 | 
            +
                            "This operation cannot be performed on a viewed vector");
         | 
| 182 | 
            +
             | 
| 183 | 
            +
                    return owned_data.at(pos);
         | 
| 184 | 
            +
                }
         | 
| 185 | 
            +
             | 
| 186 | 
            +
                const T& at(size_type pos) const {
         | 
| 187 | 
            +
                    FAISS_ASSERT_MSG(
         | 
| 188 | 
            +
                            is_owned,
         | 
| 189 | 
            +
                            "This operation cannot be performed on a viewed vector");
         | 
| 190 | 
            +
             | 
| 191 | 
            +
                    return owned_data.at(pos);
         | 
| 192 | 
            +
                }
         | 
| 193 | 
            +
             | 
| 194 | 
            +
                iterator begin() {
         | 
| 195 | 
            +
                    FAISS_ASSERT_MSG(
         | 
| 196 | 
            +
                            is_owned,
         | 
| 197 | 
            +
                            "This operation cannot be performed on a viewed vector");
         | 
| 198 | 
            +
             | 
| 199 | 
            +
                    return owned_data.begin();
         | 
| 200 | 
            +
                }
         | 
| 201 | 
            +
             | 
| 202 | 
            +
                const_iterator begin() const {
         | 
| 203 | 
            +
                    FAISS_ASSERT_MSG(
         | 
| 204 | 
            +
                            is_owned,
         | 
| 205 | 
            +
                            "This operation cannot be performed on a viewed vector");
         | 
| 206 | 
            +
             | 
| 207 | 
            +
                    return owned_data.begin();
         | 
| 208 | 
            +
                }
         | 
| 209 | 
            +
             | 
| 210 | 
            +
                iterator end() {
         | 
| 211 | 
            +
                    FAISS_ASSERT_MSG(
         | 
| 212 | 
            +
                            is_owned,
         | 
| 213 | 
            +
                            "This operation cannot be performed on a viewed vector");
         | 
| 214 | 
            +
             | 
| 215 | 
            +
                    return owned_data.end();
         | 
| 216 | 
            +
                }
         | 
| 217 | 
            +
             | 
| 218 | 
            +
                const_iterator end() const {
         | 
| 219 | 
            +
                    FAISS_ASSERT_MSG(
         | 
| 220 | 
            +
                            is_owned,
         | 
| 221 | 
            +
                            "This operation cannot be performed on a viewed vector");
         | 
| 222 | 
            +
             | 
| 223 | 
            +
                    return owned_data.end();
         | 
| 224 | 
            +
                }
         | 
| 225 | 
            +
             | 
| 226 | 
            +
                iterator erase(const_iterator begin, const_iterator end) {
         | 
| 227 | 
            +
                    FAISS_ASSERT_MSG(
         | 
| 228 | 
            +
                            is_owned,
         | 
| 229 | 
            +
                            "This operation cannot be performed on a viewed vector");
         | 
| 230 | 
            +
             | 
| 231 | 
            +
                    auto result = owned_data.erase(begin, end);
         | 
| 232 | 
            +
                    c_ptr = owned_data.data();
         | 
| 233 | 
            +
                    c_size = owned_data.size();
         | 
| 234 | 
            +
             | 
| 235 | 
            +
                    return result;
         | 
| 236 | 
            +
                }
         | 
| 237 | 
            +
             | 
| 238 | 
            +
                template <class InputIt>
         | 
| 239 | 
            +
                iterator insert(const_iterator pos, InputIt first, InputIt last) {
         | 
| 240 | 
            +
                    FAISS_ASSERT_MSG(
         | 
| 241 | 
            +
                            is_owned,
         | 
| 242 | 
            +
                            "This operation cannot be performed on a viewed vector");
         | 
| 243 | 
            +
             | 
| 244 | 
            +
                    auto result = owned_data.insert(pos, first, last);
         | 
| 245 | 
            +
                    c_ptr = owned_data.data();
         | 
| 246 | 
            +
                    c_size = owned_data.size();
         | 
| 247 | 
            +
             | 
| 248 | 
            +
                    return result;
         | 
| 249 | 
            +
                }
         | 
| 250 | 
            +
             | 
| 251 | 
            +
                void clear() {
         | 
| 252 | 
            +
                    FAISS_ASSERT_MSG(
         | 
| 253 | 
            +
                            is_owned,
         | 
| 254 | 
            +
                            "This operation cannot be performed on a viewed vector");
         | 
| 255 | 
            +
             | 
| 256 | 
            +
                    owned_data.clear();
         | 
| 257 | 
            +
                    c_ptr = owned_data.data();
         | 
| 258 | 
            +
                    c_size = owned_data.size();
         | 
| 259 | 
            +
                }
         | 
| 260 | 
            +
             | 
| 261 | 
            +
                void resize(const size_t new_size) {
         | 
| 262 | 
            +
                    FAISS_ASSERT_MSG(
         | 
| 263 | 
            +
                            is_owned,
         | 
| 264 | 
            +
                            "This operation cannot be performed on a viewed vector");
         | 
| 265 | 
            +
             | 
| 266 | 
            +
                    owned_data.resize(new_size);
         | 
| 267 | 
            +
                    c_ptr = owned_data.data();
         | 
| 268 | 
            +
                    c_size = owned_data.size();
         | 
| 269 | 
            +
                }
         | 
| 270 | 
            +
             | 
| 271 | 
            +
                void resize(const size_t new_size, const value_type v) {
         | 
| 272 | 
            +
                    FAISS_ASSERT_MSG(
         | 
| 273 | 
            +
                            is_owned,
         | 
| 274 | 
            +
                            "This operation cannot be performed on a viewed vector");
         | 
| 275 | 
            +
             | 
| 276 | 
            +
                    owned_data.resize(new_size, v);
         | 
| 277 | 
            +
                    c_ptr = owned_data.data();
         | 
| 278 | 
            +
                    c_size = owned_data.size();
         | 
| 279 | 
            +
                }
         | 
| 280 | 
            +
             | 
| 281 | 
            +
                friend void swap(self_type& a, self_type& b) {
         | 
| 282 | 
            +
                    std::swap(a.is_owned, b.is_owned);
         | 
| 283 | 
            +
                    std::swap(a.owned_data, b.owned_data);
         | 
| 284 | 
            +
                    std::swap(a.view_data, b.view_data);
         | 
| 285 | 
            +
                    std::swap(a.view_size, b.view_size);
         | 
| 286 | 
            +
                    std::swap(a.owner, b.owner);
         | 
| 287 | 
            +
                    std::swap(a.c_ptr, b.c_ptr);
         | 
| 288 | 
            +
                    std::swap(a.c_size, b.c_size);
         | 
| 289 | 
            +
                }
         | 
| 290 | 
            +
            };
         | 
| 291 | 
            +
             | 
| 292 | 
            +
            template <typename T>
         | 
| 293 | 
            +
            struct is_maybe_owned_vector : std::false_type {};
         | 
| 294 | 
            +
             | 
| 295 | 
            +
            template <typename T>
         | 
| 296 | 
            +
            struct is_maybe_owned_vector<MaybeOwnedVector<T>> : std::true_type {};
         | 
| 297 | 
            +
             | 
| 298 | 
            +
            template <typename T>
         | 
| 299 | 
            +
            inline constexpr bool is_maybe_owned_vector_v = is_maybe_owned_vector<T>::value;
         | 
| 300 | 
            +
             | 
| 301 | 
            +
            template <typename T>
         | 
| 302 | 
            +
            bool operator==(
         | 
| 303 | 
            +
                    const MaybeOwnedVector<T>& lhs,
         | 
| 304 | 
            +
                    const MaybeOwnedVector<T>& rhs) {
         | 
| 305 | 
            +
                return lhs.size() == rhs.size() &&
         | 
| 306 | 
            +
                        !memcmp(lhs.data(), rhs.data(), lhs.byte_size());
         | 
| 307 | 
            +
            }
         | 
| 308 | 
            +
             | 
| 309 | 
            +
            template <typename T>
         | 
| 310 | 
            +
            bool operator!=(
         | 
| 311 | 
            +
                    const MaybeOwnedVector<T>& lhs,
         | 
| 312 | 
            +
                    const MaybeOwnedVector<T>& rhs) {
         | 
| 313 | 
            +
                return !(lhs == rhs);
         | 
| 314 | 
            +
            }
         | 
| 315 | 
            +
             | 
| 316 | 
            +
            } // namespace faiss
         |