RubyGems - faiss - Versions diffs - 0.3.0 → 0.3.2 - Mend

faiss 0.3.0 → 0.3.2

Files changed (216) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +9 -0
data/LICENSE.txt +1 -1
data/README.md +1 -1
data/ext/faiss/extconf.rb +9 -2
data/ext/faiss/index.cpp +1 -1
data/ext/faiss/index_binary.cpp +2 -2
data/ext/faiss/product_quantizer.cpp +1 -1
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/AutoTune.cpp +7 -7
data/vendor/faiss/faiss/AutoTune.h +1 -2
data/vendor/faiss/faiss/Clustering.cpp +39 -22
data/vendor/faiss/faiss/Clustering.h +40 -21
data/vendor/faiss/faiss/IVFlib.cpp +26 -12
data/vendor/faiss/faiss/Index.cpp +1 -1
data/vendor/faiss/faiss/Index.h +40 -10
data/vendor/faiss/faiss/Index2Layer.cpp +7 -7
data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +176 -166
data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +15 -15
data/vendor/faiss/faiss/IndexBinary.cpp +9 -4
data/vendor/faiss/faiss/IndexBinary.h +8 -19
data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +2 -1
data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +24 -31
data/vendor/faiss/faiss/IndexBinaryHNSW.h +1 -1
data/vendor/faiss/faiss/IndexBinaryHash.cpp +25 -50
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +107 -188
data/vendor/faiss/faiss/IndexFastScan.cpp +95 -146
data/vendor/faiss/faiss/IndexFastScan.h +9 -8
data/vendor/faiss/faiss/IndexFlat.cpp +206 -10
data/vendor/faiss/faiss/IndexFlat.h +20 -1
data/vendor/faiss/faiss/IndexFlatCodes.cpp +170 -5
data/vendor/faiss/faiss/IndexFlatCodes.h +23 -4
data/vendor/faiss/faiss/IndexHNSW.cpp +231 -382
data/vendor/faiss/faiss/IndexHNSW.h +62 -49
data/vendor/faiss/faiss/IndexIDMap.cpp +69 -28
data/vendor/faiss/faiss/IndexIDMap.h +24 -2
data/vendor/faiss/faiss/IndexIVF.cpp +162 -56
data/vendor/faiss/faiss/IndexIVF.h +46 -6
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +33 -26
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +6 -2
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +19 -46
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -3
data/vendor/faiss/faiss/IndexIVFFastScan.cpp +502 -401
data/vendor/faiss/faiss/IndexIVFFastScan.h +63 -26
data/vendor/faiss/faiss/IndexIVFFlat.cpp +15 -5
data/vendor/faiss/faiss/IndexIVFFlat.h +3 -2
data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +172 -0
data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.h +56 -0
data/vendor/faiss/faiss/IndexIVFPQ.cpp +79 -125
data/vendor/faiss/faiss/IndexIVFPQ.h +6 -7
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +39 -52
data/vendor/faiss/faiss/IndexIVFPQFastScan.h +4 -3
data/vendor/faiss/faiss/IndexIVFPQR.cpp +45 -29
data/vendor/faiss/faiss/IndexIVFPQR.h +5 -2
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +25 -27
data/vendor/faiss/faiss/IndexIVFSpectralHash.h +6 -6
data/vendor/faiss/faiss/IndexLSH.cpp +14 -16
data/vendor/faiss/faiss/IndexLattice.cpp +1 -19
data/vendor/faiss/faiss/IndexLattice.h +3 -22
data/vendor/faiss/faiss/IndexNNDescent.cpp +3 -33
data/vendor/faiss/faiss/IndexNNDescent.h +1 -1
data/vendor/faiss/faiss/IndexNSG.cpp +11 -27
data/vendor/faiss/faiss/IndexNSG.h +11 -11
data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +56 -0
data/vendor/faiss/faiss/IndexNeuralNetCodec.h +49 -0
data/vendor/faiss/faiss/IndexPQ.cpp +72 -88
data/vendor/faiss/faiss/IndexPQ.h +1 -4
data/vendor/faiss/faiss/IndexPQFastScan.cpp +1 -1
data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -31
data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
data/vendor/faiss/faiss/IndexRefine.cpp +54 -24
data/vendor/faiss/faiss/IndexRefine.h +7 -0
data/vendor/faiss/faiss/IndexReplicas.cpp +23 -26
data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +25 -17
data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -4
data/vendor/faiss/faiss/IndexShards.cpp +21 -29
data/vendor/faiss/faiss/IndexShardsIVF.cpp +1 -2
data/vendor/faiss/faiss/MatrixStats.cpp +17 -32
data/vendor/faiss/faiss/MatrixStats.h +21 -9
data/vendor/faiss/faiss/MetaIndexes.cpp +35 -35
data/vendor/faiss/faiss/MetricType.h +7 -2
data/vendor/faiss/faiss/VectorTransform.cpp +13 -26
data/vendor/faiss/faiss/VectorTransform.h +7 -7
data/vendor/faiss/faiss/clone_index.cpp +15 -10
data/vendor/faiss/faiss/clone_index.h +3 -0
data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +95 -17
data/vendor/faiss/faiss/cppcontrib/factory_tools.cpp +152 -0
data/vendor/faiss/faiss/cppcontrib/factory_tools.h +24 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +83 -30
data/vendor/faiss/faiss/gpu/GpuCloner.cpp +123 -8
data/vendor/faiss/faiss/gpu/GpuCloner.h +22 -0
data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +13 -0
data/vendor/faiss/faiss/gpu/GpuDistance.h +46 -38
data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -1
data/vendor/faiss/faiss/gpu/GpuIndex.h +30 -12
data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +282 -0
data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +4 -4
data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +14 -9
data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +20 -3
data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -11
data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +1 -3
data/vendor/faiss/faiss/gpu/GpuResources.cpp +24 -3
data/vendor/faiss/faiss/gpu/GpuResources.h +39 -11
data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +142 -17
data/vendor/faiss/faiss/gpu/StandardGpuResources.h +57 -3
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +26 -21
data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +7 -1
data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +8 -5
data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +25 -0
data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +129 -9
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +332 -40
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +299 -208
data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +1 -0
data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
data/vendor/faiss/faiss/gpu/utils/RaftUtils.h +75 -0
data/vendor/faiss/faiss/gpu/utils/Timer.cpp +4 -1
data/vendor/faiss/faiss/gpu/utils/Timer.h +1 -1
data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +3 -1
data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +5 -5
data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +26 -1
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +10 -3
data/vendor/faiss/faiss/impl/DistanceComputer.h +70 -1
data/vendor/faiss/faiss/impl/FaissAssert.h +4 -2
data/vendor/faiss/faiss/impl/FaissException.h +13 -34
data/vendor/faiss/faiss/impl/HNSW.cpp +605 -186
data/vendor/faiss/faiss/impl/HNSW.h +52 -30
data/vendor/faiss/faiss/impl/IDSelector.h +4 -4
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +11 -9
data/vendor/faiss/faiss/impl/LookupTableScaler.h +34 -0
data/vendor/faiss/faiss/impl/NNDescent.cpp +42 -27
data/vendor/faiss/faiss/impl/NSG.cpp +0 -29
data/vendor/faiss/faiss/impl/NSG.h +1 -1
data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +14 -12
data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -22
data/vendor/faiss/faiss/impl/ProductQuantizer.h +6 -2
data/vendor/faiss/faiss/impl/Quantizer.h +1 -1
data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +27 -1015
data/vendor/faiss/faiss/impl/ResidualQuantizer.h +5 -63
data/vendor/faiss/faiss/impl/ResultHandler.h +347 -172
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +1104 -147
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +3 -8
data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +285 -42
data/vendor/faiss/faiss/impl/code_distance/code_distance-avx512.h +248 -0
data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +21 -14
data/vendor/faiss/faiss/impl/code_distance/code_distance.h +22 -12
data/vendor/faiss/faiss/impl/index_read.cpp +74 -34
data/vendor/faiss/faiss/impl/index_read_utils.h +37 -0
data/vendor/faiss/faiss/impl/index_write.cpp +88 -51
data/vendor/faiss/faiss/impl/io.cpp +23 -15
data/vendor/faiss/faiss/impl/io.h +4 -4
data/vendor/faiss/faiss/impl/io_macros.h +6 -0
data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
data/vendor/faiss/faiss/impl/platform_macros.h +40 -1
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +14 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan.h +7 -6
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +52 -38
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +487 -49
data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +960 -0
data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +176 -0
data/vendor/faiss/faiss/impl/simd_result_handlers.h +481 -225
data/vendor/faiss/faiss/index_factory.cpp +41 -20
data/vendor/faiss/faiss/index_io.h +12 -5
data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +28 -8
data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +3 -0
data/vendor/faiss/faiss/invlists/DirectMap.cpp +10 -2
data/vendor/faiss/faiss/invlists/InvertedLists.cpp +73 -17
data/vendor/faiss/faiss/invlists/InvertedLists.h +26 -8
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +24 -9
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +2 -1
data/vendor/faiss/faiss/python/python_callbacks.cpp +4 -4
data/vendor/faiss/faiss/utils/Heap.cpp +3 -1
data/vendor/faiss/faiss/utils/Heap.h +105 -0
data/vendor/faiss/faiss/utils/NeuralNet.cpp +342 -0
data/vendor/faiss/faiss/utils/NeuralNet.h +147 -0
data/vendor/faiss/faiss/utils/WorkerThread.h +1 -0
data/vendor/faiss/faiss/utils/bf16.h +36 -0
data/vendor/faiss/faiss/utils/distances.cpp +147 -123
data/vendor/faiss/faiss/utils/distances.h +86 -9
data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +5 -5
data/vendor/faiss/faiss/utils/distances_fused/avx512.h +2 -2
data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +1 -1
data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +5 -5
data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +1 -1
data/vendor/faiss/faiss/utils/distances_simd.cpp +1589 -243
data/vendor/faiss/faiss/utils/extra_distances-inl.h +70 -0
data/vendor/faiss/faiss/utils/extra_distances.cpp +85 -137
data/vendor/faiss/faiss/utils/extra_distances.h +3 -2
data/vendor/faiss/faiss/utils/fp16-arm.h +29 -0
data/vendor/faiss/faiss/utils/fp16.h +2 -0
data/vendor/faiss/faiss/utils/hamming.cpp +163 -111
data/vendor/faiss/faiss/utils/hamming.h +58 -0
data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +16 -89
data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -0
data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +19 -88
data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +58 -0
data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +14 -104
data/vendor/faiss/faiss/utils/partitioning.cpp +3 -4
data/vendor/faiss/faiss/utils/prefetch.h +77 -0
data/vendor/faiss/faiss/utils/quantize_lut.cpp +0 -14
data/vendor/faiss/faiss/utils/random.cpp +43 -0
data/vendor/faiss/faiss/utils/random.h +25 -0
data/vendor/faiss/faiss/utils/simdlib.h +10 -1
data/vendor/faiss/faiss/utils/simdlib_avx2.h +0 -6
data/vendor/faiss/faiss/utils/simdlib_avx512.h +296 -0
data/vendor/faiss/faiss/utils/simdlib_neon.h +77 -79
data/vendor/faiss/faiss/utils/simdlib_ppc64.h +1084 -0
data/vendor/faiss/faiss/utils/sorting.cpp +140 -5
data/vendor/faiss/faiss/utils/sorting.h +27 -0
data/vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h +176 -0
data/vendor/faiss/faiss/utils/utils.cpp +120 -7
data/vendor/faiss/faiss/utils/utils.h +60 -20
metadata +23 -4
data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +0 -102

data/vendor/faiss/faiss/gpu/GpuIndexCagra.h ADDED Viewed

@@ -0,0 +1,282 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+#include <faiss/IndexIVF.h>
+#include <faiss/gpu/GpuIndex.h>
+#include <faiss/gpu/GpuIndexIVFPQ.h>
+namespace faiss {
+struct IndexHNSWCagra;
+}
+namespace faiss {
+namespace gpu {
+class RaftCagra;
+enum class graph_build_algo {
+    /// Use IVF-PQ to build all-neighbors knn graph
+    IVF_PQ,
+    /// Experimental, use NN-Descent to build all-neighbors knn graph
+    NN_DESCENT
+};
+/// A type for specifying how PQ codebooks are created.
+enum class codebook_gen { // NOLINT
+    PER_SUBSPACE = 0,     // NOLINT
+    PER_CLUSTER = 1,      // NOLINT
+};
+struct IVFPQBuildCagraConfig {
+    ///
+    /// The number of inverted lists (clusters)
+    ///
+    /// Hint: the number of vectors per cluster (`n_rows/n_lists`) should be
+    /// approximately 1,000 to 10,000.
+    uint32_t n_lists = 1024;
+    /// The number of iterations searching for kmeans centers (index building).
+    uint32_t kmeans_n_iters = 20;
+    /// The fraction of data to use during iterative kmeans building.
+    double kmeans_trainset_fraction = 0.5;
+    ///
+    /// The bit length of the vector element after compression by PQ.
+    ///
+    /// Possible values: [4, 5, 6, 7, 8].
+    ///
+    /// Hint: the smaller the 'pq_bits', the smaller the index size and the
+    /// better the search performance, but the lower the recall.
+    uint32_t pq_bits = 8;
+    ///
+    /// The dimensionality of the vector after compression by PQ. When zero, an
+    /// optimal value is selected using a heuristic.
+    ///
+    /// NB: `pq_dim  /// pq_bits` must be a multiple of 8.
+    ///
+    /// Hint: a smaller 'pq_dim' results in a smaller index size and better
+    /// search performance, but lower recall. If 'pq_bits' is 8, 'pq_dim' can be
+    /// set to any number, but multiple of 8 are desirable for good performance.
+    /// If 'pq_bits' is not 8, 'pq_dim' should be a multiple of 8. For good
+    /// performance, it is desirable that 'pq_dim' is a multiple of 32. Ideally,
+    /// 'pq_dim' should be also a divisor of the dataset dim.
+    uint32_t pq_dim = 0;
+    /// How PQ codebooks are created.
+    codebook_gen codebook_kind = codebook_gen::PER_SUBSPACE;
+    ///
+    /// Apply a random rotation matrix on the input data and queries even if
+    /// `dim % pq_dim == 0`.
+    ///
+    /// Note: if `dim` is not multiple of `pq_dim`, a random rotation is always
+    /// applied to the input data and queries to transform the working space
+    /// from `dim` to `rot_dim`, which may be slightly larger than the original
+    /// space and and is a multiple of `pq_dim` (`rot_dim % pq_dim == 0`).
+    /// However, this transform is not necessary when `dim` is multiple of
+    /// `pq_dim`
+    ///   (`dim == rot_dim`, hence no need in adding "extra" data columns /
+    ///   features).
+    ///
+    /// By default, if `dim == rot_dim`, the rotation transform is initialized
+    /// with the identity matrix. When `force_random_rotation == true`, a random
+    /// orthogonal transform matrix is generated regardless of the values of
+    /// `dim` and `pq_dim`.
+    bool force_random_rotation = false;
+    ///
+    /// By default, the algorithm allocates more space than necessary for
+    /// individual clusters
+    /// (`list_data`). This allows to amortize the cost of memory allocation and
+    /// reduce the number of data copies during repeated calls to `extend`
+    /// (extending the database).
+    ///
+    /// The alternative is the conservative allocation behavior; when enabled,
+    /// the algorithm always allocates the minimum amount of memory required to
+    /// store the given number of records. Set this flag to `true` if you prefer
+    /// to use as little GPU memory for the database as possible.
+    bool conservative_memory_allocation = false;
+};
+struct IVFPQSearchCagraConfig {
+    /// The number of clusters to search.
+    uint32_t n_probes = 20;
+    ///
+    /// Data type of look up table to be created dynamically at search time.
+    ///
+    /// Possible values: [CUDA_R_32F, CUDA_R_16F, CUDA_R_8U]
+    ///
+    /// The use of low-precision types reduces the amount of shared memory
+    /// required at search time, so fast shared memory kernels can be used even
+    /// for datasets with large dimansionality. Note that the recall is slightly
+    /// degraded when low-precision type is selected.
+    cudaDataType_t lut_dtype = CUDA_R_32F;
+    ///
+    /// Storage data type for distance/similarity computed at search time.
+    ///
+    /// Possible values: [CUDA_R_16F, CUDA_R_32F]
+    ///
+    /// If the performance limiter at search time is device memory access,
+    /// selecting FP16 will improve performance slightly.
+    cudaDataType_t internal_distance_dtype = CUDA_R_32F;
+    ///
+    /// Preferred fraction of SM's unified memory / L1 cache to be used as
+    /// shared memory.
+    ///
+    /// Possible values: [0.0 - 1.0] as a fraction of the
+    /// `sharedMemPerMultiprocessor`.
+    ///
+    /// One wants to increase the carveout to make sure a good GPU occupancy for
+    /// the main search kernel, but not to keep it too high to leave some memory
+    /// to be used as L1 cache. Note, this value is interpreted only as a hint.
+    /// Moreover, a GPU usually allows only a fixed set of cache configurations,
+    /// so the provided value is rounded up to the nearest configuration. Refer
+    /// to the NVIDIA tuning guide for the target GPU architecture.
+    ///
+    /// Note, this is a low-level tuning parameter that can have drastic
+    /// negative effects on the search performance if tweaked incorrectly.
+    double preferred_shmem_carveout = 1.0;
+};
+struct GpuIndexCagraConfig : public GpuIndexConfig {
+    /// Degree of input graph for pruning.
+    size_t intermediate_graph_degree = 128;
+    /// Degree of output graph.
+    size_t graph_degree = 64;
+    /// ANN algorithm to build knn graph.
+    graph_build_algo build_algo = graph_build_algo::IVF_PQ;
+    /// Number of Iterations to run if building with NN_DESCENT
+    size_t nn_descent_niter = 20;
+    IVFPQBuildCagraConfig* ivf_pq_params = nullptr;
+    IVFPQSearchCagraConfig* ivf_pq_search_params = nullptr;
+};
+enum class search_algo {
+    /// For large batch sizes.
+    SINGLE_CTA,
+    /// For small batch sizes.
+    MULTI_CTA,
+    MULTI_KERNEL,
+    AUTO
+};
+enum class hash_mode { HASH, SMALL, AUTO };
+struct SearchParametersCagra : SearchParameters {
+    /// Maximum number of queries to search at the same time (batch size). Auto
+    /// select when 0.
+    size_t max_queries = 0;
+    /// Number of intermediate search results retained during the search.
+    ///
+    ///  This is the main knob to adjust trade off between accuracy and search
+    /// speed. Higher values improve the search accuracy.
+    size_t itopk_size = 64;
+    /// Upper limit of search iterations. Auto select when 0.
+    size_t max_iterations = 0;
+    // In the following we list additional search parameters for fine tuning.
+    // Reasonable default values are automatically chosen.
+    /// Which search implementation to use.
+    search_algo algo = search_algo::AUTO;
+    /// Number of threads used to calculate a single distance. 4, 8, 16, or 32.
+    size_t team_size = 0;
+    /// Number of graph nodes to select as the starting point for the search in
+    /// each iteration. aka search width?
+    size_t search_width = 1;
+    /// Lower limit of search iterations.
+    size_t min_iterations = 0;
+    /// Thread block size. 0, 64, 128, 256, 512, 1024. Auto selection when 0.
+    size_t thread_block_size = 0;
+    /// Hashmap type. Auto selection when AUTO.
+    hash_mode hashmap_mode = hash_mode::AUTO;
+    /// Lower limit of hashmap bit length. More than 8.
+    size_t hashmap_min_bitlen = 0;
+    /// Upper limit of hashmap fill rate. More than 0.1, less than 0.9.
+    float hashmap_max_fill_rate = 0.5;
+    /// Number of iterations of initial random seed node selection. 1 or more.
+    uint32_t num_random_samplings = 1;
+    /// Bit mask used for initial random seed node selection.
+    uint64_t seed = 0x128394;
+};
+struct GpuIndexCagra : public GpuIndex {
+   public:
+    GpuIndexCagra(
+            GpuResourcesProvider* provider,
+            int dims,
+            faiss::MetricType metric = faiss::METRIC_L2,
+            GpuIndexCagraConfig config = GpuIndexCagraConfig());
+    /// Trains CAGRA based on the given vector data
+    void train(idx_t n, const float* x) override;
+    /// Initialize ourselves from the given CPU index; will overwrite
+    /// all data in ourselves
+    void copyFrom(const faiss::IndexHNSWCagra* index);
+    /// Copy ourselves to the given CPU index; will overwrite all data
+    /// in the index instance
+    void copyTo(faiss::IndexHNSWCagra* index) const;
+    void reset() override;
+    std::vector<idx_t> get_knngraph() const;
+   protected:
+    bool addImplRequiresIDs_() const override;
+    void addImpl_(idx_t n, const float* x, const idx_t* ids) override;
+    /// Called from GpuIndex for search
+    void searchImpl_(
+            idx_t n,
+            const float* x,
+            int k,
+            float* distances,
+            idx_t* labels,
+            const SearchParameters* search_params) const override;
+    /// Our configuration options
+    const GpuIndexCagraConfig cagraConfig_;
+    /// Instance that we own; contains the inverted lists
+    std::shared_ptr<RaftCagra> index_;
+};
+} // namespace gpu
+} // namespace faiss

data/vendor/faiss/faiss/gpu/GpuIndexFlat.h CHANGED Viewed

@@ -24,15 +24,13 @@ namespace gpu {
 class FlatIndex;
 struct GpuIndexFlatConfig : public GpuIndexConfig {
-    inline GpuIndexFlatConfig() : useFloat16(false) {}
     /// Whether or not data is stored as float16
-    bool useFloat16;
+    bool ALIGNED(8) useFloat16 = false;
     /// Deprecated: no longer used
     /// Previously used to indicate whether internal storage of vectors is
     /// transposed
-    bool storeTransposed;
+    bool storeTransposed = false;
 };
 /// Wrapper around the GPU implementation that looks like
@@ -115,6 +113,8 @@ class GpuIndexFlat : public GpuIndex {
     }
    protected:
+    void resetIndex_(int dims);
     /// Flat index does not require IDs as there is no storage available for
     /// them
     bool addImplRequiresIDs_() const override;

data/vendor/faiss/faiss/gpu/GpuIndexIVF.h CHANGED Viewed

@@ -21,13 +21,17 @@ class GpuIndexFlat;
 class IVFBase;
 struct GpuIndexIVFConfig : public GpuIndexConfig {
-    inline GpuIndexIVFConfig() : indicesOptions(INDICES_64_BIT) {}
     /// Index storage options for the GPU
-    IndicesOptions indicesOptions;
+    IndicesOptions indicesOptions = INDICES_64_BIT;
     /// Configuration for the coarse quantizer object
     GpuIndexFlatConfig flatConfig;
+    /// This flag controls the CPU fallback logic for coarse quantizer
+    /// component of the index. When set to false (default), the cloner will
+    /// throw an exception for indices not implemented on GPU. When set to
+    /// true, it will fallback to a CPU implementation.
+    bool allowCpuCoarseQuantizer = false;
 };
 /// Base class of all GPU IVF index types. This (for now) deliberately does not
@@ -75,10 +79,10 @@ class GpuIndexIVF : public GpuIndex, public IndexIVFInterface {
     virtual void updateQuantizer() = 0;
     /// Returns the number of inverted lists we're managing
-    idx_t getNumLists() const;
+    virtual idx_t getNumLists() const;
     /// Returns the number of vectors present in a particular inverted list
-    idx_t getListLength(idx_t listId) const;
+    virtual idx_t getListLength(idx_t listId) const;
     /// Return the encoded vector data contained in a particular inverted list,
     /// for debugging purposes.
@@ -86,12 +90,13 @@ class GpuIndexIVF : public GpuIndex, public IndexIVFInterface {
     /// GPU-side representation.
     /// Otherwise, it is converted to the CPU format.
     /// compliant format, while the native GPU format may differ.
-    std::vector<uint8_t> getListVectorData(idx_t listId, bool gpuFormat = false)
-            const;
+    virtual std::vector<uint8_t> getListVectorData(
+            idx_t listId,
+            bool gpuFormat = false) const;
     /// Return the vector indices contained in a particular inverted list, for
     /// debugging purposes.
-    std::vector<idx_t> getListIndices(idx_t listId) const;
+    virtual std::vector<idx_t> getListIndices(idx_t listId) const;
     void search_preassigned(
             idx_t n,
@@ -123,7 +128,7 @@ class GpuIndexIVF : public GpuIndex, public IndexIVFInterface {
     int getCurrentNProbe_(const SearchParameters* params) const;
     void verifyIVFSettings_() const;
     bool addImplRequiresIDs_() const override;
-    void trainQuantizer_(idx_t n, const float* x);
+    virtual void trainQuantizer_(idx_t n, const float* x);
     /// Called from GpuIndex for add/add_with_ids
     void addImpl_(idx_t n, const float* x, const idx_t* ids) override;

data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h CHANGED Viewed

@@ -8,6 +8,8 @@
 #pragma once
 #include <faiss/gpu/GpuIndexIVF.h>
+#include <faiss/impl/ScalarQuantizer.h>
 #include <memory>
 namespace faiss {
@@ -21,11 +23,9 @@ class IVFFlat;
 class GpuIndexFlat;
 struct GpuIndexIVFFlatConfig : public GpuIndexIVFConfig {
-    inline GpuIndexIVFFlatConfig() : interleavedLayout(true) {}
     /// Use the alternative memory layout for the IVF lists
     /// (currently the default)
-    bool interleavedLayout;
+    bool interleavedLayout = true;
 };
 /// Wrapper around the GPU implementation that looks like
@@ -87,6 +87,23 @@ class GpuIndexIVFFlat : public GpuIndexIVF {
     /// Trains the coarse quantizer based on the given vector data
     void train(idx_t n, const float* x) override;
+    void reconstruct_n(idx_t i0, idx_t n, float* out) const override;
+   protected:
+    /// Initialize appropriate index
+    void setIndex_(
+            GpuResources* resources,
+            int dim,
+            int nlist,
+            faiss::MetricType metric,
+            float metricArg,
+            bool useResidual,
+            /// Optional ScalarQuantizer
+            faiss::ScalarQuantizer* scalarQ,
+            bool interleavedLayout,
+            IndicesOptions indicesOptions,
+            MemorySpace space);
    protected:
     /// Our configuration options
     const GpuIndexIVFFlatConfig ivfFlatConfig_;

data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h CHANGED Viewed

@@ -23,24 +23,19 @@ class GpuIndexFlat;
 class IVFPQ;
 struct GpuIndexIVFPQConfig : public GpuIndexIVFConfig {
-    inline GpuIndexIVFPQConfig()
-            : useFloat16LookupTables(false),
-              usePrecomputedTables(false),
-              interleavedLayout(false),
-              useMMCodeDistance(false) {}
     /// Whether or not float16 residual distance tables are used in the
     /// list scanning kernels. When subQuantizers * 2^bitsPerCode >
     /// 16384, this is required.
-    bool useFloat16LookupTables;
+    bool useFloat16LookupTables = false;
     /// Whether or not we enable the precomputed table option for
     /// search, which can substantially increase the memory requirement.
-    bool usePrecomputedTables;
+    bool usePrecomputedTables = false;
     /// Use the alternative memory layout for the IVF lists
-    /// WARNING: this is a feature under development, do not use!
-    bool interleavedLayout;
+    /// WARNING: this is a feature under development, and is only supported with
+    /// RAFT enabled for the index. Do not use if RAFT is not enabled.
+    bool interleavedLayout = false;
     /// Use GEMM-backed computation of PQ code distances for the no precomputed
     /// table version of IVFPQ.
@@ -50,7 +45,7 @@ struct GpuIndexIVFPQConfig : public GpuIndexIVFConfig {
     /// Note that MM code distance is enabled automatically if one uses a number
     /// of dimensions per sub-quantizer that is not natively specialized (an odd
     /// number like 7 or so).
-    bool useMMCodeDistance;
+    bool useMMCodeDistance = false;
 };
 /// IVFPQ index for the GPU
@@ -139,6 +134,22 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
     ProductQuantizer pq;
    protected:
+    /// Initialize appropriate index
+    void setIndex_(
+            GpuResources* resources,
+            int dim,
+            idx_t nlist,
+            faiss::MetricType metric,
+            float metricArg,
+            int numSubQuantizers,
+            int bitsPerSubQuantizer,
+            bool useFloat16LookupTables,
+            bool useMMCodeDistance,
+            bool interleavedLayout,
+            float* pqCentroidData,
+            IndicesOptions indicesOptions,
+            MemorySpace space);
     /// Throws errors if configuration settings are improper
     void verifyPQSettings_() const;

data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h CHANGED Viewed

@@ -18,11 +18,9 @@ class IVFFlat;
 class GpuIndexFlat;
 struct GpuIndexIVFScalarQuantizerConfig : public GpuIndexIVFConfig {
-    inline GpuIndexIVFScalarQuantizerConfig() : interleavedLayout(true) {}
     /// Use the alternative memory layout for the IVF lists
     /// (currently the default)
-    bool interleavedLayout;
+    bool interleavedLayout = true;
 };
 /// Wrapper around the GPU implementation that looks like

data/vendor/faiss/faiss/gpu/GpuResources.cpp CHANGED Viewed

@@ -4,6 +4,21 @@
  * This source code is licensed under the MIT license found in the
  * LICENSE file in the root directory of this source tree.
  */
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 #include <faiss/gpu/GpuResources.h>
 #include <faiss/gpu/utils/DeviceUtils.h>
@@ -143,7 +158,7 @@ GpuMemoryReservation::~GpuMemoryReservation() {
 // GpuResources
 //
-GpuResources::~GpuResources() {}
+GpuResources::~GpuResources() = default;
 cublasHandle_t GpuResources::getBlasHandleCurrentDevice() {
     return getBlasHandle(getCurrentDevice());
@@ -153,6 +168,12 @@ cudaStream_t GpuResources::getDefaultStreamCurrentDevice() {
     return getDefaultStream(getCurrentDevice());
 }
+#if defined USE_NVIDIA_RAFT
+raft::device_resources& GpuResources::getRaftHandleCurrentDevice() {
+    return getRaftHandle(getCurrentDevice());
+}
+#endif
 std::vector<cudaStream_t> GpuResources::getAlternateStreamsCurrentDevice() {
     return getAlternateStreams(getCurrentDevice());
 }
@@ -182,7 +203,7 @@ size_t GpuResources::getTempMemoryAvailableCurrentDevice() const {
 // GpuResourcesProvider
 //
-GpuResourcesProvider::~GpuResourcesProvider() {}
+GpuResourcesProvider::~GpuResourcesProvider() = default;
 //
 // GpuResourcesProviderFromResourceInstance
@@ -192,7 +213,7 @@ GpuResourcesProviderFromInstance::GpuResourcesProviderFromInstance(
         std::shared_ptr<GpuResources> p)
         : res_(p) {}
-GpuResourcesProviderFromInstance::~GpuResourcesProviderFromInstance() {}
+GpuResourcesProviderFromInstance::~GpuResourcesProviderFromInstance() = default;
 std::shared_ptr<GpuResources> GpuResourcesProviderFromInstance::getResources() {
     return res_;

data/vendor/faiss/faiss/gpu/GpuResources.h CHANGED Viewed

@@ -4,16 +4,37 @@
  * This source code is licensed under the MIT license found in the
  * LICENSE file in the root directory of this source tree.
  */
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 #pragma once
 #include <cublas_v2.h>
 #include <cuda_runtime.h>
 #include <faiss/impl/FaissAssert.h>
 #include <memory>
 #include <utility>
 #include <vector>
+#if defined USE_NVIDIA_RAFT
+#include <raft/core/device_resources.hpp>
+#include <rmm/mr/device/device_memory_resource.hpp>
+#endif
 namespace faiss {
 namespace gpu {
@@ -82,11 +103,7 @@ std::string memorySpaceToString(MemorySpace s);
 /// Information on what/where an allocation is
 struct AllocInfo {
-    inline AllocInfo()
-            : type(AllocType::Other),
-              device(0),
-              space(MemorySpace::Device),
-              stream(nullptr) {}
+    inline AllocInfo() {}
     inline AllocInfo(AllocType at, int dev, MemorySpace sp, cudaStream_t st)
             : type(at), device(dev), space(sp), stream(st) {}
@@ -95,13 +112,13 @@ struct AllocInfo {
     std::string toString() const;
     /// The internal category of the allocation
-    AllocType type;
+    AllocType type = AllocType::Other;
     /// The device on which the allocation is happening
-    int device;
+    int device = 0;
     /// The memory space of the allocation
-    MemorySpace space;
+    MemorySpace space = MemorySpace::Device;
     /// The stream on which new work on the memory will be ordered (e.g., if a
     /// piece of memory cached and to be returned for this call was last used on
@@ -111,7 +128,7 @@ struct AllocInfo {
     ///
     /// The memory manager guarantees that the returned memory is free to use
     /// without data races on this stream specified.
-    cudaStream_t stream;
+    cudaStream_t stream = nullptr;
 };
 /// Create an AllocInfo for the current device with MemorySpace::Device
@@ -125,7 +142,7 @@ AllocInfo makeSpaceAlloc(AllocType at, MemorySpace sp, cudaStream_t st);
 /// Information on what/where an allocation is, along with how big it should be
 struct AllocRequest : public AllocInfo {
-    inline AllocRequest() : AllocInfo(), size(0) {}
+    inline AllocRequest() {}
     inline AllocRequest(const AllocInfo& info, size_t sz)
             : AllocInfo(info), size(sz) {}
@@ -142,7 +159,11 @@ struct AllocRequest : public AllocInfo {
     std::string toString() const;
     /// The size in bytes of the allocation
-    size_t size;
+    size_t size = 0;
+#if defined USE_NVIDIA_RAFT
+    rmm::mr::device_memory_resource* mr = nullptr;
+#endif
 };
 /// A RAII object that manages a temporary memory request
@@ -190,6 +211,13 @@ class GpuResources {
     /// given device
     virtual cudaStream_t getDefaultStream(int device) = 0;
+#if defined USE_NVIDIA_RAFT
+    /// Returns the raft handle for the given device which can be used to
+    /// make calls to other raft primitives.
+    virtual raft::device_resources& getRaftHandle(int device) = 0;
+    raft::device_resources& getRaftHandleCurrentDevice();
+#endif
     /// Overrides the default stream for a device to the user-supplied stream.
     /// The resources object does not own this stream (i.e., it will not destroy
     /// it).