RubyGems - faiss - Versions diffs - 0.1.2 → 0.1.3 - Mend

faiss 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (192) hide show

data/vendor/faiss/{index_factory.h → faiss/index_factory.h} RENAMED

File without changes

data/vendor/faiss/{index_io.h → faiss/index_io.h} RENAMED

@@ -14,6 +14,9 @@
 #include <cstdio>
+#include <typeinfo>
+#include <string>
+#include <vector>
 /** I/O functions can read/write to a filename, a file handle or to an
  * object that abstracts the medium.
@@ -42,11 +45,15 @@ void write_index_binary (const IndexBinary *idx, FILE *f);
 void write_index_binary (const IndexBinary *idx, IOWriter *writer);
 // The read_index flags are implemented only for a subset of index types.
-const int IO_FLAG_MMAP = 1; // try to memmap if possible
 const int IO_FLAG_READ_ONLY = 2;
 // strip directory component from ondisk filename, and assume it's in
 // the same directory as the index file
 const int IO_FLAG_ONDISK_SAME_DIR = 4;
+// don't load IVF data to RAM, only list sizes
+const int IO_FLAG_SKIP_IVF_DATA = 8;
+// try to memmap data (useful for OnDiskInvertedLists)
+const int IO_FLAG_MMAP = IO_FLAG_SKIP_IVF_DATA | 0x646f0000;
 Index *read_index (const char *fname, int io_flags = 0);
 Index *read_index (FILE * f, int io_flags = 0);
@@ -69,6 +76,53 @@ void write_InvertedLists (const InvertedLists *ils, IOWriter *f);
 InvertedLists *read_InvertedLists (IOReader *reader, int io_flags = 0);
+#ifndef _MSC_VER
+/** Callbacks to handle other types of InvertedList objects.
+ *
+ * The callbacks should be registered with add_callback before calling
+ * read_index or read_InvertedLists. The callbacks for
+ * OnDiskInvertedLists are registrered by default. The invlist type is
+ * identified by:
+ *
+ * - the key (a fourcc) at read time
+ * - the class name (as given by typeid.name) at write time
+ */
+struct InvertedListsIOHook {
+    const std::string key; ///< string version of the fourcc
+    const std::string classname; ///< typeid.name
+    InvertedListsIOHook(const std::string & key, const std::string & classname);
+    /// write the index to the IOWriter (including the fourcc)
+    virtual void write(const InvertedLists *ils, IOWriter *f) const = 0;
+    /// called when the fourcc matches this class's fourcc
+    virtual InvertedLists * read(IOReader *f, int io_flags) const = 0;
+    /** read from a ArrayInvertedLists into this invertedlist type.
+     * For this to work, the callback has to be enabled and the io_flag has to be set to
+     * IO_FLAG_SKIP_IVF_DATA | (16 upper bits of the fourcc)
+     */
+    virtual InvertedLists * read_ArrayInvertedLists(
+            IOReader *f, int io_flags,
+            size_t nlist, size_t code_size,
+            const std::vector<size_t> &sizes) const = 0;
+    virtual ~InvertedListsIOHook() {}
+    /**************************** Manage the set of callbacks ******/
+    // transfers ownership
+    static void add_callback(InvertedListsIOHook *);
+    static void print_callbacks();
+    static InvertedListsIOHook* lookup(int h);
+    static InvertedListsIOHook* lookup_classname(const std::string & classname);
+};
+#endif // !_MSC_VER
 } // namespace faiss

data/vendor/faiss/faiss/python/python_callbacks.cpp ADDED

@@ -0,0 +1,112 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#include <faiss/python/python_callbacks.h>
+#include <faiss/impl/FaissAssert.h>
+namespace {
+struct PyThreadLock  {
+    PyGILState_STATE gstate;
+    PyThreadLock () {
+        gstate = PyGILState_Ensure();
+    }
+    ~PyThreadLock () {
+        PyGILState_Release(gstate);
+    }
+};
+};
+/***********************************************************
+ * Callbacks for IO reader and writer
+ ***********************************************************/
+PyCallbackIOWriter::PyCallbackIOWriter(PyObject *callback, size_t bs):
+    callback(callback), bs(bs) {
+    PyThreadLock gil;
+    Py_INCREF(callback);
+    name = "PyCallbackIOWriter";
+}
+size_t PyCallbackIOWriter::operator()(const void *ptrv, size_t size, size_t nitems) {
+    size_t ws = size * nitems;
+    const char *ptr = (const char*)ptrv;
+    PyThreadLock gil;
+    while(ws > 0) {
+        size_t wi = ws > bs ? bs : ws;
+        PyObject * result = PyObject_CallFunction(
+                callback, "(N)", PyBytes_FromStringAndSize(ptr, wi));
+        if (result == NULL) {
+            FAISS_THROW_MSG("py err");
+        }
+        // TODO check nb of bytes written
+        ptr += wi;
+        ws -= wi;
+        Py_DECREF(result);
+    }
+    return nitems;
+}
+PyCallbackIOWriter::~PyCallbackIOWriter() {
+    PyThreadLock gil;
+    Py_DECREF(callback);
+}
+PyCallbackIOReader::PyCallbackIOReader(PyObject *callback, size_t bs):
+    callback(callback), bs(bs) {
+    PyThreadLock gil;
+    Py_INCREF(callback);
+    name = "PyCallbackIOReader";
+}
+size_t PyCallbackIOReader::operator()(void *ptrv, size_t size, size_t nitems)
+{
+    size_t rs = size * nitems;
+    size_t nb = 0;
+    char *ptr = (char*)ptrv;
+    PyThreadLock gil;
+    while(rs > 0) {
+        size_t ri = rs > bs ? bs : rs;
+        PyObject * result = PyObject_CallFunction(callback, "(n)", ri);
+        if (result == NULL) {
+            FAISS_THROW_MSG("propagate py error");
+        }
+        if(!PyBytes_Check(result)) {
+            Py_DECREF(result);
+            FAISS_THROW_MSG("read callback did not return a bytes object");
+        }
+        size_t sz = PyBytes_Size(result);
+        if (sz == 0) {
+            Py_DECREF(result);
+            break;
+        }
+        nb += sz;
+        if (sz > rs) {
+            Py_DECREF(result);
+            FAISS_THROW_FMT("read callback returned %zd bytes (asked %zd)",
+                            sz, rs);
+        }
+        memcpy(ptr, PyBytes_AsString(result), sz);
+        Py_DECREF(result);
+        ptr += sz;
+        rs -= sz;
+    }
+    return nb / size;
+}
+PyCallbackIOReader::~PyCallbackIOReader() {
+    PyThreadLock gil;
+    Py_DECREF(callback);
+}

data/vendor/faiss/faiss/python/python_callbacks.h ADDED

@@ -0,0 +1,45 @@
+#pragma once
+#include "Python.h"
+#include <faiss/impl/io.h>
+#include <faiss/InvertedLists.h>
+//  all callbacks have to acquire the GIL on input
+/***********************************************************
+ * Callbacks for IO reader and writer
+ ***********************************************************/
+struct PyCallbackIOWriter: faiss::IOWriter {
+    PyObject * callback;
+    size_t bs; // maximum write size
+    /** Callback: Python function that takes a bytes object and
+     *  returns the number of bytes successfully written.
+     */
+    explicit PyCallbackIOWriter(PyObject *callback,
+                                size_t bs = 1024 * 1024);
+    size_t operator()(const void *ptrv, size_t size, size_t nitems) override;
+    ~PyCallbackIOWriter() override;
+};
+struct PyCallbackIOReader: faiss::IOReader {
+    PyObject * callback;
+    size_t bs; // maximum buffer size
+    /** Callback: Python function that takes a size and returns a
+     * bytes object with the resulting read */
+    explicit PyCallbackIOReader(PyObject *callback,
+                                size_t bs = 1024 * 1024);
+    size_t operator()(void *ptrv, size_t size, size_t nitems) override;
+    ~PyCallbackIOReader() override;
+};

data/vendor/faiss/{utils → faiss/utils}/Heap.cpp RENAMED

@@ -19,7 +19,7 @@ template <typename C>
 void HeapArray<C>::heapify ()
 {
 #pragma omp parallel for
-    for (size_t j = 0; j < nh; j++)
+    for (int64_t j = 0; j < nh; j++)
         heap_heapify<C> (k, val + j * k, ids + j * k);
 }
@@ -27,7 +27,7 @@ template <typename C>
 void HeapArray<C>::reorder ()
 {
 #pragma omp parallel for
-    for (size_t j = 0; j < nh; j++)
+    for (int64_t j = 0; j < nh; j++)
         heap_reorder<C> (k, val + j * k, ids + j * k);
 }
@@ -38,7 +38,7 @@ void HeapArray<C>::addn (size_t nj, const T *vin, TI j0,
     if (ni == -1) ni = nh;
     assert (i0 >= 0 && i0 + ni <= nh);
 #pragma omp parallel for
-    for (size_t i = i0; i < i0 + ni; i++) {
+    for (int64_t i = i0; i < i0 + ni; i++) {
         T * __restrict simi = get_val(i);
         TI * __restrict idxi = get_ids (i);
         const T *ip_line = vin + (i - i0) * nj;
@@ -65,7 +65,7 @@ void HeapArray<C>::addn_with_ids (
     if (ni == -1) ni = nh;
     assert (i0 >= 0 && i0 + ni <= nh);
 #pragma omp parallel for
-    for (size_t i = i0; i < i0 + ni; i++) {
+    for (int64_t i = i0; i < i0 + ni; i++) {
         T * __restrict simi = get_val(i);
         TI * __restrict idxi = get_ids (i);
         const T *ip_line = vin + (i - i0) * nj;
@@ -87,7 +87,7 @@ void HeapArray<C>::per_line_extrema (
                    TI * out_ids) const
 {
 #pragma omp parallel for
-    for (size_t j = 0; j < nh; j++) {
+    for (int64_t j = 0; j < nh; j++) {
         int64_t imin = -1;
         typename C::T xval = C::Crev::neutral ();
         const typename C::T * x_ = val + j * k;

data/vendor/faiss/{utils → faiss/utils}/Heap.h RENAMED

@@ -57,10 +57,8 @@ struct CMin {
     inline static bool cmp (T a, T b) {
         return a < b;
     }
-    // value that will be popped first -> must be smaller than all others
-    // for int types this is not strictly the smallest val (-max - 1)
     inline static T neutral () {
-        return -std::numeric_limits<T>::max();
+        return std::numeric_limits<T>::lowest();
     }
 };

data/vendor/faiss/{utils → faiss/utils}/WorkerThread.cpp RENAMED

File without changes

data/vendor/faiss/{utils → faiss/utils}/WorkerThread.h RENAMED

File without changes

data/vendor/faiss/{utils → faiss/utils}/distances.cpp RENAMED

@@ -9,6 +9,7 @@
 #include <faiss/utils/distances.h>
+#include <algorithm>
 #include <cstdio>
 #include <cassert>
 #include <cstring>
@@ -93,7 +94,7 @@ void fvec_norms_L2 (float * __restrict nr,
 {
 #pragma omp parallel for
-    for (size_t i = 0; i < nx; i++) {
+    for (int64_t i = 0; i < nx; i++) {
         nr[i] = sqrtf (fvec_norm_L2sqr (x + i * d, d));
     }
 }
@@ -103,7 +104,7 @@ void fvec_norms_L2sqr (float * __restrict nr,
                        size_t d, size_t nx)
 {
 #pragma omp parallel for
-    for (size_t i = 0; i < nx; i++)
+    for (int64_t i = 0; i < nx; i++)
         nr[i] = fvec_norm_L2sqr (x + i * d, d);
 }
@@ -112,7 +113,7 @@ void fvec_norms_L2sqr (float * __restrict nr,
 void fvec_renorm_L2 (size_t d, size_t nx, float * __restrict x)
 {
 #pragma omp parallel for
-    for (size_t i = 0; i < nx; i++) {
+    for (int64_t i = 0; i < nx; i++) {
         float * __restrict xi = x + i * d;
         float nr = fvec_norm_L2sqr (xi, d);
@@ -158,7 +159,7 @@ static void knn_inner_product_sse (const float * x,
         size_t i1 = std::min(i0 + check_period, nx);
 #pragma omp parallel for
-        for (size_t i = i0; i < i1; i++) {
+        for (int64_t i = i0; i < i1; i++) {
             const float * x_i = x + i * d;
             const float * y_j = y;
@@ -198,7 +199,7 @@ static void knn_L2sqr_sse (
         size_t i1 = std::min(i0 + check_period, nx);
 #pragma omp parallel for
-        for (size_t i = i0; i < i1; i++) {
+        for (int64_t i = i0; i < i1; i++) {
             const float * x_i = x + i * d;
             const float * y_j = y;
             size_t j;
@@ -312,7 +313,7 @@ static void knn_L2sqr_blas (const float * x,
             /* collect minima */
 #pragma omp parallel for
-            for (size_t i = i0; i < i1; i++) {
+            for (int64_t i = i0; i < i1; i++) {
                 float * __restrict simi = res->get_val(i);
                 int64_t * __restrict idxi = res->get_ids (i);
                 const float *ip_line = ip_block + (i - i0) * (j1 - j0);
@@ -420,7 +421,7 @@ void fvec_inner_products_by_idx (float * __restrict ip,
                                  size_t d, size_t nx, size_t ny)
 {
 #pragma omp parallel for
-    for (size_t j = 0; j < nx; j++) {
+    for (int64_t j = 0; j < nx; j++) {
         const int64_t * __restrict idsj = ids + j * ny;
         const float * xj = x + j * d;
         float * __restrict ipj = ip + j * ny;
@@ -443,7 +444,7 @@ void fvec_L2sqr_by_idx (float * __restrict dis,
                         size_t d, size_t nx, size_t ny)
 {
 #pragma omp parallel for
-    for (size_t j = 0; j < nx; j++) {
+    for (int64_t j = 0; j < nx; j++) {
         const int64_t * __restrict idsj = ids + j * ny;
         const float * xj = x + j * d;
         float * __restrict disj = dis + j * ny;
@@ -462,7 +463,7 @@ void pairwise_indexed_L2sqr (
         float *dis)
 {
 #pragma omp parallel for
-    for (size_t j = 0; j < n; j++) {
+    for (int64_t j = 0; j < n; j++) {
         if (ix[j] >= 0 && iy[j] >= 0) {
             dis[j] = fvec_L2sqr (x + d * ix[j], y + d * iy[j], d);
         }
@@ -476,7 +477,7 @@ void pairwise_indexed_inner_product (
         float *dis)
 {
 #pragma omp parallel for
-    for (size_t j = 0; j < n; j++) {
+    for (int64_t j = 0; j < n; j++) {
         if (ix[j] >= 0 && iy[j] >= 0) {
             dis[j] = fvec_inner_product (x + d * ix[j], y + d * iy[j], d);
         }
@@ -495,7 +496,7 @@ void knn_inner_products_by_idx (const float * x,
     size_t k = res->k;
 #pragma omp parallel for
-    for (size_t i = 0; i < nx; i++) {
+    for (int64_t i = 0; i < nx; i++) {
         const float * x_ = x + i * d;
         const int64_t * idsi = ids + i * ny;
         size_t j;
@@ -526,7 +527,7 @@ void knn_L2sqr_by_idx (const float * x,
     size_t k = res->k;
 #pragma omp parallel for
-    for (size_t i = 0; i < nx; i++) {
+    for (int64_t i = 0; i < nx; i++) {
         const float * x_ = x + i * d;
         const int64_t * __restrict idsi = ids + i * ny;
         float * __restrict simi = res->get_val(i);
@@ -649,7 +650,7 @@ static void range_search_sse (const float * x,
         RangeSearchPartialResult pres (res);
 #pragma omp for
-        for (size_t i = 0; i < nx; i++) {
+        for (int64_t i = 0; i < nx; i++) {
             const float * x_ = x + i * d;
             const float * y_ = y;
             size_t j;
@@ -760,5 +761,19 @@ void pairwise_L2sqr (int64_t d,
 }
+void inner_product_to_L2sqr(float* __restrict dis,
+    const float* nr1,
+    const float* nr2,
+    size_t n1, size_t n2)
+{
+#pragma omp parallel for
+    for (int64_t j = 0; j < n1; j++) {
+        float* disj = dis + j * n2;
+        for (size_t i = 0; i < n2; i++)
+            disj[i] = nr1[j] + nr2[i] - 2 * disj[i];
+    }
+}
 } // namespace faiss

data/vendor/faiss/{utils → faiss/utils}/distances.h RENAMED

@@ -15,6 +15,7 @@
 #include <stdint.h>
 #include <faiss/utils/Heap.h>
+#include <faiss/impl/platform_macros.h>
 namespace faiss {
@@ -153,7 +154,7 @@ void pairwise_indexed_inner_product (
  ***************************************************************************/
 // threshold on nx above which we switch to BLAS to compute distances
-extern int distance_compute_blas_threshold;
+FAISS_API extern int distance_compute_blas_threshold;
 /** Return the k nearest neighors of each of the nx vectors x among the ny
  *  vector y, w.r.t to max inner product