faiss 0.1.7 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/ext/faiss/ext.cpp +1 -1
- data/ext/faiss/extconf.rb +5 -2
- data/ext/faiss/index.cpp +102 -69
- data/ext/faiss/index_binary.cpp +24 -30
- data/ext/faiss/kmeans.cpp +20 -16
- data/ext/faiss/numo.hpp +867 -0
- data/ext/faiss/pca_matrix.cpp +13 -14
- data/ext/faiss/product_quantizer.cpp +23 -24
- data/ext/faiss/utils.cpp +10 -37
- data/ext/faiss/utils.h +2 -13
- data/lib/faiss.rb +0 -5
- data/lib/faiss/version.rb +1 -1
- metadata +6 -10
- data/lib/faiss/index.rb +0 -20
- data/lib/faiss/index_binary.rb +0 -20
- data/lib/faiss/kmeans.rb +0 -15
- data/lib/faiss/pca_matrix.rb +0 -15
- data/lib/faiss/product_quantizer.rb +0 -22
    
        data/ext/faiss/pca_matrix.cpp
    CHANGED
    
    | @@ -1,8 +1,5 @@ | |
| 1 1 | 
             
            #include <faiss/VectorTransform.h>
         | 
| 2 2 |  | 
| 3 | 
            -
            #include <rice/Constructor.hpp>
         | 
| 4 | 
            -
            #include <rice/Module.hpp>
         | 
| 5 | 
            -
             | 
| 6 3 | 
             
            #include "utils.h"
         | 
| 7 4 |  | 
| 8 5 | 
             
            void init_pca_matrix(Rice::Module& m) {
         | 
| @@ -10,25 +7,27 @@ void init_pca_matrix(Rice::Module& m) { | |
| 10 7 | 
             
                .define_constructor(Rice::Constructor<faiss::PCAMatrix, int, int>())
         | 
| 11 8 | 
             
                .define_method(
         | 
| 12 9 | 
             
                  "d_in",
         | 
| 13 | 
            -
                   | 
| 10 | 
            +
                  [](faiss::PCAMatrix &self) {
         | 
| 14 11 | 
             
                    return self.d_in;
         | 
| 15 12 | 
             
                  })
         | 
| 16 13 | 
             
                .define_method(
         | 
| 17 14 | 
             
                  "d_out",
         | 
| 18 | 
            -
                   | 
| 15 | 
            +
                  [](faiss::PCAMatrix &self) {
         | 
| 19 16 | 
             
                    return self.d_out;
         | 
| 20 17 | 
             
                  })
         | 
| 21 18 | 
             
                .define_method(
         | 
| 22 | 
            -
                  " | 
| 23 | 
            -
                   | 
| 24 | 
            -
                     | 
| 25 | 
            -
                    self.train(n,  | 
| 19 | 
            +
                  "train",
         | 
| 20 | 
            +
                  [](faiss::PCAMatrix &self, numo::SFloat objects) {
         | 
| 21 | 
            +
                    auto n = check_shape(objects, self.d_in);
         | 
| 22 | 
            +
                    self.train(n, objects.read_ptr());
         | 
| 26 23 | 
             
                  })
         | 
| 27 24 | 
             
                .define_method(
         | 
| 28 | 
            -
                  " | 
| 29 | 
            -
                   | 
| 30 | 
            -
                     | 
| 31 | 
            -
             | 
| 32 | 
            -
                     | 
| 25 | 
            +
                  "apply",
         | 
| 26 | 
            +
                  [](faiss::PCAMatrix &self, numo::SFloat objects) {
         | 
| 27 | 
            +
                    auto n = check_shape(objects, self.d_in);
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                    auto ary = numo::SFloat({n, static_cast<size_t>(self.d_out)});
         | 
| 30 | 
            +
                    self.apply_noalloc(n, objects.read_ptr(), ary.write_ptr());
         | 
| 31 | 
            +
                    return ary;
         | 
| 33 32 | 
             
                  });
         | 
| 34 33 | 
             
            }
         | 
| @@ -1,9 +1,6 @@ | |
| 1 1 | 
             
            #include <faiss/impl/ProductQuantizer.h>
         | 
| 2 2 | 
             
            #include <faiss/index_io.h>
         | 
| 3 3 |  | 
| 4 | 
            -
            #include <rice/Constructor.hpp>
         | 
| 5 | 
            -
            #include <rice/Module.hpp>
         | 
| 6 | 
            -
             | 
| 7 4 | 
             
            #include "utils.h"
         | 
| 8 5 |  | 
| 9 6 | 
             
            void init_product_quantizer(Rice::Module& m) {
         | 
| @@ -11,44 +8,46 @@ void init_product_quantizer(Rice::Module& m) { | |
| 11 8 | 
             
                .define_constructor(Rice::Constructor<faiss::ProductQuantizer, size_t, size_t, size_t>())
         | 
| 12 9 | 
             
                .define_method(
         | 
| 13 10 | 
             
                  "d",
         | 
| 14 | 
            -
                   | 
| 11 | 
            +
                  [](faiss::ProductQuantizer &self) {
         | 
| 15 12 | 
             
                    return self.d;
         | 
| 16 13 | 
             
                  })
         | 
| 17 14 | 
             
                .define_method(
         | 
| 18 15 | 
             
                  "m",
         | 
| 19 | 
            -
                   | 
| 16 | 
            +
                  [](faiss::ProductQuantizer &self) {
         | 
| 20 17 | 
             
                    return self.M;
         | 
| 21 18 | 
             
                  })
         | 
| 22 19 | 
             
                .define_method(
         | 
| 23 | 
            -
                  " | 
| 24 | 
            -
                   | 
| 25 | 
            -
                     | 
| 26 | 
            -
                    self.train(n,  | 
| 20 | 
            +
                  "train",
         | 
| 21 | 
            +
                  [](faiss::ProductQuantizer &self, numo::SFloat objects) {
         | 
| 22 | 
            +
                    auto n = check_shape(objects, self.d);
         | 
| 23 | 
            +
                    self.train(n, objects.read_ptr());
         | 
| 27 24 | 
             
                  })
         | 
| 28 25 | 
             
                .define_method(
         | 
| 29 | 
            -
                  " | 
| 30 | 
            -
                   | 
| 31 | 
            -
                     | 
| 32 | 
            -
             | 
| 33 | 
            -
                     | 
| 34 | 
            -
                     | 
| 26 | 
            +
                  "compute_codes",
         | 
| 27 | 
            +
                  [](faiss::ProductQuantizer &self, numo::SFloat objects) {
         | 
| 28 | 
            +
                    auto n = check_shape(objects, self.d);
         | 
| 29 | 
            +
             | 
| 30 | 
            +
                    auto codes = numo::UInt8({n, self.M});
         | 
| 31 | 
            +
                    self.compute_codes(objects.read_ptr(), codes.write_ptr(), n);
         | 
| 32 | 
            +
                    return codes;
         | 
| 35 33 | 
             
                  })
         | 
| 36 34 | 
             
                .define_method(
         | 
| 37 | 
            -
                  " | 
| 38 | 
            -
                   | 
| 39 | 
            -
                     | 
| 40 | 
            -
             | 
| 41 | 
            -
                     | 
| 42 | 
            -
                     | 
| 35 | 
            +
                  "decode",
         | 
| 36 | 
            +
                  [](faiss::ProductQuantizer &self, numo::UInt8 objects) {
         | 
| 37 | 
            +
                    auto n = check_shape(objects, self.M);
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                    auto x = numo::SFloat({n, self.d});
         | 
| 40 | 
            +
                    self.decode(objects.read_ptr(), x.write_ptr(), n);
         | 
| 41 | 
            +
                    return x;
         | 
| 43 42 | 
             
                  })
         | 
| 44 43 | 
             
                .define_method(
         | 
| 45 44 | 
             
                  "save",
         | 
| 46 | 
            -
                   | 
| 45 | 
            +
                  [](faiss::ProductQuantizer &self, const char *fname) {
         | 
| 47 46 | 
             
                    faiss::write_ProductQuantizer(&self, fname);
         | 
| 48 47 | 
             
                  })
         | 
| 49 | 
            -
                . | 
| 48 | 
            +
                .define_singleton_function(
         | 
| 50 49 | 
             
                  "load",
         | 
| 51 | 
            -
                   | 
| 50 | 
            +
                  [](const char *fname) {
         | 
| 52 51 | 
             
                    return faiss::read_ProductQuantizer(fname);
         | 
| 53 52 | 
             
                  });
         | 
| 54 53 | 
             
            }
         | 
    
        data/ext/faiss/utils.cpp
    CHANGED
    
    | @@ -1,40 +1,13 @@ | |
| 1 1 | 
             
            #include "utils.h"
         | 
| 2 2 |  | 
| 3 | 
            -
             | 
| 4 | 
            -
             | 
| 5 | 
            -
             | 
| 6 | 
            -
             | 
| 7 | 
            -
             | 
| 8 | 
            -
               | 
| 9 | 
            -
               | 
| 10 | 
            -
             | 
| 11 | 
            -
             | 
| 12 | 
            -
             | 
| 13 | 
            -
            {
         | 
| 14 | 
            -
              Rice::String s = o.call("to_binary");
         | 
| 15 | 
            -
              return (uint8_t*) s.c_str();
         | 
| 16 | 
            -
            }
         | 
| 17 | 
            -
             | 
| 18 | 
            -
            // TODO return Numo::SFloat
         | 
| 19 | 
            -
            Rice::String result(float* ptr, int64_t length)
         | 
| 20 | 
            -
            {
         | 
| 21 | 
            -
              return Rice::String(std::string((char*) ptr, length * sizeof(float)));
         | 
| 22 | 
            -
            }
         | 
| 23 | 
            -
             | 
| 24 | 
            -
            // TODO return Numo::UInt8
         | 
| 25 | 
            -
            Rice::String result(uint8_t* ptr, int64_t length)
         | 
| 26 | 
            -
            {
         | 
| 27 | 
            -
              return Rice::String(std::string((char*) ptr, length * sizeof(uint8_t)));
         | 
| 28 | 
            -
            }
         | 
| 29 | 
            -
             | 
| 30 | 
            -
            // TODO return Numo::Int32
         | 
| 31 | 
            -
            Rice::String result(int32_t* ptr, int64_t length)
         | 
| 32 | 
            -
            {
         | 
| 33 | 
            -
              return Rice::String(std::string((char*) ptr, length * sizeof(int32_t)));
         | 
| 34 | 
            -
            }
         | 
| 35 | 
            -
             | 
| 36 | 
            -
            // TODO return Numo::Int64
         | 
| 37 | 
            -
            Rice::String result(int64_t* ptr, int64_t length)
         | 
| 38 | 
            -
            {
         | 
| 39 | 
            -
              return Rice::String(std::string((char*) ptr, length * sizeof(int64_t)));
         | 
| 3 | 
            +
            size_t check_shape(numo::NArray objects, size_t k) {
         | 
| 4 | 
            +
              auto ndim = objects.ndim();
         | 
| 5 | 
            +
              if (ndim != 2) {
         | 
| 6 | 
            +
                throw Rice::Exception(rb_eArgError, "expected 2 dimensions, not %d", ndim);
         | 
| 7 | 
            +
              }
         | 
| 8 | 
            +
              auto shape = objects.shape();
         | 
| 9 | 
            +
              if (shape[1] != k) {
         | 
| 10 | 
            +
                throw Rice::Exception(rb_eArgError, "expected 2nd dimension to be %d, not %d", k, shape[1]);
         | 
| 11 | 
            +
              }
         | 
| 12 | 
            +
              return shape[0];
         | 
| 40 13 | 
             
            }
         | 
    
        data/ext/faiss/utils.h
    CHANGED
    
    | @@ -1,16 +1,5 @@ | |
| 1 1 | 
             
            #pragma once
         | 
| 2 2 |  | 
| 3 | 
            -
            #include  | 
| 4 | 
            -
            #include <rice/String.hpp>
         | 
| 3 | 
            +
            #include "numo.hpp"
         | 
| 5 4 |  | 
| 6 | 
            -
             | 
| 7 | 
            -
            uint8_t* uint8_array(Rice::Object o);
         | 
| 8 | 
            -
             | 
| 9 | 
            -
            // TODO return Numo::SFloat
         | 
| 10 | 
            -
            Rice::String result(float* ptr, int64_t length);
         | 
| 11 | 
            -
            // TODO return Numo::UInt8
         | 
| 12 | 
            -
            Rice::String result(uint8_t* ptr, int64_t length);
         | 
| 13 | 
            -
            // TODO return Numo::Int32
         | 
| 14 | 
            -
            Rice::String result(int32_t* ptr, int64_t length);
         | 
| 15 | 
            -
            // TODO return Numo::Int64
         | 
| 16 | 
            -
            Rice::String result(int64_t* ptr, int64_t length);
         | 
| 5 | 
            +
            size_t check_shape(numo::NArray objects, size_t k);
         | 
    
        data/lib/faiss.rb
    CHANGED
    
    
    
        data/lib/faiss/version.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: faiss
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0. | 
| 4 | 
            +
              version: 0.2.0
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Andrew Kane
         | 
| 8 8 | 
             
            autorequire:
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2021- | 
| 11 | 
            +
            date: 2021-05-23 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: rice
         | 
| @@ -16,14 +16,14 @@ dependencies: | |
| 16 16 | 
             
                requirements:
         | 
| 17 17 | 
             
                - - ">="
         | 
| 18 18 | 
             
                  - !ruby/object:Gem::Version
         | 
| 19 | 
            -
                    version:  | 
| 19 | 
            +
                    version: 4.0.2
         | 
| 20 20 | 
             
              type: :runtime
         | 
| 21 21 | 
             
              prerelease: false
         | 
| 22 22 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 23 23 | 
             
                requirements:
         | 
| 24 24 | 
             
                - - ">="
         | 
| 25 25 | 
             
                  - !ruby/object:Gem::Version
         | 
| 26 | 
            -
                    version:  | 
| 26 | 
            +
                    version: 4.0.2
         | 
| 27 27 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 28 28 | 
             
              name: numo-narray
         | 
| 29 29 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| @@ -53,16 +53,12 @@ files: | |
| 53 53 | 
             
            - ext/faiss/index.cpp
         | 
| 54 54 | 
             
            - ext/faiss/index_binary.cpp
         | 
| 55 55 | 
             
            - ext/faiss/kmeans.cpp
         | 
| 56 | 
            +
            - ext/faiss/numo.hpp
         | 
| 56 57 | 
             
            - ext/faiss/pca_matrix.cpp
         | 
| 57 58 | 
             
            - ext/faiss/product_quantizer.cpp
         | 
| 58 59 | 
             
            - ext/faiss/utils.cpp
         | 
| 59 60 | 
             
            - ext/faiss/utils.h
         | 
| 60 61 | 
             
            - lib/faiss.rb
         | 
| 61 | 
            -
            - lib/faiss/index.rb
         | 
| 62 | 
            -
            - lib/faiss/index_binary.rb
         | 
| 63 | 
            -
            - lib/faiss/kmeans.rb
         | 
| 64 | 
            -
            - lib/faiss/pca_matrix.rb
         | 
| 65 | 
            -
            - lib/faiss/product_quantizer.rb
         | 
| 66 62 | 
             
            - lib/faiss/version.rb
         | 
| 67 63 | 
             
            - vendor/faiss/LICENSE
         | 
| 68 64 | 
             
            - vendor/faiss/faiss/AutoTune.cpp
         | 
| @@ -257,7 +253,7 @@ required_ruby_version: !ruby/object:Gem::Requirement | |
| 257 253 | 
             
              requirements:
         | 
| 258 254 | 
             
              - - ">="
         | 
| 259 255 | 
             
                - !ruby/object:Gem::Version
         | 
| 260 | 
            -
                  version: '2. | 
| 256 | 
            +
                  version: '2.6'
         | 
| 261 257 | 
             
            required_rubygems_version: !ruby/object:Gem::Requirement
         | 
| 262 258 | 
             
              requirements:
         | 
| 263 259 | 
             
              - - ">="
         | 
    
        data/lib/faiss/index.rb
    DELETED
    
    | @@ -1,20 +0,0 @@ | |
| 1 | 
            -
            module Faiss
         | 
| 2 | 
            -
              class Index
         | 
| 3 | 
            -
                def train(objects)
         | 
| 4 | 
            -
                  objects = Numo::SFloat.cast(objects) unless objects.is_a?(Numo::SFloat)
         | 
| 5 | 
            -
                  _train(objects.shape[0], objects)
         | 
| 6 | 
            -
                end
         | 
| 7 | 
            -
             | 
| 8 | 
            -
                def add(objects)
         | 
| 9 | 
            -
                  objects = Numo::SFloat.cast(objects) unless objects.is_a?(Numo::SFloat)
         | 
| 10 | 
            -
                  _add(objects.shape[0], objects)
         | 
| 11 | 
            -
                end
         | 
| 12 | 
            -
             | 
| 13 | 
            -
                def search(objects, k)
         | 
| 14 | 
            -
                  objects = Numo::SFloat.cast(objects) unless objects.is_a?(Numo::SFloat)
         | 
| 15 | 
            -
                  n = objects.shape[0]
         | 
| 16 | 
            -
                  distances, labels = _search(n, objects, k)
         | 
| 17 | 
            -
                  [Numo::SFloat.from_binary(distances).reshape(n, k), Numo::Int64.from_binary(labels).reshape(n, k)]
         | 
| 18 | 
            -
                end
         | 
| 19 | 
            -
              end
         | 
| 20 | 
            -
            end
         | 
    
        data/lib/faiss/index_binary.rb
    DELETED
    
    | @@ -1,20 +0,0 @@ | |
| 1 | 
            -
            module Faiss
         | 
| 2 | 
            -
              class IndexBinary
         | 
| 3 | 
            -
                def train(objects)
         | 
| 4 | 
            -
                  objects = Numo::UInt8.cast(objects) unless objects.is_a?(Numo::UInt8)
         | 
| 5 | 
            -
                  _train(objects.shape[0], objects)
         | 
| 6 | 
            -
                end
         | 
| 7 | 
            -
             | 
| 8 | 
            -
                def add(objects)
         | 
| 9 | 
            -
                  objects = Numo::UInt8.cast(objects) unless objects.is_a?(Numo::UInt8)
         | 
| 10 | 
            -
                  _add(objects.shape[0], objects)
         | 
| 11 | 
            -
                end
         | 
| 12 | 
            -
             | 
| 13 | 
            -
                def search(objects, k)
         | 
| 14 | 
            -
                  objects = Numo::UInt8.cast(objects) unless objects.is_a?(Numo::UInt8)
         | 
| 15 | 
            -
                  n = objects.shape[0]
         | 
| 16 | 
            -
                  distances, labels = _search(n, objects, k)
         | 
| 17 | 
            -
                  [Numo::UInt32.from_binary(distances).reshape(n, k), Numo::Int64.from_binary(labels).reshape(n, k)]
         | 
| 18 | 
            -
                end
         | 
| 19 | 
            -
              end
         | 
| 20 | 
            -
            end
         | 
    
        data/lib/faiss/kmeans.rb
    DELETED
    
    | @@ -1,15 +0,0 @@ | |
| 1 | 
            -
            module Faiss
         | 
| 2 | 
            -
              class Kmeans
         | 
| 3 | 
            -
                attr_reader :index
         | 
| 4 | 
            -
             | 
| 5 | 
            -
                def train(objects)
         | 
| 6 | 
            -
                  objects = Numo::SFloat.cast(objects) unless objects.is_a?(Numo::SFloat)
         | 
| 7 | 
            -
                  @index = IndexFlatL2.new(d)
         | 
| 8 | 
            -
                  _train(objects.shape[0], objects, @index)
         | 
| 9 | 
            -
                end
         | 
| 10 | 
            -
             | 
| 11 | 
            -
                def centroids
         | 
| 12 | 
            -
                  Numo::SFloat.from_binary(_centroids).reshape(k, d)
         | 
| 13 | 
            -
                end
         | 
| 14 | 
            -
              end
         | 
| 15 | 
            -
            end
         | 
    
        data/lib/faiss/pca_matrix.rb
    DELETED
    
    | @@ -1,15 +0,0 @@ | |
| 1 | 
            -
            module Faiss
         | 
| 2 | 
            -
              class PCAMatrix
         | 
| 3 | 
            -
                def train(objects)
         | 
| 4 | 
            -
                  objects = Numo::SFloat.cast(objects) unless objects.is_a?(Numo::SFloat)
         | 
| 5 | 
            -
                  _train(objects.shape[0], objects)
         | 
| 6 | 
            -
                end
         | 
| 7 | 
            -
             | 
| 8 | 
            -
                def apply(objects)
         | 
| 9 | 
            -
                  objects = Numo::SFloat.cast(objects) unless objects.is_a?(Numo::SFloat)
         | 
| 10 | 
            -
                  n = objects.shape[0]
         | 
| 11 | 
            -
                  res = _apply(n, objects)
         | 
| 12 | 
            -
                  Numo::SFloat.from_binary(res).reshape(n, d_out)
         | 
| 13 | 
            -
                end
         | 
| 14 | 
            -
              end
         | 
| 15 | 
            -
            end
         | 
| @@ -1,22 +0,0 @@ | |
| 1 | 
            -
            module Faiss
         | 
| 2 | 
            -
              class ProductQuantizer
         | 
| 3 | 
            -
                def train(objects)
         | 
| 4 | 
            -
                  objects = Numo::SFloat.cast(objects) unless objects.is_a?(Numo::SFloat)
         | 
| 5 | 
            -
                  _train(objects.shape[0], objects)
         | 
| 6 | 
            -
                end
         | 
| 7 | 
            -
             | 
| 8 | 
            -
                def compute_codes(objects)
         | 
| 9 | 
            -
                  objects = Numo::SFloat.cast(objects) unless objects.is_a?(Numo::SFloat)
         | 
| 10 | 
            -
                  n = objects.shape[0]
         | 
| 11 | 
            -
                  res = _compute_codes(n, objects)
         | 
| 12 | 
            -
                  Numo::UInt8.from_binary(res).reshape(n, m)
         | 
| 13 | 
            -
                end
         | 
| 14 | 
            -
             | 
| 15 | 
            -
                def decode(objects)
         | 
| 16 | 
            -
                  objects = Numo::UInt8.cast(objects) unless objects.is_a?(Numo::UInt8)
         | 
| 17 | 
            -
                  n = objects.shape[0]
         | 
| 18 | 
            -
                  res = _decode(n, objects)
         | 
| 19 | 
            -
                  Numo::SFloat.from_binary(res).reshape(n, d)
         | 
| 20 | 
            -
                end
         | 
| 21 | 
            -
              end
         | 
| 22 | 
            -
            end
         |