faiss 0.1.7 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/ext/faiss/ext.cpp +1 -1
- data/ext/faiss/extconf.rb +5 -2
- data/ext/faiss/index.cpp +102 -69
- data/ext/faiss/index_binary.cpp +24 -30
- data/ext/faiss/kmeans.cpp +20 -16
- data/ext/faiss/numo.hpp +867 -0
- data/ext/faiss/pca_matrix.cpp +13 -14
- data/ext/faiss/product_quantizer.cpp +23 -24
- data/ext/faiss/utils.cpp +10 -37
- data/ext/faiss/utils.h +2 -13
- data/lib/faiss.rb +0 -5
- data/lib/faiss/version.rb +1 -1
- metadata +6 -10
- data/lib/faiss/index.rb +0 -20
- data/lib/faiss/index_binary.rb +0 -20
- data/lib/faiss/kmeans.rb +0 -15
- data/lib/faiss/pca_matrix.rb +0 -15
- data/lib/faiss/product_quantizer.rb +0 -22
data/ext/faiss/pca_matrix.cpp
CHANGED
@@ -1,8 +1,5 @@
|
|
1
1
|
#include <faiss/VectorTransform.h>
|
2
2
|
|
3
|
-
#include <rice/Constructor.hpp>
|
4
|
-
#include <rice/Module.hpp>
|
5
|
-
|
6
3
|
#include "utils.h"
|
7
4
|
|
8
5
|
void init_pca_matrix(Rice::Module& m) {
|
@@ -10,25 +7,27 @@ void init_pca_matrix(Rice::Module& m) {
|
|
10
7
|
.define_constructor(Rice::Constructor<faiss::PCAMatrix, int, int>())
|
11
8
|
.define_method(
|
12
9
|
"d_in",
|
13
|
-
|
10
|
+
[](faiss::PCAMatrix &self) {
|
14
11
|
return self.d_in;
|
15
12
|
})
|
16
13
|
.define_method(
|
17
14
|
"d_out",
|
18
|
-
|
15
|
+
[](faiss::PCAMatrix &self) {
|
19
16
|
return self.d_out;
|
20
17
|
})
|
21
18
|
.define_method(
|
22
|
-
"
|
23
|
-
|
24
|
-
|
25
|
-
self.train(n,
|
19
|
+
"train",
|
20
|
+
[](faiss::PCAMatrix &self, numo::SFloat objects) {
|
21
|
+
auto n = check_shape(objects, self.d_in);
|
22
|
+
self.train(n, objects.read_ptr());
|
26
23
|
})
|
27
24
|
.define_method(
|
28
|
-
"
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
25
|
+
"apply",
|
26
|
+
[](faiss::PCAMatrix &self, numo::SFloat objects) {
|
27
|
+
auto n = check_shape(objects, self.d_in);
|
28
|
+
|
29
|
+
auto ary = numo::SFloat({n, static_cast<size_t>(self.d_out)});
|
30
|
+
self.apply_noalloc(n, objects.read_ptr(), ary.write_ptr());
|
31
|
+
return ary;
|
33
32
|
});
|
34
33
|
}
|
@@ -1,9 +1,6 @@
|
|
1
1
|
#include <faiss/impl/ProductQuantizer.h>
|
2
2
|
#include <faiss/index_io.h>
|
3
3
|
|
4
|
-
#include <rice/Constructor.hpp>
|
5
|
-
#include <rice/Module.hpp>
|
6
|
-
|
7
4
|
#include "utils.h"
|
8
5
|
|
9
6
|
void init_product_quantizer(Rice::Module& m) {
|
@@ -11,44 +8,46 @@ void init_product_quantizer(Rice::Module& m) {
|
|
11
8
|
.define_constructor(Rice::Constructor<faiss::ProductQuantizer, size_t, size_t, size_t>())
|
12
9
|
.define_method(
|
13
10
|
"d",
|
14
|
-
|
11
|
+
[](faiss::ProductQuantizer &self) {
|
15
12
|
return self.d;
|
16
13
|
})
|
17
14
|
.define_method(
|
18
15
|
"m",
|
19
|
-
|
16
|
+
[](faiss::ProductQuantizer &self) {
|
20
17
|
return self.M;
|
21
18
|
})
|
22
19
|
.define_method(
|
23
|
-
"
|
24
|
-
|
25
|
-
|
26
|
-
self.train(n,
|
20
|
+
"train",
|
21
|
+
[](faiss::ProductQuantizer &self, numo::SFloat objects) {
|
22
|
+
auto n = check_shape(objects, self.d);
|
23
|
+
self.train(n, objects.read_ptr());
|
27
24
|
})
|
28
25
|
.define_method(
|
29
|
-
"
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
26
|
+
"compute_codes",
|
27
|
+
[](faiss::ProductQuantizer &self, numo::SFloat objects) {
|
28
|
+
auto n = check_shape(objects, self.d);
|
29
|
+
|
30
|
+
auto codes = numo::UInt8({n, self.M});
|
31
|
+
self.compute_codes(objects.read_ptr(), codes.write_ptr(), n);
|
32
|
+
return codes;
|
35
33
|
})
|
36
34
|
.define_method(
|
37
|
-
"
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
35
|
+
"decode",
|
36
|
+
[](faiss::ProductQuantizer &self, numo::UInt8 objects) {
|
37
|
+
auto n = check_shape(objects, self.M);
|
38
|
+
|
39
|
+
auto x = numo::SFloat({n, self.d});
|
40
|
+
self.decode(objects.read_ptr(), x.write_ptr(), n);
|
41
|
+
return x;
|
43
42
|
})
|
44
43
|
.define_method(
|
45
44
|
"save",
|
46
|
-
|
45
|
+
[](faiss::ProductQuantizer &self, const char *fname) {
|
47
46
|
faiss::write_ProductQuantizer(&self, fname);
|
48
47
|
})
|
49
|
-
.
|
48
|
+
.define_singleton_function(
|
50
49
|
"load",
|
51
|
-
|
50
|
+
[](const char *fname) {
|
52
51
|
return faiss::read_ProductQuantizer(fname);
|
53
52
|
});
|
54
53
|
}
|
data/ext/faiss/utils.cpp
CHANGED
@@ -1,40 +1,13 @@
|
|
1
1
|
#include "utils.h"
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
{
|
14
|
-
Rice::String s = o.call("to_binary");
|
15
|
-
return (uint8_t*) s.c_str();
|
16
|
-
}
|
17
|
-
|
18
|
-
// TODO return Numo::SFloat
|
19
|
-
Rice::String result(float* ptr, int64_t length)
|
20
|
-
{
|
21
|
-
return Rice::String(std::string((char*) ptr, length * sizeof(float)));
|
22
|
-
}
|
23
|
-
|
24
|
-
// TODO return Numo::UInt8
|
25
|
-
Rice::String result(uint8_t* ptr, int64_t length)
|
26
|
-
{
|
27
|
-
return Rice::String(std::string((char*) ptr, length * sizeof(uint8_t)));
|
28
|
-
}
|
29
|
-
|
30
|
-
// TODO return Numo::Int32
|
31
|
-
Rice::String result(int32_t* ptr, int64_t length)
|
32
|
-
{
|
33
|
-
return Rice::String(std::string((char*) ptr, length * sizeof(int32_t)));
|
34
|
-
}
|
35
|
-
|
36
|
-
// TODO return Numo::Int64
|
37
|
-
Rice::String result(int64_t* ptr, int64_t length)
|
38
|
-
{
|
39
|
-
return Rice::String(std::string((char*) ptr, length * sizeof(int64_t)));
|
3
|
+
size_t check_shape(numo::NArray objects, size_t k) {
|
4
|
+
auto ndim = objects.ndim();
|
5
|
+
if (ndim != 2) {
|
6
|
+
throw Rice::Exception(rb_eArgError, "expected 2 dimensions, not %d", ndim);
|
7
|
+
}
|
8
|
+
auto shape = objects.shape();
|
9
|
+
if (shape[1] != k) {
|
10
|
+
throw Rice::Exception(rb_eArgError, "expected 2nd dimension to be %d, not %d", k, shape[1]);
|
11
|
+
}
|
12
|
+
return shape[0];
|
40
13
|
}
|
data/ext/faiss/utils.h
CHANGED
@@ -1,16 +1,5 @@
|
|
1
1
|
#pragma once
|
2
2
|
|
3
|
-
#include
|
4
|
-
#include <rice/String.hpp>
|
3
|
+
#include "numo.hpp"
|
5
4
|
|
6
|
-
|
7
|
-
uint8_t* uint8_array(Rice::Object o);
|
8
|
-
|
9
|
-
// TODO return Numo::SFloat
|
10
|
-
Rice::String result(float* ptr, int64_t length);
|
11
|
-
// TODO return Numo::UInt8
|
12
|
-
Rice::String result(uint8_t* ptr, int64_t length);
|
13
|
-
// TODO return Numo::Int32
|
14
|
-
Rice::String result(int32_t* ptr, int64_t length);
|
15
|
-
// TODO return Numo::Int64
|
16
|
-
Rice::String result(int64_t* ptr, int64_t length);
|
5
|
+
size_t check_shape(numo::NArray objects, size_t k);
|
data/lib/faiss.rb
CHANGED
data/lib/faiss/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: faiss
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-05-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rice
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
19
|
+
version: 4.0.2
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
26
|
+
version: 4.0.2
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: numo-narray
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -53,16 +53,12 @@ files:
|
|
53
53
|
- ext/faiss/index.cpp
|
54
54
|
- ext/faiss/index_binary.cpp
|
55
55
|
- ext/faiss/kmeans.cpp
|
56
|
+
- ext/faiss/numo.hpp
|
56
57
|
- ext/faiss/pca_matrix.cpp
|
57
58
|
- ext/faiss/product_quantizer.cpp
|
58
59
|
- ext/faiss/utils.cpp
|
59
60
|
- ext/faiss/utils.h
|
60
61
|
- lib/faiss.rb
|
61
|
-
- lib/faiss/index.rb
|
62
|
-
- lib/faiss/index_binary.rb
|
63
|
-
- lib/faiss/kmeans.rb
|
64
|
-
- lib/faiss/pca_matrix.rb
|
65
|
-
- lib/faiss/product_quantizer.rb
|
66
62
|
- lib/faiss/version.rb
|
67
63
|
- vendor/faiss/LICENSE
|
68
64
|
- vendor/faiss/faiss/AutoTune.cpp
|
@@ -257,7 +253,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
257
253
|
requirements:
|
258
254
|
- - ">="
|
259
255
|
- !ruby/object:Gem::Version
|
260
|
-
version: '2.
|
256
|
+
version: '2.6'
|
261
257
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
262
258
|
requirements:
|
263
259
|
- - ">="
|
data/lib/faiss/index.rb
DELETED
@@ -1,20 +0,0 @@
|
|
1
|
-
module Faiss
|
2
|
-
class Index
|
3
|
-
def train(objects)
|
4
|
-
objects = Numo::SFloat.cast(objects) unless objects.is_a?(Numo::SFloat)
|
5
|
-
_train(objects.shape[0], objects)
|
6
|
-
end
|
7
|
-
|
8
|
-
def add(objects)
|
9
|
-
objects = Numo::SFloat.cast(objects) unless objects.is_a?(Numo::SFloat)
|
10
|
-
_add(objects.shape[0], objects)
|
11
|
-
end
|
12
|
-
|
13
|
-
def search(objects, k)
|
14
|
-
objects = Numo::SFloat.cast(objects) unless objects.is_a?(Numo::SFloat)
|
15
|
-
n = objects.shape[0]
|
16
|
-
distances, labels = _search(n, objects, k)
|
17
|
-
[Numo::SFloat.from_binary(distances).reshape(n, k), Numo::Int64.from_binary(labels).reshape(n, k)]
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|
data/lib/faiss/index_binary.rb
DELETED
@@ -1,20 +0,0 @@
|
|
1
|
-
module Faiss
|
2
|
-
class IndexBinary
|
3
|
-
def train(objects)
|
4
|
-
objects = Numo::UInt8.cast(objects) unless objects.is_a?(Numo::UInt8)
|
5
|
-
_train(objects.shape[0], objects)
|
6
|
-
end
|
7
|
-
|
8
|
-
def add(objects)
|
9
|
-
objects = Numo::UInt8.cast(objects) unless objects.is_a?(Numo::UInt8)
|
10
|
-
_add(objects.shape[0], objects)
|
11
|
-
end
|
12
|
-
|
13
|
-
def search(objects, k)
|
14
|
-
objects = Numo::UInt8.cast(objects) unless objects.is_a?(Numo::UInt8)
|
15
|
-
n = objects.shape[0]
|
16
|
-
distances, labels = _search(n, objects, k)
|
17
|
-
[Numo::UInt32.from_binary(distances).reshape(n, k), Numo::Int64.from_binary(labels).reshape(n, k)]
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|
data/lib/faiss/kmeans.rb
DELETED
@@ -1,15 +0,0 @@
|
|
1
|
-
module Faiss
|
2
|
-
class Kmeans
|
3
|
-
attr_reader :index
|
4
|
-
|
5
|
-
def train(objects)
|
6
|
-
objects = Numo::SFloat.cast(objects) unless objects.is_a?(Numo::SFloat)
|
7
|
-
@index = IndexFlatL2.new(d)
|
8
|
-
_train(objects.shape[0], objects, @index)
|
9
|
-
end
|
10
|
-
|
11
|
-
def centroids
|
12
|
-
Numo::SFloat.from_binary(_centroids).reshape(k, d)
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|
data/lib/faiss/pca_matrix.rb
DELETED
@@ -1,15 +0,0 @@
|
|
1
|
-
module Faiss
|
2
|
-
class PCAMatrix
|
3
|
-
def train(objects)
|
4
|
-
objects = Numo::SFloat.cast(objects) unless objects.is_a?(Numo::SFloat)
|
5
|
-
_train(objects.shape[0], objects)
|
6
|
-
end
|
7
|
-
|
8
|
-
def apply(objects)
|
9
|
-
objects = Numo::SFloat.cast(objects) unless objects.is_a?(Numo::SFloat)
|
10
|
-
n = objects.shape[0]
|
11
|
-
res = _apply(n, objects)
|
12
|
-
Numo::SFloat.from_binary(res).reshape(n, d_out)
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|
@@ -1,22 +0,0 @@
|
|
1
|
-
module Faiss
|
2
|
-
class ProductQuantizer
|
3
|
-
def train(objects)
|
4
|
-
objects = Numo::SFloat.cast(objects) unless objects.is_a?(Numo::SFloat)
|
5
|
-
_train(objects.shape[0], objects)
|
6
|
-
end
|
7
|
-
|
8
|
-
def compute_codes(objects)
|
9
|
-
objects = Numo::SFloat.cast(objects) unless objects.is_a?(Numo::SFloat)
|
10
|
-
n = objects.shape[0]
|
11
|
-
res = _compute_codes(n, objects)
|
12
|
-
Numo::UInt8.from_binary(res).reshape(n, m)
|
13
|
-
end
|
14
|
-
|
15
|
-
def decode(objects)
|
16
|
-
objects = Numo::UInt8.cast(objects) unless objects.is_a?(Numo::UInt8)
|
17
|
-
n = objects.shape[0]
|
18
|
-
res = _decode(n, objects)
|
19
|
-
Numo::SFloat.from_binary(res).reshape(n, d)
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|