faiss 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +103 -3
- data/ext/faiss/ext.cpp +99 -32
- data/ext/faiss/extconf.rb +12 -2
- data/lib/faiss/ext.bundle +0 -0
- data/lib/faiss/index.rb +3 -3
- data/lib/faiss/index_binary.rb +3 -3
- data/lib/faiss/kmeans.rb +1 -1
- data/lib/faiss/pca_matrix.rb +2 -2
- data/lib/faiss/product_quantizer.rb +3 -3
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/AutoTune.cpp +719 -0
- data/vendor/faiss/AutoTune.h +212 -0
- data/vendor/faiss/Clustering.cpp +261 -0
- data/vendor/faiss/Clustering.h +101 -0
- data/vendor/faiss/IVFlib.cpp +339 -0
- data/vendor/faiss/IVFlib.h +132 -0
- data/vendor/faiss/Index.cpp +171 -0
- data/vendor/faiss/Index.h +261 -0
- data/vendor/faiss/Index2Layer.cpp +437 -0
- data/vendor/faiss/Index2Layer.h +85 -0
- data/vendor/faiss/IndexBinary.cpp +77 -0
- data/vendor/faiss/IndexBinary.h +163 -0
- data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
- data/vendor/faiss/IndexBinaryFlat.h +54 -0
- data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
- data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
- data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
- data/vendor/faiss/IndexBinaryHNSW.h +56 -0
- data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
- data/vendor/faiss/IndexBinaryIVF.h +211 -0
- data/vendor/faiss/IndexFlat.cpp +508 -0
- data/vendor/faiss/IndexFlat.h +175 -0
- data/vendor/faiss/IndexHNSW.cpp +1090 -0
- data/vendor/faiss/IndexHNSW.h +170 -0
- data/vendor/faiss/IndexIVF.cpp +909 -0
- data/vendor/faiss/IndexIVF.h +353 -0
- data/vendor/faiss/IndexIVFFlat.cpp +502 -0
- data/vendor/faiss/IndexIVFFlat.h +118 -0
- data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
- data/vendor/faiss/IndexIVFPQ.h +161 -0
- data/vendor/faiss/IndexIVFPQR.cpp +219 -0
- data/vendor/faiss/IndexIVFPQR.h +65 -0
- data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
- data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
- data/vendor/faiss/IndexLSH.cpp +225 -0
- data/vendor/faiss/IndexLSH.h +87 -0
- data/vendor/faiss/IndexLattice.cpp +143 -0
- data/vendor/faiss/IndexLattice.h +68 -0
- data/vendor/faiss/IndexPQ.cpp +1188 -0
- data/vendor/faiss/IndexPQ.h +199 -0
- data/vendor/faiss/IndexPreTransform.cpp +288 -0
- data/vendor/faiss/IndexPreTransform.h +91 -0
- data/vendor/faiss/IndexReplicas.cpp +123 -0
- data/vendor/faiss/IndexReplicas.h +76 -0
- data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
- data/vendor/faiss/IndexScalarQuantizer.h +127 -0
- data/vendor/faiss/IndexShards.cpp +317 -0
- data/vendor/faiss/IndexShards.h +100 -0
- data/vendor/faiss/InvertedLists.cpp +623 -0
- data/vendor/faiss/InvertedLists.h +334 -0
- data/vendor/faiss/LICENSE +21 -0
- data/vendor/faiss/MatrixStats.cpp +252 -0
- data/vendor/faiss/MatrixStats.h +62 -0
- data/vendor/faiss/MetaIndexes.cpp +351 -0
- data/vendor/faiss/MetaIndexes.h +126 -0
- data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
- data/vendor/faiss/OnDiskInvertedLists.h +127 -0
- data/vendor/faiss/VectorTransform.cpp +1157 -0
- data/vendor/faiss/VectorTransform.h +322 -0
- data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
- data/vendor/faiss/c_api/AutoTune_c.h +64 -0
- data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
- data/vendor/faiss/c_api/Clustering_c.h +117 -0
- data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
- data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
- data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
- data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
- data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
- data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
- data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
- data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
- data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
- data/vendor/faiss/c_api/IndexShards_c.h +42 -0
- data/vendor/faiss/c_api/Index_c.cpp +105 -0
- data/vendor/faiss/c_api/Index_c.h +183 -0
- data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
- data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
- data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
- data/vendor/faiss/c_api/clone_index_c.h +32 -0
- data/vendor/faiss/c_api/error_c.h +42 -0
- data/vendor/faiss/c_api/error_impl.cpp +27 -0
- data/vendor/faiss/c_api/error_impl.h +16 -0
- data/vendor/faiss/c_api/faiss_c.h +58 -0
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
- data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
- data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
- data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
- data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
- data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
- data/vendor/faiss/c_api/index_factory_c.h +30 -0
- data/vendor/faiss/c_api/index_io_c.cpp +42 -0
- data/vendor/faiss/c_api/index_io_c.h +50 -0
- data/vendor/faiss/c_api/macros_impl.h +110 -0
- data/vendor/faiss/clone_index.cpp +147 -0
- data/vendor/faiss/clone_index.h +38 -0
- data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
- data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
- data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
- data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
- data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
- data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
- data/vendor/faiss/gpu/GpuCloner.h +82 -0
- data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
- data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
- data/vendor/faiss/gpu/GpuDistance.h +52 -0
- data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
- data/vendor/faiss/gpu/GpuIndex.h +148 -0
- data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
- data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
- data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
- data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
- data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
- data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
- data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
- data/vendor/faiss/gpu/GpuResources.cpp +52 -0
- data/vendor/faiss/gpu/GpuResources.h +73 -0
- data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
- data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
- data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
- data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
- data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
- data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
- data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
- data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
- data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
- data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
- data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
- data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
- data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
- data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
- data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
- data/vendor/faiss/gpu/test/TestUtils.h +93 -0
- data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
- data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
- data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
- data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
- data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
- data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
- data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
- data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
- data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
- data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
- data/vendor/faiss/gpu/utils/Timer.h +52 -0
- data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
- data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
- data/vendor/faiss/impl/FaissAssert.h +95 -0
- data/vendor/faiss/impl/FaissException.cpp +66 -0
- data/vendor/faiss/impl/FaissException.h +71 -0
- data/vendor/faiss/impl/HNSW.cpp +818 -0
- data/vendor/faiss/impl/HNSW.h +275 -0
- data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
- data/vendor/faiss/impl/PolysemousTraining.h +158 -0
- data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
- data/vendor/faiss/impl/ProductQuantizer.h +242 -0
- data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
- data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
- data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
- data/vendor/faiss/impl/ThreadedIndex.h +80 -0
- data/vendor/faiss/impl/index_read.cpp +793 -0
- data/vendor/faiss/impl/index_write.cpp +558 -0
- data/vendor/faiss/impl/io.cpp +142 -0
- data/vendor/faiss/impl/io.h +98 -0
- data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
- data/vendor/faiss/impl/lattice_Zn.h +199 -0
- data/vendor/faiss/index_factory.cpp +392 -0
- data/vendor/faiss/index_factory.h +25 -0
- data/vendor/faiss/index_io.h +75 -0
- data/vendor/faiss/misc/test_blas.cpp +84 -0
- data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
- data/vendor/faiss/tests/test_merge.cpp +258 -0
- data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
- data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
- data/vendor/faiss/tests/test_params_override.cpp +231 -0
- data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
- data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
- data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
- data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
- data/vendor/faiss/utils/Heap.cpp +122 -0
- data/vendor/faiss/utils/Heap.h +495 -0
- data/vendor/faiss/utils/WorkerThread.cpp +126 -0
- data/vendor/faiss/utils/WorkerThread.h +61 -0
- data/vendor/faiss/utils/distances.cpp +765 -0
- data/vendor/faiss/utils/distances.h +243 -0
- data/vendor/faiss/utils/distances_simd.cpp +809 -0
- data/vendor/faiss/utils/extra_distances.cpp +336 -0
- data/vendor/faiss/utils/extra_distances.h +54 -0
- data/vendor/faiss/utils/hamming-inl.h +472 -0
- data/vendor/faiss/utils/hamming.cpp +792 -0
- data/vendor/faiss/utils/hamming.h +220 -0
- data/vendor/faiss/utils/random.cpp +192 -0
- data/vendor/faiss/utils/random.h +60 -0
- data/vendor/faiss/utils/utils.cpp +783 -0
- data/vendor/faiss/utils/utils.h +181 -0
- metadata +216 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a0369e5dda330b1490e48a88863baa01df9cadfa570078892cec439f82efaad1
|
|
4
|
+
data.tar.gz: bb7d89fa17f782e8163b114b520b8c2c082cf37661b4b6fc4593460dc5958484
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 0a9f1515d142d11c688f1a9cdbcf9af0c36fa3fc98b240f236554b1067cf2daad1cefa377d18d236674b8fc1b94d64a3acc070c2528c47b68f4d231f29b7648d
|
|
7
|
+
data.tar.gz: ae02808dbda4831c7165c987b77c72f9d436bba94e3e28d372f69ceee18fb4971c6cc99a2bf7ce9bd9a9a6e4befcd372515a728bf379294ecc870f2c58f85eb2
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
|
@@ -2,12 +2,20 @@
|
|
|
2
2
|
|
|
3
3
|
[Faiss](https://github.com/facebookresearch/faiss) - efficient similarity search and clustering - for Ruby
|
|
4
4
|
|
|
5
|
+
Learn more about [Faiss](https://engineering.fb.com/data-infrastructure/faiss-a-library-for-efficient-similarity-search/)
|
|
6
|
+
|
|
7
|
+
[](https://travis-ci.org/ankane/faiss)
|
|
8
|
+
|
|
5
9
|
## Installation
|
|
6
10
|
|
|
7
|
-
First, install
|
|
11
|
+
First, install BLAS, LAPACK, and OpenMP:
|
|
8
12
|
|
|
9
13
|
```sh
|
|
10
|
-
|
|
14
|
+
# Mac
|
|
15
|
+
brew install openblas lapack libomp
|
|
16
|
+
|
|
17
|
+
# Ubuntu
|
|
18
|
+
sudo apt install libblas-dev liblapack-dev
|
|
11
19
|
```
|
|
12
20
|
|
|
13
21
|
Add this line to your application’s Gemfile:
|
|
@@ -16,6 +24,8 @@ Add this line to your application’s Gemfile:
|
|
|
16
24
|
gem 'faiss'
|
|
17
25
|
```
|
|
18
26
|
|
|
27
|
+
Faiss is not available for Windows yet
|
|
28
|
+
|
|
19
29
|
## Getting Started
|
|
20
30
|
|
|
21
31
|
Prep your data
|
|
@@ -41,6 +51,84 @@ Search
|
|
|
41
51
|
distances, ids = index.search(objects, 3)
|
|
42
52
|
```
|
|
43
53
|
|
|
54
|
+
Save an index
|
|
55
|
+
|
|
56
|
+
```ruby
|
|
57
|
+
index.save("index.bin")
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
Load an index
|
|
61
|
+
|
|
62
|
+
```ruby
|
|
63
|
+
index = Faiss::Index.load("index.bin")
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
> Use `Faiss::IndexBinary` to load binary indexes
|
|
67
|
+
|
|
68
|
+
## Basic Indexes
|
|
69
|
+
|
|
70
|
+
Exact search for L2
|
|
71
|
+
|
|
72
|
+
```rb
|
|
73
|
+
Faiss::IndexFlatL2.new(d)
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
Exact search for inner product
|
|
77
|
+
|
|
78
|
+
```rb
|
|
79
|
+
Faiss::IndexFlatIP.new(d)
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
Hierarchical navigable small world graph exploration
|
|
83
|
+
|
|
84
|
+
```rb
|
|
85
|
+
Faiss::IndexHNSWFlat.new(d, m)
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Inverted file with exact post-verification
|
|
89
|
+
|
|
90
|
+
```rb
|
|
91
|
+
Faiss::IndexIVFFlat.new(quantizer, d, nlists)
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
Locality-sensitive hashing
|
|
95
|
+
|
|
96
|
+
```rb
|
|
97
|
+
Faiss::IndexLSH.new(d, nbits)
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
Product quantizer (PQ) in flat mode
|
|
101
|
+
|
|
102
|
+
```rb
|
|
103
|
+
Faiss::IndexPQ.new(d, m, nbits)
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
IVFADC (coarse quantizer+PQ on residuals)
|
|
107
|
+
|
|
108
|
+
```rb
|
|
109
|
+
Faiss::IndexIVFPQ.new(quantizer, d, nlists, m, nbits)
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
IVFADC+R (same as IVFADC with re-ranking based on codes)
|
|
113
|
+
|
|
114
|
+
```rb
|
|
115
|
+
Faiss::IndexIVFPQR.new(quantizer, d, nlists, m, nbits, m_refine, nbits_refine)
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
## Binary Indexes
|
|
119
|
+
|
|
120
|
+
Index binary vectors
|
|
121
|
+
|
|
122
|
+
```rb
|
|
123
|
+
Faiss::IndexBinaryFlat.new(d)
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
Speed up search with an inverse vector file
|
|
127
|
+
|
|
128
|
+
```rb
|
|
129
|
+
Faiss::IndexBinaryIVF.new(quantizer, d, nlists)
|
|
130
|
+
```
|
|
131
|
+
|
|
44
132
|
## K-means Clustering
|
|
45
133
|
|
|
46
134
|
Train
|
|
@@ -92,6 +180,18 @@ Decode
|
|
|
92
180
|
pq.decode(codes)
|
|
93
181
|
```
|
|
94
182
|
|
|
183
|
+
Save a quantizer
|
|
184
|
+
|
|
185
|
+
```ruby
|
|
186
|
+
pq.save("pq.bin")
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
Load a quantizer
|
|
190
|
+
|
|
191
|
+
```ruby
|
|
192
|
+
pq = Faiss::ProductQuantizer.load("pq.bin")
|
|
193
|
+
```
|
|
194
|
+
|
|
95
195
|
## Data
|
|
96
196
|
|
|
97
197
|
Data can be an array of arrays
|
|
@@ -122,7 +222,7 @@ Everyone is encouraged to help improve this project. Here are a few ways you can
|
|
|
122
222
|
To get started with development:
|
|
123
223
|
|
|
124
224
|
```sh
|
|
125
|
-
git clone https://github.com/ankane/faiss.git
|
|
225
|
+
git clone --recursive https://github.com/ankane/faiss.git
|
|
126
226
|
cd faiss
|
|
127
227
|
bundle install
|
|
128
228
|
bundle exec rake compile
|
data/ext/faiss/ext.cpp
CHANGED
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
#include <faiss/IndexBinaryFlat.h>
|
|
11
11
|
#include <faiss/IndexBinaryIVF.h>
|
|
12
12
|
#include <faiss/index_factory.h>
|
|
13
|
+
#include <faiss/index_io.h>
|
|
13
14
|
|
|
14
15
|
#include <faiss/Clustering.h>
|
|
15
16
|
#include <faiss/VectorTransform.h>
|
|
@@ -19,6 +20,42 @@
|
|
|
19
20
|
#include <rice/Constructor.hpp>
|
|
20
21
|
#include <rice/Module.hpp>
|
|
21
22
|
|
|
23
|
+
float* float_array(Rice::Object o)
|
|
24
|
+
{
|
|
25
|
+
Rice::String s = o.call("to_binary");
|
|
26
|
+
return (float*) s.c_str();
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
uint8_t* uint8_array(Rice::Object o)
|
|
30
|
+
{
|
|
31
|
+
Rice::String s = o.call("to_binary");
|
|
32
|
+
return (uint8_t*) s.c_str();
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// TODO return Numo::SFloat
|
|
36
|
+
Rice::String result(float* ptr, int64_t length)
|
|
37
|
+
{
|
|
38
|
+
return Rice::String(std::string((char*) ptr, length * sizeof(float)));
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// TODO return Numo::UInt8
|
|
42
|
+
Rice::String result(uint8_t* ptr, int64_t length)
|
|
43
|
+
{
|
|
44
|
+
return Rice::String(std::string((char*) ptr, length * sizeof(uint8_t)));
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// TODO return Numo::Int32
|
|
48
|
+
Rice::String result(int32_t* ptr, int64_t length)
|
|
49
|
+
{
|
|
50
|
+
return Rice::String(std::string((char*) ptr, length * sizeof(int32_t)));
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// TODO return Numo::Int64
|
|
54
|
+
Rice::String result(int64_t* ptr, int64_t length)
|
|
55
|
+
{
|
|
56
|
+
return Rice::String(std::string((char*) ptr, length * sizeof(int64_t)));
|
|
57
|
+
}
|
|
58
|
+
|
|
22
59
|
extern "C"
|
|
23
60
|
void Init_ext()
|
|
24
61
|
{
|
|
@@ -47,32 +84,42 @@ void Init_ext()
|
|
|
47
84
|
})
|
|
48
85
|
.define_method(
|
|
49
86
|
"_train",
|
|
50
|
-
*[](faiss::Index &self, int64_t n, Rice::
|
|
51
|
-
const float *x = (
|
|
87
|
+
*[](faiss::Index &self, int64_t n, Rice::Object o) {
|
|
88
|
+
const float *x = float_array(o);
|
|
52
89
|
self.train(n, x);
|
|
53
90
|
})
|
|
54
91
|
.define_method(
|
|
55
92
|
"_add",
|
|
56
|
-
*[](faiss::Index &self, int64_t n, Rice::
|
|
57
|
-
const float *x = (
|
|
93
|
+
*[](faiss::Index &self, int64_t n, Rice::Object o) {
|
|
94
|
+
const float *x = float_array(o);
|
|
58
95
|
self.add(n, x);
|
|
59
96
|
})
|
|
60
97
|
.define_method(
|
|
61
98
|
"_search",
|
|
62
|
-
*[](faiss::Index &self, int64_t n, Rice::
|
|
63
|
-
const float *x = (
|
|
99
|
+
*[](faiss::Index &self, int64_t n, Rice::Object o, int64_t k) {
|
|
100
|
+
const float *x = float_array(o);
|
|
64
101
|
float *distances = new float[k * n];
|
|
65
102
|
int64_t *labels = new int64_t[k * n];
|
|
66
103
|
|
|
67
104
|
self.search(n, x, k, distances, labels);
|
|
68
105
|
|
|
69
|
-
auto dstr =
|
|
70
|
-
auto lstr =
|
|
106
|
+
auto dstr = result(distances, k * n);
|
|
107
|
+
auto lstr = result(labels, k * n);
|
|
71
108
|
|
|
72
109
|
Rice::Array ret;
|
|
73
110
|
ret.push(dstr);
|
|
74
111
|
ret.push(lstr);
|
|
75
112
|
return ret;
|
|
113
|
+
})
|
|
114
|
+
.define_method(
|
|
115
|
+
"save",
|
|
116
|
+
*[](faiss::Index &self, const char *fname) {
|
|
117
|
+
faiss::write_index(&self, fname);
|
|
118
|
+
})
|
|
119
|
+
.define_singleton_method(
|
|
120
|
+
"load",
|
|
121
|
+
*[](const char *fname) {
|
|
122
|
+
return faiss::read_index(fname);
|
|
76
123
|
});
|
|
77
124
|
|
|
78
125
|
Rice::define_class_under<faiss::IndexBinary>(rb_mFaiss, "IndexBinary")
|
|
@@ -93,32 +140,42 @@ void Init_ext()
|
|
|
93
140
|
})
|
|
94
141
|
.define_method(
|
|
95
142
|
"_train",
|
|
96
|
-
*[](faiss::IndexBinary &self, int64_t n, Rice::
|
|
97
|
-
const uint8_t *x = (
|
|
143
|
+
*[](faiss::IndexBinary &self, int64_t n, Rice::Object o) {
|
|
144
|
+
const uint8_t *x = uint8_array(o);
|
|
98
145
|
self.train(n, x);
|
|
99
146
|
})
|
|
100
147
|
.define_method(
|
|
101
148
|
"_add",
|
|
102
|
-
*[](faiss::IndexBinary &self, int64_t n, Rice::
|
|
103
|
-
const uint8_t *x = (
|
|
149
|
+
*[](faiss::IndexBinary &self, int64_t n, Rice::Object o) {
|
|
150
|
+
const uint8_t *x = uint8_array(o);
|
|
104
151
|
self.add(n, x);
|
|
105
152
|
})
|
|
106
153
|
.define_method(
|
|
107
154
|
"_search",
|
|
108
|
-
*[](faiss::IndexBinary &self, int64_t n, Rice::
|
|
109
|
-
const uint8_t *x = (
|
|
155
|
+
*[](faiss::IndexBinary &self, int64_t n, Rice::Object o, int64_t k) {
|
|
156
|
+
const uint8_t *x = uint8_array(o);
|
|
110
157
|
int32_t *distances = new int32_t[k * n];
|
|
111
158
|
int64_t *labels = new int64_t[k * n];
|
|
112
159
|
|
|
113
160
|
self.search(n, x, k, distances, labels);
|
|
114
161
|
|
|
115
|
-
auto dstr =
|
|
116
|
-
auto lstr =
|
|
162
|
+
auto dstr = result(distances, k * n);
|
|
163
|
+
auto lstr = result(labels, k * n);
|
|
117
164
|
|
|
118
165
|
Rice::Array ret;
|
|
119
166
|
ret.push(dstr);
|
|
120
167
|
ret.push(lstr);
|
|
121
168
|
return ret;
|
|
169
|
+
})
|
|
170
|
+
.define_method(
|
|
171
|
+
"save",
|
|
172
|
+
*[](faiss::IndexBinary &self, const char *fname) {
|
|
173
|
+
faiss::write_index_binary(&self, fname);
|
|
174
|
+
})
|
|
175
|
+
.define_singleton_method(
|
|
176
|
+
"load",
|
|
177
|
+
*[](const char *fname) {
|
|
178
|
+
return faiss::read_index_binary(fname);
|
|
122
179
|
});
|
|
123
180
|
|
|
124
181
|
Rice::define_class_under<faiss::IndexFlatL2, faiss::Index>(rb_mFaiss, "IndexFlatL2")
|
|
@@ -176,12 +233,12 @@ void Init_ext()
|
|
|
176
233
|
for (size_t i = 0; i < self.centroids.size(); i++) {
|
|
177
234
|
centroids[i] = self.centroids[i];
|
|
178
235
|
}
|
|
179
|
-
return
|
|
236
|
+
return result(centroids, self.k * self.d);
|
|
180
237
|
})
|
|
181
238
|
.define_method(
|
|
182
239
|
"_train",
|
|
183
|
-
*[](faiss::Clustering &self, int64_t n, Rice::
|
|
184
|
-
const float *x = (
|
|
240
|
+
*[](faiss::Clustering &self, int64_t n, Rice::Object o, faiss::Index & index) {
|
|
241
|
+
const float *x = float_array(o);
|
|
185
242
|
self.train(n, x, index);
|
|
186
243
|
});
|
|
187
244
|
|
|
@@ -199,16 +256,16 @@ void Init_ext()
|
|
|
199
256
|
})
|
|
200
257
|
.define_method(
|
|
201
258
|
"_train",
|
|
202
|
-
*[](faiss::PCAMatrix &self, int64_t n, Rice::
|
|
203
|
-
const float *x = (
|
|
259
|
+
*[](faiss::PCAMatrix &self, int64_t n, Rice::Object o) {
|
|
260
|
+
const float *x = float_array(o);
|
|
204
261
|
self.train(n, x);
|
|
205
262
|
})
|
|
206
263
|
.define_method(
|
|
207
264
|
"_apply",
|
|
208
|
-
*[](faiss::PCAMatrix &self, int64_t n, Rice::
|
|
209
|
-
const float *x = (
|
|
265
|
+
*[](faiss::PCAMatrix &self, int64_t n, Rice::Object o) {
|
|
266
|
+
const float *x = float_array(o);
|
|
210
267
|
float* res = self.apply(n, x);
|
|
211
|
-
return
|
|
268
|
+
return result(res, n * self.d_out);
|
|
212
269
|
});
|
|
213
270
|
|
|
214
271
|
Rice::define_class_under<faiss::ProductQuantizer>(rb_mFaiss, "ProductQuantizer")
|
|
@@ -225,24 +282,34 @@ void Init_ext()
|
|
|
225
282
|
})
|
|
226
283
|
.define_method(
|
|
227
284
|
"_train",
|
|
228
|
-
*[](faiss::ProductQuantizer &self, int n, Rice::
|
|
229
|
-
const float *x = (
|
|
285
|
+
*[](faiss::ProductQuantizer &self, int n, Rice::Object o) {
|
|
286
|
+
const float *x = float_array(o);
|
|
230
287
|
self.train(n, x);
|
|
231
288
|
})
|
|
232
289
|
.define_method(
|
|
233
290
|
"_compute_codes",
|
|
234
|
-
*[](faiss::ProductQuantizer &self, int n, Rice::
|
|
235
|
-
const float *x = (
|
|
291
|
+
*[](faiss::ProductQuantizer &self, int n, Rice::Object o) {
|
|
292
|
+
const float *x = float_array(o);
|
|
236
293
|
uint8_t *codes = new uint8_t[n * self.M];
|
|
237
294
|
self.compute_codes(x, codes, n);
|
|
238
|
-
return
|
|
295
|
+
return result(codes, n * self.M);
|
|
239
296
|
})
|
|
240
297
|
.define_method(
|
|
241
298
|
"_decode",
|
|
242
|
-
*[](faiss::ProductQuantizer &self, int n, Rice::
|
|
243
|
-
const uint8_t *codes = (
|
|
299
|
+
*[](faiss::ProductQuantizer &self, int n, Rice::Object o) {
|
|
300
|
+
const uint8_t *codes = uint8_array(o);
|
|
244
301
|
float *x = new float[n * self.d];
|
|
245
302
|
self.decode(codes, x, n);
|
|
246
|
-
return
|
|
303
|
+
return result(x, n * self.d);
|
|
304
|
+
})
|
|
305
|
+
.define_method(
|
|
306
|
+
"save",
|
|
307
|
+
*[](faiss::ProductQuantizer &self, const char *fname) {
|
|
308
|
+
faiss::write_ProductQuantizer(&self, fname);
|
|
309
|
+
})
|
|
310
|
+
.define_singleton_method(
|
|
311
|
+
"load",
|
|
312
|
+
*[](const char *fname) {
|
|
313
|
+
return faiss::read_ProductQuantizer(fname);
|
|
247
314
|
});
|
|
248
315
|
}
|
data/ext/faiss/extconf.rb
CHANGED
|
@@ -1,7 +1,17 @@
|
|
|
1
1
|
require "mkmf-rice"
|
|
2
2
|
|
|
3
|
-
abort "
|
|
3
|
+
abort "BLAS not found" unless have_library("blas")
|
|
4
|
+
abort "LAPACK not found" unless have_library("lapack")
|
|
5
|
+
abort "OpenMP not found" unless have_library("omp") || have_library("gomp")
|
|
4
6
|
|
|
5
|
-
$CXXFLAGS << " -std=c++11"
|
|
7
|
+
$CXXFLAGS << " -std=c++11 -march=native -DFINTEGER=int"
|
|
8
|
+
|
|
9
|
+
ext = File.expand_path(".", __dir__)
|
|
10
|
+
vendor = File.expand_path("../../vendor", __dir__)
|
|
11
|
+
|
|
12
|
+
$srcs = Dir["{#{ext},#{vendor}/faiss,#{vendor}/faiss/impl,#{vendor}/faiss/utils}/*.{cpp}"]
|
|
13
|
+
$objs = $srcs.map { |v| v.sub(/cpp\z/, "o") }
|
|
14
|
+
$INCFLAGS << " -I#{vendor}"
|
|
15
|
+
$VPATH << vendor
|
|
6
16
|
|
|
7
17
|
create_makefile("faiss/ext")
|
|
Binary file
|
data/lib/faiss/index.rb
CHANGED
|
@@ -2,18 +2,18 @@ module Faiss
|
|
|
2
2
|
class Index
|
|
3
3
|
def train(objects)
|
|
4
4
|
objects = Numo::SFloat.cast(objects) unless objects.is_a?(Numo::SFloat)
|
|
5
|
-
_train(objects.shape[0], objects
|
|
5
|
+
_train(objects.shape[0], objects)
|
|
6
6
|
end
|
|
7
7
|
|
|
8
8
|
def add(objects)
|
|
9
9
|
objects = Numo::SFloat.cast(objects) unless objects.is_a?(Numo::SFloat)
|
|
10
|
-
_add(objects.shape[0], objects
|
|
10
|
+
_add(objects.shape[0], objects)
|
|
11
11
|
end
|
|
12
12
|
|
|
13
13
|
def search(objects, k)
|
|
14
14
|
objects = Numo::SFloat.cast(objects) unless objects.is_a?(Numo::SFloat)
|
|
15
15
|
n = objects.shape[0]
|
|
16
|
-
distances, labels = _search(n, objects
|
|
16
|
+
distances, labels = _search(n, objects, k)
|
|
17
17
|
[Numo::SFloat.from_binary(distances).reshape(n, k), Numo::Int64.from_binary(labels).reshape(n, k)]
|
|
18
18
|
end
|
|
19
19
|
end
|
data/lib/faiss/index_binary.rb
CHANGED
|
@@ -2,18 +2,18 @@ module Faiss
|
|
|
2
2
|
class IndexBinary
|
|
3
3
|
def train(objects)
|
|
4
4
|
objects = Numo::UInt8.cast(objects) unless objects.is_a?(Numo::UInt8)
|
|
5
|
-
_train(objects.shape[0], objects
|
|
5
|
+
_train(objects.shape[0], objects)
|
|
6
6
|
end
|
|
7
7
|
|
|
8
8
|
def add(objects)
|
|
9
9
|
objects = Numo::UInt8.cast(objects) unless objects.is_a?(Numo::UInt8)
|
|
10
|
-
_add(objects.shape[0], objects
|
|
10
|
+
_add(objects.shape[0], objects)
|
|
11
11
|
end
|
|
12
12
|
|
|
13
13
|
def search(objects, k)
|
|
14
14
|
objects = Numo::UInt8.cast(objects) unless objects.is_a?(Numo::UInt8)
|
|
15
15
|
n = objects.shape[0]
|
|
16
|
-
distances, labels = _search(n, objects
|
|
16
|
+
distances, labels = _search(n, objects, k)
|
|
17
17
|
[Numo::UInt32.from_binary(distances).reshape(n, k), Numo::Int64.from_binary(labels).reshape(n, k)]
|
|
18
18
|
end
|
|
19
19
|
end
|