faiss 0.2.3 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/LICENSE.txt +1 -1
- data/README.md +23 -21
- data/ext/faiss/extconf.rb +11 -0
- data/ext/faiss/index.cpp +4 -4
- data/ext/faiss/index_binary.cpp +6 -6
- data/ext/faiss/product_quantizer.cpp +4 -4
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +13 -0
- data/vendor/faiss/faiss/Clustering.cpp +32 -0
- data/vendor/faiss/faiss/Clustering.h +14 -0
- data/vendor/faiss/faiss/IVFlib.cpp +101 -2
- data/vendor/faiss/faiss/IVFlib.h +26 -2
- data/vendor/faiss/faiss/Index.cpp +36 -3
- data/vendor/faiss/faiss/Index.h +43 -6
- data/vendor/faiss/faiss/Index2Layer.cpp +24 -93
- data/vendor/faiss/faiss/Index2Layer.h +8 -17
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +610 -0
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +253 -0
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +299 -0
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +199 -0
- data/vendor/faiss/faiss/IndexBinary.cpp +20 -4
- data/vendor/faiss/faiss/IndexBinary.h +18 -3
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +9 -2
- data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -2
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -1
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +2 -1
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +5 -1
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +2 -1
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +17 -4
- data/vendor/faiss/faiss/IndexBinaryHash.h +8 -4
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +28 -13
- data/vendor/faiss/faiss/IndexBinaryIVF.h +10 -7
- data/vendor/faiss/faiss/IndexFastScan.cpp +626 -0
- data/vendor/faiss/faiss/IndexFastScan.h +145 -0
- data/vendor/faiss/faiss/IndexFlat.cpp +52 -69
- data/vendor/faiss/faiss/IndexFlat.h +16 -19
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +101 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +59 -0
- data/vendor/faiss/faiss/IndexHNSW.cpp +66 -138
- data/vendor/faiss/faiss/IndexHNSW.h +4 -2
- data/vendor/faiss/faiss/IndexIDMap.cpp +247 -0
- data/vendor/faiss/faiss/IndexIDMap.h +107 -0
- data/vendor/faiss/faiss/IndexIVF.cpp +200 -40
- data/vendor/faiss/faiss/IndexIVF.h +59 -22
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +393 -0
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +183 -0
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +590 -0
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +171 -0
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +1290 -0
- data/vendor/faiss/faiss/IndexIVFFastScan.h +213 -0
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +43 -26
- data/vendor/faiss/faiss/IndexIVFFlat.h +4 -2
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +238 -53
- data/vendor/faiss/faiss/IndexIVFPQ.h +6 -2
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +23 -852
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +7 -112
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +3 -3
- data/vendor/faiss/faiss/IndexIVFPQR.h +1 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +63 -40
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +23 -7
- data/vendor/faiss/faiss/IndexLSH.cpp +8 -32
- data/vendor/faiss/faiss/IndexLSH.h +4 -16
- data/vendor/faiss/faiss/IndexLattice.cpp +7 -1
- data/vendor/faiss/faiss/IndexLattice.h +3 -1
- data/vendor/faiss/faiss/IndexNNDescent.cpp +4 -5
- data/vendor/faiss/faiss/IndexNNDescent.h +2 -1
- data/vendor/faiss/faiss/IndexNSG.cpp +37 -5
- data/vendor/faiss/faiss/IndexNSG.h +25 -1
- data/vendor/faiss/faiss/IndexPQ.cpp +108 -120
- data/vendor/faiss/faiss/IndexPQ.h +21 -22
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +15 -450
- data/vendor/faiss/faiss/IndexPQFastScan.h +15 -78
- data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -8
- data/vendor/faiss/faiss/IndexPreTransform.h +15 -3
- data/vendor/faiss/faiss/IndexRefine.cpp +36 -4
- data/vendor/faiss/faiss/IndexRefine.h +14 -2
- data/vendor/faiss/faiss/IndexReplicas.cpp +4 -2
- data/vendor/faiss/faiss/IndexReplicas.h +2 -1
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +438 -0
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +92 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +28 -43
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +8 -23
- data/vendor/faiss/faiss/IndexShards.cpp +4 -1
- data/vendor/faiss/faiss/IndexShards.h +2 -1
- data/vendor/faiss/faiss/MetaIndexes.cpp +5 -178
- data/vendor/faiss/faiss/MetaIndexes.h +3 -81
- data/vendor/faiss/faiss/VectorTransform.cpp +45 -1
- data/vendor/faiss/faiss/VectorTransform.h +25 -4
- data/vendor/faiss/faiss/clone_index.cpp +26 -3
- data/vendor/faiss/faiss/clone_index.h +3 -0
- data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +300 -0
- data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +24 -0
- data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +195 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +2058 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +408 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +2147 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +460 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +465 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +1618 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +251 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +1452 -0
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +1 -0
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +2 -6
- data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
- data/vendor/faiss/faiss/gpu/GpuIndex.h +28 -4
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +2 -1
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +10 -8
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +75 -14
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +19 -32
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -31
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +22 -28
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +14 -0
- data/vendor/faiss/faiss/gpu/GpuResources.h +16 -3
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +3 -3
- data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +32 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +1 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +311 -75
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +10 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +3 -0
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +2 -2
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +5 -4
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +331 -29
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +110 -19
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +0 -54
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +0 -76
- data/vendor/faiss/faiss/impl/DistanceComputer.h +64 -0
- data/vendor/faiss/faiss/impl/HNSW.cpp +133 -32
- data/vendor/faiss/faiss/impl/HNSW.h +19 -16
- data/vendor/faiss/faiss/impl/IDSelector.cpp +125 -0
- data/vendor/faiss/faiss/impl/IDSelector.h +135 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +378 -217
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +106 -29
- data/vendor/faiss/faiss/impl/LookupTableScaler.h +77 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +1 -4
- data/vendor/faiss/faiss/impl/NSG.h +1 -1
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +383 -0
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +154 -0
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +225 -145
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +29 -10
- data/vendor/faiss/faiss/impl/Quantizer.h +43 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +521 -55
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +94 -16
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -0
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +108 -191
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +18 -18
- data/vendor/faiss/faiss/impl/index_read.cpp +338 -24
- data/vendor/faiss/faiss/impl/index_write.cpp +300 -18
- data/vendor/faiss/faiss/impl/io.cpp +1 -1
- data/vendor/faiss/faiss/impl/io_macros.h +20 -0
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +303 -0
- data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +56 -16
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +25 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +66 -25
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +75 -27
- data/vendor/faiss/faiss/index_factory.cpp +772 -412
- data/vendor/faiss/faiss/index_factory.h +3 -0
- data/vendor/faiss/faiss/index_io.h +5 -0
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -0
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +4 -1
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +2 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +27 -0
- data/vendor/faiss/faiss/python/python_callbacks.h +15 -0
- data/vendor/faiss/faiss/utils/Heap.h +31 -15
- data/vendor/faiss/faiss/utils/distances.cpp +384 -58
- data/vendor/faiss/faiss/utils/distances.h +149 -18
- data/vendor/faiss/faiss/utils/distances_simd.cpp +776 -6
- data/vendor/faiss/faiss/utils/extra_distances.cpp +12 -7
- data/vendor/faiss/faiss/utils/extra_distances.h +3 -1
- data/vendor/faiss/faiss/utils/fp16-fp16c.h +21 -0
- data/vendor/faiss/faiss/utils/fp16-inl.h +101 -0
- data/vendor/faiss/faiss/utils/fp16.h +11 -0
- data/vendor/faiss/faiss/utils/hamming-inl.h +54 -0
- data/vendor/faiss/faiss/utils/hamming.cpp +0 -48
- data/vendor/faiss/faiss/utils/ordered_key_value.h +10 -0
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +62 -0
- data/vendor/faiss/faiss/utils/quantize_lut.h +20 -0
- data/vendor/faiss/faiss/utils/random.cpp +53 -0
- data/vendor/faiss/faiss/utils/random.h +5 -0
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +4 -0
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +6 -1
- data/vendor/faiss/faiss/utils/simdlib_neon.h +7 -2
- data/vendor/faiss/faiss/utils/utils.h +1 -1
- metadata +46 -5
- data/vendor/faiss/faiss/IndexResidual.cpp +0 -291
- data/vendor/faiss/faiss/IndexResidual.h +0 -152
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: bdc2effbe6e2d827ffe473531be0864cf710ae6f3ad34f8324087695c367d140
|
|
4
|
+
data.tar.gz: 6e5b80b1f4281766b17d208af58b44e768576d259de1c6edb25630c700215c10
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 404d064f14734a23d946ad0fc0576673e5e685d1001884981453e3bec23ecf6ba7b75857e578b47ef5303fde27342dfb25e85bbb7a60883d6ba09a964049bbfa
|
|
7
|
+
data.tar.gz: 536f403109d3773a3cd0ca27b4b46bfa26eaeedd8d5e2f4e6fa116e3b922d21f8a964a2d6aa1cbbff951da9444e9184bf7c60955263ed68c91ba91342f3f9196
|
data/CHANGELOG.md
CHANGED
data/LICENSE.txt
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
MIT License
|
|
2
2
|
|
|
3
3
|
Copyright (c) Facebook, Inc. and its affiliates.
|
|
4
|
-
Copyright (c) 2020-
|
|
4
|
+
Copyright (c) 2020-2022 Andrew Kane
|
|
5
5
|
|
|
6
6
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
7
|
of this software and associated documentation files (the "Software"), to deal
|
data/README.md
CHANGED
|
@@ -8,23 +8,25 @@ Learn more about [Faiss](https://engineering.fb.com/data-infrastructure/faiss-a-
|
|
|
8
8
|
|
|
9
9
|
## Installation
|
|
10
10
|
|
|
11
|
-
First,
|
|
11
|
+
First, ensure BLAS, LAPACK, and OpenMP are installed. For Mac, use:
|
|
12
12
|
|
|
13
13
|
```sh
|
|
14
|
-
|
|
15
|
-
|
|
14
|
+
brew install libomp
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
For Ubuntu, use:
|
|
16
18
|
|
|
17
|
-
|
|
18
|
-
sudo apt install libblas-dev liblapack-dev
|
|
19
|
+
```sh
|
|
20
|
+
sudo apt-get install libblas-dev liblapack-dev
|
|
19
21
|
```
|
|
20
22
|
|
|
21
|
-
|
|
23
|
+
Then add this line to your application’s Gemfile:
|
|
22
24
|
|
|
23
25
|
```ruby
|
|
24
|
-
gem
|
|
26
|
+
gem "faiss"
|
|
25
27
|
```
|
|
26
28
|
|
|
27
|
-
It can take a few minutes to compile the gem.
|
|
29
|
+
It can take a few minutes to compile the gem. Windows is not currently supported.
|
|
28
30
|
|
|
29
31
|
## Getting Started
|
|
30
32
|
|
|
@@ -69,61 +71,61 @@ index = Faiss::Index.load("index.bin")
|
|
|
69
71
|
|
|
70
72
|
Exact search for L2
|
|
71
73
|
|
|
72
|
-
```
|
|
74
|
+
```ruby
|
|
73
75
|
Faiss::IndexFlatL2.new(d)
|
|
74
76
|
```
|
|
75
77
|
|
|
76
78
|
Exact search for inner product
|
|
77
79
|
|
|
78
|
-
```
|
|
80
|
+
```ruby
|
|
79
81
|
Faiss::IndexFlatIP.new(d)
|
|
80
82
|
```
|
|
81
83
|
|
|
82
84
|
Hierarchical navigable small world graph exploration
|
|
83
85
|
|
|
84
|
-
```
|
|
86
|
+
```ruby
|
|
85
87
|
Faiss::IndexHNSWFlat.new(d, m)
|
|
86
88
|
```
|
|
87
89
|
|
|
88
90
|
Inverted file with exact post-verification
|
|
89
91
|
|
|
90
|
-
```
|
|
92
|
+
```ruby
|
|
91
93
|
Faiss::IndexIVFFlat.new(quantizer, d, nlists)
|
|
92
94
|
```
|
|
93
95
|
|
|
94
96
|
Locality-sensitive hashing
|
|
95
97
|
|
|
96
|
-
```
|
|
98
|
+
```ruby
|
|
97
99
|
Faiss::IndexLSH.new(d, nbits)
|
|
98
100
|
```
|
|
99
101
|
|
|
100
102
|
Scalar quantizer (SQ) in flat mode
|
|
101
103
|
|
|
102
|
-
```
|
|
104
|
+
```ruby
|
|
103
105
|
Faiss::IndexScalarQuantizer.new(d, qtype)
|
|
104
106
|
```
|
|
105
107
|
|
|
106
108
|
Product quantizer (PQ) in flat mode
|
|
107
109
|
|
|
108
|
-
```
|
|
110
|
+
```ruby
|
|
109
111
|
Faiss::IndexPQ.new(d, m, nbits)
|
|
110
112
|
```
|
|
111
113
|
|
|
112
114
|
IVF and scalar quantizer
|
|
113
115
|
|
|
114
|
-
```
|
|
116
|
+
```ruby
|
|
115
117
|
Faiss::IndexIVFScalarQuantizer.new(quantizer, d, nlists, qtype)
|
|
116
118
|
```
|
|
117
119
|
|
|
118
120
|
IVFADC (coarse quantizer+PQ on residuals)
|
|
119
121
|
|
|
120
|
-
```
|
|
122
|
+
```ruby
|
|
121
123
|
Faiss::IndexIVFPQ.new(quantizer, d, nlists, m, nbits)
|
|
122
124
|
```
|
|
123
125
|
|
|
124
126
|
IVFADC+R (same as IVFADC with re-ranking based on codes)
|
|
125
127
|
|
|
126
|
-
```
|
|
128
|
+
```ruby
|
|
127
129
|
Faiss::IndexIVFPQR.new(quantizer, d, nlists, m, nbits, m_refine, nbits_refine)
|
|
128
130
|
```
|
|
129
131
|
|
|
@@ -131,13 +133,13 @@ Faiss::IndexIVFPQR.new(quantizer, d, nlists, m, nbits, m_refine, nbits_refine)
|
|
|
131
133
|
|
|
132
134
|
Index binary vectors
|
|
133
135
|
|
|
134
|
-
```
|
|
136
|
+
```ruby
|
|
135
137
|
Faiss::IndexBinaryFlat.new(d)
|
|
136
138
|
```
|
|
137
139
|
|
|
138
140
|
Speed up search with an inverse vector file
|
|
139
141
|
|
|
140
|
-
```
|
|
142
|
+
```ruby
|
|
141
143
|
Faiss::IndexBinaryIVF.new(quantizer, d, nlists)
|
|
142
144
|
```
|
|
143
145
|
|
|
@@ -220,7 +222,7 @@ Numo::NArray.cast([[1, 2, 3], [4, 5, 6]])
|
|
|
220
222
|
|
|
221
223
|
## History
|
|
222
224
|
|
|
223
|
-
View the [changelog](
|
|
225
|
+
View the [changelog](CHANGELOG.md)
|
|
224
226
|
|
|
225
227
|
## Contributing
|
|
226
228
|
|
data/ext/faiss/extconf.rb
CHANGED
|
@@ -1,6 +1,14 @@
|
|
|
1
1
|
require "mkmf-rice"
|
|
2
2
|
require "numo/narray"
|
|
3
3
|
|
|
4
|
+
# libomp changed to keg-only
|
|
5
|
+
# https://github.com/Homebrew/homebrew-core/issues/112107
|
|
6
|
+
if RbConfig::CONFIG["host_os"] =~ /darwin/i
|
|
7
|
+
brew_prefix = RbConfig::CONFIG["host_cpu"] =~ /arm|aarch64/i ? "/opt/homebrew" : "/usr/local"
|
|
8
|
+
find_library("omp", nil, "#{brew_prefix}/opt/libomp/lib")
|
|
9
|
+
find_header("omp.h", "#{brew_prefix}/opt/libomp/include")
|
|
10
|
+
end
|
|
11
|
+
|
|
4
12
|
abort "BLAS not found" unless have_library("blas")
|
|
5
13
|
abort "LAPACK not found" unless have_library("lapack")
|
|
6
14
|
abort "OpenMP not found" unless have_library("omp") || have_library("gomp")
|
|
@@ -8,6 +16,9 @@ abort "OpenMP not found" unless have_library("omp") || have_library("gomp")
|
|
|
8
16
|
numo = File.join(Gem.loaded_specs["numo-narray"].require_path, "numo")
|
|
9
17
|
abort "Numo not found" unless find_header("numo/narray.h", numo)
|
|
10
18
|
|
|
19
|
+
# for https://bugs.ruby-lang.org/issues/19005
|
|
20
|
+
$LDFLAGS += " -Wl,-undefined,dynamic_lookup" if RbConfig::CONFIG["host_os"] =~ /darwin/i
|
|
21
|
+
|
|
11
22
|
# -march=native not supported with ARM Mac
|
|
12
23
|
default_optflags = RbConfig::CONFIG["host_os"] =~ /darwin/i && RbConfig::CONFIG["host_cpu"] =~ /arm|aarch64/i ? "" : "-march=native"
|
|
13
24
|
$CXXFLAGS << " -std=c++17 $(optflags) -DFINTEGER=int " << with_config("optflags", default_optflags)
|
data/ext/faiss/index.cpp
CHANGED
|
@@ -140,13 +140,13 @@ void init_index(Rice::Module& m) {
|
|
|
140
140
|
})
|
|
141
141
|
.define_method(
|
|
142
142
|
"save",
|
|
143
|
-
[](faiss::Index &self,
|
|
144
|
-
faiss::write_index(&self, fname);
|
|
143
|
+
[](faiss::Index &self, Rice::String fname) {
|
|
144
|
+
faiss::write_index(&self, fname.c_str());
|
|
145
145
|
})
|
|
146
146
|
.define_singleton_function(
|
|
147
147
|
"load",
|
|
148
|
-
[](
|
|
149
|
-
return faiss::read_index(fname);
|
|
148
|
+
[](Rice::String fname) {
|
|
149
|
+
return faiss::read_index(fname.c_str());
|
|
150
150
|
});
|
|
151
151
|
|
|
152
152
|
Rice::define_class_under<faiss::IndexFlatL2, faiss::Index>(m, "IndexFlatL2")
|
data/ext/faiss/index_binary.cpp
CHANGED
|
@@ -52,13 +52,13 @@ void init_index_binary(Rice::Module& m) {
|
|
|
52
52
|
})
|
|
53
53
|
.define_method(
|
|
54
54
|
"save",
|
|
55
|
-
[](faiss::IndexBinary &self,
|
|
56
|
-
faiss::write_index_binary(&self, fname);
|
|
55
|
+
[](faiss::IndexBinary &self, Rice::String fname) {
|
|
56
|
+
faiss::write_index_binary(&self, fname.c_str());
|
|
57
57
|
})
|
|
58
58
|
.define_singleton_function(
|
|
59
59
|
"load",
|
|
60
|
-
[](
|
|
61
|
-
return faiss::read_index_binary(fname);
|
|
60
|
+
[](Rice::String fname) {
|
|
61
|
+
return faiss::read_index_binary(fname.c_str());
|
|
62
62
|
});
|
|
63
63
|
|
|
64
64
|
Rice::define_class_under<faiss::IndexBinaryFlat, faiss::IndexBinary>(m, "IndexBinaryFlat")
|
|
@@ -69,7 +69,7 @@ void init_index_binary(Rice::Module& m) {
|
|
|
69
69
|
|
|
70
70
|
m.define_singleton_function(
|
|
71
71
|
"index_binary_factory",
|
|
72
|
-
[](int d,
|
|
73
|
-
return faiss::index_binary_factory(d, description);
|
|
72
|
+
[](int d, Rice::String description) {
|
|
73
|
+
return faiss::index_binary_factory(d, description.c_str());
|
|
74
74
|
});
|
|
75
75
|
}
|
|
@@ -42,12 +42,12 @@ void init_product_quantizer(Rice::Module& m) {
|
|
|
42
42
|
})
|
|
43
43
|
.define_method(
|
|
44
44
|
"save",
|
|
45
|
-
[](faiss::ProductQuantizer &self,
|
|
46
|
-
faiss::write_ProductQuantizer(&self, fname);
|
|
45
|
+
[](faiss::ProductQuantizer &self, Rice::String fname) {
|
|
46
|
+
faiss::write_ProductQuantizer(&self, fname.c_str());
|
|
47
47
|
})
|
|
48
48
|
.define_singleton_function(
|
|
49
49
|
"load",
|
|
50
|
-
[](
|
|
51
|
-
return faiss::read_ProductQuantizer(fname);
|
|
50
|
+
[](Rice::String fname) {
|
|
51
|
+
return faiss::read_ProductQuantizer(fname.c_str());
|
|
52
52
|
});
|
|
53
53
|
}
|
data/lib/faiss/version.rb
CHANGED
|
@@ -523,6 +523,19 @@ void ParameterSpace::set_index_parameter(
|
|
|
523
523
|
}
|
|
524
524
|
}
|
|
525
525
|
|
|
526
|
+
if (name == "efConstruction") {
|
|
527
|
+
if (DC(IndexHNSW)) {
|
|
528
|
+
ix->hnsw.efConstruction = int(val);
|
|
529
|
+
return;
|
|
530
|
+
}
|
|
531
|
+
if (DC(IndexIVF)) {
|
|
532
|
+
if (IndexHNSW* cq = dynamic_cast<IndexHNSW*>(ix->quantizer)) {
|
|
533
|
+
cq->hnsw.efConstruction = int(val);
|
|
534
|
+
return;
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
|
|
526
539
|
if (name == "efSearch") {
|
|
527
540
|
if (DC(IndexHNSW)) {
|
|
528
541
|
ix->hnsw.efSearch = int(val);
|
|
@@ -20,6 +20,7 @@
|
|
|
20
20
|
|
|
21
21
|
#include <faiss/IndexFlat.h>
|
|
22
22
|
#include <faiss/impl/FaissAssert.h>
|
|
23
|
+
#include <faiss/impl/kmeans1d.h>
|
|
23
24
|
#include <faiss/utils/distances.h>
|
|
24
25
|
#include <faiss/utils/random.h>
|
|
25
26
|
#include <faiss/utils/utils.h>
|
|
@@ -553,6 +554,37 @@ void Clustering::train_encoded(
|
|
|
553
554
|
}
|
|
554
555
|
}
|
|
555
556
|
|
|
557
|
+
Clustering1D::Clustering1D(int k) : Clustering(1, k) {}
|
|
558
|
+
|
|
559
|
+
Clustering1D::Clustering1D(int k, const ClusteringParameters& cp)
|
|
560
|
+
: Clustering(1, k, cp) {}
|
|
561
|
+
|
|
562
|
+
void Clustering1D::train_exact(idx_t n, const float* x) {
|
|
563
|
+
const float* xt = x;
|
|
564
|
+
|
|
565
|
+
std::unique_ptr<uint8_t[]> del;
|
|
566
|
+
if (n > k * max_points_per_centroid) {
|
|
567
|
+
uint8_t* x_new;
|
|
568
|
+
float* weights_new;
|
|
569
|
+
n = subsample_training_set(
|
|
570
|
+
*this,
|
|
571
|
+
n,
|
|
572
|
+
(uint8_t*)x,
|
|
573
|
+
sizeof(float) * d,
|
|
574
|
+
nullptr,
|
|
575
|
+
&x_new,
|
|
576
|
+
&weights_new);
|
|
577
|
+
del.reset(x_new);
|
|
578
|
+
xt = (float*)x_new;
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
centroids.resize(k);
|
|
582
|
+
double uf = kmeans1d(xt, n, k, centroids.data());
|
|
583
|
+
|
|
584
|
+
ClusteringIterationStats stats = {0.0, 0.0, 0.0, uf, 0};
|
|
585
|
+
iteration_stats.push_back(stats);
|
|
586
|
+
}
|
|
587
|
+
|
|
556
588
|
float kmeans_clustering(
|
|
557
589
|
size_t d,
|
|
558
590
|
size_t n,
|
|
@@ -111,6 +111,20 @@ struct Clustering : ClusteringParameters {
|
|
|
111
111
|
virtual ~Clustering() {}
|
|
112
112
|
};
|
|
113
113
|
|
|
114
|
+
/** Exact 1D clustering algorithm
|
|
115
|
+
*
|
|
116
|
+
* Since it does not use an index, it does not overload the train() function
|
|
117
|
+
*/
|
|
118
|
+
struct Clustering1D : Clustering {
|
|
119
|
+
explicit Clustering1D(int k);
|
|
120
|
+
|
|
121
|
+
Clustering1D(int k, const ClusteringParameters& cp);
|
|
122
|
+
|
|
123
|
+
void train_exact(idx_t n, const float* x);
|
|
124
|
+
|
|
125
|
+
virtual ~Clustering1D() {}
|
|
126
|
+
};
|
|
127
|
+
|
|
114
128
|
struct ProgressiveDimClusteringParameters : ClusteringParameters {
|
|
115
129
|
int progressive_dim_steps; ///< number of incremental steps
|
|
116
130
|
bool apply_pca; ///< apply PCA on input
|
|
@@ -5,15 +5,18 @@
|
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
// -*- c++ -*-
|
|
9
|
-
|
|
10
8
|
#include <faiss/IVFlib.h>
|
|
9
|
+
#include <omp.h>
|
|
11
10
|
|
|
12
11
|
#include <memory>
|
|
13
12
|
|
|
13
|
+
#include <faiss/IndexAdditiveQuantizer.h>
|
|
14
|
+
#include <faiss/IndexIVFAdditiveQuantizer.h>
|
|
14
15
|
#include <faiss/IndexPreTransform.h>
|
|
15
16
|
#include <faiss/MetaIndexes.h>
|
|
16
17
|
#include <faiss/impl/FaissAssert.h>
|
|
18
|
+
#include <faiss/utils/distances.h>
|
|
19
|
+
#include <faiss/utils/hamming.h>
|
|
17
20
|
#include <faiss/utils/utils.h>
|
|
18
21
|
|
|
19
22
|
namespace faiss {
|
|
@@ -349,6 +352,7 @@ void search_with_parameters(
|
|
|
349
352
|
index_ivf->search_preassigned(
|
|
350
353
|
n, x, k, Iq.data(), Dq.data(), distances, labels, false, params);
|
|
351
354
|
double t3 = getmillisecs();
|
|
355
|
+
|
|
352
356
|
if (ms_per_stage) {
|
|
353
357
|
ms_per_stage[0] = t1 - t0;
|
|
354
358
|
ms_per_stage[1] = t2 - t1;
|
|
@@ -406,5 +410,100 @@ void range_search_with_parameters(
|
|
|
406
410
|
}
|
|
407
411
|
}
|
|
408
412
|
|
|
413
|
+
IndexIVFResidualQuantizer* ivf_residual_from_quantizer(
|
|
414
|
+
const ResidualQuantizer& rq,
|
|
415
|
+
int nlevel) {
|
|
416
|
+
FAISS_THROW_IF_NOT(nlevel > 0 && nlevel + 1 < rq.M);
|
|
417
|
+
|
|
418
|
+
std::vector<size_t> nbits(nlevel);
|
|
419
|
+
std::copy(rq.nbits.begin(), rq.nbits.begin() + nlevel, nbits.begin());
|
|
420
|
+
std::unique_ptr<ResidualCoarseQuantizer> rcq(
|
|
421
|
+
new ResidualCoarseQuantizer(rq.d, nbits));
|
|
422
|
+
|
|
423
|
+
// set the coarse quantizer from the 2 first quantizers
|
|
424
|
+
rcq->rq.initialize_from(rq);
|
|
425
|
+
rcq->is_trained = true;
|
|
426
|
+
rcq->ntotal = (idx_t)1 << rcq->rq.tot_bits;
|
|
427
|
+
|
|
428
|
+
// settings for exhaustive search in RCQ
|
|
429
|
+
rcq->centroid_norms.resize(rcq->ntotal);
|
|
430
|
+
rcq->aq->compute_centroid_norms(rcq->centroid_norms.data());
|
|
431
|
+
rcq->beam_factor = -1.0; // use exact search
|
|
432
|
+
size_t nlist = rcq->ntotal;
|
|
433
|
+
|
|
434
|
+
// build a IVFResidualQuantizer from that
|
|
435
|
+
std::vector<size_t> nbits_refined;
|
|
436
|
+
for (int i = nlevel; i < rq.M; i++) {
|
|
437
|
+
nbits_refined.push_back(rq.nbits[i]);
|
|
438
|
+
}
|
|
439
|
+
std::unique_ptr<IndexIVFResidualQuantizer> index(
|
|
440
|
+
new IndexIVFResidualQuantizer(
|
|
441
|
+
rcq.get(),
|
|
442
|
+
rq.d,
|
|
443
|
+
nlist,
|
|
444
|
+
nbits_refined,
|
|
445
|
+
faiss::METRIC_L2,
|
|
446
|
+
rq.search_type));
|
|
447
|
+
index->own_fields = true;
|
|
448
|
+
rcq.release();
|
|
449
|
+
index->by_residual = true;
|
|
450
|
+
index->rq.initialize_from(rq, nlevel);
|
|
451
|
+
index->is_trained = true;
|
|
452
|
+
|
|
453
|
+
return index.release();
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
void ivf_residual_add_from_flat_codes(
|
|
457
|
+
IndexIVFResidualQuantizer* index,
|
|
458
|
+
size_t nb,
|
|
459
|
+
const uint8_t* raw_codes,
|
|
460
|
+
int64_t code_size) {
|
|
461
|
+
const ResidualCoarseQuantizer* rcq =
|
|
462
|
+
dynamic_cast<const faiss::ResidualCoarseQuantizer*>(
|
|
463
|
+
index->quantizer);
|
|
464
|
+
FAISS_THROW_IF_NOT_MSG(rcq, "the coarse quantizer must be a RCQ");
|
|
465
|
+
if (code_size < 0) {
|
|
466
|
+
code_size = index->code_size;
|
|
467
|
+
}
|
|
468
|
+
InvertedLists& invlists = *index->invlists;
|
|
469
|
+
const ResidualQuantizer& rq = index->rq;
|
|
470
|
+
|
|
471
|
+
// populate inverted lists
|
|
472
|
+
#pragma omp parallel if (nb > 10000)
|
|
473
|
+
{
|
|
474
|
+
std::vector<uint8_t> tmp_code(index->code_size);
|
|
475
|
+
std::vector<float> tmp(rq.d);
|
|
476
|
+
int nt = omp_get_num_threads();
|
|
477
|
+
int rank = omp_get_thread_num();
|
|
478
|
+
|
|
479
|
+
#pragma omp for
|
|
480
|
+
for (idx_t i = 0; i < nb; i++) {
|
|
481
|
+
const uint8_t* code = &raw_codes[i * code_size];
|
|
482
|
+
BitstringReader rd(code, code_size);
|
|
483
|
+
idx_t list_no = rd.read(rcq->rq.tot_bits);
|
|
484
|
+
|
|
485
|
+
if (list_no % nt ==
|
|
486
|
+
rank) { // each thread takes care of 1/nt of the invlists
|
|
487
|
+
// copy AQ indexes one by one
|
|
488
|
+
BitstringWriter wr(tmp_code.data(), tmp_code.size());
|
|
489
|
+
for (int j = 0; j < rq.M; j++) {
|
|
490
|
+
int nbit = rq.nbits[j];
|
|
491
|
+
wr.write(rd.read(nbit), nbit);
|
|
492
|
+
}
|
|
493
|
+
// we need to recompute the norm
|
|
494
|
+
// decode first, does not use the norm component, so that's
|
|
495
|
+
// ok
|
|
496
|
+
index->rq.decode(tmp_code.data(), tmp.data(), 1);
|
|
497
|
+
float norm = fvec_norm_L2sqr(tmp.data(), rq.d);
|
|
498
|
+
wr.write(rq.encode_norm(norm), rq.norm_bits);
|
|
499
|
+
|
|
500
|
+
// add code to the inverted list
|
|
501
|
+
invlists.add_entry(list_no, i, tmp_code.data());
|
|
502
|
+
}
|
|
503
|
+
}
|
|
504
|
+
}
|
|
505
|
+
index->ntotal += nb;
|
|
506
|
+
}
|
|
507
|
+
|
|
409
508
|
} // namespace ivflib
|
|
410
509
|
} // namespace faiss
|
data/vendor/faiss/faiss/IVFlib.h
CHANGED
|
@@ -5,8 +5,6 @@
|
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
// -*- c++ -*-
|
|
9
|
-
|
|
10
8
|
#ifndef FAISS_IVFLIB_H
|
|
11
9
|
#define FAISS_IVFLIB_H
|
|
12
10
|
|
|
@@ -20,6 +18,11 @@
|
|
|
20
18
|
#include <vector>
|
|
21
19
|
|
|
22
20
|
namespace faiss {
|
|
21
|
+
|
|
22
|
+
struct IndexIVFResidualQuantizer;
|
|
23
|
+
struct IndexResidualQuantizer;
|
|
24
|
+
struct ResidualQuantizer;
|
|
25
|
+
|
|
23
26
|
namespace ivflib {
|
|
24
27
|
|
|
25
28
|
/** check if two indexes have the same parameters and are trained in
|
|
@@ -145,6 +148,27 @@ void range_search_with_parameters(
|
|
|
145
148
|
size_t* nb_dis = nullptr,
|
|
146
149
|
double* ms_per_stage = nullptr);
|
|
147
150
|
|
|
151
|
+
/** Build an IndexIVFResidualQuantizer from an ResidualQuantizer, using the
|
|
152
|
+
* nlevel first components as coarse quantizer and the rest as codes in invlists
|
|
153
|
+
*/
|
|
154
|
+
IndexIVFResidualQuantizer* ivf_residual_from_quantizer(
|
|
155
|
+
const ResidualQuantizer&,
|
|
156
|
+
int nlevel);
|
|
157
|
+
|
|
158
|
+
/** add from codes. NB that the norm component is not used, so the code_size can
|
|
159
|
+
* be provided.
|
|
160
|
+
*
|
|
161
|
+
* @param ivfrq index to populate with the codes
|
|
162
|
+
* @param codes codes to add, size (ncode, code_size)
|
|
163
|
+
* @param code_size override the ivfrq's code_size, useful if the norm encoding
|
|
164
|
+
* is different
|
|
165
|
+
*/
|
|
166
|
+
void ivf_residual_add_from_flat_codes(
|
|
167
|
+
IndexIVFResidualQuantizer* ivfrq,
|
|
168
|
+
size_t ncode,
|
|
169
|
+
const uint8_t* codes,
|
|
170
|
+
int64_t code_size = -1);
|
|
171
|
+
|
|
148
172
|
} // namespace ivflib
|
|
149
173
|
} // namespace faiss
|
|
150
174
|
|
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
#include <faiss/Index.h>
|
|
11
11
|
|
|
12
12
|
#include <faiss/impl/AuxIndexStructures.h>
|
|
13
|
+
#include <faiss/impl/DistanceComputer.h>
|
|
13
14
|
#include <faiss/impl/FaissAssert.h>
|
|
14
15
|
#include <faiss/utils/distances.h>
|
|
15
16
|
|
|
@@ -23,7 +24,12 @@ void Index::train(idx_t /*n*/, const float* /*x*/) {
|
|
|
23
24
|
// does nothing by default
|
|
24
25
|
}
|
|
25
26
|
|
|
26
|
-
void Index::range_search(
|
|
27
|
+
void Index::range_search(
|
|
28
|
+
idx_t,
|
|
29
|
+
const float*,
|
|
30
|
+
float,
|
|
31
|
+
RangeSearchResult*,
|
|
32
|
+
const SearchParameters* params) const {
|
|
27
33
|
FAISS_THROW_MSG("range search not implemented");
|
|
28
34
|
}
|
|
29
35
|
|
|
@@ -48,7 +54,25 @@ void Index::reconstruct(idx_t, float*) const {
|
|
|
48
54
|
FAISS_THROW_MSG("reconstruct not implemented for this type of index");
|
|
49
55
|
}
|
|
50
56
|
|
|
57
|
+
void Index::reconstruct_batch(idx_t n, const idx_t* keys, float* recons) const {
|
|
58
|
+
std::mutex exception_mutex;
|
|
59
|
+
std::string exception_string;
|
|
60
|
+
#pragma omp parallel for if (n > 1000)
|
|
61
|
+
for (idx_t i = 0; i < n; i++) {
|
|
62
|
+
try {
|
|
63
|
+
reconstruct(keys[i], &recons[i * d]);
|
|
64
|
+
} catch (const std::exception& e) {
|
|
65
|
+
std::lock_guard<std::mutex> lock(exception_mutex);
|
|
66
|
+
exception_string = e.what();
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
if (!exception_string.empty()) {
|
|
70
|
+
FAISS_THROW_MSG(exception_string.c_str());
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
51
74
|
void Index::reconstruct_n(idx_t i0, idx_t ni, float* recons) const {
|
|
75
|
+
#pragma omp parallel for if (ni > 1000)
|
|
52
76
|
for (idx_t i = 0; i < ni; i++) {
|
|
53
77
|
reconstruct(i0 + i, recons + i * d);
|
|
54
78
|
}
|
|
@@ -60,10 +84,11 @@ void Index::search_and_reconstruct(
|
|
|
60
84
|
idx_t k,
|
|
61
85
|
float* distances,
|
|
62
86
|
idx_t* labels,
|
|
63
|
-
float* recons
|
|
87
|
+
float* recons,
|
|
88
|
+
const SearchParameters* params) const {
|
|
64
89
|
FAISS_THROW_IF_NOT(k > 0);
|
|
65
90
|
|
|
66
|
-
search(n, x, k, distances, labels);
|
|
91
|
+
search(n, x, k, distances, labels, params);
|
|
67
92
|
for (idx_t i = 0; i < n; ++i) {
|
|
68
93
|
for (idx_t j = 0; j < k; ++j) {
|
|
69
94
|
idx_t ij = i * k + j;
|
|
@@ -149,4 +174,12 @@ DistanceComputer* Index::get_distance_computer() const {
|
|
|
149
174
|
}
|
|
150
175
|
}
|
|
151
176
|
|
|
177
|
+
void Index::merge_from(Index& /* otherIndex */, idx_t /* add_id */) {
|
|
178
|
+
FAISS_THROW_MSG("merge_from() not implemented");
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
void Index::check_compatible_for_merge(const Index& /* otherIndex */) const {
|
|
182
|
+
FAISS_THROW_MSG("check_compatible_for_merge() not implemented");
|
|
183
|
+
}
|
|
184
|
+
|
|
152
185
|
} // namespace faiss
|