faiss 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/README.md +103 -3
  4. data/ext/faiss/ext.cpp +99 -32
  5. data/ext/faiss/extconf.rb +12 -2
  6. data/lib/faiss/ext.bundle +0 -0
  7. data/lib/faiss/index.rb +3 -3
  8. data/lib/faiss/index_binary.rb +3 -3
  9. data/lib/faiss/kmeans.rb +1 -1
  10. data/lib/faiss/pca_matrix.rb +2 -2
  11. data/lib/faiss/product_quantizer.rb +3 -3
  12. data/lib/faiss/version.rb +1 -1
  13. data/vendor/faiss/AutoTune.cpp +719 -0
  14. data/vendor/faiss/AutoTune.h +212 -0
  15. data/vendor/faiss/Clustering.cpp +261 -0
  16. data/vendor/faiss/Clustering.h +101 -0
  17. data/vendor/faiss/IVFlib.cpp +339 -0
  18. data/vendor/faiss/IVFlib.h +132 -0
  19. data/vendor/faiss/Index.cpp +171 -0
  20. data/vendor/faiss/Index.h +261 -0
  21. data/vendor/faiss/Index2Layer.cpp +437 -0
  22. data/vendor/faiss/Index2Layer.h +85 -0
  23. data/vendor/faiss/IndexBinary.cpp +77 -0
  24. data/vendor/faiss/IndexBinary.h +163 -0
  25. data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
  26. data/vendor/faiss/IndexBinaryFlat.h +54 -0
  27. data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
  28. data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
  29. data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
  30. data/vendor/faiss/IndexBinaryHNSW.h +56 -0
  31. data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
  32. data/vendor/faiss/IndexBinaryIVF.h +211 -0
  33. data/vendor/faiss/IndexFlat.cpp +508 -0
  34. data/vendor/faiss/IndexFlat.h +175 -0
  35. data/vendor/faiss/IndexHNSW.cpp +1090 -0
  36. data/vendor/faiss/IndexHNSW.h +170 -0
  37. data/vendor/faiss/IndexIVF.cpp +909 -0
  38. data/vendor/faiss/IndexIVF.h +353 -0
  39. data/vendor/faiss/IndexIVFFlat.cpp +502 -0
  40. data/vendor/faiss/IndexIVFFlat.h +118 -0
  41. data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
  42. data/vendor/faiss/IndexIVFPQ.h +161 -0
  43. data/vendor/faiss/IndexIVFPQR.cpp +219 -0
  44. data/vendor/faiss/IndexIVFPQR.h +65 -0
  45. data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
  46. data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
  47. data/vendor/faiss/IndexLSH.cpp +225 -0
  48. data/vendor/faiss/IndexLSH.h +87 -0
  49. data/vendor/faiss/IndexLattice.cpp +143 -0
  50. data/vendor/faiss/IndexLattice.h +68 -0
  51. data/vendor/faiss/IndexPQ.cpp +1188 -0
  52. data/vendor/faiss/IndexPQ.h +199 -0
  53. data/vendor/faiss/IndexPreTransform.cpp +288 -0
  54. data/vendor/faiss/IndexPreTransform.h +91 -0
  55. data/vendor/faiss/IndexReplicas.cpp +123 -0
  56. data/vendor/faiss/IndexReplicas.h +76 -0
  57. data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
  58. data/vendor/faiss/IndexScalarQuantizer.h +127 -0
  59. data/vendor/faiss/IndexShards.cpp +317 -0
  60. data/vendor/faiss/IndexShards.h +100 -0
  61. data/vendor/faiss/InvertedLists.cpp +623 -0
  62. data/vendor/faiss/InvertedLists.h +334 -0
  63. data/vendor/faiss/LICENSE +21 -0
  64. data/vendor/faiss/MatrixStats.cpp +252 -0
  65. data/vendor/faiss/MatrixStats.h +62 -0
  66. data/vendor/faiss/MetaIndexes.cpp +351 -0
  67. data/vendor/faiss/MetaIndexes.h +126 -0
  68. data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
  69. data/vendor/faiss/OnDiskInvertedLists.h +127 -0
  70. data/vendor/faiss/VectorTransform.cpp +1157 -0
  71. data/vendor/faiss/VectorTransform.h +322 -0
  72. data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
  73. data/vendor/faiss/c_api/AutoTune_c.h +64 -0
  74. data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
  75. data/vendor/faiss/c_api/Clustering_c.h +117 -0
  76. data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
  77. data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
  78. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
  79. data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
  80. data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
  81. data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
  82. data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
  83. data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
  84. data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
  85. data/vendor/faiss/c_api/IndexShards_c.h +42 -0
  86. data/vendor/faiss/c_api/Index_c.cpp +105 -0
  87. data/vendor/faiss/c_api/Index_c.h +183 -0
  88. data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
  89. data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
  90. data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
  91. data/vendor/faiss/c_api/clone_index_c.h +32 -0
  92. data/vendor/faiss/c_api/error_c.h +42 -0
  93. data/vendor/faiss/c_api/error_impl.cpp +27 -0
  94. data/vendor/faiss/c_api/error_impl.h +16 -0
  95. data/vendor/faiss/c_api/faiss_c.h +58 -0
  96. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
  97. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
  98. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
  99. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
  100. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
  101. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
  102. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
  103. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
  104. data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
  105. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
  106. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
  107. data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
  108. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
  109. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
  110. data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
  111. data/vendor/faiss/c_api/index_factory_c.h +30 -0
  112. data/vendor/faiss/c_api/index_io_c.cpp +42 -0
  113. data/vendor/faiss/c_api/index_io_c.h +50 -0
  114. data/vendor/faiss/c_api/macros_impl.h +110 -0
  115. data/vendor/faiss/clone_index.cpp +147 -0
  116. data/vendor/faiss/clone_index.h +38 -0
  117. data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
  118. data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
  119. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
  120. data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
  121. data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
  122. data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
  123. data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
  124. data/vendor/faiss/gpu/GpuCloner.h +82 -0
  125. data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
  126. data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
  127. data/vendor/faiss/gpu/GpuDistance.h +52 -0
  128. data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
  129. data/vendor/faiss/gpu/GpuIndex.h +148 -0
  130. data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
  131. data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
  132. data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
  133. data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
  134. data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
  135. data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
  136. data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
  137. data/vendor/faiss/gpu/GpuResources.cpp +52 -0
  138. data/vendor/faiss/gpu/GpuResources.h +73 -0
  139. data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
  140. data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
  141. data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
  142. data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
  143. data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
  144. data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
  145. data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
  146. data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
  147. data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
  148. data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
  149. data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
  150. data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
  151. data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
  152. data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
  153. data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
  154. data/vendor/faiss/gpu/test/TestUtils.h +93 -0
  155. data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
  156. data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
  157. data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
  158. data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
  159. data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
  160. data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
  161. data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
  162. data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
  163. data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
  164. data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
  165. data/vendor/faiss/gpu/utils/Timer.h +52 -0
  166. data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
  167. data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
  168. data/vendor/faiss/impl/FaissAssert.h +95 -0
  169. data/vendor/faiss/impl/FaissException.cpp +66 -0
  170. data/vendor/faiss/impl/FaissException.h +71 -0
  171. data/vendor/faiss/impl/HNSW.cpp +818 -0
  172. data/vendor/faiss/impl/HNSW.h +275 -0
  173. data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
  174. data/vendor/faiss/impl/PolysemousTraining.h +158 -0
  175. data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
  176. data/vendor/faiss/impl/ProductQuantizer.h +242 -0
  177. data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
  178. data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
  179. data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
  180. data/vendor/faiss/impl/ThreadedIndex.h +80 -0
  181. data/vendor/faiss/impl/index_read.cpp +793 -0
  182. data/vendor/faiss/impl/index_write.cpp +558 -0
  183. data/vendor/faiss/impl/io.cpp +142 -0
  184. data/vendor/faiss/impl/io.h +98 -0
  185. data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
  186. data/vendor/faiss/impl/lattice_Zn.h +199 -0
  187. data/vendor/faiss/index_factory.cpp +392 -0
  188. data/vendor/faiss/index_factory.h +25 -0
  189. data/vendor/faiss/index_io.h +75 -0
  190. data/vendor/faiss/misc/test_blas.cpp +84 -0
  191. data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
  192. data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
  193. data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
  194. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
  195. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
  196. data/vendor/faiss/tests/test_merge.cpp +258 -0
  197. data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
  198. data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
  199. data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
  200. data/vendor/faiss/tests/test_params_override.cpp +231 -0
  201. data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
  202. data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
  203. data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
  204. data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
  205. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
  206. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
  207. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
  208. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
  209. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
  210. data/vendor/faiss/utils/Heap.cpp +122 -0
  211. data/vendor/faiss/utils/Heap.h +495 -0
  212. data/vendor/faiss/utils/WorkerThread.cpp +126 -0
  213. data/vendor/faiss/utils/WorkerThread.h +61 -0
  214. data/vendor/faiss/utils/distances.cpp +765 -0
  215. data/vendor/faiss/utils/distances.h +243 -0
  216. data/vendor/faiss/utils/distances_simd.cpp +809 -0
  217. data/vendor/faiss/utils/extra_distances.cpp +336 -0
  218. data/vendor/faiss/utils/extra_distances.h +54 -0
  219. data/vendor/faiss/utils/hamming-inl.h +472 -0
  220. data/vendor/faiss/utils/hamming.cpp +792 -0
  221. data/vendor/faiss/utils/hamming.h +220 -0
  222. data/vendor/faiss/utils/random.cpp +192 -0
  223. data/vendor/faiss/utils/random.h +60 -0
  224. data/vendor/faiss/utils/utils.cpp +783 -0
  225. data/vendor/faiss/utils/utils.h +181 -0
  226. metadata +216 -2
@@ -0,0 +1,75 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #ifndef FAISS_INDEX_IVFSH_H
11
+ #define FAISS_INDEX_IVFSH_H
12
+
13
+
14
+ #include <vector>
15
+
16
+ #include <faiss/IndexIVF.h>
17
+
18
+
19
+ namespace faiss {
20
+
21
+ struct VectorTransform;
22
+
23
+ /** Inverted list that stores binary codes of size nbit. Before the
24
+ * binary conversion, the dimension of the vectors is transformed from
25
+ * dim d into dim nbit by vt (a random rotation by default).
26
+ *
27
+ * Each coordinate is subtracted from a value determined by
28
+ * threshold_type, and split into intervals of size period. Half of
29
+ * the interval is a 0 bit, the other half a 1.
30
+ */
31
+ struct IndexIVFSpectralHash: IndexIVF {
32
+
33
+ VectorTransform *vt; // transformation from d to nbit dim
34
+ bool own_fields;
35
+
36
+ int nbit;
37
+ float period;
38
+
39
+ enum ThresholdType {
40
+ Thresh_global,
41
+ Thresh_centroid,
42
+ Thresh_centroid_half,
43
+ Thresh_median
44
+ };
45
+ ThresholdType threshold_type;
46
+
47
+ // size nlist * nbit or 0 if Thresh_global
48
+ std::vector<float> trained;
49
+
50
+ IndexIVFSpectralHash (Index * quantizer, size_t d, size_t nlist,
51
+ int nbit, float period);
52
+
53
+ IndexIVFSpectralHash ();
54
+
55
+ void train_residual(idx_t n, const float* x) override;
56
+
57
+ void encode_vectors(idx_t n, const float* x,
58
+ const idx_t *list_nos,
59
+ uint8_t * codes,
60
+ bool include_listnos = false) const override;
61
+
62
+ InvertedListScanner *get_InvertedListScanner (bool store_pairs)
63
+ const override;
64
+
65
+ ~IndexIVFSpectralHash () override;
66
+
67
+ };
68
+
69
+
70
+
71
+
72
+ }; // namespace faiss
73
+
74
+
75
+ #endif
@@ -0,0 +1,225 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #include <faiss/IndexLSH.h>
11
+
12
+ #include <cstdio>
13
+ #include <cstring>
14
+
15
+ #include <algorithm>
16
+
17
+ #include <faiss/utils/utils.h>
18
+ #include <faiss/utils/hamming.h>
19
+ #include <faiss/impl/FaissAssert.h>
20
+
21
+
22
+ namespace faiss {
23
+
24
+ /***************************************************************
25
+ * IndexLSH
26
+ ***************************************************************/
27
+
28
+
29
+ IndexLSH::IndexLSH (idx_t d, int nbits, bool rotate_data, bool train_thresholds):
30
+ Index(d), nbits(nbits), rotate_data(rotate_data),
31
+ train_thresholds (train_thresholds), rrot(d, nbits)
32
+ {
33
+ is_trained = !train_thresholds;
34
+
35
+ bytes_per_vec = (nbits + 7) / 8;
36
+
37
+ if (rotate_data) {
38
+ rrot.init(5);
39
+ } else {
40
+ FAISS_THROW_IF_NOT (d >= nbits);
41
+ }
42
+ }
43
+
44
+ IndexLSH::IndexLSH ():
45
+ nbits (0), bytes_per_vec(0), rotate_data (false), train_thresholds (false)
46
+ {
47
+ }
48
+
49
+
50
+ const float * IndexLSH::apply_preprocess (idx_t n, const float *x) const
51
+ {
52
+
53
+ float *xt = nullptr;
54
+ if (rotate_data) {
55
+ // also applies bias if exists
56
+ xt = rrot.apply (n, x);
57
+ } else if (d != nbits) {
58
+ assert (nbits < d);
59
+ xt = new float [nbits * n];
60
+ float *xp = xt;
61
+ for (idx_t i = 0; i < n; i++) {
62
+ const float *xl = x + i * d;
63
+ for (int j = 0; j < nbits; j++)
64
+ *xp++ = xl [j];
65
+ }
66
+ }
67
+
68
+ if (train_thresholds) {
69
+
70
+ if (xt == NULL) {
71
+ xt = new float [nbits * n];
72
+ memcpy (xt, x, sizeof(*x) * n * nbits);
73
+ }
74
+
75
+ float *xp = xt;
76
+ for (idx_t i = 0; i < n; i++)
77
+ for (int j = 0; j < nbits; j++)
78
+ *xp++ -= thresholds [j];
79
+ }
80
+
81
+ return xt ? xt : x;
82
+ }
83
+
84
+
85
+
86
+ void IndexLSH::train (idx_t n, const float *x)
87
+ {
88
+ if (train_thresholds) {
89
+ thresholds.resize (nbits);
90
+ train_thresholds = false;
91
+ const float *xt = apply_preprocess (n, x);
92
+ ScopeDeleter<float> del (xt == x ? nullptr : xt);
93
+ train_thresholds = true;
94
+
95
+ float * transposed_x = new float [n * nbits];
96
+ ScopeDeleter<float> del2 (transposed_x);
97
+
98
+ for (idx_t i = 0; i < n; i++)
99
+ for (idx_t j = 0; j < nbits; j++)
100
+ transposed_x [j * n + i] = xt [i * nbits + j];
101
+
102
+ for (idx_t i = 0; i < nbits; i++) {
103
+ float *xi = transposed_x + i * n;
104
+ // std::nth_element
105
+ std::sort (xi, xi + n);
106
+ if (n % 2 == 1)
107
+ thresholds [i] = xi [n / 2];
108
+ else
109
+ thresholds [i] = (xi [n / 2 - 1] + xi [n / 2]) / 2;
110
+
111
+ }
112
+ }
113
+ is_trained = true;
114
+ }
115
+
116
+
117
+ void IndexLSH::add (idx_t n, const float *x)
118
+ {
119
+ FAISS_THROW_IF_NOT (is_trained);
120
+ codes.resize ((ntotal + n) * bytes_per_vec);
121
+
122
+ sa_encode (n, x, &codes[ntotal * bytes_per_vec]);
123
+
124
+ ntotal += n;
125
+ }
126
+
127
+
128
+ void IndexLSH::search (
129
+ idx_t n,
130
+ const float *x,
131
+ idx_t k,
132
+ float *distances,
133
+ idx_t *labels) const
134
+ {
135
+ FAISS_THROW_IF_NOT (is_trained);
136
+ const float *xt = apply_preprocess (n, x);
137
+ ScopeDeleter<float> del (xt == x ? nullptr : xt);
138
+
139
+ uint8_t * qcodes = new uint8_t [n * bytes_per_vec];
140
+ ScopeDeleter<uint8_t> del2 (qcodes);
141
+
142
+ fvecs2bitvecs (xt, qcodes, nbits, n);
143
+
144
+ int * idistances = new int [n * k];
145
+ ScopeDeleter<int> del3 (idistances);
146
+
147
+ int_maxheap_array_t res = { size_t(n), size_t(k), labels, idistances};
148
+
149
+ hammings_knn_hc (&res, qcodes, codes.data(),
150
+ ntotal, bytes_per_vec, true);
151
+
152
+
153
+ // convert distances to floats
154
+ for (int i = 0; i < k * n; i++)
155
+ distances[i] = idistances[i];
156
+
157
+ }
158
+
159
+
160
+ void IndexLSH::transfer_thresholds (LinearTransform *vt) {
161
+ if (!train_thresholds) return;
162
+ FAISS_THROW_IF_NOT (nbits == vt->d_out);
163
+ if (!vt->have_bias) {
164
+ vt->b.resize (nbits, 0);
165
+ vt->have_bias = true;
166
+ }
167
+ for (int i = 0; i < nbits; i++)
168
+ vt->b[i] -= thresholds[i];
169
+ train_thresholds = false;
170
+ thresholds.clear();
171
+ }
172
+
173
+ void IndexLSH::reset() {
174
+ codes.clear();
175
+ ntotal = 0;
176
+ }
177
+
178
+
179
+ size_t IndexLSH::sa_code_size () const
180
+ {
181
+ return bytes_per_vec;
182
+ }
183
+
184
+ void IndexLSH::sa_encode (idx_t n, const float *x,
185
+ uint8_t *bytes) const
186
+ {
187
+ FAISS_THROW_IF_NOT (is_trained);
188
+ const float *xt = apply_preprocess (n, x);
189
+ ScopeDeleter<float> del (xt == x ? nullptr : xt);
190
+ fvecs2bitvecs (xt, bytes, nbits, n);
191
+ }
192
+
193
+ void IndexLSH::sa_decode (idx_t n, const uint8_t *bytes,
194
+ float *x) const
195
+ {
196
+ float *xt = x;
197
+ ScopeDeleter<float> del;
198
+ if (rotate_data || nbits != d) {
199
+ xt = new float [n * nbits];
200
+ del.set(xt);
201
+ }
202
+ bitvecs2fvecs (bytes, xt, nbits, n);
203
+
204
+ if (train_thresholds) {
205
+ float *xp = xt;
206
+ for (idx_t i = 0; i < n; i++) {
207
+ for (int j = 0; j < nbits; j++) {
208
+ *xp++ += thresholds [j];
209
+ }
210
+ }
211
+ }
212
+
213
+ if (rotate_data) {
214
+ rrot.reverse_transform (n, xt, x);
215
+ } else if (nbits != d) {
216
+ for (idx_t i = 0; i < n; i++) {
217
+ memcpy (x + i * d, xt + i * nbits,
218
+ nbits * sizeof(xt[0]));
219
+ }
220
+ }
221
+ }
222
+
223
+
224
+
225
+ } // namespace faiss
@@ -0,0 +1,87 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #ifndef INDEX_LSH_H
11
+ #define INDEX_LSH_H
12
+
13
+ #include <vector>
14
+
15
+ #include <faiss/Index.h>
16
+ #include <faiss/VectorTransform.h>
17
+
18
+ namespace faiss {
19
+
20
+
21
+ /** The sign of each vector component is put in a binary signature */
22
+ struct IndexLSH:Index {
23
+ typedef unsigned char uint8_t;
24
+
25
+ int nbits; ///< nb of bits per vector
26
+ int bytes_per_vec; ///< nb of 8-bits per encoded vector
27
+ bool rotate_data; ///< whether to apply a random rotation to input
28
+ bool train_thresholds; ///< whether we train thresholds or use 0
29
+
30
+ RandomRotationMatrix rrot; ///< optional random rotation
31
+
32
+ std::vector <float> thresholds; ///< thresholds to compare with
33
+
34
+ /// encoded dataset
35
+ std::vector<uint8_t> codes;
36
+
37
+ IndexLSH (
38
+ idx_t d, int nbits,
39
+ bool rotate_data = true,
40
+ bool train_thresholds = false);
41
+
42
+ /** Preprocesses and resizes the input to the size required to
43
+ * binarize the data
44
+ *
45
+ * @param x input vectors, size n * d
46
+ * @return output vectors, size n * bits. May be the same pointer
47
+ * as x, otherwise it should be deleted by the caller
48
+ */
49
+ const float *apply_preprocess (idx_t n, const float *x) const;
50
+
51
+ void train(idx_t n, const float* x) override;
52
+
53
+ void add(idx_t n, const float* x) override;
54
+
55
+ void search(
56
+ idx_t n,
57
+ const float* x,
58
+ idx_t k,
59
+ float* distances,
60
+ idx_t* labels) const override;
61
+
62
+ void reset() override;
63
+
64
+ /// transfer the thresholds to a pre-processing stage (and unset
65
+ /// train_thresholds)
66
+ void transfer_thresholds (LinearTransform * vt);
67
+
68
+ ~IndexLSH() override {}
69
+
70
+ IndexLSH ();
71
+
72
+ /* standalone codec interface */
73
+ size_t sa_code_size () const override;
74
+
75
+ void sa_encode (idx_t n, const float *x,
76
+ uint8_t *bytes) const override;
77
+
78
+ void sa_decode (idx_t n, const uint8_t *bytes,
79
+ float *x) const override;
80
+
81
+ };
82
+
83
+
84
+ }
85
+
86
+
87
+ #endif
@@ -0,0 +1,143 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+
11
+ #include <faiss/IndexLattice.h>
12
+ #include <faiss/utils/hamming.h> // for the bitstring routines
13
+ #include <faiss/impl/FaissAssert.h>
14
+ #include <faiss/utils/distances.h>
15
+
16
+ namespace faiss {
17
+
18
+
19
+ IndexLattice::IndexLattice (idx_t d, int nsq, int scale_nbit, int r2):
20
+ Index (d),
21
+ nsq (nsq),
22
+ dsq (d / nsq),
23
+ zn_sphere_codec (dsq, r2),
24
+ scale_nbit (scale_nbit)
25
+ {
26
+ FAISS_THROW_IF_NOT (d % nsq == 0);
27
+
28
+ lattice_nbit = 0;
29
+ while (!( ((uint64_t)1 << lattice_nbit) >= zn_sphere_codec.nv)) {
30
+ lattice_nbit++;
31
+ }
32
+
33
+ int total_nbit = (lattice_nbit + scale_nbit) * nsq;
34
+
35
+ code_size = (total_nbit + 7) / 8;
36
+
37
+ is_trained = false;
38
+ }
39
+
40
+ void IndexLattice::train(idx_t n, const float* x)
41
+ {
42
+ // compute ranges per sub-block
43
+ trained.resize (nsq * 2);
44
+ float * mins = trained.data();
45
+ float * maxs = trained.data() + nsq;
46
+ for (int sq = 0; sq < nsq; sq++) {
47
+ mins[sq] = HUGE_VAL;
48
+ maxs[sq] = -1;
49
+ }
50
+
51
+ for (idx_t i = 0; i < n; i++) {
52
+ for (int sq = 0; sq < nsq; sq++) {
53
+ float norm2 = fvec_norm_L2sqr (x + i * d + sq * dsq, dsq);
54
+ if (norm2 > maxs[sq]) maxs[sq] = norm2;
55
+ if (norm2 < mins[sq]) mins[sq] = norm2;
56
+ }
57
+ }
58
+
59
+ for (int sq = 0; sq < nsq; sq++) {
60
+ mins[sq] = sqrtf (mins[sq]);
61
+ maxs[sq] = sqrtf (maxs[sq]);
62
+ }
63
+
64
+ is_trained = true;
65
+ }
66
+
67
+ /* The standalone codec interface */
68
+ size_t IndexLattice::sa_code_size () const
69
+ {
70
+ return code_size;
71
+ }
72
+
73
+
74
+
75
+ void IndexLattice::sa_encode (idx_t n, const float *x, uint8_t *codes) const
76
+ {
77
+
78
+ const float * mins = trained.data();
79
+ const float * maxs = mins + nsq;
80
+ int64_t sc = int64_t(1) << scale_nbit;
81
+
82
+ #pragma omp parallel for
83
+ for (idx_t i = 0; i < n; i++) {
84
+ BitstringWriter wr(codes + i * code_size, code_size);
85
+ const float *xi = x + i * d;
86
+ for (int j = 0; j < nsq; j++) {
87
+ float nj =
88
+ (sqrtf(fvec_norm_L2sqr(xi, dsq)) - mins[j])
89
+ * sc / (maxs[j] - mins[j]);
90
+ if (nj < 0) nj = 0;
91
+ if (nj >= sc) nj = sc - 1;
92
+ wr.write((int64_t)nj, scale_nbit);
93
+ wr.write(zn_sphere_codec.encode(xi), lattice_nbit);
94
+ xi += dsq;
95
+ }
96
+ }
97
+ }
98
+
99
+ void IndexLattice::sa_decode (idx_t n, const uint8_t *codes, float *x) const
100
+ {
101
+ const float * mins = trained.data();
102
+ const float * maxs = mins + nsq;
103
+ float sc = int64_t(1) << scale_nbit;
104
+ float r = sqrtf(zn_sphere_codec.r2);
105
+
106
+ #pragma omp parallel for
107
+ for (idx_t i = 0; i < n; i++) {
108
+ BitstringReader rd(codes + i * code_size, code_size);
109
+ float *xi = x + i * d;
110
+ for (int j = 0; j < nsq; j++) {
111
+ float norm =
112
+ (rd.read (scale_nbit) + 0.5) *
113
+ (maxs[j] - mins[j]) / sc + mins[j];
114
+ norm /= r;
115
+ zn_sphere_codec.decode (rd.read (lattice_nbit), xi);
116
+ for (int l = 0; l < dsq; l++) {
117
+ xi[l] *= norm;
118
+ }
119
+ xi += dsq;
120
+ }
121
+ }
122
+ }
123
+
124
+ void IndexLattice::add(idx_t , const float* )
125
+ {
126
+ FAISS_THROW_MSG("not implemented");
127
+ }
128
+
129
+
130
+ void IndexLattice::search(idx_t , const float* , idx_t ,
131
+ float* , idx_t* ) const
132
+ {
133
+ FAISS_THROW_MSG("not implemented");
134
+ }
135
+
136
+
137
+ void IndexLattice::reset()
138
+ {
139
+ FAISS_THROW_MSG("not implemented");
140
+ }
141
+
142
+
143
+ } // namespace faiss