faiss 0.2.0 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (215) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +7 -7
  5. data/ext/faiss/extconf.rb +6 -3
  6. data/ext/faiss/numo.hpp +4 -4
  7. data/ext/faiss/utils.cpp +1 -1
  8. data/ext/faiss/utils.h +1 -1
  9. data/lib/faiss/version.rb +1 -1
  10. data/vendor/faiss/faiss/AutoTune.cpp +292 -291
  11. data/vendor/faiss/faiss/AutoTune.h +55 -56
  12. data/vendor/faiss/faiss/Clustering.cpp +365 -194
  13. data/vendor/faiss/faiss/Clustering.h +102 -35
  14. data/vendor/faiss/faiss/IVFlib.cpp +171 -195
  15. data/vendor/faiss/faiss/IVFlib.h +48 -51
  16. data/vendor/faiss/faiss/Index.cpp +85 -103
  17. data/vendor/faiss/faiss/Index.h +54 -48
  18. data/vendor/faiss/faiss/Index2Layer.cpp +126 -224
  19. data/vendor/faiss/faiss/Index2Layer.h +22 -36
  20. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
  21. data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +195 -0
  22. data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
  23. data/vendor/faiss/faiss/IndexBinary.h +140 -132
  24. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
  25. data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
  26. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
  27. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
  28. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
  29. data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
  30. data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
  31. data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
  32. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
  33. data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
  34. data/vendor/faiss/faiss/IndexFlat.cpp +115 -176
  35. data/vendor/faiss/faiss/IndexFlat.h +42 -59
  36. data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
  37. data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
  38. data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
  39. data/vendor/faiss/faiss/IndexHNSW.h +57 -41
  40. data/vendor/faiss/faiss/IndexIVF.cpp +545 -453
  41. data/vendor/faiss/faiss/IndexIVF.h +169 -118
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
  43. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
  44. data/vendor/faiss/faiss/IndexIVFFlat.cpp +247 -252
  45. data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
  46. data/vendor/faiss/faiss/IndexIVFPQ.cpp +459 -517
  47. data/vendor/faiss/faiss/IndexIVFPQ.h +75 -67
  48. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
  49. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
  50. data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
  51. data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
  52. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +163 -150
  53. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +38 -25
  54. data/vendor/faiss/faiss/IndexLSH.cpp +66 -113
  55. data/vendor/faiss/faiss/IndexLSH.h +20 -38
  56. data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
  57. data/vendor/faiss/faiss/IndexLattice.h +11 -16
  58. data/vendor/faiss/faiss/IndexNNDescent.cpp +229 -0
  59. data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
  60. data/vendor/faiss/faiss/IndexNSG.cpp +301 -0
  61. data/vendor/faiss/faiss/IndexNSG.h +85 -0
  62. data/vendor/faiss/faiss/IndexPQ.cpp +387 -495
  63. data/vendor/faiss/faiss/IndexPQ.h +64 -82
  64. data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
  65. data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
  66. data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
  67. data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
  68. data/vendor/faiss/faiss/IndexRefine.cpp +139 -127
  69. data/vendor/faiss/faiss/IndexRefine.h +32 -23
  70. data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
  71. data/vendor/faiss/faiss/IndexReplicas.h +62 -56
  72. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +111 -172
  73. data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -59
  74. data/vendor/faiss/faiss/IndexShards.cpp +256 -240
  75. data/vendor/faiss/faiss/IndexShards.h +85 -73
  76. data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
  77. data/vendor/faiss/faiss/MatrixStats.h +7 -10
  78. data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
  79. data/vendor/faiss/faiss/MetaIndexes.h +40 -34
  80. data/vendor/faiss/faiss/MetricType.h +7 -7
  81. data/vendor/faiss/faiss/VectorTransform.cpp +654 -475
  82. data/vendor/faiss/faiss/VectorTransform.h +64 -89
  83. data/vendor/faiss/faiss/clone_index.cpp +78 -73
  84. data/vendor/faiss/faiss/clone_index.h +4 -9
  85. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
  86. data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
  87. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +198 -171
  88. data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
  89. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
  90. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
  91. data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
  92. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
  93. data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
  94. data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
  95. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
  96. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
  97. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
  98. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
  99. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
  100. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
  101. data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
  102. data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
  103. data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
  104. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
  106. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
  107. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
  108. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
  109. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
  110. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
  111. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
  112. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
  113. data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
  114. data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
  115. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
  116. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
  117. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
  118. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
  119. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
  120. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
  121. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
  122. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
  123. data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
  124. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
  125. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
  126. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
  127. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
  128. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
  129. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
  130. data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
  131. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +503 -0
  132. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +175 -0
  133. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
  134. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
  135. data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
  136. data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
  137. data/vendor/faiss/faiss/impl/FaissException.h +41 -29
  138. data/vendor/faiss/faiss/impl/HNSW.cpp +606 -617
  139. data/vendor/faiss/faiss/impl/HNSW.h +179 -200
  140. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +855 -0
  141. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +244 -0
  142. data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
  143. data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
  144. data/vendor/faiss/faiss/impl/NSG.cpp +679 -0
  145. data/vendor/faiss/faiss/impl/NSG.h +199 -0
  146. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
  147. data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
  148. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
  149. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
  150. data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
  151. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +758 -0
  152. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +188 -0
  153. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
  154. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +647 -707
  155. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
  156. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
  157. data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
  158. data/vendor/faiss/faiss/impl/index_read.cpp +631 -480
  159. data/vendor/faiss/faiss/impl/index_write.cpp +547 -407
  160. data/vendor/faiss/faiss/impl/io.cpp +76 -95
  161. data/vendor/faiss/faiss/impl/io.h +31 -41
  162. data/vendor/faiss/faiss/impl/io_macros.h +60 -29
  163. data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
  164. data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
  165. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
  166. data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
  167. data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
  168. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
  169. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
  170. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
  171. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
  172. data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
  173. data/vendor/faiss/faiss/index_factory.cpp +619 -397
  174. data/vendor/faiss/faiss/index_factory.h +8 -6
  175. data/vendor/faiss/faiss/index_io.h +23 -26
  176. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
  177. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
  178. data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
  179. data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
  180. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
  181. data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
  182. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
  183. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
  184. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
  185. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
  186. data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
  187. data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
  188. data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
  189. data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
  190. data/vendor/faiss/faiss/utils/Heap.h +186 -209
  191. data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
  192. data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
  193. data/vendor/faiss/faiss/utils/distances.cpp +305 -312
  194. data/vendor/faiss/faiss/utils/distances.h +170 -122
  195. data/vendor/faiss/faiss/utils/distances_simd.cpp +498 -508
  196. data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
  197. data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
  198. data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
  199. data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
  200. data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
  201. data/vendor/faiss/faiss/utils/hamming.h +62 -85
  202. data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
  203. data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
  204. data/vendor/faiss/faiss/utils/partitioning.h +26 -21
  205. data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
  206. data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
  207. data/vendor/faiss/faiss/utils/random.cpp +39 -63
  208. data/vendor/faiss/faiss/utils/random.h +13 -16
  209. data/vendor/faiss/faiss/utils/simdlib.h +4 -2
  210. data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
  211. data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
  212. data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
  213. data/vendor/faiss/faiss/utils/utils.cpp +304 -287
  214. data/vendor/faiss/faiss/utils/utils.h +54 -49
  215. metadata +29 -4
@@ -10,28 +10,25 @@
10
10
  #ifndef FAISS_ON_DISK_INVERTED_LISTS_H
11
11
  #define FAISS_ON_DISK_INVERTED_LISTS_H
12
12
 
13
- #include <vector>
14
13
  #include <list>
15
14
  #include <typeinfo>
15
+ #include <vector>
16
16
 
17
17
  #include <faiss/IndexIVF.h>
18
- #include <faiss/invlists/InvertedListsIOHook.h>
19
18
  #include <faiss/index_io.h>
19
+ #include <faiss/invlists/InvertedListsIOHook.h>
20
20
 
21
21
  namespace faiss {
22
22
 
23
-
24
23
  struct LockLevels;
25
24
 
26
-
27
25
  struct OnDiskOneList {
28
26
  size_t size; // size of inverted list (entries)
29
27
  size_t capacity; // allocated size (entries)
30
28
  size_t offset; // offset in buffer (bytes)
31
- OnDiskOneList ();
29
+ OnDiskOneList();
32
30
  };
33
31
 
34
-
35
32
  /** On-disk storage of inverted lists.
36
33
  *
37
34
  * The data is stored in a mmapped chunk of memory (base ptointer ptr,
@@ -60,17 +57,17 @@ struct OnDiskOneList {
60
57
  * to call prefetch_lists, that launches a set of threads to read the
61
58
  * lists in parallel.
62
59
  */
63
- struct OnDiskInvertedLists: InvertedLists {
60
+ struct OnDiskInvertedLists : InvertedLists {
64
61
  using List = OnDiskOneList;
65
62
 
66
63
  // size nlist
67
64
  std::vector<List> lists;
68
65
 
69
66
  struct Slot {
70
- size_t offset; // bytes
71
- size_t capacity; // bytes
72
- Slot (size_t offset, size_t capacity);
73
- Slot ();
67
+ size_t offset; // bytes
68
+ size_t capacity; // bytes
69
+ Slot(size_t offset, size_t capacity);
70
+ Slot();
74
71
  };
75
72
 
76
73
  // size whatever space remains
@@ -78,73 +75,81 @@ struct OnDiskInvertedLists: InvertedLists {
78
75
 
79
76
  std::string filename;
80
77
  size_t totsize;
81
- uint8_t *ptr; // mmap base pointer
82
- bool read_only; /// are inverted lists mapped read-only
78
+ uint8_t* ptr; // mmap base pointer
79
+ bool read_only; /// are inverted lists mapped read-only
83
80
 
84
- OnDiskInvertedLists (size_t nlist, size_t code_size,
85
- const char *filename);
81
+ OnDiskInvertedLists(size_t nlist, size_t code_size, const char* filename);
86
82
 
87
83
  size_t list_size(size_t list_no) const override;
88
- const uint8_t * get_codes (size_t list_no) const override;
89
- const idx_t * get_ids (size_t list_no) const override;
84
+ const uint8_t* get_codes(size_t list_no) const override;
85
+ const idx_t* get_ids(size_t list_no) const override;
90
86
 
91
- size_t add_entries (
92
- size_t list_no, size_t n_entry,
93
- const idx_t* ids, const uint8_t *code) override;
87
+ size_t add_entries(
88
+ size_t list_no,
89
+ size_t n_entry,
90
+ const idx_t* ids,
91
+ const uint8_t* code) override;
94
92
 
95
- void update_entries (size_t list_no, size_t offset, size_t n_entry,
96
- const idx_t *ids, const uint8_t *code) override;
93
+ void update_entries(
94
+ size_t list_no,
95
+ size_t offset,
96
+ size_t n_entry,
97
+ const idx_t* ids,
98
+ const uint8_t* code) override;
97
99
 
98
- void resize (size_t list_no, size_t new_size) override;
100
+ void resize(size_t list_no, size_t new_size) override;
99
101
 
100
102
  // copy all inverted lists into *this, in compact form (without
101
103
  // allocating slots)
102
- size_t merge_from (const InvertedLists **ils, int n_il, bool verbose=false);
104
+ size_t merge_from(
105
+ const InvertedLists** ils,
106
+ int n_il,
107
+ bool verbose = false);
103
108
 
104
109
  /// same as merge_from for a single invlist
105
- size_t merge_from_1 (const InvertedLists *il, bool verbose=false);
110
+ size_t merge_from_1(const InvertedLists* il, bool verbose = false);
106
111
 
107
112
  /// restrict the inverted lists to l0:l1 without touching the mmapped region
108
113
  void crop_invlists(size_t l0, size_t l1);
109
114
 
110
- void prefetch_lists (const idx_t *list_nos, int nlist) const override;
115
+ void prefetch_lists(const idx_t* list_nos, int nlist) const override;
111
116
 
112
- ~OnDiskInvertedLists () override;
117
+ ~OnDiskInvertedLists() override;
113
118
 
114
119
  // private
115
120
 
116
- LockLevels * locks;
121
+ LockLevels* locks;
117
122
 
118
123
  // encapsulates the threads that are busy prefeteching
119
124
  struct OngoingPrefetch;
120
- OngoingPrefetch *pf;
125
+ OngoingPrefetch* pf;
121
126
  int prefetch_nthread;
122
127
 
123
- void do_mmap ();
124
- void update_totsize (size_t new_totsize);
125
- void resize_locked (size_t list_no, size_t new_size);
126
- size_t allocate_slot (size_t capacity);
127
- void free_slot (size_t offset, size_t capacity);
128
+ void do_mmap();
129
+ void update_totsize(size_t new_totsize);
130
+ void resize_locked(size_t list_no, size_t new_size);
131
+ size_t allocate_slot(size_t capacity);
132
+ void free_slot(size_t offset, size_t capacity);
128
133
 
129
134
  /// override all list sizes and make a packed storage
130
- void set_all_lists_sizes(const size_t *sizes);
135
+ void set_all_lists_sizes(const size_t* sizes);
131
136
 
132
137
  // empty constructor for the I/O functions
133
- OnDiskInvertedLists ();
138
+ OnDiskInvertedLists();
134
139
  };
135
140
 
136
- struct OnDiskInvertedListsIOHook: InvertedListsIOHook {
141
+ struct OnDiskInvertedListsIOHook : InvertedListsIOHook {
137
142
  OnDiskInvertedListsIOHook();
138
- void write(const InvertedLists *ils, IOWriter *f) const override;
139
- InvertedLists * read(IOReader *f, int io_flags) const override;
140
- InvertedLists * read_ArrayInvertedLists(
141
- IOReader *f, int io_flags,
142
- size_t nlist, size_t code_size,
143
- const std::vector<size_t> &sizes) const override;
143
+ void write(const InvertedLists* ils, IOWriter* f) const override;
144
+ InvertedLists* read(IOReader* f, int io_flags) const override;
145
+ InvertedLists* read_ArrayInvertedLists(
146
+ IOReader* f,
147
+ int io_flags,
148
+ size_t nlist,
149
+ size_t code_size,
150
+ const std::vector<size_t>& sizes) const override;
144
151
  };
145
152
 
146
-
147
-
148
153
  } // namespace faiss
149
154
 
150
155
  #endif
@@ -5,47 +5,46 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
- #include <faiss/python/python_callbacks.h>
9
8
  #include <faiss/impl/FaissAssert.h>
9
+ #include <faiss/python/python_callbacks.h>
10
10
 
11
11
  namespace {
12
12
 
13
- struct PyThreadLock {
13
+ struct PyThreadLock {
14
14
  PyGILState_STATE gstate;
15
15
 
16
- PyThreadLock () {
16
+ PyThreadLock() {
17
17
  gstate = PyGILState_Ensure();
18
18
  }
19
19
 
20
- ~PyThreadLock () {
20
+ ~PyThreadLock() {
21
21
  PyGILState_Release(gstate);
22
22
  }
23
23
  };
24
24
 
25
-
26
- };
27
-
25
+ }; // namespace
28
26
 
29
27
  /***********************************************************
30
28
  * Callbacks for IO reader and writer
31
29
  ***********************************************************/
32
30
 
33
- PyCallbackIOWriter::PyCallbackIOWriter(PyObject *callback, size_t bs):
34
- callback(callback), bs(bs) {
31
+ PyCallbackIOWriter::PyCallbackIOWriter(PyObject* callback, size_t bs)
32
+ : callback(callback), bs(bs) {
35
33
  PyThreadLock gil;
36
34
  Py_INCREF(callback);
37
35
  name = "PyCallbackIOWriter";
38
36
  }
39
37
 
40
-
41
-
42
- size_t PyCallbackIOWriter::operator()(const void *ptrv, size_t size, size_t nitems) {
38
+ size_t PyCallbackIOWriter::operator()(
39
+ const void* ptrv,
40
+ size_t size,
41
+ size_t nitems) {
43
42
  size_t ws = size * nitems;
44
- const char *ptr = (const char*)ptrv;
43
+ const char* ptr = (const char*)ptrv;
45
44
  PyThreadLock gil;
46
- while(ws > 0) {
45
+ while (ws > 0) {
47
46
  size_t wi = ws > bs ? bs : ws;
48
- PyObject * result = PyObject_CallFunction(
47
+ PyObject* result = PyObject_CallFunction(
49
48
  callback, "(N)", PyBytes_FromStringAndSize(ptr, wi));
50
49
  if (result == NULL) {
51
50
  FAISS_THROW_MSG("py err");
@@ -63,27 +62,25 @@ PyCallbackIOWriter::~PyCallbackIOWriter() {
63
62
  Py_DECREF(callback);
64
63
  }
65
64
 
66
-
67
- PyCallbackIOReader::PyCallbackIOReader(PyObject *callback, size_t bs):
68
- callback(callback), bs(bs) {
65
+ PyCallbackIOReader::PyCallbackIOReader(PyObject* callback, size_t bs)
66
+ : callback(callback), bs(bs) {
69
67
  PyThreadLock gil;
70
68
  Py_INCREF(callback);
71
69
  name = "PyCallbackIOReader";
72
70
  }
73
71
 
74
- size_t PyCallbackIOReader::operator()(void *ptrv, size_t size, size_t nitems)
75
- {
72
+ size_t PyCallbackIOReader::operator()(void* ptrv, size_t size, size_t nitems) {
76
73
  size_t rs = size * nitems;
77
74
  size_t nb = 0;
78
- char *ptr = (char*)ptrv;
75
+ char* ptr = (char*)ptrv;
79
76
  PyThreadLock gil;
80
- while(rs > 0) {
77
+ while (rs > 0) {
81
78
  size_t ri = rs > bs ? bs : rs;
82
- PyObject * result = PyObject_CallFunction(callback, "(n)", ri);
79
+ PyObject* result = PyObject_CallFunction(callback, "(n)", ri);
83
80
  if (result == NULL) {
84
81
  FAISS_THROW_MSG("propagate py error");
85
82
  }
86
- if(!PyBytes_Check(result)) {
83
+ if (!PyBytes_Check(result)) {
87
84
  Py_DECREF(result);
88
85
  FAISS_THROW_MSG("read callback did not return a bytes object");
89
86
  }
@@ -95,8 +92,8 @@ size_t PyCallbackIOReader::operator()(void *ptrv, size_t size, size_t nitems)
95
92
  nb += sz;
96
93
  if (sz > rs) {
97
94
  Py_DECREF(result);
98
- FAISS_THROW_FMT("read callback returned %zd bytes (asked %zd)",
99
- sz, rs);
95
+ FAISS_THROW_FMT(
96
+ "read callback returned %zd bytes (asked %zd)", sz, rs);
100
97
  }
101
98
  memcpy(ptr, PyBytes_AsString(result), sz);
102
99
  Py_DECREF(result);
@@ -7,46 +7,39 @@
7
7
 
8
8
  #pragma once
9
9
 
10
- #include "Python.h"
11
10
  #include <faiss/impl/io.h>
12
11
  #include <faiss/invlists/InvertedLists.h>
12
+ #include "Python.h"
13
13
 
14
14
  // all callbacks have to acquire the GIL on input
15
15
 
16
-
17
16
  /***********************************************************
18
17
  * Callbacks for IO reader and writer
19
18
  ***********************************************************/
20
19
 
21
- struct PyCallbackIOWriter: faiss::IOWriter {
22
-
23
- PyObject * callback;
20
+ struct PyCallbackIOWriter : faiss::IOWriter {
21
+ PyObject* callback;
24
22
  size_t bs; // maximum write size
25
23
 
26
24
  /** Callback: Python function that takes a bytes object and
27
25
  * returns the number of bytes successfully written.
28
26
  */
29
- explicit PyCallbackIOWriter(PyObject *callback,
30
- size_t bs = 1024 * 1024);
27
+ explicit PyCallbackIOWriter(PyObject* callback, size_t bs = 1024 * 1024);
31
28
 
32
- size_t operator()(const void *ptrv, size_t size, size_t nitems) override;
29
+ size_t operator()(const void* ptrv, size_t size, size_t nitems) override;
33
30
 
34
31
  ~PyCallbackIOWriter() override;
35
-
36
32
  };
37
33
 
38
-
39
- struct PyCallbackIOReader: faiss::IOReader {
40
- PyObject * callback;
34
+ struct PyCallbackIOReader : faiss::IOReader {
35
+ PyObject* callback;
41
36
  size_t bs; // maximum buffer size
42
37
 
43
38
  /** Callback: Python function that takes a size and returns a
44
39
  * bytes object with the resulting read */
45
- explicit PyCallbackIOReader(PyObject *callback,
46
- size_t bs = 1024 * 1024);
40
+ explicit PyCallbackIOReader(PyObject* callback, size_t bs = 1024 * 1024);
47
41
 
48
- size_t operator()(void *ptrv, size_t size, size_t nitems) override;
42
+ size_t operator()(void* ptrv, size_t size, size_t nitems) override;
49
43
 
50
44
  ~PyCallbackIOReader() override;
51
-
52
45
  };
@@ -5,12 +5,11 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
-
9
8
  #pragma once
10
9
 
10
+ #include <cassert>
11
11
  #include <cstdint>
12
12
  #include <cstdlib>
13
- #include <cassert>
14
13
  #include <cstring>
15
14
 
16
15
  #include <algorithm>
@@ -19,33 +18,34 @@
19
18
 
20
19
  namespace faiss {
21
20
 
22
- template<int A=32>
23
- inline bool is_aligned_pointer(const void* x)
24
- {
21
+ template <int A = 32>
22
+ inline bool is_aligned_pointer(const void* x) {
25
23
  size_t xi = (size_t)x;
26
24
  return xi % A == 0;
27
25
  }
28
26
 
29
27
  // class that manages suitably aligned arrays for SIMD
30
28
  // T should be a POV type. The default alignment is 32 for AVX
31
- template<class T, int A=32>
29
+ template <class T, int A = 32>
32
30
  struct AlignedTableTightAlloc {
33
- T * ptr;
31
+ T* ptr;
34
32
  size_t numel;
35
33
 
36
- AlignedTableTightAlloc(): ptr(nullptr), numel(0)
37
- { }
34
+ AlignedTableTightAlloc() : ptr(nullptr), numel(0) {}
38
35
 
39
- explicit AlignedTableTightAlloc(size_t n): ptr(nullptr), numel(0)
40
- { resize(n); }
36
+ explicit AlignedTableTightAlloc(size_t n) : ptr(nullptr), numel(0) {
37
+ resize(n);
38
+ }
41
39
 
42
- size_t itemsize() const {return sizeof(T); }
40
+ size_t itemsize() const {
41
+ return sizeof(T);
42
+ }
43
43
 
44
44
  void resize(size_t n) {
45
45
  if (numel == n) {
46
46
  return;
47
47
  }
48
- T * new_ptr;
48
+ T* new_ptr;
49
49
  if (n > 0) {
50
50
  int ret = posix_memalign((void**)&new_ptr, A, n * sizeof(T));
51
51
  if (ret != 0) {
@@ -62,34 +62,54 @@ struct AlignedTableTightAlloc {
62
62
  ptr = new_ptr;
63
63
  }
64
64
 
65
- void clear() {memset(ptr, 0, nbytes()); }
66
- size_t size() const {return numel; }
67
- size_t nbytes() const {return numel * sizeof(T); }
65
+ void clear() {
66
+ memset(ptr, 0, nbytes());
67
+ }
68
+ size_t size() const {
69
+ return numel;
70
+ }
71
+ size_t nbytes() const {
72
+ return numel * sizeof(T);
73
+ }
68
74
 
69
- T * get() {return ptr; }
70
- const T * get() const {return ptr; }
71
- T * data() {return ptr; }
72
- const T * data() const {return ptr; }
73
- T & operator [] (size_t i) {return ptr[i]; }
74
- T operator [] (size_t i) const {return ptr[i]; }
75
+ T* get() {
76
+ return ptr;
77
+ }
78
+ const T* get() const {
79
+ return ptr;
80
+ }
81
+ T* data() {
82
+ return ptr;
83
+ }
84
+ const T* data() const {
85
+ return ptr;
86
+ }
87
+ T& operator[](size_t i) {
88
+ return ptr[i];
89
+ }
90
+ T operator[](size_t i) const {
91
+ return ptr[i];
92
+ }
75
93
 
76
- ~AlignedTableTightAlloc() {posix_memalign_free(ptr); }
94
+ ~AlignedTableTightAlloc() {
95
+ posix_memalign_free(ptr);
96
+ }
77
97
 
78
- AlignedTableTightAlloc<T, A> & operator =
79
- (const AlignedTableTightAlloc<T, A> & other) {
98
+ AlignedTableTightAlloc<T, A>& operator=(
99
+ const AlignedTableTightAlloc<T, A>& other) {
80
100
  resize(other.numel);
81
101
  memcpy(ptr, other.ptr, sizeof(T) * numel);
82
102
  return *this;
83
103
  }
84
104
 
85
- AlignedTableTightAlloc(const AlignedTableTightAlloc<T, A> & other) {
105
+ AlignedTableTightAlloc(const AlignedTableTightAlloc<T, A>& other)
106
+ : ptr(nullptr), numel(0) {
86
107
  *this = other;
87
108
  }
88
-
89
109
  };
90
110
 
91
111
  // same as AlignedTableTightAlloc, but with geometric re-allocation
92
- template<class T, int A=32>
112
+ template <class T, int A = 32>
93
113
  struct AlignedTable {
94
114
  AlignedTableTightAlloc<T, A> tab;
95
115
  size_t numel = 0;
@@ -110,32 +130,47 @@ struct AlignedTable {
110
130
 
111
131
  AlignedTable() {}
112
132
 
113
- explicit AlignedTable(size_t n):
114
- tab(round_capacity(n)),
115
- numel(n)
116
- { }
133
+ explicit AlignedTable(size_t n) : tab(round_capacity(n)), numel(n) {}
117
134
 
118
- size_t itemsize() const {return sizeof(T); }
135
+ size_t itemsize() const {
136
+ return sizeof(T);
137
+ }
119
138
 
120
139
  void resize(size_t n) {
121
140
  tab.resize(round_capacity(n));
122
141
  numel = n;
123
142
  }
124
143
 
125
- void clear() { tab.clear(); }
126
- size_t size() const {return numel; }
127
- size_t nbytes() const {return numel * sizeof(T); }
144
+ void clear() {
145
+ tab.clear();
146
+ }
147
+ size_t size() const {
148
+ return numel;
149
+ }
150
+ size_t nbytes() const {
151
+ return numel * sizeof(T);
152
+ }
128
153
 
129
- T * get() {return tab.get(); }
130
- const T * get() const {return tab.get(); }
131
- T * data() {return tab.get(); }
132
- const T * data() const {return tab.get(); }
133
- T & operator [] (size_t i) {return tab.ptr[i]; }
134
- T operator [] (size_t i) const {return tab.ptr[i]; }
154
+ T* get() {
155
+ return tab.get();
156
+ }
157
+ const T* get() const {
158
+ return tab.get();
159
+ }
160
+ T* data() {
161
+ return tab.get();
162
+ }
163
+ const T* data() const {
164
+ return tab.get();
165
+ }
166
+ T& operator[](size_t i) {
167
+ return tab.ptr[i];
168
+ }
169
+ T operator[](size_t i) const {
170
+ return tab.ptr[i];
171
+ }
135
172
 
136
173
  // assign and copy constructor should work as expected
137
-
138
174
  };
139
175
 
140
-
141
176
  } // namespace faiss
@@ -11,86 +11,82 @@
11
11
 
12
12
  #include <faiss/utils/Heap.h>
13
13
 
14
-
15
14
  namespace faiss {
16
15
 
17
-
18
16
  template <typename C>
19
- void HeapArray<C>::heapify ()
20
- {
17
+ void HeapArray<C>::heapify() {
21
18
  #pragma omp parallel for
22
19
  for (int64_t j = 0; j < nh; j++)
23
- heap_heapify<C> (k, val + j * k, ids + j * k);
20
+ heap_heapify<C>(k, val + j * k, ids + j * k);
24
21
  }
25
22
 
26
23
  template <typename C>
27
- void HeapArray<C>::reorder ()
28
- {
24
+ void HeapArray<C>::reorder() {
29
25
  #pragma omp parallel for
30
26
  for (int64_t j = 0; j < nh; j++)
31
- heap_reorder<C> (k, val + j * k, ids + j * k);
27
+ heap_reorder<C>(k, val + j * k, ids + j * k);
32
28
  }
33
29
 
34
30
  template <typename C>
35
- void HeapArray<C>::addn (size_t nj, const T *vin, TI j0,
36
- size_t i0, int64_t ni)
37
- {
38
- if (ni == -1) ni = nh;
39
- assert (i0 >= 0 && i0 + ni <= nh);
31
+ void HeapArray<C>::addn(size_t nj, const T* vin, TI j0, size_t i0, int64_t ni) {
32
+ if (ni == -1)
33
+ ni = nh;
34
+ assert(i0 >= 0 && i0 + ni <= nh);
40
35
  #pragma omp parallel for
41
36
  for (int64_t i = i0; i < i0 + ni; i++) {
42
- T * __restrict simi = get_val(i);
43
- TI * __restrict idxi = get_ids (i);
44
- const T *ip_line = vin + (i - i0) * nj;
37
+ T* __restrict simi = get_val(i);
38
+ TI* __restrict idxi = get_ids(i);
39
+ const T* ip_line = vin + (i - i0) * nj;
45
40
 
46
41
  for (size_t j = 0; j < nj; j++) {
47
- T ip = ip_line [j];
42
+ T ip = ip_line[j];
48
43
  if (C::cmp(simi[0], ip)) {
49
- heap_replace_top<C> (k, simi, idxi, ip, j + j0);
44
+ heap_replace_top<C>(k, simi, idxi, ip, j + j0);
50
45
  }
51
46
  }
52
47
  }
53
48
  }
54
49
 
55
50
  template <typename C>
56
- void HeapArray<C>::addn_with_ids (
57
- size_t nj, const T *vin, const TI *id_in,
58
- int64_t id_stride, size_t i0, int64_t ni)
59
- {
51
+ void HeapArray<C>::addn_with_ids(
52
+ size_t nj,
53
+ const T* vin,
54
+ const TI* id_in,
55
+ int64_t id_stride,
56
+ size_t i0,
57
+ int64_t ni) {
60
58
  if (id_in == nullptr) {
61
- addn (nj, vin, 0, i0, ni);
59
+ addn(nj, vin, 0, i0, ni);
62
60
  return;
63
61
  }
64
- if (ni == -1) ni = nh;
65
- assert (i0 >= 0 && i0 + ni <= nh);
62
+ if (ni == -1)
63
+ ni = nh;
64
+ assert(i0 >= 0 && i0 + ni <= nh);
66
65
  #pragma omp parallel for
67
66
  for (int64_t i = i0; i < i0 + ni; i++) {
68
- T * __restrict simi = get_val(i);
69
- TI * __restrict idxi = get_ids (i);
70
- const T *ip_line = vin + (i - i0) * nj;
71
- const TI *id_line = id_in + (i - i0) * id_stride;
67
+ T* __restrict simi = get_val(i);
68
+ TI* __restrict idxi = get_ids(i);
69
+ const T* ip_line = vin + (i - i0) * nj;
70
+ const TI* id_line = id_in + (i - i0) * id_stride;
72
71
 
73
72
  for (size_t j = 0; j < nj; j++) {
74
- T ip = ip_line [j];
73
+ T ip = ip_line[j];
75
74
  if (C::cmp(simi[0], ip)) {
76
- heap_replace_top<C> (k, simi, idxi, ip, id_line [j]);
75
+ heap_replace_top<C>(k, simi, idxi, ip, id_line[j]);
77
76
  }
78
77
  }
79
78
  }
80
79
  }
81
80
 
82
81
  template <typename C>
83
- void HeapArray<C>::per_line_extrema (
84
- T * out_val,
85
- TI * out_ids) const
86
- {
82
+ void HeapArray<C>::per_line_extrema(T* out_val, TI* out_ids) const {
87
83
  #pragma omp parallel for
88
84
  for (int64_t j = 0; j < nh; j++) {
89
85
  int64_t imin = -1;
90
- typename C::T xval = C::Crev::neutral ();
91
- const typename C::T * x_ = val + j * k;
86
+ typename C::T xval = C::Crev::neutral();
87
+ const typename C::T* x_ = val + j * k;
92
88
  for (size_t i = 0; i < k; i++)
93
- if (C::cmp (x_[i], xval)) {
89
+ if (C::cmp(x_[i], xval)) {
94
90
  xval = x_[i];
95
91
  imin = i;
96
92
  }
@@ -99,22 +95,18 @@ void HeapArray<C>::per_line_extrema (
99
95
 
100
96
  if (out_ids) {
101
97
  if (ids && imin != -1)
102
- out_ids[j] = ids [j * k + imin];
98
+ out_ids[j] = ids[j * k + imin];
103
99
  else
104
100
  out_ids[j] = imin;
105
101
  }
106
102
  }
107
103
  }
108
104
 
109
-
110
-
111
-
112
105
  // explicit instanciations
113
106
 
114
- template struct HeapArray<CMin <float, int64_t> >;
115
- template struct HeapArray<CMax <float, int64_t> >;
116
- template struct HeapArray<CMin <int, int64_t> >;
117
- template struct HeapArray<CMax <int, int64_t> >;
118
-
107
+ template struct HeapArray<CMin<float, int64_t>>;
108
+ template struct HeapArray<CMax<float, int64_t>>;
109
+ template struct HeapArray<CMin<int, int64_t>>;
110
+ template struct HeapArray<CMax<int, int64_t>>;
119
111
 
120
- } // END namespace fasis
112
+ } // namespace faiss