hnswlib 0.6.2 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +28 -0
- data/README.md +14 -7
- data/ext/hnswlib/hnswlibext.cpp +1 -3
- data/ext/hnswlib/hnswlibext.hpp +326 -93
- data/ext/hnswlib/src/bruteforce.h +142 -131
- data/ext/hnswlib/src/hnswalg.h +1028 -964
- data/ext/hnswlib/src/hnswlib.h +74 -66
- data/ext/hnswlib/src/space_ip.h +299 -299
- data/ext/hnswlib/src/space_l2.h +268 -273
- data/ext/hnswlib/src/visited_list_pool.h +54 -55
- data/lib/hnswlib/version.rb +2 -2
- data/lib/hnswlib.rb +21 -18
- data/sig/hnswlib.rbs +9 -7
- metadata +3 -3
data/ext/hnswlib/src/hnswlib.h
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
#pragma once
|
2
2
|
#ifndef NO_MANUAL_VECTORIZATION
|
3
|
-
#
|
3
|
+
#if (defined(__SSE__) || _M_IX86_FP > 0 || defined(_M_AMD64) || defined(_M_X64))
|
4
4
|
#define USE_SSE
|
5
5
|
#ifdef __AVX__
|
6
6
|
#define USE_AVX
|
@@ -15,21 +15,20 @@
|
|
15
15
|
#ifdef _MSC_VER
|
16
16
|
#include <intrin.h>
|
17
17
|
#include <stdexcept>
|
18
|
-
|
19
|
-
void cpu_x86::cpuid(int32_t out[4], int32_t eax, int32_t ecx) {
|
18
|
+
void cpuid(int32_t out[4], int32_t eax, int32_t ecx) {
|
20
19
|
__cpuidex(out, eax, ecx);
|
21
20
|
}
|
22
|
-
__int64 xgetbv(unsigned int x) {
|
21
|
+
static __int64 xgetbv(unsigned int x) {
|
23
22
|
return _xgetbv(x);
|
24
23
|
}
|
25
24
|
#else
|
26
25
|
#include <x86intrin.h>
|
27
26
|
#include <cpuid.h>
|
28
27
|
#include <stdint.h>
|
29
|
-
void cpuid(int32_t cpuInfo[4], int32_t eax, int32_t ecx) {
|
28
|
+
static void cpuid(int32_t cpuInfo[4], int32_t eax, int32_t ecx) {
|
30
29
|
__cpuid_count(eax, ecx, cpuInfo[0], cpuInfo[1], cpuInfo[2], cpuInfo[3]);
|
31
30
|
}
|
32
|
-
uint64_t xgetbv(unsigned int index) {
|
31
|
+
static uint64_t xgetbv(unsigned int index) {
|
33
32
|
uint32_t eax, edx;
|
34
33
|
__asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
|
35
34
|
return ((uint64_t)edx << 32) | eax;
|
@@ -51,7 +50,7 @@ uint64_t xgetbv(unsigned int index) {
|
|
51
50
|
// Adapted from https://github.com/Mysticial/FeatureDetector
|
52
51
|
#define _XCR_XFEATURE_ENABLED_MASK 0
|
53
52
|
|
54
|
-
bool AVXCapable() {
|
53
|
+
static bool AVXCapable() {
|
55
54
|
int cpuInfo[4];
|
56
55
|
|
57
56
|
// CPU support
|
@@ -78,7 +77,7 @@ bool AVXCapable() {
|
|
78
77
|
return HW_AVX && avxSupported;
|
79
78
|
}
|
80
79
|
|
81
|
-
bool AVX512Capable() {
|
80
|
+
static bool AVX512Capable() {
|
82
81
|
if (!AVXCapable()) return false;
|
83
82
|
|
84
83
|
int cpuInfo[4];
|
@@ -88,7 +87,7 @@ bool AVX512Capable() {
|
|
88
87
|
int nIds = cpuInfo[0];
|
89
88
|
|
90
89
|
bool HW_AVX512F = false;
|
91
|
-
if (nIds >= 0x00000007) {
|
90
|
+
if (nIds >= 0x00000007) { // AVX512 Foundation
|
92
91
|
cpuid(cpuInfo, 0x00000007, 0);
|
93
92
|
HW_AVX512F = (cpuInfo[1] & ((int)1 << 16)) != 0;
|
94
93
|
}
|
@@ -114,77 +113,86 @@ bool AVX512Capable() {
|
|
114
113
|
#include <string.h>
|
115
114
|
|
116
115
|
namespace hnswlib {
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
116
|
+
typedef size_t labeltype;
|
117
|
+
|
118
|
+
// This can be extended to store state for filtering (e.g. from a std::set)
|
119
|
+
class BaseFilterFunctor {
|
120
|
+
public:
|
121
|
+
virtual bool operator()(hnswlib::labeltype id) { return true; }
|
122
|
+
virtual ~BaseFilterFunctor() {};
|
123
|
+
};
|
124
|
+
|
125
|
+
template <typename T>
|
126
|
+
class pairGreater {
|
127
|
+
public:
|
128
|
+
bool operator()(const T& p1, const T& p2) {
|
129
|
+
return p1.first > p2.first;
|
130
130
|
}
|
131
|
+
};
|
131
132
|
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
133
|
+
template<typename T>
|
134
|
+
static void writeBinaryPOD(std::ostream &out, const T &podRef) {
|
135
|
+
out.write((char *) &podRef, sizeof(T));
|
136
|
+
}
|
136
137
|
|
137
|
-
|
138
|
-
|
138
|
+
template<typename T>
|
139
|
+
static void readBinaryPOD(std::istream &in, T &podRef) {
|
140
|
+
in.read((char *) &podRef, sizeof(T));
|
141
|
+
}
|
139
142
|
|
143
|
+
template<typename MTYPE>
|
144
|
+
using DISTFUNC = MTYPE(*)(const void *, const void *, const void *);
|
140
145
|
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
+
template<typename MTYPE>
|
147
|
+
class SpaceInterface {
|
148
|
+
public:
|
149
|
+
// virtual void search(void *);
|
150
|
+
virtual size_t get_data_size() = 0;
|
146
151
|
|
147
|
-
|
152
|
+
virtual DISTFUNC<MTYPE> get_dist_func() = 0;
|
148
153
|
|
149
|
-
|
154
|
+
virtual void *get_dist_func_param() = 0;
|
150
155
|
|
151
|
-
|
152
|
-
|
156
|
+
virtual ~SpaceInterface() {}
|
157
|
+
};
|
153
158
|
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
virtual std::priority_queue<std::pair<dist_t, labeltype >> searchKnn(const void *, size_t) const = 0;
|
159
|
+
template<typename dist_t>
|
160
|
+
class AlgorithmInterface {
|
161
|
+
public:
|
162
|
+
virtual void addPoint(const void *datapoint, labeltype label, bool replace_deleted = false) = 0;
|
159
163
|
|
160
|
-
|
161
|
-
|
162
|
-
searchKnnCloserFirst(const void* query_data, size_t k) const;
|
164
|
+
virtual std::priority_queue<std::pair<dist_t, labeltype>>
|
165
|
+
searchKnn(const void*, size_t, BaseFilterFunctor* isIdAllowed = nullptr) const = 0;
|
163
166
|
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
};
|
168
|
-
|
169
|
-
template<typename dist_t>
|
170
|
-
std::vector<std::pair<dist_t, labeltype>>
|
171
|
-
AlgorithmInterface<dist_t>::searchKnnCloserFirst(const void* query_data, size_t k) const {
|
172
|
-
std::vector<std::pair<dist_t, labeltype>> result;
|
173
|
-
|
174
|
-
// here searchKnn returns the result in the order of further first
|
175
|
-
auto ret = searchKnn(query_data, k);
|
176
|
-
{
|
177
|
-
size_t sz = ret.size();
|
178
|
-
result.resize(sz);
|
179
|
-
while (!ret.empty()) {
|
180
|
-
result[--sz] = ret.top();
|
181
|
-
ret.pop();
|
182
|
-
}
|
183
|
-
}
|
167
|
+
// Return k nearest neighbor in the order of closer fist
|
168
|
+
virtual std::vector<std::pair<dist_t, labeltype>>
|
169
|
+
searchKnnCloserFirst(const void* query_data, size_t k, BaseFilterFunctor* isIdAllowed = nullptr) const;
|
184
170
|
|
185
|
-
|
171
|
+
virtual void saveIndex(const std::string &location) = 0;
|
172
|
+
virtual ~AlgorithmInterface(){
|
186
173
|
}
|
174
|
+
};
|
175
|
+
|
176
|
+
template<typename dist_t>
|
177
|
+
std::vector<std::pair<dist_t, labeltype>>
|
178
|
+
AlgorithmInterface<dist_t>::searchKnnCloserFirst(const void* query_data, size_t k,
|
179
|
+
BaseFilterFunctor* isIdAllowed) const {
|
180
|
+
std::vector<std::pair<dist_t, labeltype>> result;
|
181
|
+
|
182
|
+
// here searchKnn returns the result in the order of further first
|
183
|
+
auto ret = searchKnn(query_data, k, isIdAllowed);
|
184
|
+
{
|
185
|
+
size_t sz = ret.size();
|
186
|
+
result.resize(sz);
|
187
|
+
while (!ret.empty()) {
|
188
|
+
result[--sz] = ret.top();
|
189
|
+
ret.pop();
|
190
|
+
}
|
191
|
+
}
|
192
|
+
|
193
|
+
return result;
|
187
194
|
}
|
195
|
+
} // namespace hnswlib
|
188
196
|
|
189
197
|
#include "space_l2.h"
|
190
198
|
#include "space_ip.h"
|