umappp 0.1.6 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,12 @@
1
1
  #pragma once
2
2
  #ifndef NO_MANUAL_VECTORIZATION
3
- #ifdef __SSE__
3
+ #if (defined(__SSE__) || _M_IX86_FP > 0 || defined(_M_AMD64) || defined(_M_X64))
4
4
  #define USE_SSE
5
5
  #ifdef __AVX__
6
6
  #define USE_AVX
7
+ #ifdef __AVX512F__
8
+ #define USE_AVX512
9
+ #endif
7
10
  #endif
8
11
  #endif
9
12
  #endif
@@ -12,15 +15,96 @@
12
15
  #ifdef _MSC_VER
13
16
  #include <intrin.h>
14
17
  #include <stdexcept>
18
+ void cpuid(int32_t out[4], int32_t eax, int32_t ecx) {
19
+ __cpuidex(out, eax, ecx);
20
+ }
21
+ static __int64 xgetbv(unsigned int x) {
22
+ return _xgetbv(x);
23
+ }
15
24
  #else
16
25
  #include <x86intrin.h>
26
+ #include <cpuid.h>
27
+ #include <stdint.h>
28
+ static void cpuid(int32_t cpuInfo[4], int32_t eax, int32_t ecx) {
29
+ __cpuid_count(eax, ecx, cpuInfo[0], cpuInfo[1], cpuInfo[2], cpuInfo[3]);
30
+ }
31
+ static uint64_t xgetbv(unsigned int index) {
32
+ uint32_t eax, edx;
33
+ __asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
34
+ return ((uint64_t)edx << 32) | eax;
35
+ }
36
+ #endif
37
+
38
+ #if defined(USE_AVX512)
39
+ #include <immintrin.h>
17
40
  #endif
18
41
 
19
42
  #if defined(__GNUC__)
20
43
  #define PORTABLE_ALIGN32 __attribute__((aligned(32)))
44
+ #define PORTABLE_ALIGN64 __attribute__((aligned(64)))
21
45
  #else
22
46
  #define PORTABLE_ALIGN32 __declspec(align(32))
47
+ #define PORTABLE_ALIGN64 __declspec(align(64))
23
48
  #endif
49
+
50
+ // Adapted from https://github.com/Mysticial/FeatureDetector
51
+ #define _XCR_XFEATURE_ENABLED_MASK 0
52
+
53
+ static bool AVXCapable() {
54
+ int cpuInfo[4];
55
+
56
+ // CPU support
57
+ cpuid(cpuInfo, 0, 0);
58
+ int nIds = cpuInfo[0];
59
+
60
+ bool HW_AVX = false;
61
+ if (nIds >= 0x00000001) {
62
+ cpuid(cpuInfo, 0x00000001, 0);
63
+ HW_AVX = (cpuInfo[2] & ((int)1 << 28)) != 0;
64
+ }
65
+
66
+ // OS support
67
+ cpuid(cpuInfo, 1, 0);
68
+
69
+ bool osUsesXSAVE_XRSTORE = (cpuInfo[2] & (1 << 27)) != 0;
70
+ bool cpuAVXSuport = (cpuInfo[2] & (1 << 28)) != 0;
71
+
72
+ bool avxSupported = false;
73
+ if (osUsesXSAVE_XRSTORE && cpuAVXSuport) {
74
+ uint64_t xcrFeatureMask = xgetbv(_XCR_XFEATURE_ENABLED_MASK);
75
+ avxSupported = (xcrFeatureMask & 0x6) == 0x6;
76
+ }
77
+ return HW_AVX && avxSupported;
78
+ }
79
+
80
+ static bool AVX512Capable() {
81
+ if (!AVXCapable()) return false;
82
+
83
+ int cpuInfo[4];
84
+
85
+ // CPU support
86
+ cpuid(cpuInfo, 0, 0);
87
+ int nIds = cpuInfo[0];
88
+
89
+ bool HW_AVX512F = false;
90
+ if (nIds >= 0x00000007) { // AVX512 Foundation
91
+ cpuid(cpuInfo, 0x00000007, 0);
92
+ HW_AVX512F = (cpuInfo[1] & ((int)1 << 16)) != 0;
93
+ }
94
+
95
+ // OS support
96
+ cpuid(cpuInfo, 1, 0);
97
+
98
+ bool osUsesXSAVE_XRSTORE = (cpuInfo[2] & (1 << 27)) != 0;
99
+ bool cpuAVXSuport = (cpuInfo[2] & (1 << 28)) != 0;
100
+
101
+ bool avx512Supported = false;
102
+ if (osUsesXSAVE_XRSTORE && cpuAVXSuport) {
103
+ uint64_t xcrFeatureMask = xgetbv(_XCR_XFEATURE_ENABLED_MASK);
104
+ avx512Supported = (xcrFeatureMask & 0xe6) == 0xe6;
105
+ }
106
+ return HW_AVX512F && avx512Supported;
107
+ }
24
108
  #endif
25
109
 
26
110
  #include <queue>
@@ -29,78 +113,85 @@
29
113
  #include <string.h>
30
114
 
31
115
  namespace hnswlib {
32
- typedef size_t labeltype;
33
-
34
- template <typename T>
35
- class pairGreater {
36
- public:
37
- bool operator()(const T& p1, const T& p2) {
38
- return p1.first > p2.first;
39
- }
40
- };
41
-
42
- template<typename T>
43
- static void writeBinaryPOD(std::ostream &out, const T &podRef) {
44
- out.write((char *) &podRef, sizeof(T));
116
+ typedef size_t labeltype;
117
+
118
+ // This can be extended to store state for filtering (e.g. from a std::set)
119
+ class BaseFilterFunctor {
120
+ public:
121
+ virtual bool operator()(hnswlib::labeltype id) { return true; }
122
+ };
123
+
124
+ template <typename T>
125
+ class pairGreater {
126
+ public:
127
+ bool operator()(const T& p1, const T& p2) {
128
+ return p1.first > p2.first;
45
129
  }
130
+ };
46
131
 
47
- template<typename T>
48
- static void readBinaryPOD(std::istream &in, T &podRef) {
49
- in.read((char *) &podRef, sizeof(T));
50
- }
132
+ template<typename T>
133
+ static void writeBinaryPOD(std::ostream &out, const T &podRef) {
134
+ out.write((char *) &podRef, sizeof(T));
135
+ }
51
136
 
52
- template<typename MTYPE>
53
- using DISTFUNC = MTYPE(*)(const void *, const void *, const void *);
137
+ template<typename T>
138
+ static void readBinaryPOD(std::istream &in, T &podRef) {
139
+ in.read((char *) &podRef, sizeof(T));
140
+ }
54
141
 
142
+ template<typename MTYPE>
143
+ using DISTFUNC = MTYPE(*)(const void *, const void *, const void *);
55
144
 
56
- template<typename MTYPE>
57
- class SpaceInterface {
58
- public:
59
- //virtual void search(void *);
60
- virtual size_t get_data_size() = 0;
145
+ template<typename MTYPE>
146
+ class SpaceInterface {
147
+ public:
148
+ // virtual void search(void *);
149
+ virtual size_t get_data_size() = 0;
61
150
 
62
- virtual DISTFUNC<MTYPE> get_dist_func() = 0;
151
+ virtual DISTFUNC<MTYPE> get_dist_func() = 0;
63
152
 
64
- virtual void *get_dist_func_param() = 0;
153
+ virtual void *get_dist_func_param() = 0;
65
154
 
66
- virtual ~SpaceInterface() {}
67
- };
155
+ virtual ~SpaceInterface() {}
156
+ };
68
157
 
69
- template<typename dist_t>
70
- class AlgorithmInterface {
71
- public:
72
- virtual void addPoint(const void *datapoint, labeltype label)=0;
73
- virtual std::priority_queue<std::pair<dist_t, labeltype >> searchKnn(const void *, size_t) const = 0;
158
+ template<typename dist_t>
159
+ class AlgorithmInterface {
160
+ public:
161
+ virtual void addPoint(const void *datapoint, labeltype label, bool replace_deleted = false) = 0;
74
162
 
75
- // Return k nearest neighbor in the order of closer fist
76
- virtual std::vector<std::pair<dist_t, labeltype>>
77
- searchKnnCloserFirst(const void* query_data, size_t k) const;
163
+ virtual std::priority_queue<std::pair<dist_t, labeltype>>
164
+ searchKnn(const void*, size_t, BaseFilterFunctor* isIdAllowed = nullptr) const = 0;
78
165
 
79
- virtual void saveIndex(const std::string &location)=0;
80
- virtual ~AlgorithmInterface(){
81
- }
82
- };
83
-
84
- template<typename dist_t>
85
- std::vector<std::pair<dist_t, labeltype>>
86
- AlgorithmInterface<dist_t>::searchKnnCloserFirst(const void* query_data, size_t k) const {
87
- std::vector<std::pair<dist_t, labeltype>> result;
88
-
89
- // here searchKnn returns the result in the order of further first
90
- auto ret = searchKnn(query_data, k);
91
- {
92
- size_t sz = ret.size();
93
- result.resize(sz);
94
- while (!ret.empty()) {
95
- result[--sz] = ret.top();
96
- ret.pop();
97
- }
98
- }
166
+ // Return k nearest neighbor in the order of closer fist
167
+ virtual std::vector<std::pair<dist_t, labeltype>>
168
+ searchKnnCloserFirst(const void* query_data, size_t k, BaseFilterFunctor* isIdAllowed = nullptr) const;
99
169
 
100
- return result;
170
+ virtual void saveIndex(const std::string &location) = 0;
171
+ virtual ~AlgorithmInterface(){
172
+ }
173
+ };
174
+
175
+ template<typename dist_t>
176
+ std::vector<std::pair<dist_t, labeltype>>
177
+ AlgorithmInterface<dist_t>::searchKnnCloserFirst(const void* query_data, size_t k,
178
+ BaseFilterFunctor* isIdAllowed) const {
179
+ std::vector<std::pair<dist_t, labeltype>> result;
180
+
181
+ // here searchKnn returns the result in the order of further first
182
+ auto ret = searchKnn(query_data, k, isIdAllowed);
183
+ {
184
+ size_t sz = ret.size();
185
+ result.resize(sz);
186
+ while (!ret.empty()) {
187
+ result[--sz] = ret.top();
188
+ ret.pop();
189
+ }
101
190
  }
102
191
 
192
+ return result;
103
193
  }
194
+ } // namespace hnswlib
104
195
 
105
196
  #include "space_l2.h"
106
197
  #include "space_ip.h"