umappp 0.1.6 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +22 -16
- data/ext/umappp/numo.hpp +957 -833
- data/ext/umappp/umappp.cpp +39 -45
- data/lib/umappp/version.rb +1 -1
- data/lib/umappp.rb +5 -4
- data/vendor/aarand/aarand.hpp +141 -28
- data/vendor/annoy/annoylib.h +1 -1
- data/vendor/hnswlib/bruteforce.h +142 -127
- data/vendor/hnswlib/hnswalg.h +1018 -939
- data/vendor/hnswlib/hnswlib.h +149 -58
- data/vendor/hnswlib/space_ip.h +322 -229
- data/vendor/hnswlib/space_l2.h +283 -240
- data/vendor/hnswlib/visited_list_pool.h +54 -55
- data/vendor/irlba/irlba.hpp +12 -27
- data/vendor/irlba/lanczos.hpp +30 -31
- data/vendor/irlba/parallel.hpp +37 -38
- data/vendor/irlba/utils.hpp +12 -23
- data/vendor/irlba/wrappers.hpp +239 -70
- data/vendor/kmeans/Details.hpp +1 -1
- data/vendor/kmeans/HartiganWong.hpp +28 -2
- data/vendor/kmeans/InitializeKmeansPP.hpp +29 -1
- data/vendor/kmeans/Kmeans.hpp +25 -2
- data/vendor/kmeans/Lloyd.hpp +29 -2
- data/vendor/kmeans/MiniBatch.hpp +48 -8
- data/vendor/knncolle/Annoy/Annoy.hpp +3 -0
- data/vendor/knncolle/Hnsw/Hnsw.hpp +3 -0
- data/vendor/knncolle/Kmknn/Kmknn.hpp +11 -1
- data/vendor/knncolle/utils/find_nearest_neighbors.hpp +8 -6
- data/vendor/umappp/Umap.hpp +85 -43
- data/vendor/umappp/optimize_layout.hpp +410 -133
- data/vendor/umappp/spectral_init.hpp +4 -1
- metadata +7 -10
data/vendor/hnswlib/hnswlib.h
CHANGED
@@ -1,9 +1,12 @@
|
|
1
1
|
#pragma once
|
2
2
|
#ifndef NO_MANUAL_VECTORIZATION
|
3
|
-
#
|
3
|
+
#if (defined(__SSE__) || _M_IX86_FP > 0 || defined(_M_AMD64) || defined(_M_X64))
|
4
4
|
#define USE_SSE
|
5
5
|
#ifdef __AVX__
|
6
6
|
#define USE_AVX
|
7
|
+
#ifdef __AVX512F__
|
8
|
+
#define USE_AVX512
|
9
|
+
#endif
|
7
10
|
#endif
|
8
11
|
#endif
|
9
12
|
#endif
|
@@ -12,15 +15,96 @@
|
|
12
15
|
#ifdef _MSC_VER
|
13
16
|
#include <intrin.h>
|
14
17
|
#include <stdexcept>
|
18
|
+
void cpuid(int32_t out[4], int32_t eax, int32_t ecx) {
|
19
|
+
__cpuidex(out, eax, ecx);
|
20
|
+
}
|
21
|
+
static __int64 xgetbv(unsigned int x) {
|
22
|
+
return _xgetbv(x);
|
23
|
+
}
|
15
24
|
#else
|
16
25
|
#include <x86intrin.h>
|
26
|
+
#include <cpuid.h>
|
27
|
+
#include <stdint.h>
|
28
|
+
static void cpuid(int32_t cpuInfo[4], int32_t eax, int32_t ecx) {
|
29
|
+
__cpuid_count(eax, ecx, cpuInfo[0], cpuInfo[1], cpuInfo[2], cpuInfo[3]);
|
30
|
+
}
|
31
|
+
static uint64_t xgetbv(unsigned int index) {
|
32
|
+
uint32_t eax, edx;
|
33
|
+
__asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
|
34
|
+
return ((uint64_t)edx << 32) | eax;
|
35
|
+
}
|
36
|
+
#endif
|
37
|
+
|
38
|
+
#if defined(USE_AVX512)
|
39
|
+
#include <immintrin.h>
|
17
40
|
#endif
|
18
41
|
|
19
42
|
#if defined(__GNUC__)
|
20
43
|
#define PORTABLE_ALIGN32 __attribute__((aligned(32)))
|
44
|
+
#define PORTABLE_ALIGN64 __attribute__((aligned(64)))
|
21
45
|
#else
|
22
46
|
#define PORTABLE_ALIGN32 __declspec(align(32))
|
47
|
+
#define PORTABLE_ALIGN64 __declspec(align(64))
|
23
48
|
#endif
|
49
|
+
|
50
|
+
// Adapted from https://github.com/Mysticial/FeatureDetector
|
51
|
+
#define _XCR_XFEATURE_ENABLED_MASK 0
|
52
|
+
|
53
|
+
static bool AVXCapable() {
|
54
|
+
int cpuInfo[4];
|
55
|
+
|
56
|
+
// CPU support
|
57
|
+
cpuid(cpuInfo, 0, 0);
|
58
|
+
int nIds = cpuInfo[0];
|
59
|
+
|
60
|
+
bool HW_AVX = false;
|
61
|
+
if (nIds >= 0x00000001) {
|
62
|
+
cpuid(cpuInfo, 0x00000001, 0);
|
63
|
+
HW_AVX = (cpuInfo[2] & ((int)1 << 28)) != 0;
|
64
|
+
}
|
65
|
+
|
66
|
+
// OS support
|
67
|
+
cpuid(cpuInfo, 1, 0);
|
68
|
+
|
69
|
+
bool osUsesXSAVE_XRSTORE = (cpuInfo[2] & (1 << 27)) != 0;
|
70
|
+
bool cpuAVXSuport = (cpuInfo[2] & (1 << 28)) != 0;
|
71
|
+
|
72
|
+
bool avxSupported = false;
|
73
|
+
if (osUsesXSAVE_XRSTORE && cpuAVXSuport) {
|
74
|
+
uint64_t xcrFeatureMask = xgetbv(_XCR_XFEATURE_ENABLED_MASK);
|
75
|
+
avxSupported = (xcrFeatureMask & 0x6) == 0x6;
|
76
|
+
}
|
77
|
+
return HW_AVX && avxSupported;
|
78
|
+
}
|
79
|
+
|
80
|
+
static bool AVX512Capable() {
|
81
|
+
if (!AVXCapable()) return false;
|
82
|
+
|
83
|
+
int cpuInfo[4];
|
84
|
+
|
85
|
+
// CPU support
|
86
|
+
cpuid(cpuInfo, 0, 0);
|
87
|
+
int nIds = cpuInfo[0];
|
88
|
+
|
89
|
+
bool HW_AVX512F = false;
|
90
|
+
if (nIds >= 0x00000007) { // AVX512 Foundation
|
91
|
+
cpuid(cpuInfo, 0x00000007, 0);
|
92
|
+
HW_AVX512F = (cpuInfo[1] & ((int)1 << 16)) != 0;
|
93
|
+
}
|
94
|
+
|
95
|
+
// OS support
|
96
|
+
cpuid(cpuInfo, 1, 0);
|
97
|
+
|
98
|
+
bool osUsesXSAVE_XRSTORE = (cpuInfo[2] & (1 << 27)) != 0;
|
99
|
+
bool cpuAVXSuport = (cpuInfo[2] & (1 << 28)) != 0;
|
100
|
+
|
101
|
+
bool avx512Supported = false;
|
102
|
+
if (osUsesXSAVE_XRSTORE && cpuAVXSuport) {
|
103
|
+
uint64_t xcrFeatureMask = xgetbv(_XCR_XFEATURE_ENABLED_MASK);
|
104
|
+
avx512Supported = (xcrFeatureMask & 0xe6) == 0xe6;
|
105
|
+
}
|
106
|
+
return HW_AVX512F && avx512Supported;
|
107
|
+
}
|
24
108
|
#endif
|
25
109
|
|
26
110
|
#include <queue>
|
@@ -29,78 +113,85 @@
|
|
29
113
|
#include <string.h>
|
30
114
|
|
31
115
|
namespace hnswlib {
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
116
|
+
typedef size_t labeltype;
|
117
|
+
|
118
|
+
// This can be extended to store state for filtering (e.g. from a std::set)
|
119
|
+
class BaseFilterFunctor {
|
120
|
+
public:
|
121
|
+
virtual bool operator()(hnswlib::labeltype id) { return true; }
|
122
|
+
};
|
123
|
+
|
124
|
+
template <typename T>
|
125
|
+
class pairGreater {
|
126
|
+
public:
|
127
|
+
bool operator()(const T& p1, const T& p2) {
|
128
|
+
return p1.first > p2.first;
|
45
129
|
}
|
130
|
+
};
|
46
131
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
132
|
+
template<typename T>
|
133
|
+
static void writeBinaryPOD(std::ostream &out, const T &podRef) {
|
134
|
+
out.write((char *) &podRef, sizeof(T));
|
135
|
+
}
|
51
136
|
|
52
|
-
|
53
|
-
|
137
|
+
template<typename T>
|
138
|
+
static void readBinaryPOD(std::istream &in, T &podRef) {
|
139
|
+
in.read((char *) &podRef, sizeof(T));
|
140
|
+
}
|
54
141
|
|
142
|
+
template<typename MTYPE>
|
143
|
+
using DISTFUNC = MTYPE(*)(const void *, const void *, const void *);
|
55
144
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
145
|
+
template<typename MTYPE>
|
146
|
+
class SpaceInterface {
|
147
|
+
public:
|
148
|
+
// virtual void search(void *);
|
149
|
+
virtual size_t get_data_size() = 0;
|
61
150
|
|
62
|
-
|
151
|
+
virtual DISTFUNC<MTYPE> get_dist_func() = 0;
|
63
152
|
|
64
|
-
|
153
|
+
virtual void *get_dist_func_param() = 0;
|
65
154
|
|
66
|
-
|
67
|
-
|
155
|
+
virtual ~SpaceInterface() {}
|
156
|
+
};
|
68
157
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
virtual std::priority_queue<std::pair<dist_t, labeltype >> searchKnn(const void *, size_t) const = 0;
|
158
|
+
template<typename dist_t>
|
159
|
+
class AlgorithmInterface {
|
160
|
+
public:
|
161
|
+
virtual void addPoint(const void *datapoint, labeltype label, bool replace_deleted = false) = 0;
|
74
162
|
|
75
|
-
|
76
|
-
|
77
|
-
searchKnnCloserFirst(const void* query_data, size_t k) const;
|
163
|
+
virtual std::priority_queue<std::pair<dist_t, labeltype>>
|
164
|
+
searchKnn(const void*, size_t, BaseFilterFunctor* isIdAllowed = nullptr) const = 0;
|
78
165
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
};
|
83
|
-
|
84
|
-
template<typename dist_t>
|
85
|
-
std::vector<std::pair<dist_t, labeltype>>
|
86
|
-
AlgorithmInterface<dist_t>::searchKnnCloserFirst(const void* query_data, size_t k) const {
|
87
|
-
std::vector<std::pair<dist_t, labeltype>> result;
|
88
|
-
|
89
|
-
// here searchKnn returns the result in the order of further first
|
90
|
-
auto ret = searchKnn(query_data, k);
|
91
|
-
{
|
92
|
-
size_t sz = ret.size();
|
93
|
-
result.resize(sz);
|
94
|
-
while (!ret.empty()) {
|
95
|
-
result[--sz] = ret.top();
|
96
|
-
ret.pop();
|
97
|
-
}
|
98
|
-
}
|
166
|
+
// Return k nearest neighbor in the order of closer fist
|
167
|
+
virtual std::vector<std::pair<dist_t, labeltype>>
|
168
|
+
searchKnnCloserFirst(const void* query_data, size_t k, BaseFilterFunctor* isIdAllowed = nullptr) const;
|
99
169
|
|
100
|
-
|
170
|
+
virtual void saveIndex(const std::string &location) = 0;
|
171
|
+
virtual ~AlgorithmInterface(){
|
172
|
+
}
|
173
|
+
};
|
174
|
+
|
175
|
+
template<typename dist_t>
|
176
|
+
std::vector<std::pair<dist_t, labeltype>>
|
177
|
+
AlgorithmInterface<dist_t>::searchKnnCloserFirst(const void* query_data, size_t k,
|
178
|
+
BaseFilterFunctor* isIdAllowed) const {
|
179
|
+
std::vector<std::pair<dist_t, labeltype>> result;
|
180
|
+
|
181
|
+
// here searchKnn returns the result in the order of further first
|
182
|
+
auto ret = searchKnn(query_data, k, isIdAllowed);
|
183
|
+
{
|
184
|
+
size_t sz = ret.size();
|
185
|
+
result.resize(sz);
|
186
|
+
while (!ret.empty()) {
|
187
|
+
result[--sz] = ret.top();
|
188
|
+
ret.pop();
|
189
|
+
}
|
101
190
|
}
|
102
191
|
|
192
|
+
return result;
|
103
193
|
}
|
194
|
+
} // namespace hnswlib
|
104
195
|
|
105
196
|
#include "space_l2.h"
|
106
197
|
#include "space_ip.h"
|