umappp 0.1.5 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +11 -4
- data/ext/umappp/umappp.cpp +41 -43
- data/lib/umappp/version.rb +1 -1
- data/lib/umappp.rb +5 -4
- data/vendor/aarand/aarand.hpp +141 -28
- data/vendor/annoy/annoylib.h +1 -1
- data/vendor/hnswlib/bruteforce.h +142 -127
- data/vendor/hnswlib/hnswalg.h +1018 -939
- data/vendor/hnswlib/hnswlib.h +149 -58
- data/vendor/hnswlib/space_ip.h +322 -229
- data/vendor/hnswlib/space_l2.h +283 -240
- data/vendor/hnswlib/visited_list_pool.h +54 -55
- data/vendor/irlba/irlba.hpp +12 -27
- data/vendor/irlba/lanczos.hpp +30 -31
- data/vendor/irlba/parallel.hpp +37 -38
- data/vendor/irlba/utils.hpp +12 -23
- data/vendor/irlba/wrappers.hpp +239 -70
- data/vendor/kmeans/Details.hpp +1 -1
- data/vendor/kmeans/HartiganWong.hpp +28 -2
- data/vendor/kmeans/InitializeKmeansPP.hpp +29 -1
- data/vendor/kmeans/Kmeans.hpp +25 -2
- data/vendor/kmeans/Lloyd.hpp +29 -2
- data/vendor/kmeans/MiniBatch.hpp +48 -8
- data/vendor/knncolle/Annoy/Annoy.hpp +3 -0
- data/vendor/knncolle/Hnsw/Hnsw.hpp +3 -0
- data/vendor/knncolle/Kmknn/Kmknn.hpp +11 -1
- data/vendor/knncolle/utils/find_nearest_neighbors.hpp +8 -6
- data/vendor/umappp/Umap.hpp +85 -43
- data/vendor/umappp/optimize_layout.hpp +410 -133
- data/vendor/umappp/spectral_init.hpp +4 -1
- metadata +6 -6
data/vendor/hnswlib/hnswlib.h
CHANGED
@@ -1,9 +1,12 @@
|
|
1
1
|
#pragma once
|
2
2
|
#ifndef NO_MANUAL_VECTORIZATION
|
3
|
-
#
|
3
|
+
#if (defined(__SSE__) || _M_IX86_FP > 0 || defined(_M_AMD64) || defined(_M_X64))
|
4
4
|
#define USE_SSE
|
5
5
|
#ifdef __AVX__
|
6
6
|
#define USE_AVX
|
7
|
+
#ifdef __AVX512F__
|
8
|
+
#define USE_AVX512
|
9
|
+
#endif
|
7
10
|
#endif
|
8
11
|
#endif
|
9
12
|
#endif
|
@@ -12,15 +15,96 @@
|
|
12
15
|
#ifdef _MSC_VER
|
13
16
|
#include <intrin.h>
|
14
17
|
#include <stdexcept>
|
18
|
+
void cpuid(int32_t out[4], int32_t eax, int32_t ecx) {
|
19
|
+
__cpuidex(out, eax, ecx);
|
20
|
+
}
|
21
|
+
static __int64 xgetbv(unsigned int x) {
|
22
|
+
return _xgetbv(x);
|
23
|
+
}
|
15
24
|
#else
|
16
25
|
#include <x86intrin.h>
|
26
|
+
#include <cpuid.h>
|
27
|
+
#include <stdint.h>
|
28
|
+
static void cpuid(int32_t cpuInfo[4], int32_t eax, int32_t ecx) {
|
29
|
+
__cpuid_count(eax, ecx, cpuInfo[0], cpuInfo[1], cpuInfo[2], cpuInfo[3]);
|
30
|
+
}
|
31
|
+
static uint64_t xgetbv(unsigned int index) {
|
32
|
+
uint32_t eax, edx;
|
33
|
+
__asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
|
34
|
+
return ((uint64_t)edx << 32) | eax;
|
35
|
+
}
|
36
|
+
#endif
|
37
|
+
|
38
|
+
#if defined(USE_AVX512)
|
39
|
+
#include <immintrin.h>
|
17
40
|
#endif
|
18
41
|
|
19
42
|
#if defined(__GNUC__)
|
20
43
|
#define PORTABLE_ALIGN32 __attribute__((aligned(32)))
|
44
|
+
#define PORTABLE_ALIGN64 __attribute__((aligned(64)))
|
21
45
|
#else
|
22
46
|
#define PORTABLE_ALIGN32 __declspec(align(32))
|
47
|
+
#define PORTABLE_ALIGN64 __declspec(align(64))
|
23
48
|
#endif
|
49
|
+
|
50
|
+
// Adapted from https://github.com/Mysticial/FeatureDetector
|
51
|
+
#define _XCR_XFEATURE_ENABLED_MASK 0
|
52
|
+
|
53
|
+
static bool AVXCapable() {
|
54
|
+
int cpuInfo[4];
|
55
|
+
|
56
|
+
// CPU support
|
57
|
+
cpuid(cpuInfo, 0, 0);
|
58
|
+
int nIds = cpuInfo[0];
|
59
|
+
|
60
|
+
bool HW_AVX = false;
|
61
|
+
if (nIds >= 0x00000001) {
|
62
|
+
cpuid(cpuInfo, 0x00000001, 0);
|
63
|
+
HW_AVX = (cpuInfo[2] & ((int)1 << 28)) != 0;
|
64
|
+
}
|
65
|
+
|
66
|
+
// OS support
|
67
|
+
cpuid(cpuInfo, 1, 0);
|
68
|
+
|
69
|
+
bool osUsesXSAVE_XRSTORE = (cpuInfo[2] & (1 << 27)) != 0;
|
70
|
+
bool cpuAVXSuport = (cpuInfo[2] & (1 << 28)) != 0;
|
71
|
+
|
72
|
+
bool avxSupported = false;
|
73
|
+
if (osUsesXSAVE_XRSTORE && cpuAVXSuport) {
|
74
|
+
uint64_t xcrFeatureMask = xgetbv(_XCR_XFEATURE_ENABLED_MASK);
|
75
|
+
avxSupported = (xcrFeatureMask & 0x6) == 0x6;
|
76
|
+
}
|
77
|
+
return HW_AVX && avxSupported;
|
78
|
+
}
|
79
|
+
|
80
|
+
static bool AVX512Capable() {
|
81
|
+
if (!AVXCapable()) return false;
|
82
|
+
|
83
|
+
int cpuInfo[4];
|
84
|
+
|
85
|
+
// CPU support
|
86
|
+
cpuid(cpuInfo, 0, 0);
|
87
|
+
int nIds = cpuInfo[0];
|
88
|
+
|
89
|
+
bool HW_AVX512F = false;
|
90
|
+
if (nIds >= 0x00000007) { // AVX512 Foundation
|
91
|
+
cpuid(cpuInfo, 0x00000007, 0);
|
92
|
+
HW_AVX512F = (cpuInfo[1] & ((int)1 << 16)) != 0;
|
93
|
+
}
|
94
|
+
|
95
|
+
// OS support
|
96
|
+
cpuid(cpuInfo, 1, 0);
|
97
|
+
|
98
|
+
bool osUsesXSAVE_XRSTORE = (cpuInfo[2] & (1 << 27)) != 0;
|
99
|
+
bool cpuAVXSuport = (cpuInfo[2] & (1 << 28)) != 0;
|
100
|
+
|
101
|
+
bool avx512Supported = false;
|
102
|
+
if (osUsesXSAVE_XRSTORE && cpuAVXSuport) {
|
103
|
+
uint64_t xcrFeatureMask = xgetbv(_XCR_XFEATURE_ENABLED_MASK);
|
104
|
+
avx512Supported = (xcrFeatureMask & 0xe6) == 0xe6;
|
105
|
+
}
|
106
|
+
return HW_AVX512F && avx512Supported;
|
107
|
+
}
|
24
108
|
#endif
|
25
109
|
|
26
110
|
#include <queue>
|
@@ -29,78 +113,85 @@
|
|
29
113
|
#include <string.h>
|
30
114
|
|
31
115
|
namespace hnswlib {
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
116
|
+
typedef size_t labeltype;
|
117
|
+
|
118
|
+
// This can be extended to store state for filtering (e.g. from a std::set)
|
119
|
+
class BaseFilterFunctor {
|
120
|
+
public:
|
121
|
+
virtual bool operator()(hnswlib::labeltype id) { return true; }
|
122
|
+
};
|
123
|
+
|
124
|
+
template <typename T>
|
125
|
+
class pairGreater {
|
126
|
+
public:
|
127
|
+
bool operator()(const T& p1, const T& p2) {
|
128
|
+
return p1.first > p2.first;
|
45
129
|
}
|
130
|
+
};
|
46
131
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
132
|
+
template<typename T>
|
133
|
+
static void writeBinaryPOD(std::ostream &out, const T &podRef) {
|
134
|
+
out.write((char *) &podRef, sizeof(T));
|
135
|
+
}
|
51
136
|
|
52
|
-
|
53
|
-
|
137
|
+
template<typename T>
|
138
|
+
static void readBinaryPOD(std::istream &in, T &podRef) {
|
139
|
+
in.read((char *) &podRef, sizeof(T));
|
140
|
+
}
|
54
141
|
|
142
|
+
template<typename MTYPE>
|
143
|
+
using DISTFUNC = MTYPE(*)(const void *, const void *, const void *);
|
55
144
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
145
|
+
template<typename MTYPE>
|
146
|
+
class SpaceInterface {
|
147
|
+
public:
|
148
|
+
// virtual void search(void *);
|
149
|
+
virtual size_t get_data_size() = 0;
|
61
150
|
|
62
|
-
|
151
|
+
virtual DISTFUNC<MTYPE> get_dist_func() = 0;
|
63
152
|
|
64
|
-
|
153
|
+
virtual void *get_dist_func_param() = 0;
|
65
154
|
|
66
|
-
|
67
|
-
|
155
|
+
virtual ~SpaceInterface() {}
|
156
|
+
};
|
68
157
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
virtual std::priority_queue<std::pair<dist_t, labeltype >> searchKnn(const void *, size_t) const = 0;
|
158
|
+
template<typename dist_t>
|
159
|
+
class AlgorithmInterface {
|
160
|
+
public:
|
161
|
+
virtual void addPoint(const void *datapoint, labeltype label, bool replace_deleted = false) = 0;
|
74
162
|
|
75
|
-
|
76
|
-
|
77
|
-
searchKnnCloserFirst(const void* query_data, size_t k) const;
|
163
|
+
virtual std::priority_queue<std::pair<dist_t, labeltype>>
|
164
|
+
searchKnn(const void*, size_t, BaseFilterFunctor* isIdAllowed = nullptr) const = 0;
|
78
165
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
};
|
83
|
-
|
84
|
-
template<typename dist_t>
|
85
|
-
std::vector<std::pair<dist_t, labeltype>>
|
86
|
-
AlgorithmInterface<dist_t>::searchKnnCloserFirst(const void* query_data, size_t k) const {
|
87
|
-
std::vector<std::pair<dist_t, labeltype>> result;
|
88
|
-
|
89
|
-
// here searchKnn returns the result in the order of further first
|
90
|
-
auto ret = searchKnn(query_data, k);
|
91
|
-
{
|
92
|
-
size_t sz = ret.size();
|
93
|
-
result.resize(sz);
|
94
|
-
while (!ret.empty()) {
|
95
|
-
result[--sz] = ret.top();
|
96
|
-
ret.pop();
|
97
|
-
}
|
98
|
-
}
|
166
|
+
// Return k nearest neighbor in the order of closer fist
|
167
|
+
virtual std::vector<std::pair<dist_t, labeltype>>
|
168
|
+
searchKnnCloserFirst(const void* query_data, size_t k, BaseFilterFunctor* isIdAllowed = nullptr) const;
|
99
169
|
|
100
|
-
|
170
|
+
virtual void saveIndex(const std::string &location) = 0;
|
171
|
+
virtual ~AlgorithmInterface(){
|
172
|
+
}
|
173
|
+
};
|
174
|
+
|
175
|
+
template<typename dist_t>
|
176
|
+
std::vector<std::pair<dist_t, labeltype>>
|
177
|
+
AlgorithmInterface<dist_t>::searchKnnCloserFirst(const void* query_data, size_t k,
|
178
|
+
BaseFilterFunctor* isIdAllowed) const {
|
179
|
+
std::vector<std::pair<dist_t, labeltype>> result;
|
180
|
+
|
181
|
+
// here searchKnn returns the result in the order of further first
|
182
|
+
auto ret = searchKnn(query_data, k, isIdAllowed);
|
183
|
+
{
|
184
|
+
size_t sz = ret.size();
|
185
|
+
result.resize(sz);
|
186
|
+
while (!ret.empty()) {
|
187
|
+
result[--sz] = ret.top();
|
188
|
+
ret.pop();
|
189
|
+
}
|
101
190
|
}
|
102
191
|
|
192
|
+
return result;
|
103
193
|
}
|
194
|
+
} // namespace hnswlib
|
104
195
|
|
105
196
|
#include "space_l2.h"
|
106
197
|
#include "space_ip.h"
|