annoy-rb 0.5.0 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -0
- data/README.md +34 -9
- data/ext/annoy/annoyext.cpp +12 -9
- data/ext/annoy/annoyext.hpp +290 -284
- data/ext/annoy/src/LICENSE +1 -1
- data/ext/annoy/src/annoylib.h +75 -59
- data/ext/annoy/src/kissrandom.h +19 -5
- data/lib/annoy/version.rb +2 -2
- data/lib/annoy-rb.rb +3 -0
- data/lib/annoy.rb +16 -5
- data/sig/annoy.rbs +75 -1
- metadata +10 -16
- data/.github/workflows/build.yml +0 -20
- data/.gitignore +0 -21
- data/.rspec +0 -3
- data/CODE_OF_CONDUCT.md +0 -74
- data/Gemfile +0 -10
- data/Rakefile +0 -15
- data/Steepfile +0 -20
- data/annoy-rb.gemspec +0 -28
data/ext/annoy/src/annoylib.h
CHANGED
@@ -13,8 +13,8 @@
|
|
13
13
|
// the License.
|
14
14
|
|
15
15
|
|
16
|
-
#ifndef
|
17
|
-
#define
|
16
|
+
#ifndef ANNOY_ANNOYLIB_H
|
17
|
+
#define ANNOY_ANNOYLIB_H
|
18
18
|
|
19
19
|
#include <stdio.h>
|
20
20
|
#include <sys/stat.h>
|
@@ -58,6 +58,10 @@ typedef signed __int64 int64_t;
|
|
58
58
|
#include <queue>
|
59
59
|
#include <limits>
|
60
60
|
|
61
|
+
#if __cplusplus >= 201103L
|
62
|
+
#include <type_traits>
|
63
|
+
#endif
|
64
|
+
|
61
65
|
#ifdef ANNOYLIB_MULTITHREADED_BUILD
|
62
66
|
#include <thread>
|
63
67
|
#include <mutex>
|
@@ -72,9 +76,9 @@ typedef signed __int64 int64_t;
|
|
72
76
|
// This allows others to supply their own logger / error printer without
|
73
77
|
// requiring Annoy to import their headers. See RcppAnnoy for a use case.
|
74
78
|
#ifndef __ERROR_PRINTER_OVERRIDE__
|
75
|
-
#define
|
79
|
+
#define annoylib_showUpdate(...) { fprintf(stderr, __VA_ARGS__ ); }
|
76
80
|
#else
|
77
|
-
#define
|
81
|
+
#define annoylib_showUpdate(...) { __ERROR_PRINTER_OVERRIDE__( __VA_ARGS__ ); }
|
78
82
|
#endif
|
79
83
|
|
80
84
|
// Portable alloc definition, cf Writing R Extensions, Section 1.6.4
|
@@ -87,40 +91,24 @@ typedef signed __int64 int64_t;
|
|
87
91
|
# include <alloca.h>
|
88
92
|
#endif
|
89
93
|
|
90
|
-
inline void set_error_from_errno(char **error, const char* msg) {
|
91
|
-
showUpdate("%s: %s (%d)\n", msg, strerror(errno), errno);
|
92
|
-
if (error) {
|
93
|
-
*error = (char *)malloc(256); // TODO: win doesn't support snprintf
|
94
|
-
sprintf(*error, "%s: %s (%d)", msg, strerror(errno), errno);
|
95
|
-
}
|
96
|
-
}
|
97
|
-
|
98
|
-
inline void set_error_from_string(char **error, const char* msg) {
|
99
|
-
showUpdate("%s\n", msg);
|
100
|
-
if (error) {
|
101
|
-
*error = (char *)malloc(strlen(msg) + 1);
|
102
|
-
strcpy(*error, msg);
|
103
|
-
}
|
104
|
-
}
|
105
|
-
|
106
94
|
// We let the v array in the Node struct take whatever space is needed, so this is a mostly insignificant number.
|
107
95
|
// Compilers need *some* size defined for the v array, and some memory checking tools will flag for buffer overruns if this is set too low.
|
108
|
-
#define
|
96
|
+
#define ANNOYLIB_V_ARRAY_SIZE 65536
|
109
97
|
|
110
98
|
#ifndef _MSC_VER
|
111
|
-
#define
|
99
|
+
#define annoylib_popcount __builtin_popcountll
|
112
100
|
#else // See #293, #358
|
113
|
-
#define
|
101
|
+
#define annoylib_popcount cole_popcount
|
114
102
|
#endif
|
115
103
|
|
116
104
|
#if !defined(NO_MANUAL_VECTORIZATION) && defined(__GNUC__) && (__GNUC__ >6) && defined(__AVX512F__) // See #402
|
117
|
-
#define
|
105
|
+
#define ANNOYLIB_USE_AVX512
|
118
106
|
#elif !defined(NO_MANUAL_VECTORIZATION) && defined(__AVX__) && defined (__SSE__) && defined(__SSE2__) && defined(__SSE3__)
|
119
|
-
#define
|
107
|
+
#define ANNOYLIB_USE_AVX
|
120
108
|
#else
|
121
109
|
#endif
|
122
110
|
|
123
|
-
#if defined(
|
111
|
+
#if defined(ANNOYLIB_USE_AVX) || defined(ANNOYLIB_USE_AVX512)
|
124
112
|
#if defined(_MSC_VER)
|
125
113
|
#include <intrin.h>
|
126
114
|
#elif defined(__GNUC__)
|
@@ -129,11 +117,30 @@ inline void set_error_from_string(char **error, const char* msg) {
|
|
129
117
|
#endif
|
130
118
|
|
131
119
|
#if !defined(__MINGW32__)
|
132
|
-
#define
|
120
|
+
#define ANNOYLIB_FTRUNCATE_SIZE(x) static_cast<int64_t>(x)
|
133
121
|
#else
|
134
|
-
#define
|
122
|
+
#define ANNOYLIB_FTRUNCATE_SIZE(x) (x)
|
135
123
|
#endif
|
136
124
|
|
125
|
+
namespace Annoy {
|
126
|
+
|
127
|
+
inline void set_error_from_errno(char **error, const char* msg) {
|
128
|
+
annoylib_showUpdate("%s: %s (%d)\n", msg, strerror(errno), errno);
|
129
|
+
if (error) {
|
130
|
+
*error = (char *)malloc(256); // TODO: win doesn't support snprintf
|
131
|
+
sprintf(*error, "%s: %s (%d)", msg, strerror(errno), errno);
|
132
|
+
}
|
133
|
+
}
|
134
|
+
|
135
|
+
inline void set_error_from_string(char **error, const char* msg) {
|
136
|
+
annoylib_showUpdate("%s\n", msg);
|
137
|
+
if (error) {
|
138
|
+
*error = (char *)malloc(strlen(msg) + 1);
|
139
|
+
strcpy(*error, msg);
|
140
|
+
}
|
141
|
+
}
|
142
|
+
|
143
|
+
|
137
144
|
using std::vector;
|
138
145
|
using std::pair;
|
139
146
|
using std::numeric_limits;
|
@@ -145,7 +152,7 @@ inline bool remap_memory_and_truncate(void** _ptr, int _fd, size_t old_size, siz
|
|
145
152
|
bool ok = ftruncate(_fd, new_size) != -1;
|
146
153
|
#else
|
147
154
|
munmap(*_ptr, old_size);
|
148
|
-
bool ok = ftruncate(_fd,
|
155
|
+
bool ok = ftruncate(_fd, ANNOYLIB_FTRUNCATE_SIZE(new_size)) != -1;
|
149
156
|
#ifdef MAP_POPULATE
|
150
157
|
*_ptr = mmap(*_ptr, new_size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, _fd, 0);
|
151
158
|
#else
|
@@ -194,7 +201,7 @@ inline T euclidean_distance(const T* x, const T* y, int f) {
|
|
194
201
|
return d;
|
195
202
|
}
|
196
203
|
|
197
|
-
#ifdef
|
204
|
+
#ifdef ANNOYLIB_USE_AVX
|
198
205
|
// Horizontal single sum of 256bit vector.
|
199
206
|
inline float hsum256_ps_avx(__m256 v) {
|
200
207
|
const __m128 x128 = _mm_add_ps(_mm256_extractf128_ps(v, 1), _mm256_castps256_ps128(v));
|
@@ -277,7 +284,7 @@ inline float euclidean_distance<float>(const float* x, const float* y, int f) {
|
|
277
284
|
|
278
285
|
#endif
|
279
286
|
|
280
|
-
#ifdef
|
287
|
+
#ifdef ANNOYLIB_USE_AVX512
|
281
288
|
template<>
|
282
289
|
inline float dot<float>(const float* x, const float *y, int f) {
|
283
290
|
float result = 0;
|
@@ -452,7 +459,7 @@ struct Angular : Base {
|
|
452
459
|
S children[2]; // Will possibly store more than 2
|
453
460
|
T norm;
|
454
461
|
};
|
455
|
-
T v[
|
462
|
+
T v[ANNOYLIB_V_ARRAY_SIZE];
|
456
463
|
};
|
457
464
|
template<typename S, typename T>
|
458
465
|
static inline T distance(const Node<S, T>* x, const Node<S, T>* y, int f) {
|
@@ -523,7 +530,7 @@ struct DotProduct : Angular {
|
|
523
530
|
S n_descendants;
|
524
531
|
S children[2]; // Will possibly store more than 2
|
525
532
|
T dot_factor;
|
526
|
-
T v[
|
533
|
+
T v[ANNOYLIB_V_ARRAY_SIZE];
|
527
534
|
};
|
528
535
|
|
529
536
|
static const char* name() {
|
@@ -630,7 +637,7 @@ struct Hamming : Base {
|
|
630
637
|
struct Node {
|
631
638
|
S n_descendants;
|
632
639
|
S children[2];
|
633
|
-
T v[
|
640
|
+
T v[ANNOYLIB_V_ARRAY_SIZE];
|
634
641
|
};
|
635
642
|
|
636
643
|
static const size_t max_iterations = 20;
|
@@ -659,7 +666,7 @@ struct Hamming : Base {
|
|
659
666
|
static inline T distance(const Node<S, T>* x, const Node<S, T>* y, int f) {
|
660
667
|
size_t dist = 0;
|
661
668
|
for (int i = 0; i < f; i++) {
|
662
|
-
dist +=
|
669
|
+
dist += annoylib_popcount(x->v[i] ^ y->v[i]);
|
663
670
|
}
|
664
671
|
return dist;
|
665
672
|
}
|
@@ -727,7 +734,7 @@ struct Minkowski : Base {
|
|
727
734
|
S n_descendants;
|
728
735
|
T a; // need an extra constant term to determine the offset of the plane
|
729
736
|
S children[2];
|
730
|
-
T v[
|
737
|
+
T v[ANNOYLIB_V_ARRAY_SIZE];
|
731
738
|
};
|
732
739
|
template<typename S, typename T>
|
733
740
|
static inline T margin(const Node<S, T>* n, const T* y, int f) {
|
@@ -815,7 +822,7 @@ struct Manhattan : Minkowski {
|
|
815
822
|
}
|
816
823
|
};
|
817
824
|
|
818
|
-
template<typename S, typename T>
|
825
|
+
template<typename S, typename T, typename R = uint64_t>
|
819
826
|
class AnnoyIndexInterface {
|
820
827
|
public:
|
821
828
|
// Note that the methods with an **error argument will allocate memory and write the pointer to that string if error is non-NULL
|
@@ -833,12 +840,18 @@ class AnnoyIndexInterface {
|
|
833
840
|
virtual S get_n_trees() const = 0;
|
834
841
|
virtual void verbose(bool v) = 0;
|
835
842
|
virtual void get_item(S item, T* v) const = 0;
|
836
|
-
virtual void set_seed(
|
843
|
+
virtual void set_seed(R q) = 0;
|
837
844
|
virtual bool on_disk_build(const char* filename, char** error=NULL) = 0;
|
838
845
|
};
|
839
846
|
|
840
847
|
template<typename S, typename T, typename Distance, typename Random, class ThreadedBuildPolicy>
|
841
|
-
class AnnoyIndex : public AnnoyIndexInterface<S, T
|
848
|
+
class AnnoyIndex : public AnnoyIndexInterface<S, T,
|
849
|
+
#if __cplusplus >= 201103L
|
850
|
+
typename std::remove_const<decltype(Random::default_seed)>::type
|
851
|
+
#else
|
852
|
+
typename Random::seed_type
|
853
|
+
#endif
|
854
|
+
> {
|
842
855
|
/*
|
843
856
|
* We use random projection to build a forest of binary trees of all items.
|
844
857
|
* Basically just split the hyperspace into two sides by a hyperplane,
|
@@ -849,6 +862,11 @@ template<typename S, typename T, typename Distance, typename Random, class Threa
|
|
849
862
|
public:
|
850
863
|
typedef Distance D;
|
851
864
|
typedef typename D::template Node<S, T> Node;
|
865
|
+
#if __cplusplus >= 201103L
|
866
|
+
typedef typename std::remove_const<decltype(Random::default_seed)>::type R;
|
867
|
+
#else
|
868
|
+
typedef typename Random::seed_type R;
|
869
|
+
#endif
|
852
870
|
|
853
871
|
protected:
|
854
872
|
const int _f;
|
@@ -859,8 +877,7 @@ protected:
|
|
859
877
|
S _nodes_size;
|
860
878
|
vector<S> _roots;
|
861
879
|
S _K;
|
862
|
-
|
863
|
-
int _seed;
|
880
|
+
R _seed;
|
864
881
|
bool _loaded;
|
865
882
|
bool _verbose;
|
866
883
|
int _fd;
|
@@ -869,8 +886,8 @@ protected:
|
|
869
886
|
public:
|
870
887
|
|
871
888
|
AnnoyIndex() : _f(0), _fd(0), _nodes(NULL), _n_items(0), _n_nodes(0), _nodes_size(0),
|
872
|
-
|
873
|
-
AnnoyIndex(int f) : _f(f) {
|
889
|
+
_loaded(false), _verbose(false), _on_disk(false), _built(false) { }
|
890
|
+
AnnoyIndex(int f) : _f(f), _seed(Random::default_seed) {
|
874
891
|
_s = offsetof(Node, v) + _f * sizeof(T); // Size of each node
|
875
892
|
_verbose = false;
|
876
893
|
_built = false;
|
@@ -924,7 +941,7 @@ public:
|
|
924
941
|
return false;
|
925
942
|
}
|
926
943
|
_nodes_size = 1;
|
927
|
-
if (ftruncate(_fd,
|
944
|
+
if (ftruncate(_fd, ANNOYLIB_FTRUNCATE_SIZE(_s) * ANNOYLIB_FTRUNCATE_SIZE(_nodes_size)) == -1) {
|
928
945
|
set_error_from_errno(error, "Unable to truncate");
|
929
946
|
return false;
|
930
947
|
}
|
@@ -960,7 +977,7 @@ public:
|
|
960
977
|
memcpy(_get(_n_nodes + (S)i), _get(_roots[i]), _s);
|
961
978
|
_n_nodes += _roots.size();
|
962
979
|
|
963
|
-
if (_verbose)
|
980
|
+
if (_verbose) annoylib_showUpdate("has %d nodes\n", _n_nodes);
|
964
981
|
|
965
982
|
if (_on_disk) {
|
966
983
|
if (!remap_memory_and_truncate(&_nodes, _fd,
|
@@ -1029,7 +1046,7 @@ public:
|
|
1029
1046
|
_n_nodes = 0;
|
1030
1047
|
_nodes_size = 0;
|
1031
1048
|
_on_disk = false;
|
1032
|
-
|
1049
|
+
_seed = Random::default_seed;
|
1033
1050
|
_roots.clear();
|
1034
1051
|
}
|
1035
1052
|
|
@@ -1048,7 +1065,7 @@ public:
|
|
1048
1065
|
}
|
1049
1066
|
}
|
1050
1067
|
reinitialize();
|
1051
|
-
if (_verbose)
|
1068
|
+
if (_verbose) annoylib_showUpdate("unloaded\n");
|
1052
1069
|
}
|
1053
1070
|
|
1054
1071
|
bool load(const char* filename, bool prefault=false, char** error=NULL) {
|
@@ -1076,7 +1093,7 @@ public:
|
|
1076
1093
|
#ifdef MAP_POPULATE
|
1077
1094
|
flags |= MAP_POPULATE;
|
1078
1095
|
#else
|
1079
|
-
|
1096
|
+
annoylib_showUpdate("prefault is set to true, but MAP_POPULATE is not defined on this platform");
|
1080
1097
|
#endif
|
1081
1098
|
}
|
1082
1099
|
_nodes = (Node*)mmap(0, size, PROT_READ, flags, _fd, 0);
|
@@ -1100,7 +1117,7 @@ public:
|
|
1100
1117
|
_loaded = true;
|
1101
1118
|
_built = true;
|
1102
1119
|
_n_items = m;
|
1103
|
-
if (_verbose)
|
1120
|
+
if (_verbose) annoylib_showUpdate("found %lu roots with degree %d\n", _roots.size(), m);
|
1104
1121
|
return true;
|
1105
1122
|
}
|
1106
1123
|
|
@@ -1136,16 +1153,13 @@ public:
|
|
1136
1153
|
memcpy(v, m->v, (_f) * sizeof(T));
|
1137
1154
|
}
|
1138
1155
|
|
1139
|
-
void set_seed(
|
1140
|
-
_is_seeded = true;
|
1156
|
+
void set_seed(R seed) {
|
1141
1157
|
_seed = seed;
|
1142
1158
|
}
|
1143
1159
|
|
1144
1160
|
void thread_build(int q, int thread_idx, ThreadedBuildPolicy& threaded_build_policy) {
|
1145
|
-
Random _random;
|
1146
1161
|
// Each thread needs its own seed, otherwise each thread would be building the same tree(s)
|
1147
|
-
|
1148
|
-
_random.set_seed(seed);
|
1162
|
+
Random _random(_seed + thread_idx);
|
1149
1163
|
|
1150
1164
|
vector<S> thread_roots;
|
1151
1165
|
while (1) {
|
@@ -1162,7 +1176,7 @@ public:
|
|
1162
1176
|
}
|
1163
1177
|
}
|
1164
1178
|
|
1165
|
-
if (_verbose)
|
1179
|
+
if (_verbose) annoylib_showUpdate("pass %zd...\n", thread_roots.size());
|
1166
1180
|
|
1167
1181
|
vector<S> indices;
|
1168
1182
|
threaded_build_policy.lock_shared_nodes();
|
@@ -1192,14 +1206,14 @@ protected:
|
|
1192
1206
|
static_cast<size_t>(_s) * static_cast<size_t>(_nodes_size),
|
1193
1207
|
static_cast<size_t>(_s) * static_cast<size_t>(new_nodes_size)) &&
|
1194
1208
|
_verbose)
|
1195
|
-
|
1209
|
+
annoylib_showUpdate("File truncation error\n");
|
1196
1210
|
} else {
|
1197
1211
|
_nodes = realloc(_nodes, _s * new_nodes_size);
|
1198
1212
|
memset((char *) _nodes + (_nodes_size * _s) / sizeof(char), 0, (new_nodes_size - _nodes_size) * _s);
|
1199
1213
|
}
|
1200
1214
|
|
1201
1215
|
_nodes_size = new_nodes_size;
|
1202
|
-
if (_verbose)
|
1216
|
+
if (_verbose) annoylib_showUpdate("Reallocating to %d nodes: old_address=%p, new_address=%p\n", new_nodes_size, old, _nodes);
|
1203
1217
|
}
|
1204
1218
|
|
1205
1219
|
void _allocate_size(S n, ThreadedBuildPolicy& threaded_build_policy) {
|
@@ -1281,7 +1295,7 @@ protected:
|
|
1281
1295
|
bool side = D::side(m, n->v, _f, _random);
|
1282
1296
|
children_indices[side].push_back(j);
|
1283
1297
|
} else {
|
1284
|
-
|
1298
|
+
annoylib_showUpdate("No node for index %d?\n", j);
|
1285
1299
|
}
|
1286
1300
|
}
|
1287
1301
|
|
@@ -1293,7 +1307,7 @@ protected:
|
|
1293
1307
|
// If we didn't find a hyperplane, just randomize sides as a last option
|
1294
1308
|
while (_split_imbalance(children_indices[0], children_indices[1]) > 0.99) {
|
1295
1309
|
if (_verbose)
|
1296
|
-
|
1310
|
+
annoylib_showUpdate("\tNo hyperplane found (left has %ld children, right has %ld children)\n",
|
1297
1311
|
children_indices[0].size(), children_indices[1].size());
|
1298
1312
|
|
1299
1313
|
children_indices[0].clear();
|
@@ -1477,5 +1491,7 @@ public:
|
|
1477
1491
|
};
|
1478
1492
|
#endif
|
1479
1493
|
|
1494
|
+
}
|
1495
|
+
|
1480
1496
|
#endif
|
1481
1497
|
// vim: tabstop=2 shiftwidth=2
|
data/ext/annoy/src/kissrandom.h
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
#ifndef
|
2
|
-
#define
|
1
|
+
#ifndef ANNOY_KISSRANDOM_H
|
2
|
+
#define ANNOY_KISSRANDOM_H
|
3
3
|
|
4
4
|
#if defined(_MSC_VER) && _MSC_VER == 1500
|
5
5
|
typedef unsigned __int32 uint32_t;
|
@@ -8,6 +8,8 @@ typedef unsigned __int64 uint64_t;
|
|
8
8
|
#include <stdint.h>
|
9
9
|
#endif
|
10
10
|
|
11
|
+
namespace Annoy {
|
12
|
+
|
11
13
|
// KISS = "keep it simple, stupid", but high quality random number generator
|
12
14
|
// http://www0.cs.ucl.ac.uk/staff/d.jones/GoodPracticeRNG.pdf -> "Use a good RNG and build it into your code"
|
13
15
|
// http://mathforum.org/kb/message.jspa?messageID=6627731
|
@@ -20,8 +22,13 @@ struct Kiss32Random {
|
|
20
22
|
uint32_t z;
|
21
23
|
uint32_t c;
|
22
24
|
|
25
|
+
static const uint32_t default_seed = 123456789;
|
26
|
+
#if __cplusplus < 201103L
|
27
|
+
typedef uint32_t seed_type;
|
28
|
+
#endif
|
29
|
+
|
23
30
|
// seed must be != 0
|
24
|
-
Kiss32Random(uint32_t seed =
|
31
|
+
Kiss32Random(uint32_t seed = default_seed) {
|
25
32
|
x = seed;
|
26
33
|
y = 362436000;
|
27
34
|
z = 521288629;
|
@@ -64,8 +71,13 @@ struct Kiss64Random {
|
|
64
71
|
uint64_t z;
|
65
72
|
uint64_t c;
|
66
73
|
|
74
|
+
static const uint64_t default_seed = 1234567890987654321ULL;
|
75
|
+
#if __cplusplus < 201103L
|
76
|
+
typedef uint64_t seed_type;
|
77
|
+
#endif
|
78
|
+
|
67
79
|
// seed must be != 0
|
68
|
-
Kiss64Random(uint64_t seed =
|
80
|
+
Kiss64Random(uint64_t seed = default_seed) {
|
69
81
|
x = seed;
|
70
82
|
y = 362436362436362436ULL;
|
71
83
|
z = 1066149217761810ULL;
|
@@ -97,10 +109,12 @@ struct Kiss64Random {
|
|
97
109
|
// Draw random integer between 0 and n-1 where n is at most the number of data points you have
|
98
110
|
return kiss() % n;
|
99
111
|
}
|
100
|
-
inline void set_seed(
|
112
|
+
inline void set_seed(uint64_t seed) {
|
101
113
|
x = seed;
|
102
114
|
}
|
103
115
|
};
|
104
116
|
|
117
|
+
}
|
118
|
+
|
105
119
|
#endif
|
106
120
|
// vim: tabstop=2 shiftwidth=2
|
data/lib/annoy/version.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
# Annoy.rb is a Ruby wrapper for Annoy (Approximate Nearest Neighbors Oh Yeah).
|
4
4
|
module Annoy
|
5
5
|
# The version of Annoy.rb you are using.
|
6
|
-
VERSION = '0.
|
6
|
+
VERSION = '0.7.0'
|
7
7
|
|
8
8
|
# The version of Annoy included with gem.
|
9
|
-
ANNOY_VERSION = '1.17.
|
9
|
+
ANNOY_VERSION = '1.17.1'
|
10
10
|
end
|
data/lib/annoy-rb.rb
ADDED
data/lib/annoy.rb
CHANGED
@@ -30,30 +30,40 @@ module Annoy
|
|
30
30
|
# @return [String]
|
31
31
|
attr_reader :metric
|
32
32
|
|
33
|
+
# Returns the data type of feature.
|
34
|
+
# @return [String]
|
35
|
+
attr_reader :dtype
|
36
|
+
|
33
37
|
# Create a new search index.
|
34
38
|
#
|
35
39
|
# @param n_features [Integer] The number of features (dimensions) of stored vector.
|
36
40
|
# @param metric [String] The distance metric between vectors ('angular', 'dot', 'hamming', 'euclidean', or 'manhattan').
|
37
|
-
|
41
|
+
# @param dtype [String] The data type of features ('float64' and 'float32').
|
42
|
+
# If metric is given 'hamming', 'uint64' is automatically assigned to this argument.
|
43
|
+
def initialize(n_features:, metric: 'angular', dtype: 'float64') # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength
|
38
44
|
raise ArgumentError, 'Expect n_features to be Integer.' unless n_features.is_a?(Numeric)
|
39
45
|
|
40
46
|
@n_features = n_features.to_i
|
41
47
|
@metric = metric
|
48
|
+
@dtype = dtype
|
42
49
|
|
50
|
+
# rubocop:disable Layout/LineLength
|
43
51
|
@index = case @metric
|
44
52
|
when 'angular'
|
45
|
-
AnnoyIndexAngular.new(@n_features)
|
53
|
+
@dtype == 'float64' ? AnnoyIndexAngular.new(@n_features) : AnnoyIndexAngularFloat32.new(@n_features)
|
46
54
|
when 'dot'
|
47
|
-
AnnoyIndexDotProduct.new(@n_features)
|
55
|
+
@dtype == 'float64' ? AnnoyIndexDotProduct.new(@n_features) : AnnoyIndexDotProductFloat32.new(@n_features)
|
48
56
|
when 'hamming'
|
57
|
+
@dtype = 'uint64'
|
49
58
|
AnnoyIndexHamming.new(@n_features)
|
50
59
|
when 'euclidean'
|
51
|
-
AnnoyIndexEuclidean.new(@n_features)
|
60
|
+
@dtype == 'float64' ? AnnoyIndexEuclidean.new(@n_features) : AnnoyIndexEuclideanFloat32.new(@n_features)
|
52
61
|
when 'manhattan'
|
53
|
-
AnnoyIndexManhattan.new(@n_features)
|
62
|
+
@dtype == 'float64' ? AnnoyIndexManhattan.new(@n_features) : AnnoyIndexManhattanFloat32.new(@n_features)
|
54
63
|
else
|
55
64
|
raise ArgumentError, "No such metric: #{@metric}."
|
56
65
|
end
|
66
|
+
# rubocop:enable Layout/LineLength
|
57
67
|
end
|
58
68
|
|
59
69
|
# Add item to be indexed.
|
@@ -69,6 +79,7 @@ module Annoy
|
|
69
79
|
#
|
70
80
|
# @param n_trees [Integer] The number of trees. More trees gives higher search precision.
|
71
81
|
# @param n_jobs [Integer] The number of threads used to build the trees. If -1 is given, uses all available CPU cores.
|
82
|
+
# This parameter is enabled only if "-DANNOYLIB_MULTITHREADED_BUILD" is specified on gem installation.
|
72
83
|
# @return [Boolean]
|
73
84
|
def build(n_trees, n_jobs: -1)
|
74
85
|
@index.build(n_trees, n_jobs)
|
data/sig/annoy.rbs
CHANGED
@@ -1,11 +1,13 @@
|
|
1
1
|
module Annoy
|
2
2
|
VERSION: String
|
3
|
+
ANNOY_VERSION: String
|
3
4
|
|
4
5
|
class AnnoyIndex
|
5
6
|
attr_reader n_features: Integer
|
6
7
|
attr_reader metric: String
|
8
|
+
attr_reader dtype: String
|
7
9
|
|
8
|
-
def initialize: (n_features: Integer n_features, ?metric: String metric) -> void
|
10
|
+
def initialize: (n_features: Integer n_features, ?metric: String metric, ?dtype: String dtype) -> void
|
9
11
|
def add_item: (Integer i, Array[Float | Integer] v) -> bool
|
10
12
|
def build: (Integer n_trees, ?n_jobs: Integer n_jobs) -> bool
|
11
13
|
def save: (String filename, ?prefault: bool prefault) -> bool
|
@@ -40,6 +42,24 @@ module Annoy
|
|
40
42
|
def seed: (Integer s) -> nil
|
41
43
|
end
|
42
44
|
|
45
|
+
class AnnoyIndexAngularFloat32
|
46
|
+
def initialize: (Integer n_features) -> void
|
47
|
+
def add_item: (Integer i, Array[Float] v) -> bool
|
48
|
+
def build: (Integer n_trees, Integer n_jobs) -> bool
|
49
|
+
def save: (String filename, bool prefault) -> bool
|
50
|
+
def load: (String filename, bool prefault) -> bool
|
51
|
+
def unload: () -> bool
|
52
|
+
def get_nns_by_item: (Integer i, Integer n, Integer search_k, (true | false) include_distances) -> ([Array[Integer], Array[Float]] | Array[Integer])
|
53
|
+
def get_nns_by_vector: (Array[Float] v, Integer n, Integer search_k, (true | false) include_distances) -> ([Array[Integer], Array[Float]] | Array[Integer])
|
54
|
+
def get_item: (Integer i) -> Array[Float]
|
55
|
+
def get_distance: (Integer i, Integer j) -> Float
|
56
|
+
def n_items: () -> Integer
|
57
|
+
def n_trees: () -> Integer
|
58
|
+
def on_disk_build: (String filename) -> bool
|
59
|
+
def verbose: (bool flag) -> nil
|
60
|
+
def seed: (Integer s) -> nil
|
61
|
+
end
|
62
|
+
|
43
63
|
class AnnoyIndexDotProduct
|
44
64
|
def initialize: (Integer n_features) -> void
|
45
65
|
def add_item: (Integer i, Array[Float] v) -> bool
|
@@ -58,6 +78,24 @@ module Annoy
|
|
58
78
|
def seed: (Integer s) -> nil
|
59
79
|
end
|
60
80
|
|
81
|
+
class AnnoyIndexDotProductFloat32
|
82
|
+
def initialize: (Integer n_features) -> void
|
83
|
+
def add_item: (Integer i, Array[Float] v) -> bool
|
84
|
+
def build: (Integer n_trees, Integer n_jobs) -> bool
|
85
|
+
def save: (String filename, bool prefault) -> bool
|
86
|
+
def load: (String filename, bool prefault) -> bool
|
87
|
+
def unload: () -> bool
|
88
|
+
def get_nns_by_item: (Integer i, Integer n, Integer search_k, (true | false) include_distances) -> ([Array[Integer], Array[Float]] | Array[Integer])
|
89
|
+
def get_nns_by_vector: (Array[Float] v, Integer n, Integer search_k, (true | false) include_distances) -> ([Array[Integer], Array[Float]] | Array[Integer])
|
90
|
+
def get_item: (Integer i) -> Array[Float]
|
91
|
+
def get_distance: (Integer i, Integer j) -> Float
|
92
|
+
def n_items: () -> Integer
|
93
|
+
def n_trees: () -> Integer
|
94
|
+
def on_disk_build: (String filename) -> bool
|
95
|
+
def verbose: (bool flag) -> nil
|
96
|
+
def seed: (Integer s) -> nil
|
97
|
+
end
|
98
|
+
|
61
99
|
class AnnoyIndexHamming
|
62
100
|
def initialize: (Integer n_features) -> void
|
63
101
|
def add_item: (Integer i, Array[Integer] v) -> bool
|
@@ -94,6 +132,24 @@ module Annoy
|
|
94
132
|
def seed: (Integer s) -> nil
|
95
133
|
end
|
96
134
|
|
135
|
+
class AnnoyIndexEuclideanFloat32
|
136
|
+
def initialize: (Integer n_features) -> void
|
137
|
+
def add_item: (Integer i, Array[Float] v) -> bool
|
138
|
+
def build: (Integer n_trees, Integer n_jobs) -> bool
|
139
|
+
def save: (String filename, bool prefault) -> bool
|
140
|
+
def load: (String filename, bool prefault) -> bool
|
141
|
+
def unload: () -> bool
|
142
|
+
def get_nns_by_item: (Integer i, Integer n, Integer search_k, (true | false) include_distances) -> ([Array[Integer], Array[Float]] | Array[Integer])
|
143
|
+
def get_nns_by_vector: (Array[Float] v, Integer n, Integer search_k, (true | false) include_distances) -> ([Array[Integer], Array[Float]] | Array[Integer])
|
144
|
+
def get_item: (Integer i) -> Array[Float]
|
145
|
+
def get_distance: (Integer i, Integer j) -> Float
|
146
|
+
def n_items: () -> Integer
|
147
|
+
def n_trees: () -> Integer
|
148
|
+
def on_disk_build: (String filename) -> bool
|
149
|
+
def verbose: (bool flag) -> nil
|
150
|
+
def seed: (Integer s) -> nil
|
151
|
+
end
|
152
|
+
|
97
153
|
class AnnoyIndexManhattan
|
98
154
|
def initialize: (Integer n_features) -> void
|
99
155
|
def add_item: (Integer i, Array[Float] v) -> bool
|
@@ -111,4 +167,22 @@ module Annoy
|
|
111
167
|
def verbose: (bool flag) -> nil
|
112
168
|
def seed: (Integer s) -> nil
|
113
169
|
end
|
170
|
+
|
171
|
+
class AnnoyIndexManhattanFloat32
|
172
|
+
def initialize: (Integer n_features) -> void
|
173
|
+
def add_item: (Integer i, Array[Float] v) -> bool
|
174
|
+
def build: (Integer n_trees, Integer n_jobs) -> bool
|
175
|
+
def save: (String filename, bool prefault) -> bool
|
176
|
+
def load: (String filename, bool prefault) -> bool
|
177
|
+
def unload: () -> bool
|
178
|
+
def get_nns_by_item: (Integer i, Integer n, Integer search_k, (true | false) include_distances) -> ([Array[Integer], Array[Float]] | Array[Integer])
|
179
|
+
def get_nns_by_vector: (Array[Float] v, Integer n, Integer search_k, (true | false) include_distances) -> ([Array[Integer], Array[Float]] | Array[Integer])
|
180
|
+
def get_item: (Integer i) -> Array[Float]
|
181
|
+
def get_distance: (Integer i, Integer j) -> Float
|
182
|
+
def n_items: () -> Integer
|
183
|
+
def n_trees: () -> Integer
|
184
|
+
def on_disk_build: (String filename) -> bool
|
185
|
+
def verbose: (bool flag) -> nil
|
186
|
+
def seed: (Integer s) -> nil
|
187
|
+
end
|
114
188
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: annoy-rb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-08-13 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Annoy.rb provides Ruby bindings for the Annoy (Approximate Nearest Neighbors
|
14
14
|
Oh Yeah).
|
@@ -19,17 +19,9 @@ extensions:
|
|
19
19
|
- ext/annoy/extconf.rb
|
20
20
|
extra_rdoc_files: []
|
21
21
|
files:
|
22
|
-
- ".github/workflows/build.yml"
|
23
|
-
- ".gitignore"
|
24
|
-
- ".rspec"
|
25
22
|
- CHANGELOG.md
|
26
|
-
- CODE_OF_CONDUCT.md
|
27
|
-
- Gemfile
|
28
23
|
- LICENSE.txt
|
29
24
|
- README.md
|
30
|
-
- Rakefile
|
31
|
-
- Steepfile
|
32
|
-
- annoy-rb.gemspec
|
33
25
|
- ext/annoy/annoyext.cpp
|
34
26
|
- ext/annoy/annoyext.hpp
|
35
27
|
- ext/annoy/extconf.rb
|
@@ -37,17 +29,19 @@ files:
|
|
37
29
|
- ext/annoy/src/annoylib.h
|
38
30
|
- ext/annoy/src/kissrandom.h
|
39
31
|
- ext/annoy/src/mman.h
|
32
|
+
- lib/annoy-rb.rb
|
40
33
|
- lib/annoy.rb
|
41
34
|
- lib/annoy/version.rb
|
42
35
|
- sig/annoy.rbs
|
43
|
-
homepage: https://github.com/yoshoku/annoy
|
36
|
+
homepage: https://github.com/yoshoku/annoy-rb
|
44
37
|
licenses:
|
45
38
|
- Apache-2.0
|
46
39
|
metadata:
|
47
|
-
homepage_uri: https://github.com/yoshoku/annoy
|
48
|
-
source_code_uri: https://github.com/yoshoku/annoy
|
49
|
-
changelog_uri: https://github.com/yoshoku/annoy
|
50
|
-
documentation_uri: https://yoshoku.github.io/annoy
|
40
|
+
homepage_uri: https://github.com/yoshoku/annoy-rb
|
41
|
+
source_code_uri: https://github.com/yoshoku/annoy-rb
|
42
|
+
changelog_uri: https://github.com/yoshoku/annoy-rb/blob/main/CHANGELOG.md
|
43
|
+
documentation_uri: https://yoshoku.github.io/annoy-rb/doc/
|
44
|
+
rubygems_mfa_required: 'true'
|
51
45
|
post_install_message:
|
52
46
|
rdoc_options: []
|
53
47
|
require_paths:
|
@@ -63,7 +57,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
63
57
|
- !ruby/object:Gem::Version
|
64
58
|
version: '0'
|
65
59
|
requirements: []
|
66
|
-
rubygems_version: 3.2.
|
60
|
+
rubygems_version: 3.2.33
|
67
61
|
signing_key:
|
68
62
|
specification_version: 4
|
69
63
|
summary: Ruby bindings for the Annoy (Approximate Nearest Neighbors Oh Yeah).
|