annoy-rb 0.5.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -0
- data/README.md +34 -9
- data/ext/annoy/annoyext.cpp +12 -9
- data/ext/annoy/annoyext.hpp +290 -284
- data/ext/annoy/src/LICENSE +1 -1
- data/ext/annoy/src/annoylib.h +75 -59
- data/ext/annoy/src/kissrandom.h +19 -5
- data/lib/annoy/version.rb +2 -2
- data/lib/annoy-rb.rb +3 -0
- data/lib/annoy.rb +16 -5
- data/sig/annoy.rbs +75 -1
- metadata +10 -16
- data/.github/workflows/build.yml +0 -20
- data/.gitignore +0 -21
- data/.rspec +0 -3
- data/CODE_OF_CONDUCT.md +0 -74
- data/Gemfile +0 -10
- data/Rakefile +0 -15
- data/Steepfile +0 -20
- data/annoy-rb.gemspec +0 -28
data/ext/annoy/src/annoylib.h
CHANGED
@@ -13,8 +13,8 @@
|
|
13
13
|
// the License.
|
14
14
|
|
15
15
|
|
16
|
-
#ifndef
|
17
|
-
#define
|
16
|
+
#ifndef ANNOY_ANNOYLIB_H
|
17
|
+
#define ANNOY_ANNOYLIB_H
|
18
18
|
|
19
19
|
#include <stdio.h>
|
20
20
|
#include <sys/stat.h>
|
@@ -58,6 +58,10 @@ typedef signed __int64 int64_t;
|
|
58
58
|
#include <queue>
|
59
59
|
#include <limits>
|
60
60
|
|
61
|
+
#if __cplusplus >= 201103L
|
62
|
+
#include <type_traits>
|
63
|
+
#endif
|
64
|
+
|
61
65
|
#ifdef ANNOYLIB_MULTITHREADED_BUILD
|
62
66
|
#include <thread>
|
63
67
|
#include <mutex>
|
@@ -72,9 +76,9 @@ typedef signed __int64 int64_t;
|
|
72
76
|
// This allows others to supply their own logger / error printer without
|
73
77
|
// requiring Annoy to import their headers. See RcppAnnoy for a use case.
|
74
78
|
#ifndef __ERROR_PRINTER_OVERRIDE__
|
75
|
-
#define
|
79
|
+
#define annoylib_showUpdate(...) { fprintf(stderr, __VA_ARGS__ ); }
|
76
80
|
#else
|
77
|
-
#define
|
81
|
+
#define annoylib_showUpdate(...) { __ERROR_PRINTER_OVERRIDE__( __VA_ARGS__ ); }
|
78
82
|
#endif
|
79
83
|
|
80
84
|
// Portable alloc definition, cf Writing R Extensions, Section 1.6.4
|
@@ -87,40 +91,24 @@ typedef signed __int64 int64_t;
|
|
87
91
|
# include <alloca.h>
|
88
92
|
#endif
|
89
93
|
|
90
|
-
inline void set_error_from_errno(char **error, const char* msg) {
|
91
|
-
showUpdate("%s: %s (%d)\n", msg, strerror(errno), errno);
|
92
|
-
if (error) {
|
93
|
-
*error = (char *)malloc(256); // TODO: win doesn't support snprintf
|
94
|
-
sprintf(*error, "%s: %s (%d)", msg, strerror(errno), errno);
|
95
|
-
}
|
96
|
-
}
|
97
|
-
|
98
|
-
inline void set_error_from_string(char **error, const char* msg) {
|
99
|
-
showUpdate("%s\n", msg);
|
100
|
-
if (error) {
|
101
|
-
*error = (char *)malloc(strlen(msg) + 1);
|
102
|
-
strcpy(*error, msg);
|
103
|
-
}
|
104
|
-
}
|
105
|
-
|
106
94
|
// We let the v array in the Node struct take whatever space is needed, so this is a mostly insignificant number.
|
107
95
|
// Compilers need *some* size defined for the v array, and some memory checking tools will flag for buffer overruns if this is set too low.
|
108
|
-
#define
|
96
|
+
#define ANNOYLIB_V_ARRAY_SIZE 65536
|
109
97
|
|
110
98
|
#ifndef _MSC_VER
|
111
|
-
#define
|
99
|
+
#define annoylib_popcount __builtin_popcountll
|
112
100
|
#else // See #293, #358
|
113
|
-
#define
|
101
|
+
#define annoylib_popcount cole_popcount
|
114
102
|
#endif
|
115
103
|
|
116
104
|
#if !defined(NO_MANUAL_VECTORIZATION) && defined(__GNUC__) && (__GNUC__ >6) && defined(__AVX512F__) // See #402
|
117
|
-
#define
|
105
|
+
#define ANNOYLIB_USE_AVX512
|
118
106
|
#elif !defined(NO_MANUAL_VECTORIZATION) && defined(__AVX__) && defined (__SSE__) && defined(__SSE2__) && defined(__SSE3__)
|
119
|
-
#define
|
107
|
+
#define ANNOYLIB_USE_AVX
|
120
108
|
#else
|
121
109
|
#endif
|
122
110
|
|
123
|
-
#if defined(
|
111
|
+
#if defined(ANNOYLIB_USE_AVX) || defined(ANNOYLIB_USE_AVX512)
|
124
112
|
#if defined(_MSC_VER)
|
125
113
|
#include <intrin.h>
|
126
114
|
#elif defined(__GNUC__)
|
@@ -129,11 +117,30 @@ inline void set_error_from_string(char **error, const char* msg) {
|
|
129
117
|
#endif
|
130
118
|
|
131
119
|
#if !defined(__MINGW32__)
|
132
|
-
#define
|
120
|
+
#define ANNOYLIB_FTRUNCATE_SIZE(x) static_cast<int64_t>(x)
|
133
121
|
#else
|
134
|
-
#define
|
122
|
+
#define ANNOYLIB_FTRUNCATE_SIZE(x) (x)
|
135
123
|
#endif
|
136
124
|
|
125
|
+
namespace Annoy {
|
126
|
+
|
127
|
+
inline void set_error_from_errno(char **error, const char* msg) {
|
128
|
+
annoylib_showUpdate("%s: %s (%d)\n", msg, strerror(errno), errno);
|
129
|
+
if (error) {
|
130
|
+
*error = (char *)malloc(256); // TODO: win doesn't support snprintf
|
131
|
+
sprintf(*error, "%s: %s (%d)", msg, strerror(errno), errno);
|
132
|
+
}
|
133
|
+
}
|
134
|
+
|
135
|
+
inline void set_error_from_string(char **error, const char* msg) {
|
136
|
+
annoylib_showUpdate("%s\n", msg);
|
137
|
+
if (error) {
|
138
|
+
*error = (char *)malloc(strlen(msg) + 1);
|
139
|
+
strcpy(*error, msg);
|
140
|
+
}
|
141
|
+
}
|
142
|
+
|
143
|
+
|
137
144
|
using std::vector;
|
138
145
|
using std::pair;
|
139
146
|
using std::numeric_limits;
|
@@ -145,7 +152,7 @@ inline bool remap_memory_and_truncate(void** _ptr, int _fd, size_t old_size, siz
|
|
145
152
|
bool ok = ftruncate(_fd, new_size) != -1;
|
146
153
|
#else
|
147
154
|
munmap(*_ptr, old_size);
|
148
|
-
bool ok = ftruncate(_fd,
|
155
|
+
bool ok = ftruncate(_fd, ANNOYLIB_FTRUNCATE_SIZE(new_size)) != -1;
|
149
156
|
#ifdef MAP_POPULATE
|
150
157
|
*_ptr = mmap(*_ptr, new_size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, _fd, 0);
|
151
158
|
#else
|
@@ -194,7 +201,7 @@ inline T euclidean_distance(const T* x, const T* y, int f) {
|
|
194
201
|
return d;
|
195
202
|
}
|
196
203
|
|
197
|
-
#ifdef
|
204
|
+
#ifdef ANNOYLIB_USE_AVX
|
198
205
|
// Horizontal single sum of 256bit vector.
|
199
206
|
inline float hsum256_ps_avx(__m256 v) {
|
200
207
|
const __m128 x128 = _mm_add_ps(_mm256_extractf128_ps(v, 1), _mm256_castps256_ps128(v));
|
@@ -277,7 +284,7 @@ inline float euclidean_distance<float>(const float* x, const float* y, int f) {
|
|
277
284
|
|
278
285
|
#endif
|
279
286
|
|
280
|
-
#ifdef
|
287
|
+
#ifdef ANNOYLIB_USE_AVX512
|
281
288
|
template<>
|
282
289
|
inline float dot<float>(const float* x, const float *y, int f) {
|
283
290
|
float result = 0;
|
@@ -452,7 +459,7 @@ struct Angular : Base {
|
|
452
459
|
S children[2]; // Will possibly store more than 2
|
453
460
|
T norm;
|
454
461
|
};
|
455
|
-
T v[
|
462
|
+
T v[ANNOYLIB_V_ARRAY_SIZE];
|
456
463
|
};
|
457
464
|
template<typename S, typename T>
|
458
465
|
static inline T distance(const Node<S, T>* x, const Node<S, T>* y, int f) {
|
@@ -523,7 +530,7 @@ struct DotProduct : Angular {
|
|
523
530
|
S n_descendants;
|
524
531
|
S children[2]; // Will possibly store more than 2
|
525
532
|
T dot_factor;
|
526
|
-
T v[
|
533
|
+
T v[ANNOYLIB_V_ARRAY_SIZE];
|
527
534
|
};
|
528
535
|
|
529
536
|
static const char* name() {
|
@@ -630,7 +637,7 @@ struct Hamming : Base {
|
|
630
637
|
struct Node {
|
631
638
|
S n_descendants;
|
632
639
|
S children[2];
|
633
|
-
T v[
|
640
|
+
T v[ANNOYLIB_V_ARRAY_SIZE];
|
634
641
|
};
|
635
642
|
|
636
643
|
static const size_t max_iterations = 20;
|
@@ -659,7 +666,7 @@ struct Hamming : Base {
|
|
659
666
|
static inline T distance(const Node<S, T>* x, const Node<S, T>* y, int f) {
|
660
667
|
size_t dist = 0;
|
661
668
|
for (int i = 0; i < f; i++) {
|
662
|
-
dist +=
|
669
|
+
dist += annoylib_popcount(x->v[i] ^ y->v[i]);
|
663
670
|
}
|
664
671
|
return dist;
|
665
672
|
}
|
@@ -727,7 +734,7 @@ struct Minkowski : Base {
|
|
727
734
|
S n_descendants;
|
728
735
|
T a; // need an extra constant term to determine the offset of the plane
|
729
736
|
S children[2];
|
730
|
-
T v[
|
737
|
+
T v[ANNOYLIB_V_ARRAY_SIZE];
|
731
738
|
};
|
732
739
|
template<typename S, typename T>
|
733
740
|
static inline T margin(const Node<S, T>* n, const T* y, int f) {
|
@@ -815,7 +822,7 @@ struct Manhattan : Minkowski {
|
|
815
822
|
}
|
816
823
|
};
|
817
824
|
|
818
|
-
template<typename S, typename T>
|
825
|
+
template<typename S, typename T, typename R = uint64_t>
|
819
826
|
class AnnoyIndexInterface {
|
820
827
|
public:
|
821
828
|
// Note that the methods with an **error argument will allocate memory and write the pointer to that string if error is non-NULL
|
@@ -833,12 +840,18 @@ class AnnoyIndexInterface {
|
|
833
840
|
virtual S get_n_trees() const = 0;
|
834
841
|
virtual void verbose(bool v) = 0;
|
835
842
|
virtual void get_item(S item, T* v) const = 0;
|
836
|
-
virtual void set_seed(
|
843
|
+
virtual void set_seed(R q) = 0;
|
837
844
|
virtual bool on_disk_build(const char* filename, char** error=NULL) = 0;
|
838
845
|
};
|
839
846
|
|
840
847
|
template<typename S, typename T, typename Distance, typename Random, class ThreadedBuildPolicy>
|
841
|
-
class AnnoyIndex : public AnnoyIndexInterface<S, T
|
848
|
+
class AnnoyIndex : public AnnoyIndexInterface<S, T,
|
849
|
+
#if __cplusplus >= 201103L
|
850
|
+
typename std::remove_const<decltype(Random::default_seed)>::type
|
851
|
+
#else
|
852
|
+
typename Random::seed_type
|
853
|
+
#endif
|
854
|
+
> {
|
842
855
|
/*
|
843
856
|
* We use random projection to build a forest of binary trees of all items.
|
844
857
|
* Basically just split the hyperspace into two sides by a hyperplane,
|
@@ -849,6 +862,11 @@ template<typename S, typename T, typename Distance, typename Random, class Threa
|
|
849
862
|
public:
|
850
863
|
typedef Distance D;
|
851
864
|
typedef typename D::template Node<S, T> Node;
|
865
|
+
#if __cplusplus >= 201103L
|
866
|
+
typedef typename std::remove_const<decltype(Random::default_seed)>::type R;
|
867
|
+
#else
|
868
|
+
typedef typename Random::seed_type R;
|
869
|
+
#endif
|
852
870
|
|
853
871
|
protected:
|
854
872
|
const int _f;
|
@@ -859,8 +877,7 @@ protected:
|
|
859
877
|
S _nodes_size;
|
860
878
|
vector<S> _roots;
|
861
879
|
S _K;
|
862
|
-
|
863
|
-
int _seed;
|
880
|
+
R _seed;
|
864
881
|
bool _loaded;
|
865
882
|
bool _verbose;
|
866
883
|
int _fd;
|
@@ -869,8 +886,8 @@ protected:
|
|
869
886
|
public:
|
870
887
|
|
871
888
|
AnnoyIndex() : _f(0), _fd(0), _nodes(NULL), _n_items(0), _n_nodes(0), _nodes_size(0),
|
872
|
-
|
873
|
-
AnnoyIndex(int f) : _f(f) {
|
889
|
+
_loaded(false), _verbose(false), _on_disk(false), _built(false) { }
|
890
|
+
AnnoyIndex(int f) : _f(f), _seed(Random::default_seed) {
|
874
891
|
_s = offsetof(Node, v) + _f * sizeof(T); // Size of each node
|
875
892
|
_verbose = false;
|
876
893
|
_built = false;
|
@@ -924,7 +941,7 @@ public:
|
|
924
941
|
return false;
|
925
942
|
}
|
926
943
|
_nodes_size = 1;
|
927
|
-
if (ftruncate(_fd,
|
944
|
+
if (ftruncate(_fd, ANNOYLIB_FTRUNCATE_SIZE(_s) * ANNOYLIB_FTRUNCATE_SIZE(_nodes_size)) == -1) {
|
928
945
|
set_error_from_errno(error, "Unable to truncate");
|
929
946
|
return false;
|
930
947
|
}
|
@@ -960,7 +977,7 @@ public:
|
|
960
977
|
memcpy(_get(_n_nodes + (S)i), _get(_roots[i]), _s);
|
961
978
|
_n_nodes += _roots.size();
|
962
979
|
|
963
|
-
if (_verbose)
|
980
|
+
if (_verbose) annoylib_showUpdate("has %d nodes\n", _n_nodes);
|
964
981
|
|
965
982
|
if (_on_disk) {
|
966
983
|
if (!remap_memory_and_truncate(&_nodes, _fd,
|
@@ -1029,7 +1046,7 @@ public:
|
|
1029
1046
|
_n_nodes = 0;
|
1030
1047
|
_nodes_size = 0;
|
1031
1048
|
_on_disk = false;
|
1032
|
-
|
1049
|
+
_seed = Random::default_seed;
|
1033
1050
|
_roots.clear();
|
1034
1051
|
}
|
1035
1052
|
|
@@ -1048,7 +1065,7 @@ public:
|
|
1048
1065
|
}
|
1049
1066
|
}
|
1050
1067
|
reinitialize();
|
1051
|
-
if (_verbose)
|
1068
|
+
if (_verbose) annoylib_showUpdate("unloaded\n");
|
1052
1069
|
}
|
1053
1070
|
|
1054
1071
|
bool load(const char* filename, bool prefault=false, char** error=NULL) {
|
@@ -1076,7 +1093,7 @@ public:
|
|
1076
1093
|
#ifdef MAP_POPULATE
|
1077
1094
|
flags |= MAP_POPULATE;
|
1078
1095
|
#else
|
1079
|
-
|
1096
|
+
annoylib_showUpdate("prefault is set to true, but MAP_POPULATE is not defined on this platform");
|
1080
1097
|
#endif
|
1081
1098
|
}
|
1082
1099
|
_nodes = (Node*)mmap(0, size, PROT_READ, flags, _fd, 0);
|
@@ -1100,7 +1117,7 @@ public:
|
|
1100
1117
|
_loaded = true;
|
1101
1118
|
_built = true;
|
1102
1119
|
_n_items = m;
|
1103
|
-
if (_verbose)
|
1120
|
+
if (_verbose) annoylib_showUpdate("found %lu roots with degree %d\n", _roots.size(), m);
|
1104
1121
|
return true;
|
1105
1122
|
}
|
1106
1123
|
|
@@ -1136,16 +1153,13 @@ public:
|
|
1136
1153
|
memcpy(v, m->v, (_f) * sizeof(T));
|
1137
1154
|
}
|
1138
1155
|
|
1139
|
-
void set_seed(
|
1140
|
-
_is_seeded = true;
|
1156
|
+
void set_seed(R seed) {
|
1141
1157
|
_seed = seed;
|
1142
1158
|
}
|
1143
1159
|
|
1144
1160
|
void thread_build(int q, int thread_idx, ThreadedBuildPolicy& threaded_build_policy) {
|
1145
|
-
Random _random;
|
1146
1161
|
// Each thread needs its own seed, otherwise each thread would be building the same tree(s)
|
1147
|
-
|
1148
|
-
_random.set_seed(seed);
|
1162
|
+
Random _random(_seed + thread_idx);
|
1149
1163
|
|
1150
1164
|
vector<S> thread_roots;
|
1151
1165
|
while (1) {
|
@@ -1162,7 +1176,7 @@ public:
|
|
1162
1176
|
}
|
1163
1177
|
}
|
1164
1178
|
|
1165
|
-
if (_verbose)
|
1179
|
+
if (_verbose) annoylib_showUpdate("pass %zd...\n", thread_roots.size());
|
1166
1180
|
|
1167
1181
|
vector<S> indices;
|
1168
1182
|
threaded_build_policy.lock_shared_nodes();
|
@@ -1192,14 +1206,14 @@ protected:
|
|
1192
1206
|
static_cast<size_t>(_s) * static_cast<size_t>(_nodes_size),
|
1193
1207
|
static_cast<size_t>(_s) * static_cast<size_t>(new_nodes_size)) &&
|
1194
1208
|
_verbose)
|
1195
|
-
|
1209
|
+
annoylib_showUpdate("File truncation error\n");
|
1196
1210
|
} else {
|
1197
1211
|
_nodes = realloc(_nodes, _s * new_nodes_size);
|
1198
1212
|
memset((char *) _nodes + (_nodes_size * _s) / sizeof(char), 0, (new_nodes_size - _nodes_size) * _s);
|
1199
1213
|
}
|
1200
1214
|
|
1201
1215
|
_nodes_size = new_nodes_size;
|
1202
|
-
if (_verbose)
|
1216
|
+
if (_verbose) annoylib_showUpdate("Reallocating to %d nodes: old_address=%p, new_address=%p\n", new_nodes_size, old, _nodes);
|
1203
1217
|
}
|
1204
1218
|
|
1205
1219
|
void _allocate_size(S n, ThreadedBuildPolicy& threaded_build_policy) {
|
@@ -1281,7 +1295,7 @@ protected:
|
|
1281
1295
|
bool side = D::side(m, n->v, _f, _random);
|
1282
1296
|
children_indices[side].push_back(j);
|
1283
1297
|
} else {
|
1284
|
-
|
1298
|
+
annoylib_showUpdate("No node for index %d?\n", j);
|
1285
1299
|
}
|
1286
1300
|
}
|
1287
1301
|
|
@@ -1293,7 +1307,7 @@ protected:
|
|
1293
1307
|
// If we didn't find a hyperplane, just randomize sides as a last option
|
1294
1308
|
while (_split_imbalance(children_indices[0], children_indices[1]) > 0.99) {
|
1295
1309
|
if (_verbose)
|
1296
|
-
|
1310
|
+
annoylib_showUpdate("\tNo hyperplane found (left has %ld children, right has %ld children)\n",
|
1297
1311
|
children_indices[0].size(), children_indices[1].size());
|
1298
1312
|
|
1299
1313
|
children_indices[0].clear();
|
@@ -1477,5 +1491,7 @@ public:
|
|
1477
1491
|
};
|
1478
1492
|
#endif
|
1479
1493
|
|
1494
|
+
}
|
1495
|
+
|
1480
1496
|
#endif
|
1481
1497
|
// vim: tabstop=2 shiftwidth=2
|
data/ext/annoy/src/kissrandom.h
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
#ifndef
|
2
|
-
#define
|
1
|
+
#ifndef ANNOY_KISSRANDOM_H
|
2
|
+
#define ANNOY_KISSRANDOM_H
|
3
3
|
|
4
4
|
#if defined(_MSC_VER) && _MSC_VER == 1500
|
5
5
|
typedef unsigned __int32 uint32_t;
|
@@ -8,6 +8,8 @@ typedef unsigned __int64 uint64_t;
|
|
8
8
|
#include <stdint.h>
|
9
9
|
#endif
|
10
10
|
|
11
|
+
namespace Annoy {
|
12
|
+
|
11
13
|
// KISS = "keep it simple, stupid", but high quality random number generator
|
12
14
|
// http://www0.cs.ucl.ac.uk/staff/d.jones/GoodPracticeRNG.pdf -> "Use a good RNG and build it into your code"
|
13
15
|
// http://mathforum.org/kb/message.jspa?messageID=6627731
|
@@ -20,8 +22,13 @@ struct Kiss32Random {
|
|
20
22
|
uint32_t z;
|
21
23
|
uint32_t c;
|
22
24
|
|
25
|
+
static const uint32_t default_seed = 123456789;
|
26
|
+
#if __cplusplus < 201103L
|
27
|
+
typedef uint32_t seed_type;
|
28
|
+
#endif
|
29
|
+
|
23
30
|
// seed must be != 0
|
24
|
-
Kiss32Random(uint32_t seed =
|
31
|
+
Kiss32Random(uint32_t seed = default_seed) {
|
25
32
|
x = seed;
|
26
33
|
y = 362436000;
|
27
34
|
z = 521288629;
|
@@ -64,8 +71,13 @@ struct Kiss64Random {
|
|
64
71
|
uint64_t z;
|
65
72
|
uint64_t c;
|
66
73
|
|
74
|
+
static const uint64_t default_seed = 1234567890987654321ULL;
|
75
|
+
#if __cplusplus < 201103L
|
76
|
+
typedef uint64_t seed_type;
|
77
|
+
#endif
|
78
|
+
|
67
79
|
// seed must be != 0
|
68
|
-
Kiss64Random(uint64_t seed =
|
80
|
+
Kiss64Random(uint64_t seed = default_seed) {
|
69
81
|
x = seed;
|
70
82
|
y = 362436362436362436ULL;
|
71
83
|
z = 1066149217761810ULL;
|
@@ -97,10 +109,12 @@ struct Kiss64Random {
|
|
97
109
|
// Draw random integer between 0 and n-1 where n is at most the number of data points you have
|
98
110
|
return kiss() % n;
|
99
111
|
}
|
100
|
-
inline void set_seed(
|
112
|
+
inline void set_seed(uint64_t seed) {
|
101
113
|
x = seed;
|
102
114
|
}
|
103
115
|
};
|
104
116
|
|
117
|
+
}
|
118
|
+
|
105
119
|
#endif
|
106
120
|
// vim: tabstop=2 shiftwidth=2
|
data/lib/annoy/version.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
# Annoy.rb is a Ruby wrapper for Annoy (Approximate Nearest Neighbors Oh Yeah).
|
4
4
|
module Annoy
|
5
5
|
# The version of Annoy.rb you are using.
|
6
|
-
VERSION = '0.
|
6
|
+
VERSION = '0.7.0'
|
7
7
|
|
8
8
|
# The version of Annoy included with gem.
|
9
|
-
ANNOY_VERSION = '1.17.
|
9
|
+
ANNOY_VERSION = '1.17.1'
|
10
10
|
end
|
data/lib/annoy-rb.rb
ADDED
data/lib/annoy.rb
CHANGED
@@ -30,30 +30,40 @@ module Annoy
|
|
30
30
|
# @return [String]
|
31
31
|
attr_reader :metric
|
32
32
|
|
33
|
+
# Returns the data type of feature.
|
34
|
+
# @return [String]
|
35
|
+
attr_reader :dtype
|
36
|
+
|
33
37
|
# Create a new search index.
|
34
38
|
#
|
35
39
|
# @param n_features [Integer] The number of features (dimensions) of stored vector.
|
36
40
|
# @param metric [String] The distance metric between vectors ('angular', 'dot', 'hamming', 'euclidean', or 'manhattan').
|
37
|
-
|
41
|
+
# @param dtype [String] The data type of features ('float64' and 'float32').
|
42
|
+
# If metric is given 'hamming', 'uint64' is automatically assigned to this argument.
|
43
|
+
def initialize(n_features:, metric: 'angular', dtype: 'float64') # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength
|
38
44
|
raise ArgumentError, 'Expect n_features to be Integer.' unless n_features.is_a?(Numeric)
|
39
45
|
|
40
46
|
@n_features = n_features.to_i
|
41
47
|
@metric = metric
|
48
|
+
@dtype = dtype
|
42
49
|
|
50
|
+
# rubocop:disable Layout/LineLength
|
43
51
|
@index = case @metric
|
44
52
|
when 'angular'
|
45
|
-
AnnoyIndexAngular.new(@n_features)
|
53
|
+
@dtype == 'float64' ? AnnoyIndexAngular.new(@n_features) : AnnoyIndexAngularFloat32.new(@n_features)
|
46
54
|
when 'dot'
|
47
|
-
AnnoyIndexDotProduct.new(@n_features)
|
55
|
+
@dtype == 'float64' ? AnnoyIndexDotProduct.new(@n_features) : AnnoyIndexDotProductFloat32.new(@n_features)
|
48
56
|
when 'hamming'
|
57
|
+
@dtype = 'uint64'
|
49
58
|
AnnoyIndexHamming.new(@n_features)
|
50
59
|
when 'euclidean'
|
51
|
-
AnnoyIndexEuclidean.new(@n_features)
|
60
|
+
@dtype == 'float64' ? AnnoyIndexEuclidean.new(@n_features) : AnnoyIndexEuclideanFloat32.new(@n_features)
|
52
61
|
when 'manhattan'
|
53
|
-
AnnoyIndexManhattan.new(@n_features)
|
62
|
+
@dtype == 'float64' ? AnnoyIndexManhattan.new(@n_features) : AnnoyIndexManhattanFloat32.new(@n_features)
|
54
63
|
else
|
55
64
|
raise ArgumentError, "No such metric: #{@metric}."
|
56
65
|
end
|
66
|
+
# rubocop:enable Layout/LineLength
|
57
67
|
end
|
58
68
|
|
59
69
|
# Add item to be indexed.
|
@@ -69,6 +79,7 @@ module Annoy
|
|
69
79
|
#
|
70
80
|
# @param n_trees [Integer] The number of trees. More trees gives higher search precision.
|
71
81
|
# @param n_jobs [Integer] The number of threads used to build the trees. If -1 is given, uses all available CPU cores.
|
82
|
+
# This parameter is enabled only if "-DANNOYLIB_MULTITHREADED_BUILD" is specified on gem installation.
|
72
83
|
# @return [Boolean]
|
73
84
|
def build(n_trees, n_jobs: -1)
|
74
85
|
@index.build(n_trees, n_jobs)
|
data/sig/annoy.rbs
CHANGED
@@ -1,11 +1,13 @@
|
|
1
1
|
module Annoy
|
2
2
|
VERSION: String
|
3
|
+
ANNOY_VERSION: String
|
3
4
|
|
4
5
|
class AnnoyIndex
|
5
6
|
attr_reader n_features: Integer
|
6
7
|
attr_reader metric: String
|
8
|
+
attr_reader dtype: String
|
7
9
|
|
8
|
-
def initialize: (n_features: Integer n_features, ?metric: String metric) -> void
|
10
|
+
def initialize: (n_features: Integer n_features, ?metric: String metric, ?dtype: String dtype) -> void
|
9
11
|
def add_item: (Integer i, Array[Float | Integer] v) -> bool
|
10
12
|
def build: (Integer n_trees, ?n_jobs: Integer n_jobs) -> bool
|
11
13
|
def save: (String filename, ?prefault: bool prefault) -> bool
|
@@ -40,6 +42,24 @@ module Annoy
|
|
40
42
|
def seed: (Integer s) -> nil
|
41
43
|
end
|
42
44
|
|
45
|
+
class AnnoyIndexAngularFloat32
|
46
|
+
def initialize: (Integer n_features) -> void
|
47
|
+
def add_item: (Integer i, Array[Float] v) -> bool
|
48
|
+
def build: (Integer n_trees, Integer n_jobs) -> bool
|
49
|
+
def save: (String filename, bool prefault) -> bool
|
50
|
+
def load: (String filename, bool prefault) -> bool
|
51
|
+
def unload: () -> bool
|
52
|
+
def get_nns_by_item: (Integer i, Integer n, Integer search_k, (true | false) include_distances) -> ([Array[Integer], Array[Float]] | Array[Integer])
|
53
|
+
def get_nns_by_vector: (Array[Float] v, Integer n, Integer search_k, (true | false) include_distances) -> ([Array[Integer], Array[Float]] | Array[Integer])
|
54
|
+
def get_item: (Integer i) -> Array[Float]
|
55
|
+
def get_distance: (Integer i, Integer j) -> Float
|
56
|
+
def n_items: () -> Integer
|
57
|
+
def n_trees: () -> Integer
|
58
|
+
def on_disk_build: (String filename) -> bool
|
59
|
+
def verbose: (bool flag) -> nil
|
60
|
+
def seed: (Integer s) -> nil
|
61
|
+
end
|
62
|
+
|
43
63
|
class AnnoyIndexDotProduct
|
44
64
|
def initialize: (Integer n_features) -> void
|
45
65
|
def add_item: (Integer i, Array[Float] v) -> bool
|
@@ -58,6 +78,24 @@ module Annoy
|
|
58
78
|
def seed: (Integer s) -> nil
|
59
79
|
end
|
60
80
|
|
81
|
+
class AnnoyIndexDotProductFloat32
|
82
|
+
def initialize: (Integer n_features) -> void
|
83
|
+
def add_item: (Integer i, Array[Float] v) -> bool
|
84
|
+
def build: (Integer n_trees, Integer n_jobs) -> bool
|
85
|
+
def save: (String filename, bool prefault) -> bool
|
86
|
+
def load: (String filename, bool prefault) -> bool
|
87
|
+
def unload: () -> bool
|
88
|
+
def get_nns_by_item: (Integer i, Integer n, Integer search_k, (true | false) include_distances) -> ([Array[Integer], Array[Float]] | Array[Integer])
|
89
|
+
def get_nns_by_vector: (Array[Float] v, Integer n, Integer search_k, (true | false) include_distances) -> ([Array[Integer], Array[Float]] | Array[Integer])
|
90
|
+
def get_item: (Integer i) -> Array[Float]
|
91
|
+
def get_distance: (Integer i, Integer j) -> Float
|
92
|
+
def n_items: () -> Integer
|
93
|
+
def n_trees: () -> Integer
|
94
|
+
def on_disk_build: (String filename) -> bool
|
95
|
+
def verbose: (bool flag) -> nil
|
96
|
+
def seed: (Integer s) -> nil
|
97
|
+
end
|
98
|
+
|
61
99
|
class AnnoyIndexHamming
|
62
100
|
def initialize: (Integer n_features) -> void
|
63
101
|
def add_item: (Integer i, Array[Integer] v) -> bool
|
@@ -94,6 +132,24 @@ module Annoy
|
|
94
132
|
def seed: (Integer s) -> nil
|
95
133
|
end
|
96
134
|
|
135
|
+
class AnnoyIndexEuclideanFloat32
|
136
|
+
def initialize: (Integer n_features) -> void
|
137
|
+
def add_item: (Integer i, Array[Float] v) -> bool
|
138
|
+
def build: (Integer n_trees, Integer n_jobs) -> bool
|
139
|
+
def save: (String filename, bool prefault) -> bool
|
140
|
+
def load: (String filename, bool prefault) -> bool
|
141
|
+
def unload: () -> bool
|
142
|
+
def get_nns_by_item: (Integer i, Integer n, Integer search_k, (true | false) include_distances) -> ([Array[Integer], Array[Float]] | Array[Integer])
|
143
|
+
def get_nns_by_vector: (Array[Float] v, Integer n, Integer search_k, (true | false) include_distances) -> ([Array[Integer], Array[Float]] | Array[Integer])
|
144
|
+
def get_item: (Integer i) -> Array[Float]
|
145
|
+
def get_distance: (Integer i, Integer j) -> Float
|
146
|
+
def n_items: () -> Integer
|
147
|
+
def n_trees: () -> Integer
|
148
|
+
def on_disk_build: (String filename) -> bool
|
149
|
+
def verbose: (bool flag) -> nil
|
150
|
+
def seed: (Integer s) -> nil
|
151
|
+
end
|
152
|
+
|
97
153
|
class AnnoyIndexManhattan
|
98
154
|
def initialize: (Integer n_features) -> void
|
99
155
|
def add_item: (Integer i, Array[Float] v) -> bool
|
@@ -111,4 +167,22 @@ module Annoy
|
|
111
167
|
def verbose: (bool flag) -> nil
|
112
168
|
def seed: (Integer s) -> nil
|
113
169
|
end
|
170
|
+
|
171
|
+
class AnnoyIndexManhattanFloat32
|
172
|
+
def initialize: (Integer n_features) -> void
|
173
|
+
def add_item: (Integer i, Array[Float] v) -> bool
|
174
|
+
def build: (Integer n_trees, Integer n_jobs) -> bool
|
175
|
+
def save: (String filename, bool prefault) -> bool
|
176
|
+
def load: (String filename, bool prefault) -> bool
|
177
|
+
def unload: () -> bool
|
178
|
+
def get_nns_by_item: (Integer i, Integer n, Integer search_k, (true | false) include_distances) -> ([Array[Integer], Array[Float]] | Array[Integer])
|
179
|
+
def get_nns_by_vector: (Array[Float] v, Integer n, Integer search_k, (true | false) include_distances) -> ([Array[Integer], Array[Float]] | Array[Integer])
|
180
|
+
def get_item: (Integer i) -> Array[Float]
|
181
|
+
def get_distance: (Integer i, Integer j) -> Float
|
182
|
+
def n_items: () -> Integer
|
183
|
+
def n_trees: () -> Integer
|
184
|
+
def on_disk_build: (String filename) -> bool
|
185
|
+
def verbose: (bool flag) -> nil
|
186
|
+
def seed: (Integer s) -> nil
|
187
|
+
end
|
114
188
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: annoy-rb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-08-13 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Annoy.rb provides Ruby bindings for the Annoy (Approximate Nearest Neighbors
|
14
14
|
Oh Yeah).
|
@@ -19,17 +19,9 @@ extensions:
|
|
19
19
|
- ext/annoy/extconf.rb
|
20
20
|
extra_rdoc_files: []
|
21
21
|
files:
|
22
|
-
- ".github/workflows/build.yml"
|
23
|
-
- ".gitignore"
|
24
|
-
- ".rspec"
|
25
22
|
- CHANGELOG.md
|
26
|
-
- CODE_OF_CONDUCT.md
|
27
|
-
- Gemfile
|
28
23
|
- LICENSE.txt
|
29
24
|
- README.md
|
30
|
-
- Rakefile
|
31
|
-
- Steepfile
|
32
|
-
- annoy-rb.gemspec
|
33
25
|
- ext/annoy/annoyext.cpp
|
34
26
|
- ext/annoy/annoyext.hpp
|
35
27
|
- ext/annoy/extconf.rb
|
@@ -37,17 +29,19 @@ files:
|
|
37
29
|
- ext/annoy/src/annoylib.h
|
38
30
|
- ext/annoy/src/kissrandom.h
|
39
31
|
- ext/annoy/src/mman.h
|
32
|
+
- lib/annoy-rb.rb
|
40
33
|
- lib/annoy.rb
|
41
34
|
- lib/annoy/version.rb
|
42
35
|
- sig/annoy.rbs
|
43
|
-
homepage: https://github.com/yoshoku/annoy
|
36
|
+
homepage: https://github.com/yoshoku/annoy-rb
|
44
37
|
licenses:
|
45
38
|
- Apache-2.0
|
46
39
|
metadata:
|
47
|
-
homepage_uri: https://github.com/yoshoku/annoy
|
48
|
-
source_code_uri: https://github.com/yoshoku/annoy
|
49
|
-
changelog_uri: https://github.com/yoshoku/annoy
|
50
|
-
documentation_uri: https://yoshoku.github.io/annoy
|
40
|
+
homepage_uri: https://github.com/yoshoku/annoy-rb
|
41
|
+
source_code_uri: https://github.com/yoshoku/annoy-rb
|
42
|
+
changelog_uri: https://github.com/yoshoku/annoy-rb/blob/main/CHANGELOG.md
|
43
|
+
documentation_uri: https://yoshoku.github.io/annoy-rb/doc/
|
44
|
+
rubygems_mfa_required: 'true'
|
51
45
|
post_install_message:
|
52
46
|
rdoc_options: []
|
53
47
|
require_paths:
|
@@ -63,7 +57,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
63
57
|
- !ruby/object:Gem::Version
|
64
58
|
version: '0'
|
65
59
|
requirements: []
|
66
|
-
rubygems_version: 3.2.
|
60
|
+
rubygems_version: 3.2.33
|
67
61
|
signing_key:
|
68
62
|
specification_version: 4
|
69
63
|
summary: Ruby bindings for the Annoy (Approximate Nearest Neighbors Oh Yeah).
|