annoy-rb 0.6.1 → 0.7.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/README.md +5 -5
- data/ext/annoy/annoyext.hpp +8 -6
- data/ext/annoy/src/LICENSE +1 -1
- data/ext/annoy/src/annoylib.h +75 -59
- data/ext/annoy/src/kissrandom.h +19 -5
- data/lib/annoy/version.rb +2 -2
- metadata +7 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d1f1b7326f096ef01be709b819e6fcac709776e5561d931b8b8a5b022aed1dc2
|
4
|
+
data.tar.gz: e767849b0970773b1c01307b830d6bbf15c250d86a4716d809867dc0cf8f211d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: da4627d3414ed6ba062a3fb5d9ba546a32dced3e11d42ab4756badc219c29813b9d5389cbcb2159c027ad9a3b43c58f23f0f19660836c557d12c2c1b2a37330f
|
7
|
+
data.tar.gz: f195b96f9010a67ccc01088006a09e1ec5f857e7226f3d6b7c58a9ab5205757365061bbd1eb571715bfcd24a8f47c00e6aea7fcc1d7024f8cb6e511b7cb37255
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
## 0.7.1
|
2
|
+
|
3
|
+
- Fix bug that item elements are converted to unsingned integers when dtype is set to float32 ([#3](https://github.com/yoshoku/annoy-rb/issues/3)).
|
4
|
+
|
5
|
+
## 0.7.0
|
6
|
+
|
7
|
+
- Update bundled Annoy version to 1.17.1.
|
8
|
+
- Refactor config files.
|
9
|
+
|
1
10
|
## 0.6.1
|
2
11
|
|
3
12
|
- Refactor codes and configs with RuboCop and clang-format.
|
data/README.md
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
# annoy-rb
|
2
2
|
|
3
|
-
[![Build Status](https://github.com/yoshoku/annoy
|
3
|
+
[![Build Status](https://github.com/yoshoku/annoy-rb/workflows/build/badge.svg)](https://github.com/yoshoku/annoy-rb/actions?query=workflow%3Abuild)
|
4
4
|
[![Gem Version](https://badge.fury.io/rb/annoy-rb.svg)](https://badge.fury.io/rb/annoy-rb)
|
5
|
-
[![License](https://img.shields.io/badge/License-Apache%202.0-yellowgreen.svg)](https://github.com/yoshoku/annoy
|
6
|
-
[![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://yoshoku.github.io/annoy
|
5
|
+
[![License](https://img.shields.io/badge/License-Apache%202.0-yellowgreen.svg)](https://github.com/yoshoku/annoy-rb/blob/main/LICENSE.txt)
|
6
|
+
[![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://yoshoku.github.io/annoy-rb/doc/)
|
7
7
|
|
8
8
|
annoy-rb provides Ruby bindings for the [Annoy (Approximate Nearest Neighbors Oh Yeah)](https://github.com/spotify/annoy).
|
9
9
|
|
@@ -40,7 +40,7 @@ $ gem install annoy-rb -- --with-cxxflags=-march=native
|
|
40
40
|
|
41
41
|
## Documentation
|
42
42
|
|
43
|
-
* [annoy-rb API Documentation](https://yoshoku.github.io/annoy
|
43
|
+
* [annoy-rb API Documentation](https://yoshoku.github.io/annoy-rb/doc/)
|
44
44
|
|
45
45
|
## Usage
|
46
46
|
|
@@ -81,6 +81,6 @@ The gem is available as open source under the terms of the [Apache-2.0 License](
|
|
81
81
|
|
82
82
|
## Contributing
|
83
83
|
|
84
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/yoshoku/annoy
|
84
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/yoshoku/annoy-rb.
|
85
85
|
This project is intended to be a safe, welcoming space for collaboration,
|
86
86
|
and contributors are expected to adhere to the [Contributor Covenant](https://contributor-covenant.org) code of conduct.
|
data/ext/annoy/annoyext.hpp
CHANGED
@@ -26,6 +26,8 @@
|
|
26
26
|
#include <annoylib.h>
|
27
27
|
#include <kissrandom.h>
|
28
28
|
|
29
|
+
using namespace Annoy;
|
30
|
+
|
29
31
|
#ifdef ANNOYLIB_MULTITHREADED_BUILD
|
30
32
|
typedef AnnoyIndexMultiThreadedBuildPolicy AnnoyIndexThreadedBuildPolicy;
|
31
33
|
#else
|
@@ -109,7 +111,7 @@ private:
|
|
109
111
|
|
110
112
|
F* vec = (F*)ruby_xmalloc(n_dims * sizeof(F));
|
111
113
|
for (int i = 0; i < n_dims; i++) {
|
112
|
-
vec[i] = typeid(F) == typeid(
|
114
|
+
vec[i] = (F)(typeid(F) == typeid(uint64_t) ? NUM2UINT(rb_ary_entry(arr, i)) : NUM2DBL(rb_ary_entry(arr, i)));
|
113
115
|
}
|
114
116
|
|
115
117
|
char* error;
|
@@ -193,7 +195,7 @@ private:
|
|
193
195
|
const int sz_distances = distances.size();
|
194
196
|
VALUE distances_arr = rb_ary_new2(sz_distances);
|
195
197
|
for (int i = 0; i < sz_distances; i++) {
|
196
|
-
rb_ary_store(distances_arr, i, typeid(F) == typeid(
|
198
|
+
rb_ary_store(distances_arr, i, typeid(F) == typeid(uint64_t) ? UINT2NUM(distances[i]) : DBL2NUM(distances[i]));
|
197
199
|
}
|
198
200
|
VALUE res = rb_ary_new2(2);
|
199
201
|
rb_ary_store(res, 0, neighbors_arr);
|
@@ -220,7 +222,7 @@ private:
|
|
220
222
|
|
221
223
|
F* vec = (F*)ruby_xmalloc(n_dims * sizeof(F));
|
222
224
|
for (int i = 0; i < n_dims; i++) {
|
223
|
-
vec[i] = typeid(F) == typeid(
|
225
|
+
vec[i] = (F)(typeid(F) == typeid(uint64_t) ? NUM2UINT(rb_ary_entry(_vec, i)) : NUM2DBL(rb_ary_entry(_vec, i)));
|
224
226
|
}
|
225
227
|
|
226
228
|
const int n_neighbors = NUM2INT(_n_neighbors);
|
@@ -244,7 +246,7 @@ private:
|
|
244
246
|
const int sz_distances = distances.size();
|
245
247
|
VALUE distances_arr = rb_ary_new2(sz_distances);
|
246
248
|
for (int i = 0; i < sz_distances; i++) {
|
247
|
-
rb_ary_store(distances_arr, i, typeid(F) == typeid(
|
249
|
+
rb_ary_store(distances_arr, i, typeid(F) == typeid(uint64_t) ? UINT2NUM(distances[i]) : DBL2NUM(distances[i]));
|
248
250
|
}
|
249
251
|
VALUE res = rb_ary_new2(2);
|
250
252
|
rb_ary_store(res, 0, neighbors_arr);
|
@@ -264,7 +266,7 @@ private:
|
|
264
266
|
get_annoy_index(self)->get_item(idx, vec);
|
265
267
|
|
266
268
|
for (int i = 0; i < n_dims; i++) {
|
267
|
-
rb_ary_store(arr, i, typeid(F) == typeid(
|
269
|
+
rb_ary_store(arr, i, typeid(F) == typeid(uint64_t) ? UINT2NUM(vec[i]) : DBL2NUM(vec[i]));
|
268
270
|
}
|
269
271
|
|
270
272
|
ruby_xfree(vec);
|
@@ -275,7 +277,7 @@ private:
|
|
275
277
|
const int32_t i = (int32_t)NUM2INT(_i);
|
276
278
|
const int32_t j = (int32_t)NUM2INT(_j);
|
277
279
|
const F dist = get_annoy_index(self)->get_distance(i, j);
|
278
|
-
return typeid(F) == typeid(
|
280
|
+
return typeid(F) == typeid(uint64_t) ? UINT2NUM(dist) : DBL2NUM(dist);
|
279
281
|
};
|
280
282
|
|
281
283
|
static VALUE _annoy_index_get_n_items(VALUE self) {
|
data/ext/annoy/src/LICENSE
CHANGED
@@ -187,7 +187,7 @@
|
|
187
187
|
same "printed page" as the copyright notice for easier
|
188
188
|
identification within third-party archives.
|
189
189
|
|
190
|
-
Copyright
|
190
|
+
Copyright 2021 (c) Spotify and its affiliates.
|
191
191
|
|
192
192
|
Licensed under the Apache License, Version 2.0 (the "License");
|
193
193
|
you may not use this file except in compliance with the License.
|
data/ext/annoy/src/annoylib.h
CHANGED
@@ -13,8 +13,8 @@
|
|
13
13
|
// the License.
|
14
14
|
|
15
15
|
|
16
|
-
#ifndef
|
17
|
-
#define
|
16
|
+
#ifndef ANNOY_ANNOYLIB_H
|
17
|
+
#define ANNOY_ANNOYLIB_H
|
18
18
|
|
19
19
|
#include <stdio.h>
|
20
20
|
#include <sys/stat.h>
|
@@ -58,6 +58,10 @@ typedef signed __int64 int64_t;
|
|
58
58
|
#include <queue>
|
59
59
|
#include <limits>
|
60
60
|
|
61
|
+
#if __cplusplus >= 201103L
|
62
|
+
#include <type_traits>
|
63
|
+
#endif
|
64
|
+
|
61
65
|
#ifdef ANNOYLIB_MULTITHREADED_BUILD
|
62
66
|
#include <thread>
|
63
67
|
#include <mutex>
|
@@ -72,9 +76,9 @@ typedef signed __int64 int64_t;
|
|
72
76
|
// This allows others to supply their own logger / error printer without
|
73
77
|
// requiring Annoy to import their headers. See RcppAnnoy for a use case.
|
74
78
|
#ifndef __ERROR_PRINTER_OVERRIDE__
|
75
|
-
#define
|
79
|
+
#define annoylib_showUpdate(...) { fprintf(stderr, __VA_ARGS__ ); }
|
76
80
|
#else
|
77
|
-
#define
|
81
|
+
#define annoylib_showUpdate(...) { __ERROR_PRINTER_OVERRIDE__( __VA_ARGS__ ); }
|
78
82
|
#endif
|
79
83
|
|
80
84
|
// Portable alloc definition, cf Writing R Extensions, Section 1.6.4
|
@@ -87,40 +91,24 @@ typedef signed __int64 int64_t;
|
|
87
91
|
# include <alloca.h>
|
88
92
|
#endif
|
89
93
|
|
90
|
-
inline void set_error_from_errno(char **error, const char* msg) {
|
91
|
-
showUpdate("%s: %s (%d)\n", msg, strerror(errno), errno);
|
92
|
-
if (error) {
|
93
|
-
*error = (char *)malloc(256); // TODO: win doesn't support snprintf
|
94
|
-
sprintf(*error, "%s: %s (%d)", msg, strerror(errno), errno);
|
95
|
-
}
|
96
|
-
}
|
97
|
-
|
98
|
-
inline void set_error_from_string(char **error, const char* msg) {
|
99
|
-
showUpdate("%s\n", msg);
|
100
|
-
if (error) {
|
101
|
-
*error = (char *)malloc(strlen(msg) + 1);
|
102
|
-
strcpy(*error, msg);
|
103
|
-
}
|
104
|
-
}
|
105
|
-
|
106
94
|
// We let the v array in the Node struct take whatever space is needed, so this is a mostly insignificant number.
|
107
95
|
// Compilers need *some* size defined for the v array, and some memory checking tools will flag for buffer overruns if this is set too low.
|
108
|
-
#define
|
96
|
+
#define ANNOYLIB_V_ARRAY_SIZE 65536
|
109
97
|
|
110
98
|
#ifndef _MSC_VER
|
111
|
-
#define
|
99
|
+
#define annoylib_popcount __builtin_popcountll
|
112
100
|
#else // See #293, #358
|
113
|
-
#define
|
101
|
+
#define annoylib_popcount cole_popcount
|
114
102
|
#endif
|
115
103
|
|
116
104
|
#if !defined(NO_MANUAL_VECTORIZATION) && defined(__GNUC__) && (__GNUC__ >6) && defined(__AVX512F__) // See #402
|
117
|
-
#define
|
105
|
+
#define ANNOYLIB_USE_AVX512
|
118
106
|
#elif !defined(NO_MANUAL_VECTORIZATION) && defined(__AVX__) && defined (__SSE__) && defined(__SSE2__) && defined(__SSE3__)
|
119
|
-
#define
|
107
|
+
#define ANNOYLIB_USE_AVX
|
120
108
|
#else
|
121
109
|
#endif
|
122
110
|
|
123
|
-
#if defined(
|
111
|
+
#if defined(ANNOYLIB_USE_AVX) || defined(ANNOYLIB_USE_AVX512)
|
124
112
|
#if defined(_MSC_VER)
|
125
113
|
#include <intrin.h>
|
126
114
|
#elif defined(__GNUC__)
|
@@ -129,11 +117,30 @@ inline void set_error_from_string(char **error, const char* msg) {
|
|
129
117
|
#endif
|
130
118
|
|
131
119
|
#if !defined(__MINGW32__)
|
132
|
-
#define
|
120
|
+
#define ANNOYLIB_FTRUNCATE_SIZE(x) static_cast<int64_t>(x)
|
133
121
|
#else
|
134
|
-
#define
|
122
|
+
#define ANNOYLIB_FTRUNCATE_SIZE(x) (x)
|
135
123
|
#endif
|
136
124
|
|
125
|
+
namespace Annoy {
|
126
|
+
|
127
|
+
inline void set_error_from_errno(char **error, const char* msg) {
|
128
|
+
annoylib_showUpdate("%s: %s (%d)\n", msg, strerror(errno), errno);
|
129
|
+
if (error) {
|
130
|
+
*error = (char *)malloc(256); // TODO: win doesn't support snprintf
|
131
|
+
sprintf(*error, "%s: %s (%d)", msg, strerror(errno), errno);
|
132
|
+
}
|
133
|
+
}
|
134
|
+
|
135
|
+
inline void set_error_from_string(char **error, const char* msg) {
|
136
|
+
annoylib_showUpdate("%s\n", msg);
|
137
|
+
if (error) {
|
138
|
+
*error = (char *)malloc(strlen(msg) + 1);
|
139
|
+
strcpy(*error, msg);
|
140
|
+
}
|
141
|
+
}
|
142
|
+
|
143
|
+
|
137
144
|
using std::vector;
|
138
145
|
using std::pair;
|
139
146
|
using std::numeric_limits;
|
@@ -145,7 +152,7 @@ inline bool remap_memory_and_truncate(void** _ptr, int _fd, size_t old_size, siz
|
|
145
152
|
bool ok = ftruncate(_fd, new_size) != -1;
|
146
153
|
#else
|
147
154
|
munmap(*_ptr, old_size);
|
148
|
-
bool ok = ftruncate(_fd,
|
155
|
+
bool ok = ftruncate(_fd, ANNOYLIB_FTRUNCATE_SIZE(new_size)) != -1;
|
149
156
|
#ifdef MAP_POPULATE
|
150
157
|
*_ptr = mmap(*_ptr, new_size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, _fd, 0);
|
151
158
|
#else
|
@@ -194,7 +201,7 @@ inline T euclidean_distance(const T* x, const T* y, int f) {
|
|
194
201
|
return d;
|
195
202
|
}
|
196
203
|
|
197
|
-
#ifdef
|
204
|
+
#ifdef ANNOYLIB_USE_AVX
|
198
205
|
// Horizontal single sum of 256bit vector.
|
199
206
|
inline float hsum256_ps_avx(__m256 v) {
|
200
207
|
const __m128 x128 = _mm_add_ps(_mm256_extractf128_ps(v, 1), _mm256_castps256_ps128(v));
|
@@ -277,7 +284,7 @@ inline float euclidean_distance<float>(const float* x, const float* y, int f) {
|
|
277
284
|
|
278
285
|
#endif
|
279
286
|
|
280
|
-
#ifdef
|
287
|
+
#ifdef ANNOYLIB_USE_AVX512
|
281
288
|
template<>
|
282
289
|
inline float dot<float>(const float* x, const float *y, int f) {
|
283
290
|
float result = 0;
|
@@ -452,7 +459,7 @@ struct Angular : Base {
|
|
452
459
|
S children[2]; // Will possibly store more than 2
|
453
460
|
T norm;
|
454
461
|
};
|
455
|
-
T v[
|
462
|
+
T v[ANNOYLIB_V_ARRAY_SIZE];
|
456
463
|
};
|
457
464
|
template<typename S, typename T>
|
458
465
|
static inline T distance(const Node<S, T>* x, const Node<S, T>* y, int f) {
|
@@ -523,7 +530,7 @@ struct DotProduct : Angular {
|
|
523
530
|
S n_descendants;
|
524
531
|
S children[2]; // Will possibly store more than 2
|
525
532
|
T dot_factor;
|
526
|
-
T v[
|
533
|
+
T v[ANNOYLIB_V_ARRAY_SIZE];
|
527
534
|
};
|
528
535
|
|
529
536
|
static const char* name() {
|
@@ -630,7 +637,7 @@ struct Hamming : Base {
|
|
630
637
|
struct Node {
|
631
638
|
S n_descendants;
|
632
639
|
S children[2];
|
633
|
-
T v[
|
640
|
+
T v[ANNOYLIB_V_ARRAY_SIZE];
|
634
641
|
};
|
635
642
|
|
636
643
|
static const size_t max_iterations = 20;
|
@@ -659,7 +666,7 @@ struct Hamming : Base {
|
|
659
666
|
static inline T distance(const Node<S, T>* x, const Node<S, T>* y, int f) {
|
660
667
|
size_t dist = 0;
|
661
668
|
for (int i = 0; i < f; i++) {
|
662
|
-
dist +=
|
669
|
+
dist += annoylib_popcount(x->v[i] ^ y->v[i]);
|
663
670
|
}
|
664
671
|
return dist;
|
665
672
|
}
|
@@ -727,7 +734,7 @@ struct Minkowski : Base {
|
|
727
734
|
S n_descendants;
|
728
735
|
T a; // need an extra constant term to determine the offset of the plane
|
729
736
|
S children[2];
|
730
|
-
T v[
|
737
|
+
T v[ANNOYLIB_V_ARRAY_SIZE];
|
731
738
|
};
|
732
739
|
template<typename S, typename T>
|
733
740
|
static inline T margin(const Node<S, T>* n, const T* y, int f) {
|
@@ -815,7 +822,7 @@ struct Manhattan : Minkowski {
|
|
815
822
|
}
|
816
823
|
};
|
817
824
|
|
818
|
-
template<typename S, typename T>
|
825
|
+
template<typename S, typename T, typename R = uint64_t>
|
819
826
|
class AnnoyIndexInterface {
|
820
827
|
public:
|
821
828
|
// Note that the methods with an **error argument will allocate memory and write the pointer to that string if error is non-NULL
|
@@ -833,12 +840,18 @@ class AnnoyIndexInterface {
|
|
833
840
|
virtual S get_n_trees() const = 0;
|
834
841
|
virtual void verbose(bool v) = 0;
|
835
842
|
virtual void get_item(S item, T* v) const = 0;
|
836
|
-
virtual void set_seed(
|
843
|
+
virtual void set_seed(R q) = 0;
|
837
844
|
virtual bool on_disk_build(const char* filename, char** error=NULL) = 0;
|
838
845
|
};
|
839
846
|
|
840
847
|
template<typename S, typename T, typename Distance, typename Random, class ThreadedBuildPolicy>
|
841
|
-
class AnnoyIndex : public AnnoyIndexInterface<S, T
|
848
|
+
class AnnoyIndex : public AnnoyIndexInterface<S, T,
|
849
|
+
#if __cplusplus >= 201103L
|
850
|
+
typename std::remove_const<decltype(Random::default_seed)>::type
|
851
|
+
#else
|
852
|
+
typename Random::seed_type
|
853
|
+
#endif
|
854
|
+
> {
|
842
855
|
/*
|
843
856
|
* We use random projection to build a forest of binary trees of all items.
|
844
857
|
* Basically just split the hyperspace into two sides by a hyperplane,
|
@@ -849,6 +862,11 @@ template<typename S, typename T, typename Distance, typename Random, class Threa
|
|
849
862
|
public:
|
850
863
|
typedef Distance D;
|
851
864
|
typedef typename D::template Node<S, T> Node;
|
865
|
+
#if __cplusplus >= 201103L
|
866
|
+
typedef typename std::remove_const<decltype(Random::default_seed)>::type R;
|
867
|
+
#else
|
868
|
+
typedef typename Random::seed_type R;
|
869
|
+
#endif
|
852
870
|
|
853
871
|
protected:
|
854
872
|
const int _f;
|
@@ -859,8 +877,7 @@ protected:
|
|
859
877
|
S _nodes_size;
|
860
878
|
vector<S> _roots;
|
861
879
|
S _K;
|
862
|
-
|
863
|
-
int _seed;
|
880
|
+
R _seed;
|
864
881
|
bool _loaded;
|
865
882
|
bool _verbose;
|
866
883
|
int _fd;
|
@@ -869,8 +886,8 @@ protected:
|
|
869
886
|
public:
|
870
887
|
|
871
888
|
AnnoyIndex() : _f(0), _fd(0), _nodes(NULL), _n_items(0), _n_nodes(0), _nodes_size(0),
|
872
|
-
|
873
|
-
AnnoyIndex(int f) : _f(f) {
|
889
|
+
_loaded(false), _verbose(false), _on_disk(false), _built(false) { }
|
890
|
+
AnnoyIndex(int f) : _f(f), _seed(Random::default_seed) {
|
874
891
|
_s = offsetof(Node, v) + _f * sizeof(T); // Size of each node
|
875
892
|
_verbose = false;
|
876
893
|
_built = false;
|
@@ -924,7 +941,7 @@ public:
|
|
924
941
|
return false;
|
925
942
|
}
|
926
943
|
_nodes_size = 1;
|
927
|
-
if (ftruncate(_fd,
|
944
|
+
if (ftruncate(_fd, ANNOYLIB_FTRUNCATE_SIZE(_s) * ANNOYLIB_FTRUNCATE_SIZE(_nodes_size)) == -1) {
|
928
945
|
set_error_from_errno(error, "Unable to truncate");
|
929
946
|
return false;
|
930
947
|
}
|
@@ -960,7 +977,7 @@ public:
|
|
960
977
|
memcpy(_get(_n_nodes + (S)i), _get(_roots[i]), _s);
|
961
978
|
_n_nodes += _roots.size();
|
962
979
|
|
963
|
-
if (_verbose)
|
980
|
+
if (_verbose) annoylib_showUpdate("has %d nodes\n", _n_nodes);
|
964
981
|
|
965
982
|
if (_on_disk) {
|
966
983
|
if (!remap_memory_and_truncate(&_nodes, _fd,
|
@@ -1029,7 +1046,7 @@ public:
|
|
1029
1046
|
_n_nodes = 0;
|
1030
1047
|
_nodes_size = 0;
|
1031
1048
|
_on_disk = false;
|
1032
|
-
|
1049
|
+
_seed = Random::default_seed;
|
1033
1050
|
_roots.clear();
|
1034
1051
|
}
|
1035
1052
|
|
@@ -1048,7 +1065,7 @@ public:
|
|
1048
1065
|
}
|
1049
1066
|
}
|
1050
1067
|
reinitialize();
|
1051
|
-
if (_verbose)
|
1068
|
+
if (_verbose) annoylib_showUpdate("unloaded\n");
|
1052
1069
|
}
|
1053
1070
|
|
1054
1071
|
bool load(const char* filename, bool prefault=false, char** error=NULL) {
|
@@ -1076,7 +1093,7 @@ public:
|
|
1076
1093
|
#ifdef MAP_POPULATE
|
1077
1094
|
flags |= MAP_POPULATE;
|
1078
1095
|
#else
|
1079
|
-
|
1096
|
+
annoylib_showUpdate("prefault is set to true, but MAP_POPULATE is not defined on this platform");
|
1080
1097
|
#endif
|
1081
1098
|
}
|
1082
1099
|
_nodes = (Node*)mmap(0, size, PROT_READ, flags, _fd, 0);
|
@@ -1100,7 +1117,7 @@ public:
|
|
1100
1117
|
_loaded = true;
|
1101
1118
|
_built = true;
|
1102
1119
|
_n_items = m;
|
1103
|
-
if (_verbose)
|
1120
|
+
if (_verbose) annoylib_showUpdate("found %lu roots with degree %d\n", _roots.size(), m);
|
1104
1121
|
return true;
|
1105
1122
|
}
|
1106
1123
|
|
@@ -1136,16 +1153,13 @@ public:
|
|
1136
1153
|
memcpy(v, m->v, (_f) * sizeof(T));
|
1137
1154
|
}
|
1138
1155
|
|
1139
|
-
void set_seed(
|
1140
|
-
_is_seeded = true;
|
1156
|
+
void set_seed(R seed) {
|
1141
1157
|
_seed = seed;
|
1142
1158
|
}
|
1143
1159
|
|
1144
1160
|
void thread_build(int q, int thread_idx, ThreadedBuildPolicy& threaded_build_policy) {
|
1145
|
-
Random _random;
|
1146
1161
|
// Each thread needs its own seed, otherwise each thread would be building the same tree(s)
|
1147
|
-
|
1148
|
-
_random.set_seed(seed);
|
1162
|
+
Random _random(_seed + thread_idx);
|
1149
1163
|
|
1150
1164
|
vector<S> thread_roots;
|
1151
1165
|
while (1) {
|
@@ -1162,7 +1176,7 @@ public:
|
|
1162
1176
|
}
|
1163
1177
|
}
|
1164
1178
|
|
1165
|
-
if (_verbose)
|
1179
|
+
if (_verbose) annoylib_showUpdate("pass %zd...\n", thread_roots.size());
|
1166
1180
|
|
1167
1181
|
vector<S> indices;
|
1168
1182
|
threaded_build_policy.lock_shared_nodes();
|
@@ -1192,14 +1206,14 @@ protected:
|
|
1192
1206
|
static_cast<size_t>(_s) * static_cast<size_t>(_nodes_size),
|
1193
1207
|
static_cast<size_t>(_s) * static_cast<size_t>(new_nodes_size)) &&
|
1194
1208
|
_verbose)
|
1195
|
-
|
1209
|
+
annoylib_showUpdate("File truncation error\n");
|
1196
1210
|
} else {
|
1197
1211
|
_nodes = realloc(_nodes, _s * new_nodes_size);
|
1198
1212
|
memset((char *) _nodes + (_nodes_size * _s) / sizeof(char), 0, (new_nodes_size - _nodes_size) * _s);
|
1199
1213
|
}
|
1200
1214
|
|
1201
1215
|
_nodes_size = new_nodes_size;
|
1202
|
-
if (_verbose)
|
1216
|
+
if (_verbose) annoylib_showUpdate("Reallocating to %d nodes: old_address=%p, new_address=%p\n", new_nodes_size, old, _nodes);
|
1203
1217
|
}
|
1204
1218
|
|
1205
1219
|
void _allocate_size(S n, ThreadedBuildPolicy& threaded_build_policy) {
|
@@ -1281,7 +1295,7 @@ protected:
|
|
1281
1295
|
bool side = D::side(m, n->v, _f, _random);
|
1282
1296
|
children_indices[side].push_back(j);
|
1283
1297
|
} else {
|
1284
|
-
|
1298
|
+
annoylib_showUpdate("No node for index %d?\n", j);
|
1285
1299
|
}
|
1286
1300
|
}
|
1287
1301
|
|
@@ -1293,7 +1307,7 @@ protected:
|
|
1293
1307
|
// If we didn't find a hyperplane, just randomize sides as a last option
|
1294
1308
|
while (_split_imbalance(children_indices[0], children_indices[1]) > 0.99) {
|
1295
1309
|
if (_verbose)
|
1296
|
-
|
1310
|
+
annoylib_showUpdate("\tNo hyperplane found (left has %ld children, right has %ld children)\n",
|
1297
1311
|
children_indices[0].size(), children_indices[1].size());
|
1298
1312
|
|
1299
1313
|
children_indices[0].clear();
|
@@ -1477,5 +1491,7 @@ public:
|
|
1477
1491
|
};
|
1478
1492
|
#endif
|
1479
1493
|
|
1494
|
+
}
|
1495
|
+
|
1480
1496
|
#endif
|
1481
1497
|
// vim: tabstop=2 shiftwidth=2
|
data/ext/annoy/src/kissrandom.h
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
#ifndef
|
2
|
-
#define
|
1
|
+
#ifndef ANNOY_KISSRANDOM_H
|
2
|
+
#define ANNOY_KISSRANDOM_H
|
3
3
|
|
4
4
|
#if defined(_MSC_VER) && _MSC_VER == 1500
|
5
5
|
typedef unsigned __int32 uint32_t;
|
@@ -8,6 +8,8 @@ typedef unsigned __int64 uint64_t;
|
|
8
8
|
#include <stdint.h>
|
9
9
|
#endif
|
10
10
|
|
11
|
+
namespace Annoy {
|
12
|
+
|
11
13
|
// KISS = "keep it simple, stupid", but high quality random number generator
|
12
14
|
// http://www0.cs.ucl.ac.uk/staff/d.jones/GoodPracticeRNG.pdf -> "Use a good RNG and build it into your code"
|
13
15
|
// http://mathforum.org/kb/message.jspa?messageID=6627731
|
@@ -20,8 +22,13 @@ struct Kiss32Random {
|
|
20
22
|
uint32_t z;
|
21
23
|
uint32_t c;
|
22
24
|
|
25
|
+
static const uint32_t default_seed = 123456789;
|
26
|
+
#if __cplusplus < 201103L
|
27
|
+
typedef uint32_t seed_type;
|
28
|
+
#endif
|
29
|
+
|
23
30
|
// seed must be != 0
|
24
|
-
Kiss32Random(uint32_t seed =
|
31
|
+
Kiss32Random(uint32_t seed = default_seed) {
|
25
32
|
x = seed;
|
26
33
|
y = 362436000;
|
27
34
|
z = 521288629;
|
@@ -64,8 +71,13 @@ struct Kiss64Random {
|
|
64
71
|
uint64_t z;
|
65
72
|
uint64_t c;
|
66
73
|
|
74
|
+
static const uint64_t default_seed = 1234567890987654321ULL;
|
75
|
+
#if __cplusplus < 201103L
|
76
|
+
typedef uint64_t seed_type;
|
77
|
+
#endif
|
78
|
+
|
67
79
|
// seed must be != 0
|
68
|
-
Kiss64Random(uint64_t seed =
|
80
|
+
Kiss64Random(uint64_t seed = default_seed) {
|
69
81
|
x = seed;
|
70
82
|
y = 362436362436362436ULL;
|
71
83
|
z = 1066149217761810ULL;
|
@@ -97,10 +109,12 @@ struct Kiss64Random {
|
|
97
109
|
// Draw random integer between 0 and n-1 where n is at most the number of data points you have
|
98
110
|
return kiss() % n;
|
99
111
|
}
|
100
|
-
inline void set_seed(
|
112
|
+
inline void set_seed(uint64_t seed) {
|
101
113
|
x = seed;
|
102
114
|
}
|
103
115
|
};
|
104
116
|
|
117
|
+
}
|
118
|
+
|
105
119
|
#endif
|
106
120
|
// vim: tabstop=2 shiftwidth=2
|
data/lib/annoy/version.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
# Annoy.rb is a Ruby wrapper for Annoy (Approximate Nearest Neighbors Oh Yeah).
|
4
4
|
module Annoy
|
5
5
|
# The version of Annoy.rb you are using.
|
6
|
-
VERSION = '0.
|
6
|
+
VERSION = '0.7.1'
|
7
7
|
|
8
8
|
# The version of Annoy included with gem.
|
9
|
-
ANNOY_VERSION = '1.17.
|
9
|
+
ANNOY_VERSION = '1.17.1'
|
10
10
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: annoy-rb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-09-30 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Annoy.rb provides Ruby bindings for the Annoy (Approximate Nearest Neighbors
|
14
14
|
Oh Yeah).
|
@@ -33,14 +33,14 @@ files:
|
|
33
33
|
- lib/annoy.rb
|
34
34
|
- lib/annoy/version.rb
|
35
35
|
- sig/annoy.rbs
|
36
|
-
homepage: https://github.com/yoshoku/annoy
|
36
|
+
homepage: https://github.com/yoshoku/annoy-rb
|
37
37
|
licenses:
|
38
38
|
- Apache-2.0
|
39
39
|
metadata:
|
40
|
-
homepage_uri: https://github.com/yoshoku/annoy
|
41
|
-
source_code_uri: https://github.com/yoshoku/annoy
|
42
|
-
changelog_uri: https://github.com/yoshoku/annoy
|
43
|
-
documentation_uri: https://yoshoku.github.io/annoy
|
40
|
+
homepage_uri: https://github.com/yoshoku/annoy-rb
|
41
|
+
source_code_uri: https://github.com/yoshoku/annoy-rb
|
42
|
+
changelog_uri: https://github.com/yoshoku/annoy-rb/blob/main/CHANGELOG.md
|
43
|
+
documentation_uri: https://yoshoku.github.io/annoy-rb/doc/
|
44
44
|
rubygems_mfa_required: 'true'
|
45
45
|
post_install_message:
|
46
46
|
rdoc_options: []
|