annoy-rb 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +20 -7
- data/ext/annoy/annoyext.cpp +1 -3
- data/ext/annoy/annoyext.hpp +282 -278
- data/lib/annoy/version.rb +1 -1
- data/lib/annoy-rb.rb +3 -0
- data/lib/annoy.rb +3 -1
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 504bef50c398d29e30123e694558cbbb9edc5c3958f457ce3b5622e51467decf
|
4
|
+
data.tar.gz: 39965aeddd097635ccdd9e926d2a1b349c294c41a44c2cc5f729c5bc5431d99e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 56dfefa366db41856aebc90c23ccbd775bbc0eaf354e80f87568049ff05b039ce07e875da2be6a4e5730415f255d8e972800326a597ce073056803508de1291c
|
7
|
+
data.tar.gz: 353473a64ffcae0beb9f0d4af7eb6fac85404f3103ad325ece10d7a59813b5eb778e98195d16801036fc167a118d408e4cfd58825c954db077c0234927faf3d2
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
## 0.6.1
|
2
|
+
|
3
|
+
- Refactor codes and configs with RuboCop and clang-format.
|
4
|
+
|
1
5
|
## 0.6.0
|
2
6
|
- Add `dtype` argument to initialize method to specify the data type of vector element.
|
3
7
|
If you want to load a search index created with the Python bindings, specify 'float32' to the dtype argument.
|
data/README.md
CHANGED
@@ -1,11 +1,11 @@
|
|
1
|
-
#
|
1
|
+
# annoy-rb
|
2
2
|
|
3
3
|
[](https://github.com/yoshoku/annoy.rb/actions?query=workflow%3Abuild)
|
4
4
|
[](https://badge.fury.io/rb/annoy-rb)
|
5
5
|
[](https://github.com/yoshoku/annoy.rb/blob/main/LICENSE.txt)
|
6
|
-
[](https://yoshoku.github.io/annoy.rb/doc/)
|
7
7
|
|
8
|
-
|
8
|
+
annoy-rb provides Ruby bindings for the [Annoy (Approximate Nearest Neighbors Oh Yeah)](https://github.com/spotify/annoy).
|
9
9
|
|
10
10
|
## Installation
|
11
11
|
|
@@ -23,11 +23,24 @@ Or install it yourself as:
|
|
23
23
|
|
24
24
|
$ gem install annoy-rb
|
25
25
|
|
26
|
-
Note:
|
26
|
+
Note: annoy-rb does not require the installation of another external library.
|
27
|
+
In addition, annoy-rb does not give any optimization options when building native extensions.
|
28
|
+
If necessary, add optimization options yourself during installation, as follows;
|
29
|
+
|
30
|
+
```
|
31
|
+
$ bundle config --local build.annoy-rb "--with-cxxflags=-march=native"
|
32
|
+
$ bundle install
|
33
|
+
```
|
34
|
+
|
35
|
+
Or:
|
36
|
+
|
37
|
+
```
|
38
|
+
$ gem install annoy-rb -- --with-cxxflags=-march=native
|
39
|
+
```
|
27
40
|
|
28
41
|
## Documentation
|
29
42
|
|
30
|
-
* [
|
43
|
+
* [annoy-rb API Documentation](https://yoshoku.github.io/annoy.rb/doc/)
|
31
44
|
|
32
45
|
## Usage
|
33
46
|
|
@@ -50,7 +63,7 @@ u.load('test.ann')
|
|
50
63
|
p u.get_nns_by_item(0, 100) # will find the 100 nearest neighbors.
|
51
64
|
```
|
52
65
|
|
53
|
-
With the default argument, annoy
|
66
|
+
With the default argument, annoy-rb uses double precision floating point type for the data type of vector element.
|
54
67
|
On the other hand, the [Python bindings of Annoy](https://pypi.org/project/annoy/) use single precision floating point type.
|
55
68
|
If you want to load a search index created with the Python bindings, specify 'float32' to the dtype argument.
|
56
69
|
|
@@ -70,4 +83,4 @@ The gem is available as open source under the terms of the [Apache-2.0 License](
|
|
70
83
|
|
71
84
|
Bug reports and pull requests are welcome on GitHub at https://github.com/yoshoku/annoy.rb.
|
72
85
|
This project is intended to be a safe, welcoming space for collaboration,
|
73
|
-
and contributors are expected to adhere to the [Contributor Covenant](
|
86
|
+
and contributors are expected to adhere to the [Contributor Covenant](https://contributor-covenant.org) code of conduct.
|
data/ext/annoy/annoyext.cpp
CHANGED
@@ -18,9 +18,7 @@
|
|
18
18
|
|
19
19
|
#include "annoyext.hpp"
|
20
20
|
|
21
|
-
extern "C"
|
22
|
-
void Init_annoyext(void)
|
23
|
-
{
|
21
|
+
extern "C" void Init_annoyext(void) {
|
24
22
|
VALUE rb_mAnnoy = rb_define_module("Annoy");
|
25
23
|
RbAnnoyIndex<AnnoyIndexAngular<double>, double>::define_class(rb_mAnnoy, "AnnoyIndexAngular");
|
26
24
|
RbAnnoyIndex<AnnoyIndexDotProduct<double>, double>::define_class(rb_mAnnoy, "AnnoyIndexDotProduct");
|
data/ext/annoy/annoyext.hpp
CHANGED
@@ -22,301 +22,304 @@
|
|
22
22
|
#include <typeinfo>
|
23
23
|
|
24
24
|
#include <ruby.h>
|
25
|
+
|
25
26
|
#include <annoylib.h>
|
26
27
|
#include <kissrandom.h>
|
27
28
|
|
28
29
|
#ifdef ANNOYLIB_MULTITHREADED_BUILD
|
29
|
-
|
30
|
+
typedef AnnoyIndexMultiThreadedBuildPolicy AnnoyIndexThreadedBuildPolicy;
|
30
31
|
#else
|
31
|
-
|
32
|
+
typedef AnnoyIndexSingleThreadedBuildPolicy AnnoyIndexThreadedBuildPolicy;
|
32
33
|
#endif
|
33
34
|
|
35
|
+
// clang-format off
|
34
36
|
template<typename F> using AnnoyIndexAngular = AnnoyIndex<int32_t, F, Angular, Kiss64Random, AnnoyIndexThreadedBuildPolicy>;
|
35
37
|
template<typename F> using AnnoyIndexDotProduct = AnnoyIndex<int32_t, F, DotProduct, Kiss64Random, AnnoyIndexThreadedBuildPolicy>;
|
36
38
|
template<typename F> using AnnoyIndexHamming = AnnoyIndex<int32_t, F, Hamming, Kiss64Random, AnnoyIndexThreadedBuildPolicy>;
|
37
39
|
template<typename F> using AnnoyIndexEuclidean = AnnoyIndex<int32_t, F, Euclidean, Kiss64Random, AnnoyIndexThreadedBuildPolicy>;
|
38
40
|
template<typename F> using AnnoyIndexManhattan = AnnoyIndex<int32_t, F, Manhattan, Kiss64Random, AnnoyIndexThreadedBuildPolicy>;
|
39
|
-
|
40
|
-
|
41
|
-
{
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
VALUE error_str = rb_str_new_cstr(error);
|
118
|
-
free(error);
|
119
|
-
ruby_xfree(vec);
|
120
|
-
rb_raise(rb_eRuntimeError, "%s", StringValuePtr(error_str));
|
121
|
-
return Qfalse;
|
122
|
-
}
|
123
|
-
|
41
|
+
// clang-format on
|
42
|
+
|
43
|
+
template <class T, typename F> class RbAnnoyIndex {
|
44
|
+
public:
|
45
|
+
static VALUE annoy_index_alloc(VALUE self) {
|
46
|
+
T* ptr = (T*)ruby_xmalloc(sizeof(T));
|
47
|
+
new (ptr) T();
|
48
|
+
return TypedData_Wrap_Struct(self, &annoy_index_type, ptr);
|
49
|
+
};
|
50
|
+
|
51
|
+
static void annoy_index_free(void* ptr) {
|
52
|
+
((T*)ptr)->~AnnoyIndex();
|
53
|
+
ruby_xfree(ptr);
|
54
|
+
};
|
55
|
+
|
56
|
+
static size_t annoy_index_size(const void* ptr) { return sizeof(*((T*)ptr)); };
|
57
|
+
|
58
|
+
static T* get_annoy_index(VALUE self) {
|
59
|
+
T* ptr;
|
60
|
+
TypedData_Get_Struct(self, T, &annoy_index_type, ptr);
|
61
|
+
return ptr;
|
62
|
+
};
|
63
|
+
|
64
|
+
static VALUE define_class(VALUE rb_mAnnoy, const char* class_name) {
|
65
|
+
VALUE rb_cAnnoyIndex = rb_define_class_under(rb_mAnnoy, class_name, rb_cObject);
|
66
|
+
rb_define_alloc_func(rb_cAnnoyIndex, annoy_index_alloc);
|
67
|
+
rb_define_method(rb_cAnnoyIndex, "initialize", RUBY_METHOD_FUNC(_annoy_index_init), 1);
|
68
|
+
rb_define_method(rb_cAnnoyIndex, "add_item", RUBY_METHOD_FUNC(_annoy_index_add_item), 2);
|
69
|
+
rb_define_method(rb_cAnnoyIndex, "build", RUBY_METHOD_FUNC(_annoy_index_build), 2);
|
70
|
+
rb_define_method(rb_cAnnoyIndex, "save", RUBY_METHOD_FUNC(_annoy_index_save), 2);
|
71
|
+
rb_define_method(rb_cAnnoyIndex, "load", RUBY_METHOD_FUNC(_annoy_index_load), 2);
|
72
|
+
rb_define_method(rb_cAnnoyIndex, "unload", RUBY_METHOD_FUNC(_annoy_index_unload), 0);
|
73
|
+
rb_define_method(rb_cAnnoyIndex, "get_nns_by_item", RUBY_METHOD_FUNC(_annoy_index_get_nns_by_item), 4);
|
74
|
+
rb_define_method(rb_cAnnoyIndex, "get_nns_by_vector", RUBY_METHOD_FUNC(_annoy_index_get_nns_by_vector), 4);
|
75
|
+
rb_define_method(rb_cAnnoyIndex, "get_item", RUBY_METHOD_FUNC(_annoy_index_get_item), 1);
|
76
|
+
rb_define_method(rb_cAnnoyIndex, "get_distance", RUBY_METHOD_FUNC(_annoy_index_get_distance), 2);
|
77
|
+
rb_define_method(rb_cAnnoyIndex, "get_n_items", RUBY_METHOD_FUNC(_annoy_index_get_n_items), 0);
|
78
|
+
rb_define_method(rb_cAnnoyIndex, "get_n_trees", RUBY_METHOD_FUNC(_annoy_index_get_n_trees), 0);
|
79
|
+
rb_define_method(rb_cAnnoyIndex, "on_disk_build", RUBY_METHOD_FUNC(_annoy_index_on_disk_build), 1);
|
80
|
+
rb_define_method(rb_cAnnoyIndex, "set_seed", RUBY_METHOD_FUNC(_annoy_index_set_seed), 1);
|
81
|
+
rb_define_method(rb_cAnnoyIndex, "verbose", RUBY_METHOD_FUNC(_annoy_index_verbose), 1);
|
82
|
+
rb_define_method(rb_cAnnoyIndex, "get_f", RUBY_METHOD_FUNC(_annoy_index_get_f), 0);
|
83
|
+
return rb_cAnnoyIndex;
|
84
|
+
};
|
85
|
+
|
86
|
+
private:
|
87
|
+
static const rb_data_type_t annoy_index_type;
|
88
|
+
|
89
|
+
static VALUE _annoy_index_init(VALUE self, VALUE _n_dims) {
|
90
|
+
const int n_dims = NUM2INT(_n_dims);
|
91
|
+
T* ptr = get_annoy_index(self);
|
92
|
+
new (ptr) T(n_dims);
|
93
|
+
return Qnil;
|
94
|
+
};
|
95
|
+
|
96
|
+
static VALUE _annoy_index_add_item(VALUE self, VALUE _idx, VALUE arr) {
|
97
|
+
const int32_t idx = (int32_t)NUM2INT(_idx);
|
98
|
+
const int n_dims = get_annoy_index(self)->get_f();
|
99
|
+
|
100
|
+
if (!RB_TYPE_P(arr, T_ARRAY)) {
|
101
|
+
rb_raise(rb_eArgError, "Expect item vector to be Array.");
|
102
|
+
return Qfalse;
|
103
|
+
}
|
104
|
+
|
105
|
+
if (n_dims != RARRAY_LEN(arr)) {
|
106
|
+
rb_raise(rb_eArgError, "Array size does not match to index dimensionality.");
|
107
|
+
return Qfalse;
|
108
|
+
}
|
109
|
+
|
110
|
+
F* vec = (F*)ruby_xmalloc(n_dims * sizeof(F));
|
111
|
+
for (int i = 0; i < n_dims; i++) {
|
112
|
+
vec[i] = typeid(F) == typeid(double) ? NUM2DBL(rb_ary_entry(arr, i)) : NUM2UINT(rb_ary_entry(arr, i));
|
113
|
+
}
|
114
|
+
|
115
|
+
char* error;
|
116
|
+
if (!get_annoy_index(self)->add_item(idx, vec, &error)) {
|
117
|
+
VALUE error_str = rb_str_new_cstr(error);
|
118
|
+
free(error);
|
124
119
|
ruby_xfree(vec);
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
120
|
+
rb_raise(rb_eRuntimeError, "%s", StringValuePtr(error_str));
|
121
|
+
return Qfalse;
|
122
|
+
}
|
123
|
+
|
124
|
+
ruby_xfree(vec);
|
125
|
+
return Qtrue;
|
126
|
+
};
|
127
|
+
|
128
|
+
static VALUE _annoy_index_build(VALUE self, VALUE _n_trees, VALUE _n_jobs) {
|
129
|
+
const int n_trees = NUM2INT(_n_trees);
|
130
|
+
const int n_jobs = NUM2INT(_n_jobs);
|
131
|
+
char* error;
|
132
|
+
if (!get_annoy_index(self)->build(n_trees, n_jobs, &error)) {
|
133
|
+
VALUE error_str = rb_str_new_cstr(error);
|
134
|
+
free(error);
|
135
|
+
rb_raise(rb_eRuntimeError, "%s", StringValuePtr(error_str));
|
136
|
+
return Qfalse;
|
137
|
+
}
|
138
|
+
return Qtrue;
|
139
|
+
};
|
140
|
+
|
141
|
+
static VALUE _annoy_index_save(VALUE self, VALUE _filename, VALUE _prefault) {
|
142
|
+
const char* filename = StringValuePtr(_filename);
|
143
|
+
const bool prefault = _prefault == Qtrue ? true : false;
|
144
|
+
char* error;
|
145
|
+
if (!get_annoy_index(self)->save(filename, prefault, &error)) {
|
146
|
+
VALUE error_str = rb_str_new_cstr(error);
|
147
|
+
free(error);
|
148
|
+
rb_raise(rb_eRuntimeError, "%s", StringValuePtr(error_str));
|
149
|
+
return Qfalse;
|
150
|
+
}
|
151
|
+
RB_GC_GUARD(_filename);
|
152
|
+
return Qtrue;
|
153
|
+
};
|
154
|
+
|
155
|
+
static VALUE _annoy_index_load(VALUE self, VALUE _filename, VALUE _prefault) {
|
156
|
+
const char* filename = StringValuePtr(_filename);
|
157
|
+
const bool prefault = _prefault == Qtrue ? true : false;
|
158
|
+
char* error;
|
159
|
+
if (!get_annoy_index(self)->load(filename, prefault, &error)) {
|
160
|
+
VALUE error_str = rb_str_new_cstr(error);
|
161
|
+
free(error);
|
162
|
+
rb_raise(rb_eRuntimeError, "%s", StringValuePtr(error_str));
|
163
|
+
return Qfalse;
|
164
|
+
}
|
165
|
+
RB_GC_GUARD(_filename);
|
166
|
+
return Qtrue;
|
167
|
+
};
|
168
|
+
|
169
|
+
static VALUE _annoy_index_unload(VALUE self) {
|
170
|
+
get_annoy_index(self)->unload();
|
171
|
+
return Qnil;
|
172
|
+
};
|
173
|
+
|
174
|
+
static VALUE _annoy_index_get_nns_by_item(VALUE self, VALUE _idx, VALUE _n_neighbors, VALUE _search_k,
|
175
|
+
VALUE _include_distances) {
|
176
|
+
const int32_t idx = (int32_t)NUM2INT(_idx);
|
177
|
+
const int n_neighbors = NUM2INT(_n_neighbors);
|
178
|
+
const int search_k = NUM2INT(_search_k);
|
179
|
+
const bool include_distances = _include_distances == Qtrue ? true : false;
|
180
|
+
std::vector<int32_t> neighbors;
|
181
|
+
std::vector<F> distances;
|
182
|
+
|
183
|
+
get_annoy_index(self)->get_nns_by_item(idx, n_neighbors, search_k, &neighbors, include_distances ? &distances : NULL);
|
184
|
+
|
185
|
+
const int sz_neighbors = neighbors.size();
|
186
|
+
VALUE neighbors_arr = rb_ary_new2(sz_neighbors);
|
187
|
+
|
188
|
+
for (int i = 0; i < sz_neighbors; i++) {
|
189
|
+
rb_ary_store(neighbors_arr, i, INT2NUM((int)(neighbors[i])));
|
190
|
+
}
|
191
|
+
|
192
|
+
if (include_distances) {
|
193
|
+
const int sz_distances = distances.size();
|
194
|
+
VALUE distances_arr = rb_ary_new2(sz_distances);
|
195
|
+
for (int i = 0; i < sz_distances; i++) {
|
196
|
+
rb_ary_store(distances_arr, i, typeid(F) == typeid(double) ? DBL2NUM(distances[i]) : UINT2NUM(distances[i]));
|
137
197
|
}
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
const int
|
185
|
-
VALUE
|
186
|
-
|
187
|
-
|
188
|
-
rb_ary_store(neighbors_arr, i, INT2NUM((int)(neighbors[i])));
|
189
|
-
}
|
190
|
-
|
191
|
-
if (include_distances) {
|
192
|
-
const int sz_distances = distances.size();
|
193
|
-
VALUE distances_arr = rb_ary_new2(sz_distances);
|
194
|
-
for (int i = 0; i < sz_distances; i++) {
|
195
|
-
rb_ary_store(distances_arr, i, typeid(F) == typeid(double) ? DBL2NUM(distances[i]) : UINT2NUM(distances[i]));
|
196
|
-
}
|
197
|
-
VALUE res = rb_ary_new2(2);
|
198
|
-
rb_ary_store(res, 0, neighbors_arr);
|
199
|
-
rb_ary_store(res, 1, distances_arr);
|
200
|
-
return res;
|
201
|
-
}
|
202
|
-
|
203
|
-
return neighbors_arr;
|
204
|
-
};
|
205
|
-
|
206
|
-
static VALUE _annoy_index_get_nns_by_vector(VALUE self, VALUE _vec, VALUE _n_neighbors, VALUE _search_k, VALUE _include_distances) {
|
207
|
-
const int n_dims = get_annoy_index(self)->get_f();
|
208
|
-
|
209
|
-
if (!RB_TYPE_P(_vec, T_ARRAY)) {
|
210
|
-
rb_raise(rb_eArgError, "Expect item vector to be Array.");
|
211
|
-
return Qfalse;
|
212
|
-
}
|
213
|
-
|
214
|
-
if (n_dims != RARRAY_LEN(_vec)) {
|
215
|
-
rb_raise(rb_eArgError, "Array size does not match to index dimensionality.");
|
216
|
-
return Qfalse;
|
217
|
-
}
|
218
|
-
|
219
|
-
F* vec = (F*)ruby_xmalloc(n_dims * sizeof(F));
|
220
|
-
for (int i = 0; i < n_dims; i++) {
|
221
|
-
vec[i] = typeid(F) == typeid(double) ? NUM2DBL(rb_ary_entry(_vec, i)) : NUM2UINT(rb_ary_entry(_vec, i));
|
222
|
-
}
|
223
|
-
|
224
|
-
const int n_neighbors = NUM2INT(_n_neighbors);
|
225
|
-
const int search_k = NUM2INT(_search_k);
|
226
|
-
const bool include_distances = _include_distances == Qtrue ? true : false;
|
227
|
-
std::vector<int32_t> neighbors;
|
228
|
-
std::vector<F> distances;
|
229
|
-
|
230
|
-
get_annoy_index(self)->get_nns_by_vector(vec, n_neighbors, search_k, &neighbors, include_distances ? &distances : NULL);
|
231
|
-
|
232
|
-
ruby_xfree(vec);
|
233
|
-
|
234
|
-
const int sz_neighbors = neighbors.size();
|
235
|
-
VALUE neighbors_arr = rb_ary_new2(sz_neighbors);
|
236
|
-
|
237
|
-
for (int i = 0; i < sz_neighbors; i++) {
|
238
|
-
rb_ary_store(neighbors_arr, i, INT2NUM((int)(neighbors[i])));
|
239
|
-
}
|
240
|
-
|
241
|
-
if (include_distances) {
|
242
|
-
const int sz_distances = distances.size();
|
243
|
-
VALUE distances_arr = rb_ary_new2(sz_distances);
|
244
|
-
for (int i = 0; i < sz_distances; i++) {
|
245
|
-
rb_ary_store(distances_arr, i, typeid(F) == typeid(double) ? DBL2NUM(distances[i]) : UINT2NUM(distances[i]));
|
246
|
-
}
|
247
|
-
VALUE res = rb_ary_new2(2);
|
248
|
-
rb_ary_store(res, 0, neighbors_arr);
|
249
|
-
rb_ary_store(res, 1, distances_arr);
|
250
|
-
return res;
|
251
|
-
}
|
252
|
-
|
253
|
-
return neighbors_arr;
|
254
|
-
};
|
255
|
-
|
256
|
-
static VALUE _annoy_index_get_item(VALUE self, VALUE _idx) {
|
257
|
-
const int32_t idx = (int32_t)NUM2INT(_idx);
|
258
|
-
const int n_dims = get_annoy_index(self)->get_f();
|
259
|
-
F* vec = (F*)ruby_xmalloc(n_dims * sizeof(F));
|
260
|
-
VALUE arr = rb_ary_new2(n_dims);
|
261
|
-
|
262
|
-
get_annoy_index(self)->get_item(idx, vec);
|
263
|
-
|
264
|
-
for (int i = 0; i < n_dims; i++) {
|
265
|
-
rb_ary_store(arr, i, typeid(F) == typeid(double) ? DBL2NUM(vec[i]) : UINT2NUM(vec[i]));
|
266
|
-
}
|
267
|
-
|
268
|
-
ruby_xfree(vec);
|
269
|
-
return arr;
|
270
|
-
};
|
271
|
-
|
272
|
-
static VALUE _annoy_index_get_distance(VALUE self, VALUE _i, VALUE _j) {
|
273
|
-
const int32_t i = (int32_t)NUM2INT(_i);
|
274
|
-
const int32_t j = (int32_t)NUM2INT(_j);
|
275
|
-
const F dist = get_annoy_index(self)->get_distance(i, j);
|
276
|
-
return typeid(F) == typeid(double) ? DBL2NUM(dist) : UINT2NUM(dist);
|
277
|
-
};
|
278
|
-
|
279
|
-
static VALUE _annoy_index_get_n_items(VALUE self) {
|
280
|
-
const int32_t n_items = get_annoy_index(self)->get_n_items();
|
281
|
-
return INT2NUM(n_items);
|
282
|
-
};
|
283
|
-
|
284
|
-
static VALUE _annoy_index_get_n_trees(VALUE self) {
|
285
|
-
const int32_t n_trees = get_annoy_index(self)->get_n_trees();
|
286
|
-
return INT2NUM(n_trees);
|
287
|
-
};
|
288
|
-
|
289
|
-
static VALUE _annoy_index_on_disk_build(VALUE self, VALUE _filename) {
|
290
|
-
const char* filename = StringValuePtr(_filename);
|
291
|
-
char* error;
|
292
|
-
if (!get_annoy_index(self)->on_disk_build(filename, &error)) {
|
293
|
-
VALUE error_str = rb_str_new_cstr(error);
|
294
|
-
free(error);
|
295
|
-
rb_raise(rb_eRuntimeError, "%s", StringValuePtr(error_str));
|
296
|
-
return Qfalse;
|
198
|
+
VALUE res = rb_ary_new2(2);
|
199
|
+
rb_ary_store(res, 0, neighbors_arr);
|
200
|
+
rb_ary_store(res, 1, distances_arr);
|
201
|
+
return res;
|
202
|
+
}
|
203
|
+
|
204
|
+
return neighbors_arr;
|
205
|
+
};
|
206
|
+
|
207
|
+
static VALUE _annoy_index_get_nns_by_vector(VALUE self, VALUE _vec, VALUE _n_neighbors, VALUE _search_k,
|
208
|
+
VALUE _include_distances) {
|
209
|
+
const int n_dims = get_annoy_index(self)->get_f();
|
210
|
+
|
211
|
+
if (!RB_TYPE_P(_vec, T_ARRAY)) {
|
212
|
+
rb_raise(rb_eArgError, "Expect item vector to be Array.");
|
213
|
+
return Qfalse;
|
214
|
+
}
|
215
|
+
|
216
|
+
if (n_dims != RARRAY_LEN(_vec)) {
|
217
|
+
rb_raise(rb_eArgError, "Array size does not match to index dimensionality.");
|
218
|
+
return Qfalse;
|
219
|
+
}
|
220
|
+
|
221
|
+
F* vec = (F*)ruby_xmalloc(n_dims * sizeof(F));
|
222
|
+
for (int i = 0; i < n_dims; i++) {
|
223
|
+
vec[i] = typeid(F) == typeid(double) ? NUM2DBL(rb_ary_entry(_vec, i)) : NUM2UINT(rb_ary_entry(_vec, i));
|
224
|
+
}
|
225
|
+
|
226
|
+
const int n_neighbors = NUM2INT(_n_neighbors);
|
227
|
+
const int search_k = NUM2INT(_search_k);
|
228
|
+
const bool include_distances = _include_distances == Qtrue ? true : false;
|
229
|
+
std::vector<int32_t> neighbors;
|
230
|
+
std::vector<F> distances;
|
231
|
+
|
232
|
+
get_annoy_index(self)->get_nns_by_vector(vec, n_neighbors, search_k, &neighbors, include_distances ? &distances : NULL);
|
233
|
+
|
234
|
+
ruby_xfree(vec);
|
235
|
+
|
236
|
+
const int sz_neighbors = neighbors.size();
|
237
|
+
VALUE neighbors_arr = rb_ary_new2(sz_neighbors);
|
238
|
+
|
239
|
+
for (int i = 0; i < sz_neighbors; i++) {
|
240
|
+
rb_ary_store(neighbors_arr, i, INT2NUM((int)(neighbors[i])));
|
241
|
+
}
|
242
|
+
|
243
|
+
if (include_distances) {
|
244
|
+
const int sz_distances = distances.size();
|
245
|
+
VALUE distances_arr = rb_ary_new2(sz_distances);
|
246
|
+
for (int i = 0; i < sz_distances; i++) {
|
247
|
+
rb_ary_store(distances_arr, i, typeid(F) == typeid(double) ? DBL2NUM(distances[i]) : UINT2NUM(distances[i]));
|
297
248
|
}
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
}
|
249
|
+
VALUE res = rb_ary_new2(2);
|
250
|
+
rb_ary_store(res, 0, neighbors_arr);
|
251
|
+
rb_ary_store(res, 1, distances_arr);
|
252
|
+
return res;
|
253
|
+
}
|
254
|
+
|
255
|
+
return neighbors_arr;
|
256
|
+
};
|
257
|
+
|
258
|
+
static VALUE _annoy_index_get_item(VALUE self, VALUE _idx) {
|
259
|
+
const int32_t idx = (int32_t)NUM2INT(_idx);
|
260
|
+
const int n_dims = get_annoy_index(self)->get_f();
|
261
|
+
F* vec = (F*)ruby_xmalloc(n_dims * sizeof(F));
|
262
|
+
VALUE arr = rb_ary_new2(n_dims);
|
263
|
+
|
264
|
+
get_annoy_index(self)->get_item(idx, vec);
|
265
|
+
|
266
|
+
for (int i = 0; i < n_dims; i++) {
|
267
|
+
rb_ary_store(arr, i, typeid(F) == typeid(double) ? DBL2NUM(vec[i]) : UINT2NUM(vec[i]));
|
268
|
+
}
|
269
|
+
|
270
|
+
ruby_xfree(vec);
|
271
|
+
return arr;
|
272
|
+
};
|
273
|
+
|
274
|
+
static VALUE _annoy_index_get_distance(VALUE self, VALUE _i, VALUE _j) {
|
275
|
+
const int32_t i = (int32_t)NUM2INT(_i);
|
276
|
+
const int32_t j = (int32_t)NUM2INT(_j);
|
277
|
+
const F dist = get_annoy_index(self)->get_distance(i, j);
|
278
|
+
return typeid(F) == typeid(double) ? DBL2NUM(dist) : UINT2NUM(dist);
|
279
|
+
};
|
280
|
+
|
281
|
+
static VALUE _annoy_index_get_n_items(VALUE self) {
|
282
|
+
const int32_t n_items = get_annoy_index(self)->get_n_items();
|
283
|
+
return INT2NUM(n_items);
|
284
|
+
};
|
285
|
+
|
286
|
+
static VALUE _annoy_index_get_n_trees(VALUE self) {
|
287
|
+
const int32_t n_trees = get_annoy_index(self)->get_n_trees();
|
288
|
+
return INT2NUM(n_trees);
|
289
|
+
};
|
290
|
+
|
291
|
+
static VALUE _annoy_index_on_disk_build(VALUE self, VALUE _filename) {
|
292
|
+
const char* filename = StringValuePtr(_filename);
|
293
|
+
char* error;
|
294
|
+
if (!get_annoy_index(self)->on_disk_build(filename, &error)) {
|
295
|
+
VALUE error_str = rb_str_new_cstr(error);
|
296
|
+
free(error);
|
297
|
+
rb_raise(rb_eRuntimeError, "%s", StringValuePtr(error_str));
|
298
|
+
return Qfalse;
|
299
|
+
}
|
300
|
+
RB_GC_GUARD(_filename);
|
301
|
+
return Qtrue;
|
302
|
+
};
|
303
|
+
|
304
|
+
static VALUE _annoy_index_set_seed(VALUE self, VALUE _seed) {
|
305
|
+
const int seed = NUM2INT(_seed);
|
306
|
+
get_annoy_index(self)->set_seed(seed);
|
307
|
+
return Qnil;
|
308
|
+
};
|
309
|
+
|
310
|
+
static VALUE _annoy_index_verbose(VALUE self, VALUE _flag) {
|
311
|
+
const bool flag = _flag == Qtrue ? true : false;
|
312
|
+
get_annoy_index(self)->verbose(flag);
|
313
|
+
return Qnil;
|
314
|
+
};
|
315
|
+
|
316
|
+
static VALUE _annoy_index_get_f(VALUE self) {
|
317
|
+
const int32_t f = get_annoy_index(self)->get_f();
|
318
|
+
return INT2NUM(f);
|
319
|
+
};
|
318
320
|
};
|
319
321
|
|
322
|
+
// clang-format off
|
320
323
|
template<class T, typename F>
|
321
324
|
const rb_data_type_t RbAnnoyIndex<T, F>::annoy_index_type = {
|
322
325
|
"RbAnnoyIndex",
|
@@ -329,5 +332,6 @@ const rb_data_type_t RbAnnoyIndex<T, F>::annoy_index_type = {
|
|
329
332
|
NULL,
|
330
333
|
RUBY_TYPED_FREE_IMMEDIATELY
|
331
334
|
};
|
335
|
+
// clang-format on
|
332
336
|
|
333
337
|
#endif /* ANNOYEXT_HPP */
|
data/lib/annoy/version.rb
CHANGED
data/lib/annoy-rb.rb
ADDED
data/lib/annoy.rb
CHANGED
@@ -40,13 +40,14 @@ module Annoy
|
|
40
40
|
# @param metric [String] The distance metric between vectors ('angular', 'dot', 'hamming', 'euclidean', or 'manhattan').
|
41
41
|
# @param dtype [String] The data type of features ('float64' and 'float32').
|
42
42
|
# If metric is given 'hamming', 'uint64' is automatically assigned to this argument.
|
43
|
-
def initialize(n_features:, metric: 'angular', dtype: 'float64')
|
43
|
+
def initialize(n_features:, metric: 'angular', dtype: 'float64') # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength
|
44
44
|
raise ArgumentError, 'Expect n_features to be Integer.' unless n_features.is_a?(Numeric)
|
45
45
|
|
46
46
|
@n_features = n_features.to_i
|
47
47
|
@metric = metric
|
48
48
|
@dtype = dtype
|
49
49
|
|
50
|
+
# rubocop:disable Layout/LineLength
|
50
51
|
@index = case @metric
|
51
52
|
when 'angular'
|
52
53
|
@dtype == 'float64' ? AnnoyIndexAngular.new(@n_features) : AnnoyIndexAngularFloat32.new(@n_features)
|
@@ -62,6 +63,7 @@ module Annoy
|
|
62
63
|
else
|
63
64
|
raise ArgumentError, "No such metric: #{@metric}."
|
64
65
|
end
|
66
|
+
# rubocop:enable Layout/LineLength
|
65
67
|
end
|
66
68
|
|
67
69
|
# Add item to be indexed.
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: annoy-rb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-05-22 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Annoy.rb provides Ruby bindings for the Annoy (Approximate Nearest Neighbors
|
14
14
|
Oh Yeah).
|
@@ -29,6 +29,7 @@ files:
|
|
29
29
|
- ext/annoy/src/annoylib.h
|
30
30
|
- ext/annoy/src/kissrandom.h
|
31
31
|
- ext/annoy/src/mman.h
|
32
|
+
- lib/annoy-rb.rb
|
32
33
|
- lib/annoy.rb
|
33
34
|
- lib/annoy/version.rb
|
34
35
|
- sig/annoy.rbs
|
@@ -40,6 +41,7 @@ metadata:
|
|
40
41
|
source_code_uri: https://github.com/yoshoku/annoy.rb
|
41
42
|
changelog_uri: https://github.com/yoshoku/annoy.rb/blob/main/CHANGELOG.md
|
42
43
|
documentation_uri: https://yoshoku.github.io/annoy.rb/doc/
|
44
|
+
rubygems_mfa_required: 'true'
|
43
45
|
post_install_message:
|
44
46
|
rdoc_options: []
|
45
47
|
require_paths:
|
@@ -55,7 +57,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
55
57
|
- !ruby/object:Gem::Version
|
56
58
|
version: '0'
|
57
59
|
requirements: []
|
58
|
-
rubygems_version: 3.
|
60
|
+
rubygems_version: 3.2.33
|
59
61
|
signing_key:
|
60
62
|
specification_version: 4
|
61
63
|
summary: Ruby bindings for the Annoy (Approximate Nearest Neighbors Oh Yeah).
|