hnswlib 0.1.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -1
- data/CHANGELOG.md +19 -1
- data/Gemfile +2 -0
- data/README.md +6 -2
- data/Steepfile +27 -0
- data/ext/hnswlib/hnswlibext.cpp +1 -0
- data/ext/hnswlib/hnswlibext.hpp +227 -4
- data/ext/hnswlib/src/bruteforce.h +3 -2
- data/ext/hnswlib/src/hnswalg.h +2 -1
- data/ext/hnswlib/src/space_ip.h +1 -0
- data/ext/hnswlib/src/space_l2.h +1 -0
- data/hnswlib.gemspec +1 -1
- data/lib/hnswlib/version.rb +2 -1
- data/lib/hnswlib.rb +6 -4
- data/sig/hnswlib.rbs +69 -0
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 99f5f1403a51083df75ef842a11b996fe6c159f95c3798f217a076cb5f535254
|
4
|
+
data.tar.gz: f9972ac4e644727a126e937e81494cc11bc051bc7ff0daf57139845aed1e7719
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c4f96fceab228e3cffd9f2b77e513b86fe71ec3156cdff10c0bc9206ad7951f87c507531088f91f45a940c399102bff6a2dedc5fee8b09b37a5f5739e9a02b6c
|
7
|
+
data.tar.gz: 8297e1e0d1f6753b3e8900b07b80dba271276a14303b102cd8084e480ec491da269baac1fc8be08ab350976fc22cb859583745dc219d73d6f56b33fe7502ac2b
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,4 +1,22 @@
|
|
1
|
-
## [
|
1
|
+
## [0.4.0] - 2021-09-12
|
2
|
+
|
3
|
+
- Add type declaration file.
|
4
|
+
|
5
|
+
## [0.3.0] - 2021-08-08
|
6
|
+
|
7
|
+
- Rename `Hnswlib::Index` to `Hnswlib::HnswIndex` (for compatibility, `Hnswlib::Index` has been an alis for `Hnswlib::HnswIndex`).
|
8
|
+
- Update API documentation.
|
9
|
+
|
10
|
+
## [0.2.0] - 2021-08-02
|
11
|
+
|
12
|
+
- Add binding class for the BruteforceSearch.
|
13
|
+
- Add type check for arguments of initialize methods of BruteforceSearch and HierarchicalNSW.
|
14
|
+
- Add dummy constructor call at memory allocation for class to prevent occuring segment fault on GC when initialize method is failed.
|
15
|
+
|
16
|
+
## [0.1.1] - 2021-07-25
|
17
|
+
|
18
|
+
- Fix to use `rb_obj_is_isntance_of` for klass comparison due to type error when loading search index on irb 1.3.x: [issue #1](https://github.com/yoshoku/hnswlib.rb/issues/1)
|
19
|
+
- Update API documentation.
|
2
20
|
|
3
21
|
## [0.1.0] - 2021-07-24
|
4
22
|
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -35,7 +35,7 @@ Or install it yourself as:
|
|
35
35
|
require 'hnswlib'
|
36
36
|
|
37
37
|
f = 40 # length of item vector that will be indexed.
|
38
|
-
t = Hnswlib::
|
38
|
+
t = Hnswlib::HnswIndex.new(n_features: f, max_item: 1000)
|
39
39
|
|
40
40
|
1000.times do |i|
|
41
41
|
v = Array.new(f) { rand }
|
@@ -44,11 +44,15 @@ end
|
|
44
44
|
|
45
45
|
t.save('test.ann')
|
46
46
|
|
47
|
-
u = Hnswlib::
|
47
|
+
u = Hnswlib::HnswIndex.new(n_features: f, max_item: 1000)
|
48
48
|
u.load('test.ann')
|
49
49
|
p u.get_nns_by_item(0, 100) # will find the 100 nearest neighbors.
|
50
50
|
```
|
51
51
|
|
52
|
+
## License
|
53
|
+
|
54
|
+
The gem is available as open source under the terms of the [Apache-2.0 License](https://www.apache.org/licenses/LICENSE-2.0).
|
55
|
+
|
52
56
|
## Contributing
|
53
57
|
|
54
58
|
Bug reports and pull requests are welcome on GitHub at https://github.com/yoshoku/hnswlib.rb.
|
data/Steepfile
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# D = Steep::Diagnostic
|
2
|
+
#
|
3
|
+
target :lib do
|
4
|
+
signature "sig"
|
5
|
+
#
|
6
|
+
check "lib" # Directory name
|
7
|
+
# check "Gemfile" # File name
|
8
|
+
# check "app/models/**/*.rb" # Glob
|
9
|
+
# # ignore "lib/templates/*.rb"
|
10
|
+
#
|
11
|
+
# # library "pathname", "set" # Standard libraries
|
12
|
+
# # library "strong_json" # Gems
|
13
|
+
#
|
14
|
+
# # configure_code_diagnostics(D::Ruby.strict) # `strict` diagnostics setting
|
15
|
+
# # configure_code_diagnostics(D::Ruby.lenient) # `lenient` diagnostics setting
|
16
|
+
# # configure_code_diagnostics do |hash| # You can setup everything yourself
|
17
|
+
# # hash[D::Ruby::NoMethod] = :information
|
18
|
+
# # end
|
19
|
+
end
|
20
|
+
|
21
|
+
# target :test do
|
22
|
+
# signature "sig", "sig-private"
|
23
|
+
#
|
24
|
+
# check "test"
|
25
|
+
#
|
26
|
+
# # library "pathname", "set" # Standard libraries
|
27
|
+
# end
|
data/ext/hnswlib/hnswlibext.cpp
CHANGED
data/ext/hnswlib/hnswlibext.hpp
CHANGED
@@ -26,11 +26,13 @@
|
|
26
26
|
VALUE rb_cHnswlibL2Space;
|
27
27
|
VALUE rb_cHnswlibInnerProductSpace;
|
28
28
|
VALUE rb_cHnswlibHierarchicalNSW;
|
29
|
+
VALUE rb_cHnswlibBruteforceSearch;
|
29
30
|
|
30
31
|
class RbHnswlibL2Space {
|
31
32
|
public:
|
32
33
|
static VALUE hnsw_l2space_alloc(VALUE self) {
|
33
34
|
hnswlib::L2Space* ptr = (hnswlib::L2Space*)ruby_xmalloc(sizeof(hnswlib::L2Space));
|
35
|
+
new (ptr) hnswlib::L2Space(); // dummy call to constructor for GC.
|
34
36
|
return TypedData_Wrap_Struct(self, &hnsw_l2space_type, ptr);
|
35
37
|
};
|
36
38
|
|
@@ -106,6 +108,7 @@ class RbHnswlibInnerProductSpace {
|
|
106
108
|
public:
|
107
109
|
static VALUE hnsw_ipspace_alloc(VALUE self) {
|
108
110
|
hnswlib::InnerProductSpace* ptr = (hnswlib::InnerProductSpace*)ruby_xmalloc(sizeof(hnswlib::InnerProductSpace));
|
111
|
+
new (ptr) hnswlib::InnerProductSpace(); // dummy call to constructor for GC.
|
109
112
|
return TypedData_Wrap_Struct(self, &hnsw_ipspace_type, ptr);
|
110
113
|
};
|
111
114
|
|
@@ -181,6 +184,7 @@ class RbHnswlibHierarchicalNSW {
|
|
181
184
|
public:
|
182
185
|
static VALUE hnsw_hierarchicalnsw_alloc(VALUE self) {
|
183
186
|
hnswlib::HierarchicalNSW<float>* ptr = (hnswlib::HierarchicalNSW<float>*)ruby_xmalloc(sizeof(hnswlib::HierarchicalNSW<float>));
|
187
|
+
new (ptr) hnswlib::HierarchicalNSW<float>(); // dummy call to constructor for GC.
|
184
188
|
return TypedData_Wrap_Struct(self, &hnsw_hierarchicalnsw_type, ptr);
|
185
189
|
};
|
186
190
|
|
@@ -239,9 +243,30 @@ class RbHnswlibHierarchicalNSW {
|
|
239
243
|
if (kw_values[3] == Qundef) kw_values[3] = INT2NUM(200);
|
240
244
|
if (kw_values[4] == Qundef) kw_values[4] = INT2NUM(100);
|
241
245
|
|
246
|
+
if (!(rb_obj_is_instance_of(kw_values[0], rb_cHnswlibL2Space) || rb_obj_is_instance_of(kw_values[0], rb_cHnswlibInnerProductSpace))) {
|
247
|
+
rb_raise(rb_eTypeError, "expected space, Hnswlib::L2Space or Hnswlib::InnerProductSpace");
|
248
|
+
return Qnil;
|
249
|
+
}
|
250
|
+
if (!RB_INTEGER_TYPE_P(kw_values[1])) {
|
251
|
+
rb_raise(rb_eTypeError, "expected max_elements, Integer");
|
252
|
+
return Qnil;
|
253
|
+
}
|
254
|
+
if (!RB_INTEGER_TYPE_P(kw_values[2])) {
|
255
|
+
rb_raise(rb_eTypeError, "expected m, Integer");
|
256
|
+
return Qnil;
|
257
|
+
}
|
258
|
+
if (!RB_INTEGER_TYPE_P(kw_values[3])) {
|
259
|
+
rb_raise(rb_eTypeError, "expected ef_construction, Integer");
|
260
|
+
return Qnil;
|
261
|
+
}
|
262
|
+
if (!RB_INTEGER_TYPE_P(kw_values[4])) {
|
263
|
+
rb_raise(rb_eTypeError, "expected random_seed, Integer");
|
264
|
+
return Qnil;
|
265
|
+
}
|
266
|
+
|
242
267
|
rb_iv_set(self, "@space", kw_values[0]);
|
243
268
|
hnswlib::SpaceInterface<float>* space;
|
244
|
-
if (
|
269
|
+
if (rb_obj_is_instance_of(kw_values[0], rb_cHnswlibL2Space)) {
|
245
270
|
space = RbHnswlibL2Space::get_hnsw_l2space(kw_values[0]);
|
246
271
|
} else {
|
247
272
|
space = RbHnswlibInnerProductSpace::get_hnsw_ipspace(kw_values[0]);
|
@@ -344,11 +369,12 @@ class RbHnswlibHierarchicalNSW {
|
|
344
369
|
|
345
370
|
static VALUE _hnsw_hierarchicalnsw_load_index(VALUE self, VALUE _filename) {
|
346
371
|
std::string filename(StringValuePtr(_filename));
|
372
|
+
VALUE ivspace = rb_iv_get(self, "@space");
|
347
373
|
hnswlib::SpaceInterface<float>* space;
|
348
|
-
if (
|
349
|
-
space = RbHnswlibL2Space::get_hnsw_l2space(
|
374
|
+
if (rb_obj_is_instance_of(ivspace, rb_cHnswlibL2Space)) {
|
375
|
+
space = RbHnswlibL2Space::get_hnsw_l2space(ivspace);
|
350
376
|
} else {
|
351
|
-
space = RbHnswlibInnerProductSpace::get_hnsw_ipspace(
|
377
|
+
space = RbHnswlibInnerProductSpace::get_hnsw_ipspace(ivspace);
|
352
378
|
}
|
353
379
|
get_hnsw_hierarchicalnsw(self)->loadIndex(filename, space);
|
354
380
|
RB_GC_GUARD(_filename);
|
@@ -417,4 +443,201 @@ const rb_data_type_t RbHnswlibHierarchicalNSW::hnsw_hierarchicalnsw_type = {
|
|
417
443
|
RUBY_TYPED_FREE_IMMEDIATELY
|
418
444
|
};
|
419
445
|
|
446
|
+
class RbHnswlibBruteforceSearch {
|
447
|
+
public:
|
448
|
+
static VALUE hnsw_bruteforcesearch_alloc(VALUE self) {
|
449
|
+
hnswlib::BruteforceSearch<float>* ptr = (hnswlib::BruteforceSearch<float>*)ruby_xmalloc(sizeof(hnswlib::BruteforceSearch<float>));
|
450
|
+
new (ptr) hnswlib::BruteforceSearch<float>(); // dummy call to constructor for GC.
|
451
|
+
return TypedData_Wrap_Struct(self, &hnsw_bruteforcesearch_type, ptr);
|
452
|
+
};
|
453
|
+
|
454
|
+
static void hnsw_bruteforcesearch_free(void* ptr) {
|
455
|
+
((hnswlib::BruteforceSearch<float>*)ptr)->~BruteforceSearch();
|
456
|
+
ruby_xfree(ptr);
|
457
|
+
};
|
458
|
+
|
459
|
+
static size_t hnsw_bruteforcesearch_size(const void* ptr) {
|
460
|
+
return sizeof(*((hnswlib::BruteforceSearch<float>*)ptr));
|
461
|
+
};
|
462
|
+
|
463
|
+
static hnswlib::BruteforceSearch<float>* get_hnsw_bruteforcesearch(VALUE self) {
|
464
|
+
hnswlib::BruteforceSearch<float>* ptr;
|
465
|
+
TypedData_Get_Struct(self, hnswlib::BruteforceSearch<float>, &hnsw_bruteforcesearch_type, ptr);
|
466
|
+
return ptr;
|
467
|
+
};
|
468
|
+
|
469
|
+
static VALUE define_class(VALUE rb_mHnswlib) {
|
470
|
+
rb_cHnswlibBruteforceSearch = rb_define_class_under(rb_mHnswlib, "BruteforceSearch", rb_cObject);
|
471
|
+
rb_define_alloc_func(rb_cHnswlibBruteforceSearch, hnsw_bruteforcesearch_alloc);
|
472
|
+
rb_define_method(rb_cHnswlibBruteforceSearch, "initialize", RUBY_METHOD_FUNC(_hnsw_bruteforcesearch_init), -1);
|
473
|
+
rb_define_method(rb_cHnswlibBruteforceSearch, "add_point", RUBY_METHOD_FUNC(_hnsw_bruteforcesearch_add_point), 2);
|
474
|
+
rb_define_method(rb_cHnswlibBruteforceSearch, "search_knn", RUBY_METHOD_FUNC(_hnsw_bruteforcesearch_search_knn), 2);
|
475
|
+
rb_define_method(rb_cHnswlibBruteforceSearch, "save_index", RUBY_METHOD_FUNC(_hnsw_bruteforcesearch_save_index), 1);
|
476
|
+
rb_define_method(rb_cHnswlibBruteforceSearch, "load_index", RUBY_METHOD_FUNC(_hnsw_bruteforcesearch_load_index), 1);
|
477
|
+
rb_define_method(rb_cHnswlibBruteforceSearch, "remove_point", RUBY_METHOD_FUNC(_hnsw_bruteforcesearch_remove_point), 1);
|
478
|
+
rb_define_method(rb_cHnswlibBruteforceSearch, "max_elements", RUBY_METHOD_FUNC(_hnsw_bruteforcesearch_max_elements), 0);
|
479
|
+
rb_define_method(rb_cHnswlibBruteforceSearch, "current_count", RUBY_METHOD_FUNC(_hnsw_bruteforcesearch_current_count), 0);
|
480
|
+
rb_define_attr(rb_cHnswlibBruteforceSearch, "space", 1, 0);
|
481
|
+
return rb_cHnswlibBruteforceSearch;
|
482
|
+
};
|
483
|
+
|
484
|
+
private:
|
485
|
+
static const rb_data_type_t hnsw_bruteforcesearch_type;
|
486
|
+
|
487
|
+
static VALUE _hnsw_bruteforcesearch_init(int argc, VALUE* argv, VALUE self) {
|
488
|
+
VALUE kw_args = Qnil;
|
489
|
+
ID kw_table[2] = { rb_intern("space"), rb_intern("max_elements") };
|
490
|
+
VALUE kw_values[2] = { Qundef, Qundef };
|
491
|
+
rb_scan_args(argc, argv, ":", &kw_args);
|
492
|
+
rb_get_kwargs(kw_args, kw_table, 2, 0, kw_values);
|
493
|
+
|
494
|
+
if (!(rb_obj_is_instance_of(kw_values[0], rb_cHnswlibL2Space) || rb_obj_is_instance_of(kw_values[0], rb_cHnswlibInnerProductSpace))) {
|
495
|
+
rb_raise(rb_eTypeError, "expected space, Hnswlib::L2Space or Hnswlib::InnerProductSpace");
|
496
|
+
return Qnil;
|
497
|
+
}
|
498
|
+
if (!RB_INTEGER_TYPE_P(kw_values[1])) {
|
499
|
+
rb_raise(rb_eTypeError, "expected max_elements, Integer");
|
500
|
+
return Qnil;
|
501
|
+
}
|
502
|
+
|
503
|
+
rb_iv_set(self, "@space", kw_values[0]);
|
504
|
+
hnswlib::SpaceInterface<float>* space;
|
505
|
+
if (rb_obj_is_instance_of(kw_values[0], rb_cHnswlibL2Space)) {
|
506
|
+
space = RbHnswlibL2Space::get_hnsw_l2space(kw_values[0]);
|
507
|
+
} else {
|
508
|
+
space = RbHnswlibInnerProductSpace::get_hnsw_ipspace(kw_values[0]);
|
509
|
+
}
|
510
|
+
const size_t max_elements = (size_t)NUM2INT(kw_values[1]);
|
511
|
+
|
512
|
+
hnswlib::BruteforceSearch<float>* ptr = get_hnsw_bruteforcesearch(self);
|
513
|
+
new (ptr) hnswlib::BruteforceSearch<float>(space, max_elements);
|
514
|
+
|
515
|
+
return Qnil;
|
516
|
+
};
|
517
|
+
|
518
|
+
static VALUE _hnsw_bruteforcesearch_add_point(VALUE self, VALUE arr, VALUE idx) {
|
519
|
+
const int dim = NUM2INT(rb_iv_get(rb_iv_get(self, "@space"), "@dim"));
|
520
|
+
|
521
|
+
if (!RB_TYPE_P(arr, T_ARRAY)) {
|
522
|
+
rb_raise(rb_eArgError, "Expect point vector to be Ruby Array.");
|
523
|
+
return Qfalse;
|
524
|
+
}
|
525
|
+
|
526
|
+
if (!RB_INTEGER_TYPE_P(idx)) {
|
527
|
+
rb_raise(rb_eArgError, "Expect index to be Ruby Integer.");
|
528
|
+
return Qfalse;
|
529
|
+
}
|
530
|
+
|
531
|
+
if (dim != RARRAY_LEN(arr)) {
|
532
|
+
rb_raise(rb_eArgError, "Array size does not match to index dimensionality.");
|
533
|
+
return Qfalse;
|
534
|
+
}
|
535
|
+
|
536
|
+
float* vec = (float*)ruby_xmalloc(dim * sizeof(float));
|
537
|
+
for (int i = 0; i < dim; i++) {
|
538
|
+
vec[i] = (float)NUM2DBL(rb_ary_entry(arr, i));
|
539
|
+
}
|
540
|
+
|
541
|
+
get_hnsw_bruteforcesearch(self)->addPoint((void *)vec, (size_t)NUM2INT(idx));
|
542
|
+
|
543
|
+
ruby_xfree(vec);
|
544
|
+
return Qtrue;
|
545
|
+
};
|
546
|
+
|
547
|
+
static VALUE _hnsw_bruteforcesearch_search_knn(VALUE self, VALUE arr, VALUE k) {
|
548
|
+
const int dim = NUM2INT(rb_iv_get(rb_iv_get(self, "@space"), "@dim"));
|
549
|
+
|
550
|
+
if (!RB_TYPE_P(arr, T_ARRAY)) {
|
551
|
+
rb_raise(rb_eArgError, "Expect query vector to be Ruby Array.");
|
552
|
+
return Qnil;
|
553
|
+
}
|
554
|
+
|
555
|
+
if (!RB_INTEGER_TYPE_P(k)) {
|
556
|
+
rb_raise(rb_eArgError, "Expect the number of nearest neighbors to be Ruby Integer.");
|
557
|
+
return Qnil;
|
558
|
+
}
|
559
|
+
|
560
|
+
if (dim != RARRAY_LEN(arr)) {
|
561
|
+
rb_raise(rb_eArgError, "Array size does not match to index dimensionality.");
|
562
|
+
return Qnil;
|
563
|
+
}
|
564
|
+
|
565
|
+
float* vec = (float*)ruby_xmalloc(dim * sizeof(float));
|
566
|
+
for (int i = 0; i < dim; i++) {
|
567
|
+
vec[i] = (float)NUM2DBL(rb_ary_entry(arr, i));
|
568
|
+
}
|
569
|
+
|
570
|
+
std::priority_queue<std::pair<float, size_t>> result =
|
571
|
+
get_hnsw_bruteforcesearch(self)->searchKnn((void *)vec, (size_t)NUM2INT(k));
|
572
|
+
|
573
|
+
ruby_xfree(vec);
|
574
|
+
|
575
|
+
if (result.size() != (size_t)NUM2INT(k)) {
|
576
|
+
rb_raise(rb_eRuntimeError, "Cannot return the results in a contigious 2D array. Probably ef or M is too small.");
|
577
|
+
return Qnil;
|
578
|
+
}
|
579
|
+
|
580
|
+
VALUE distances_arr = rb_ary_new2(result.size());
|
581
|
+
VALUE neighbors_arr = rb_ary_new2(result.size());
|
582
|
+
|
583
|
+
for (int i = NUM2INT(k) - 1; i >= 0; i--) {
|
584
|
+
const std::pair<float, size_t>& result_tuple = result.top();
|
585
|
+
rb_ary_store(distances_arr, i, DBL2NUM((double)result_tuple.first));
|
586
|
+
rb_ary_store(neighbors_arr, i, INT2NUM((int)result_tuple.second));
|
587
|
+
result.pop();
|
588
|
+
}
|
589
|
+
|
590
|
+
VALUE ret = rb_ary_new2(2);
|
591
|
+
rb_ary_store(ret, 0, neighbors_arr);
|
592
|
+
rb_ary_store(ret, 1, distances_arr);
|
593
|
+
return ret;
|
594
|
+
};
|
595
|
+
|
596
|
+
static VALUE _hnsw_bruteforcesearch_save_index(VALUE self, VALUE _filename) {
|
597
|
+
std::string filename(StringValuePtr(_filename));
|
598
|
+
get_hnsw_bruteforcesearch(self)->saveIndex(filename);
|
599
|
+
RB_GC_GUARD(_filename);
|
600
|
+
return Qnil;
|
601
|
+
};
|
602
|
+
|
603
|
+
static VALUE _hnsw_bruteforcesearch_load_index(VALUE self, VALUE _filename) {
|
604
|
+
std::string filename(StringValuePtr(_filename));
|
605
|
+
VALUE ivspace = rb_iv_get(self, "@space");
|
606
|
+
hnswlib::SpaceInterface<float>* space;
|
607
|
+
if (rb_obj_is_instance_of(ivspace, rb_cHnswlibL2Space)) {
|
608
|
+
space = RbHnswlibL2Space::get_hnsw_l2space(ivspace);
|
609
|
+
} else {
|
610
|
+
space = RbHnswlibInnerProductSpace::get_hnsw_ipspace(ivspace);
|
611
|
+
}
|
612
|
+
get_hnsw_bruteforcesearch(self)->loadIndex(filename, space);
|
613
|
+
RB_GC_GUARD(_filename);
|
614
|
+
return Qnil;
|
615
|
+
};
|
616
|
+
|
617
|
+
static VALUE _hnsw_bruteforcesearch_remove_point(VALUE self, VALUE idx) {
|
618
|
+
get_hnsw_bruteforcesearch(self)->removePoint((size_t)NUM2INT(idx));
|
619
|
+
return Qnil;
|
620
|
+
};
|
621
|
+
|
622
|
+
static VALUE _hnsw_bruteforcesearch_max_elements(VALUE self) {
|
623
|
+
return INT2NUM((int)(get_hnsw_bruteforcesearch(self)->maxelements_));
|
624
|
+
};
|
625
|
+
|
626
|
+
static VALUE _hnsw_bruteforcesearch_current_count(VALUE self) {
|
627
|
+
return INT2NUM((int)(get_hnsw_bruteforcesearch(self)->cur_element_count));
|
628
|
+
};
|
629
|
+
};
|
630
|
+
|
631
|
+
const rb_data_type_t RbHnswlibBruteforceSearch::hnsw_bruteforcesearch_type = {
|
632
|
+
"RbHnswlibBruteforceSearch",
|
633
|
+
{
|
634
|
+
NULL,
|
635
|
+
RbHnswlibBruteforceSearch::hnsw_bruteforcesearch_free,
|
636
|
+
RbHnswlibBruteforceSearch::hnsw_bruteforcesearch_size
|
637
|
+
},
|
638
|
+
NULL,
|
639
|
+
NULL,
|
640
|
+
RUBY_TYPED_FREE_IMMEDIATELY
|
641
|
+
};
|
642
|
+
|
420
643
|
#endif /* HNSWLIBEXT_HPP */
|
@@ -8,6 +8,7 @@ namespace hnswlib {
|
|
8
8
|
template<typename dist_t>
|
9
9
|
class BruteforceSearch : public AlgorithmInterface<dist_t> {
|
10
10
|
public:
|
11
|
+
BruteforceSearch() : data_(nullptr) { }
|
11
12
|
BruteforceSearch(SpaceInterface <dist_t> *s) {
|
12
13
|
|
13
14
|
}
|
@@ -91,13 +92,13 @@ namespace hnswlib {
|
|
91
92
|
searchKnn(const void *query_data, size_t k) const {
|
92
93
|
std::priority_queue<std::pair<dist_t, labeltype >> topResults;
|
93
94
|
if (cur_element_count == 0) return topResults;
|
94
|
-
for (
|
95
|
+
for (size_t i = 0; i < k; i++) {
|
95
96
|
dist_t dist = fstdistfunc_(query_data, data_ + size_per_element_ * i, dist_func_param_);
|
96
97
|
topResults.push(std::pair<dist_t, labeltype>(dist, *((labeltype *) (data_ + size_per_element_ * i +
|
97
98
|
data_size_))));
|
98
99
|
}
|
99
100
|
dist_t lastdist = topResults.top().first;
|
100
|
-
for (
|
101
|
+
for (size_t i = k; i < cur_element_count; i++) {
|
101
102
|
dist_t dist = fstdistfunc_(query_data, data_ + size_per_element_ * i, dist_func_param_);
|
102
103
|
if (dist <= lastdist) {
|
103
104
|
topResults.push(std::pair<dist_t, labeltype>(dist, *((labeltype *) (data_ + size_per_element_ * i +
|
data/ext/hnswlib/src/hnswalg.h
CHANGED
@@ -17,6 +17,7 @@ namespace hnswlib {
|
|
17
17
|
class HierarchicalNSW : public AlgorithmInterface<dist_t> {
|
18
18
|
public:
|
19
19
|
static const tableint max_update_element_locks = 65536;
|
20
|
+
HierarchicalNSW() : visited_list_pool_(nullptr), data_level0_memory_(nullptr), linkLists_(nullptr), cur_element_count(0) { }
|
20
21
|
HierarchicalNSW(SpaceInterface<dist_t> *s) {
|
21
22
|
|
22
23
|
}
|
@@ -757,7 +758,7 @@ namespace hnswlib {
|
|
757
758
|
size_t dim = *((size_t *) dist_func_param_);
|
758
759
|
std::vector<data_t> data;
|
759
760
|
data_t* data_ptr = (data_t*) data_ptrv;
|
760
|
-
for (
|
761
|
+
for (size_t i = 0; i < dim; i++) {
|
761
762
|
data.push_back(*data_ptr);
|
762
763
|
data_ptr += 1;
|
763
764
|
}
|
data/ext/hnswlib/src/space_ip.h
CHANGED
data/ext/hnswlib/src/space_l2.h
CHANGED
data/hnswlib.gemspec
CHANGED
@@ -20,7 +20,7 @@ Gem::Specification.new do |spec|
|
|
20
20
|
# Specify which files should be added to the gem when it is released.
|
21
21
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
22
22
|
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
23
|
-
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:test|spec|features)/}) }
|
23
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:test|spec|features)/}) }.reject { |f| f.include?('dummy.rb') }
|
24
24
|
end
|
25
25
|
spec.bindir = 'exe'
|
26
26
|
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
data/lib/hnswlib/version.rb
CHANGED
data/lib/hnswlib.rb
CHANGED
@@ -4,12 +4,12 @@ require_relative 'hnswlib/version'
|
|
4
4
|
require_relative 'hnswlib/hnswlibext'
|
5
5
|
|
6
6
|
module Hnswlib
|
7
|
-
#
|
7
|
+
# HnswIndex is a class that provides functions for k-nearest eighbors search.
|
8
8
|
#
|
9
9
|
# @example
|
10
10
|
# require 'hnswlib'
|
11
11
|
#
|
12
|
-
# index = Hnswlib::
|
12
|
+
# index = Hnswlib::HnswIndex.new(n_features: 100, max_item: 10000)
|
13
13
|
#
|
14
14
|
# 5000.times do |item_id|
|
15
15
|
# item_vec = Array.new(100) { rand - 0.5 }
|
@@ -18,7 +18,7 @@ module Hnswlib
|
|
18
18
|
#
|
19
19
|
# index.get_nns_by_item(0, 100)
|
20
20
|
#
|
21
|
-
class
|
21
|
+
class HnswIndex
|
22
22
|
# Returns the metric of index.
|
23
23
|
# @return [String]
|
24
24
|
attr_reader :metric
|
@@ -63,7 +63,6 @@ module Hnswlib
|
|
63
63
|
# Remove the item vector.
|
64
64
|
#
|
65
65
|
# @param i [Integer] The ID of item.
|
66
|
-
# @return [Array]
|
67
66
|
def remove_item(i)
|
68
67
|
@index.mark_deleted(i)
|
69
68
|
end
|
@@ -151,4 +150,7 @@ module Hnswlib
|
|
151
150
|
@index.max_elements
|
152
151
|
end
|
153
152
|
end
|
153
|
+
|
154
|
+
# Index is alias of HnswIndex
|
155
|
+
Index = ::Hnswlib::HnswIndex
|
154
156
|
end
|
data/sig/hnswlib.rbs
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
module Hnswlib
|
2
|
+
VERSION: ::String
|
3
|
+
HSWLIB_VERSION: ::String
|
4
|
+
|
5
|
+
class HnswIndex
|
6
|
+
attr_reader metric: String
|
7
|
+
|
8
|
+
def initialize: (n_features: Integer n_features, max_item: Integer max_item, ?metric: ::String metric, ?m: ::Integer m, ?ef_construction: ::Integer ef_construction, ?random_seed: ::Integer random_seed) -> void
|
9
|
+
def add_item: (Integer i, Array[Float] v) -> bool
|
10
|
+
def get_item: (Integer i) -> Array[Float]
|
11
|
+
def remove_item: (Integer i) -> void
|
12
|
+
def get_nns_by_item: (Integer i, Integer n, ?include_distances: (true | false) include_distances) -> ([Array[Integer], Array[Float]] | Array[Integer])
|
13
|
+
def get_nns_by_vector: (Array[Float] v, Integer n, ?include_distances: (true | false) include_distances) -> ([Array[Integer], Array[Float]] | Array[Integer])
|
14
|
+
def resize_index: (Integer new_max_item) -> void
|
15
|
+
def set_ef: (Integer ef) -> void
|
16
|
+
def save: (String filename) -> void
|
17
|
+
def load: (String filename) -> void
|
18
|
+
def get_distance: (Integer i, Integer j) -> Float
|
19
|
+
def n_items: () -> Integer
|
20
|
+
def n_features: () -> Integer
|
21
|
+
def max_item: () -> Integer
|
22
|
+
end
|
23
|
+
|
24
|
+
Index: untyped
|
25
|
+
|
26
|
+
class L2Space
|
27
|
+
attr_accessor dim: Integer
|
28
|
+
|
29
|
+
def initialize: (Integer dim) -> void
|
30
|
+
def distance: (Array[Float] a, Array[Float] b) -> Float
|
31
|
+
end
|
32
|
+
|
33
|
+
class InnerProductSpace
|
34
|
+
attr_accessor dim: Integer
|
35
|
+
|
36
|
+
def initialize: (Integer dim) -> void
|
37
|
+
def distance: (Array[Float] a, Array[Float] b) -> Float
|
38
|
+
end
|
39
|
+
|
40
|
+
class BruteforceSearch
|
41
|
+
attr_accessor space: (::Hnswlib::L2Space | ::Hnswlib::InnerProductSpace)
|
42
|
+
|
43
|
+
def initialize: (space: (::Hnswlib::L2Space | ::Hnswlib::InnerProductSpace) space, max_elements: Integer max_elements) -> void
|
44
|
+
def add_point: (Array[Float] arr, Integer idx) -> bool
|
45
|
+
def current_count: () -> Integer
|
46
|
+
def load_index: (String filename) -> void
|
47
|
+
def max_elements: () -> Integer
|
48
|
+
def remove_point: (Integer idx) -> void
|
49
|
+
def save_index: (String filename) -> void
|
50
|
+
def search_knn: (Array[Float] arr, Integer k) -> [Array[Integer], Array[Float]]
|
51
|
+
end
|
52
|
+
|
53
|
+
class HierarchicalNSW
|
54
|
+
attr_accessor space: (::Hnswlib::L2Space | ::Hnswlib::InnerProductSpace)
|
55
|
+
|
56
|
+
def initialize: (space: (::Hnswlib::L2Space | ::Hnswlib::InnerProductSpace) space, max_elements: Integer max_elements, ?m: Integer m, ?ef_construction: Integer ef_construction, ?random_seed: Integer random_seed) -> void
|
57
|
+
def add_point: (Array[Float] arr, Integer idx) -> bool
|
58
|
+
def current_count: () -> Integer
|
59
|
+
def get_ids: () -> Array[Integer]
|
60
|
+
def get_point: (Integer idx) -> Array[Float]
|
61
|
+
def load_index: (String filename) -> void
|
62
|
+
def mark_deleted: (Integer idx) -> void
|
63
|
+
def max_elements: () -> Integer
|
64
|
+
def resize_index: (Integer new_max_elements) -> void
|
65
|
+
def save_index: (String filename) -> void
|
66
|
+
def search_knn: (Array[Float] arr, Integer k) -> [Array[Integer], Array[Float]]
|
67
|
+
def set_ef: (Integer ef) -> void
|
68
|
+
end
|
69
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hnswlib
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-09-11 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Hnswlib.rb provides Ruby bindings for the Hnswlib.
|
14
14
|
email:
|
@@ -27,6 +27,7 @@ files:
|
|
27
27
|
- LICENSE.txt
|
28
28
|
- README.md
|
29
29
|
- Rakefile
|
30
|
+
- Steepfile
|
30
31
|
- ext/hnswlib/extconf.rb
|
31
32
|
- ext/hnswlib/hnswlibext.cpp
|
32
33
|
- ext/hnswlib/hnswlibext.hpp
|
@@ -40,6 +41,7 @@ files:
|
|
40
41
|
- hnswlib.gemspec
|
41
42
|
- lib/hnswlib.rb
|
42
43
|
- lib/hnswlib/version.rb
|
44
|
+
- sig/hnswlib.rbs
|
43
45
|
homepage: https://github.com/yoshoku/hnswlib.rb
|
44
46
|
licenses:
|
45
47
|
- Apache-2.0
|
@@ -62,7 +64,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
62
64
|
- !ruby/object:Gem::Version
|
63
65
|
version: '0'
|
64
66
|
requirements: []
|
65
|
-
rubygems_version: 3.
|
67
|
+
rubygems_version: 3.2.22
|
66
68
|
signing_key:
|
67
69
|
specification_version: 4
|
68
70
|
summary: Ruby bindings for the Hnswlib.
|