hnswlib 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1cd9cbb9fec801aa832f424f382c7efe7818c4909d9f0909e39afbe6d095311a
4
- data.tar.gz: 557b1bed74b3c53814d6b68c85d588583546a83bcc1998f4dbfd6c265c188797
3
+ metadata.gz: 84f3ca262eddd8b331cbedc163e55e28d77eba76d2184332fef6afd0e2ddcce5
4
+ data.tar.gz: 248f8ca655ae83e2f56b26910dde6e96a23771040d61bac7ee7204e294063c2a
5
5
  SHA512:
6
- metadata.gz: b764cd9bc82036ba04865a287caf78691e87b44eac24d74719d3956a26a5cc586f2c37f7dd44ad90bdc43d2bffbbe4902dd52f5599fe647e997b9a35e7bd5659
7
- data.tar.gz: b20823f47c3a40ce39bd1edebe6e72b04084be07d16d55eef761d4c97fdda764bd3792691a57b94edf8c479a7ee5b786d02d66cbf7a861de63741d4a6cbc1b29
6
+ metadata.gz: 56d528a7ce9af7f96b291e74fc6a4c8ff1d3da3d66e1dbeed0dc8e884f5f5822b04e35d0fb12f83155a97ac1505a9a1db7b245cdcb2b08ba96ae6657743ccfeb
7
+ data.tar.gz: 47ca9c4db92798e07576505bc0c6b2a4f3ffb8afdcfdd020226232d8fca31d4259571509ce08ccde8eeecd06bcfa7cc2a5512c29d2568fa9b5d0ec0736aaa2ea
data/.gitignore CHANGED
@@ -15,4 +15,5 @@ mkmf.log
15
15
  # rspec failure tracking
16
16
  .rspec_status
17
17
 
18
- spec/test.ann
18
+ *.ann
19
+ /bin/
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ ## [0.2.0] - 2021-08-02
2
+
3
+ - Add binding class for the BruteforceSearch.
4
+ - Add type check for arguments of initialize methods of BruteforceSearch and HierarchicalNSW.
5
+ - Add dummy constructor call at memory allocation for class to prevent occuring segment fault on GC when initialize method is failed.
6
+
1
7
  ## [0.1.1] - 2021-07-25
2
8
 
3
9
  - Fix to use `rb_obj_is_isntance_of` for klass comparison due to type error when loading search index on irb 1.3.x: [issue #1](https://github.com/yoshoku/hnswlib.rb/issues/1)
data/README.md CHANGED
@@ -49,6 +49,10 @@ u.load('test.ann')
49
49
  p u.get_nns_by_item(0, 100) # will find the 100 nearest neighbors.
50
50
  ```
51
51
 
52
+ ## License
53
+
54
+ The gem is available as open source under the terms of the [Apache-2.0 License](https://www.apache.org/licenses/LICENSE-2.0).
55
+
52
56
  ## Contributing
53
57
 
54
58
  Bug reports and pull requests are welcome on GitHub at https://github.com/yoshoku/hnswlib.rb.
@@ -26,4 +26,5 @@ void Init_hnswlibext(void) {
26
26
  RbHnswlibL2Space::define_class(rb_mHnswlib);
27
27
  RbHnswlibInnerProductSpace::define_class(rb_mHnswlib);
28
28
  RbHnswlibHierarchicalNSW::define_class(rb_mHnswlib);
29
+ RbHnswlibBruteforceSearch::define_class(rb_mHnswlib);
29
30
  }
@@ -26,11 +26,13 @@
26
26
  VALUE rb_cHnswlibL2Space;
27
27
  VALUE rb_cHnswlibInnerProductSpace;
28
28
  VALUE rb_cHnswlibHierarchicalNSW;
29
+ VALUE rb_cHnswlibBruteforceSearch;
29
30
 
30
31
  class RbHnswlibL2Space {
31
32
  public:
32
33
  static VALUE hnsw_l2space_alloc(VALUE self) {
33
34
  hnswlib::L2Space* ptr = (hnswlib::L2Space*)ruby_xmalloc(sizeof(hnswlib::L2Space));
35
+ new (ptr) hnswlib::L2Space(); // dummy call to constructor for GC.
34
36
  return TypedData_Wrap_Struct(self, &hnsw_l2space_type, ptr);
35
37
  };
36
38
 
@@ -106,6 +108,7 @@ class RbHnswlibInnerProductSpace {
106
108
  public:
107
109
  static VALUE hnsw_ipspace_alloc(VALUE self) {
108
110
  hnswlib::InnerProductSpace* ptr = (hnswlib::InnerProductSpace*)ruby_xmalloc(sizeof(hnswlib::InnerProductSpace));
111
+ new (ptr) hnswlib::InnerProductSpace(); // dummy call to constructor for GC.
109
112
  return TypedData_Wrap_Struct(self, &hnsw_ipspace_type, ptr);
110
113
  };
111
114
 
@@ -181,6 +184,7 @@ class RbHnswlibHierarchicalNSW {
181
184
  public:
182
185
  static VALUE hnsw_hierarchicalnsw_alloc(VALUE self) {
183
186
  hnswlib::HierarchicalNSW<float>* ptr = (hnswlib::HierarchicalNSW<float>*)ruby_xmalloc(sizeof(hnswlib::HierarchicalNSW<float>));
187
+ new (ptr) hnswlib::HierarchicalNSW<float>(); // dummy call to constructor for GC.
184
188
  return TypedData_Wrap_Struct(self, &hnsw_hierarchicalnsw_type, ptr);
185
189
  };
186
190
 
@@ -239,6 +243,27 @@ class RbHnswlibHierarchicalNSW {
239
243
  if (kw_values[3] == Qundef) kw_values[3] = INT2NUM(200);
240
244
  if (kw_values[4] == Qundef) kw_values[4] = INT2NUM(100);
241
245
 
246
+ if (!(rb_obj_is_instance_of(kw_values[0], rb_cHnswlibL2Space) || rb_obj_is_instance_of(kw_values[0], rb_cHnswlibInnerProductSpace))) {
247
+ rb_raise(rb_eTypeError, "expected space, Hnswlib::L2Space or Hnswlib::InnerProductSpace");
248
+ return Qnil;
249
+ }
250
+ if (!RB_INTEGER_TYPE_P(kw_values[1])) {
251
+ rb_raise(rb_eTypeError, "expected max_elements, Integer");
252
+ return Qnil;
253
+ }
254
+ if (!RB_INTEGER_TYPE_P(kw_values[2])) {
255
+ rb_raise(rb_eTypeError, "expected m, Integer");
256
+ return Qnil;
257
+ }
258
+ if (!RB_INTEGER_TYPE_P(kw_values[3])) {
259
+ rb_raise(rb_eTypeError, "expected ef_construction, Integer");
260
+ return Qnil;
261
+ }
262
+ if (!RB_INTEGER_TYPE_P(kw_values[4])) {
263
+ rb_raise(rb_eTypeError, "expected random_seed, Integer");
264
+ return Qnil;
265
+ }
266
+
242
267
  rb_iv_set(self, "@space", kw_values[0]);
243
268
  hnswlib::SpaceInterface<float>* space;
244
269
  if (rb_obj_is_instance_of(kw_values[0], rb_cHnswlibL2Space)) {
@@ -418,4 +443,201 @@ const rb_data_type_t RbHnswlibHierarchicalNSW::hnsw_hierarchicalnsw_type = {
418
443
  RUBY_TYPED_FREE_IMMEDIATELY
419
444
  };
420
445
 
446
+ class RbHnswlibBruteforceSearch {
447
+ public:
448
+ static VALUE hnsw_bruteforcesearch_alloc(VALUE self) {
449
+ hnswlib::BruteforceSearch<float>* ptr = (hnswlib::BruteforceSearch<float>*)ruby_xmalloc(sizeof(hnswlib::BruteforceSearch<float>));
450
+ new (ptr) hnswlib::BruteforceSearch<float>(); // dummy call to constructor for GC.
451
+ return TypedData_Wrap_Struct(self, &hnsw_bruteforcesearch_type, ptr);
452
+ };
453
+
454
+ static void hnsw_bruteforcesearch_free(void* ptr) {
455
+ ((hnswlib::BruteforceSearch<float>*)ptr)->~BruteforceSearch();
456
+ ruby_xfree(ptr);
457
+ };
458
+
459
+ static size_t hnsw_bruteforcesearch_size(const void* ptr) {
460
+ return sizeof(*((hnswlib::BruteforceSearch<float>*)ptr));
461
+ };
462
+
463
+ static hnswlib::BruteforceSearch<float>* get_hnsw_bruteforcesearch(VALUE self) {
464
+ hnswlib::BruteforceSearch<float>* ptr;
465
+ TypedData_Get_Struct(self, hnswlib::BruteforceSearch<float>, &hnsw_bruteforcesearch_type, ptr);
466
+ return ptr;
467
+ };
468
+
469
+ static VALUE define_class(VALUE rb_mHnswlib) {
470
+ rb_cHnswlibBruteforceSearch = rb_define_class_under(rb_mHnswlib, "BruteforceSearch", rb_cObject);
471
+ rb_define_alloc_func(rb_cHnswlibBruteforceSearch, hnsw_bruteforcesearch_alloc);
472
+ rb_define_method(rb_cHnswlibBruteforceSearch, "initialize", RUBY_METHOD_FUNC(_hnsw_bruteforcesearch_init), -1);
473
+ rb_define_method(rb_cHnswlibBruteforceSearch, "add_point", RUBY_METHOD_FUNC(_hnsw_bruteforcesearch_add_point), 2);
474
+ rb_define_method(rb_cHnswlibBruteforceSearch, "search_knn", RUBY_METHOD_FUNC(_hnsw_bruteforcesearch_search_knn), 2);
475
+ rb_define_method(rb_cHnswlibBruteforceSearch, "save_index", RUBY_METHOD_FUNC(_hnsw_bruteforcesearch_save_index), 1);
476
+ rb_define_method(rb_cHnswlibBruteforceSearch, "load_index", RUBY_METHOD_FUNC(_hnsw_bruteforcesearch_load_index), 1);
477
+ rb_define_method(rb_cHnswlibBruteforceSearch, "remove_point", RUBY_METHOD_FUNC(_hnsw_bruteforcesearch_remove_point), 1);
478
+ rb_define_method(rb_cHnswlibBruteforceSearch, "max_elements", RUBY_METHOD_FUNC(_hnsw_bruteforcesearch_max_elements), 0);
479
+ rb_define_method(rb_cHnswlibBruteforceSearch, "current_count", RUBY_METHOD_FUNC(_hnsw_bruteforcesearch_current_count), 0);
480
+ rb_define_attr(rb_cHnswlibBruteforceSearch, "space", 1, 0);
481
+ return rb_cHnswlibBruteforceSearch;
482
+ };
483
+
484
+ private:
485
+ static const rb_data_type_t hnsw_bruteforcesearch_type;
486
+
487
+ static VALUE _hnsw_bruteforcesearch_init(int argc, VALUE* argv, VALUE self) {
488
+ VALUE kw_args = Qnil;
489
+ ID kw_table[2] = { rb_intern("space"), rb_intern("max_elements") };
490
+ VALUE kw_values[2] = { Qundef, Qundef };
491
+ rb_scan_args(argc, argv, ":", &kw_args);
492
+ rb_get_kwargs(kw_args, kw_table, 2, 0, kw_values);
493
+
494
+ if (!(rb_obj_is_instance_of(kw_values[0], rb_cHnswlibL2Space) || rb_obj_is_instance_of(kw_values[0], rb_cHnswlibInnerProductSpace))) {
495
+ rb_raise(rb_eTypeError, "expected space, Hnswlib::L2Space or Hnswlib::InnerProductSpace");
496
+ return Qnil;
497
+ }
498
+ if (!RB_INTEGER_TYPE_P(kw_values[1])) {
499
+ rb_raise(rb_eTypeError, "expected max_elements, Integer");
500
+ return Qnil;
501
+ }
502
+
503
+ rb_iv_set(self, "@space", kw_values[0]);
504
+ hnswlib::SpaceInterface<float>* space;
505
+ if (rb_obj_is_instance_of(kw_values[0], rb_cHnswlibL2Space)) {
506
+ space = RbHnswlibL2Space::get_hnsw_l2space(kw_values[0]);
507
+ } else {
508
+ space = RbHnswlibInnerProductSpace::get_hnsw_ipspace(kw_values[0]);
509
+ }
510
+ const size_t max_elements = (size_t)NUM2INT(kw_values[1]);
511
+
512
+ hnswlib::BruteforceSearch<float>* ptr = get_hnsw_bruteforcesearch(self);
513
+ new (ptr) hnswlib::BruteforceSearch<float>(space, max_elements);
514
+
515
+ return Qnil;
516
+ };
517
+
518
+ static VALUE _hnsw_bruteforcesearch_add_point(VALUE self, VALUE arr, VALUE idx) {
519
+ const int dim = NUM2INT(rb_iv_get(rb_iv_get(self, "@space"), "@dim"));
520
+
521
+ if (!RB_TYPE_P(arr, T_ARRAY)) {
522
+ rb_raise(rb_eArgError, "Expect point vector to be Ruby Array.");
523
+ return Qfalse;
524
+ }
525
+
526
+ if (!RB_INTEGER_TYPE_P(idx)) {
527
+ rb_raise(rb_eArgError, "Expect index to be Ruby Integer.");
528
+ return Qfalse;
529
+ }
530
+
531
+ if (dim != RARRAY_LEN(arr)) {
532
+ rb_raise(rb_eArgError, "Array size does not match to index dimensionality.");
533
+ return Qfalse;
534
+ }
535
+
536
+ float* vec = (float*)ruby_xmalloc(dim * sizeof(float));
537
+ for (int i = 0; i < dim; i++) {
538
+ vec[i] = (float)NUM2DBL(rb_ary_entry(arr, i));
539
+ }
540
+
541
+ get_hnsw_bruteforcesearch(self)->addPoint((void *)vec, (size_t)NUM2INT(idx));
542
+
543
+ ruby_xfree(vec);
544
+ return Qtrue;
545
+ };
546
+
547
+ static VALUE _hnsw_bruteforcesearch_search_knn(VALUE self, VALUE arr, VALUE k) {
548
+ const int dim = NUM2INT(rb_iv_get(rb_iv_get(self, "@space"), "@dim"));
549
+
550
+ if (!RB_TYPE_P(arr, T_ARRAY)) {
551
+ rb_raise(rb_eArgError, "Expect query vector to be Ruby Array.");
552
+ return Qnil;
553
+ }
554
+
555
+ if (!RB_INTEGER_TYPE_P(k)) {
556
+ rb_raise(rb_eArgError, "Expect the number of nearest neighbors to be Ruby Integer.");
557
+ return Qnil;
558
+ }
559
+
560
+ if (dim != RARRAY_LEN(arr)) {
561
+ rb_raise(rb_eArgError, "Array size does not match to index dimensionality.");
562
+ return Qnil;
563
+ }
564
+
565
+ float* vec = (float*)ruby_xmalloc(dim * sizeof(float));
566
+ for (int i = 0; i < dim; i++) {
567
+ vec[i] = (float)NUM2DBL(rb_ary_entry(arr, i));
568
+ }
569
+
570
+ std::priority_queue<std::pair<float, size_t>> result =
571
+ get_hnsw_bruteforcesearch(self)->searchKnn((void *)vec, (size_t)NUM2INT(k));
572
+
573
+ ruby_xfree(vec);
574
+
575
+ if (result.size() != (size_t)NUM2INT(k)) {
576
+ rb_raise(rb_eRuntimeError, "Cannot return the results in a contigious 2D array. Probably ef or M is too small.");
577
+ return Qnil;
578
+ }
579
+
580
+ VALUE distances_arr = rb_ary_new2(result.size());
581
+ VALUE neighbors_arr = rb_ary_new2(result.size());
582
+
583
+ for (int i = NUM2INT(k) - 1; i >= 0; i--) {
584
+ const std::pair<float, size_t>& result_tuple = result.top();
585
+ rb_ary_store(distances_arr, i, DBL2NUM((double)result_tuple.first));
586
+ rb_ary_store(neighbors_arr, i, INT2NUM((int)result_tuple.second));
587
+ result.pop();
588
+ }
589
+
590
+ VALUE ret = rb_ary_new2(2);
591
+ rb_ary_store(ret, 0, neighbors_arr);
592
+ rb_ary_store(ret, 1, distances_arr);
593
+ return ret;
594
+ };
595
+
596
+ static VALUE _hnsw_bruteforcesearch_save_index(VALUE self, VALUE _filename) {
597
+ std::string filename(StringValuePtr(_filename));
598
+ get_hnsw_bruteforcesearch(self)->saveIndex(filename);
599
+ RB_GC_GUARD(_filename);
600
+ return Qnil;
601
+ };
602
+
603
+ static VALUE _hnsw_bruteforcesearch_load_index(VALUE self, VALUE _filename) {
604
+ std::string filename(StringValuePtr(_filename));
605
+ VALUE ivspace = rb_iv_get(self, "@space");
606
+ hnswlib::SpaceInterface<float>* space;
607
+ if (rb_obj_is_instance_of(ivspace, rb_cHnswlibL2Space)) {
608
+ space = RbHnswlibL2Space::get_hnsw_l2space(ivspace);
609
+ } else {
610
+ space = RbHnswlibInnerProductSpace::get_hnsw_ipspace(ivspace);
611
+ }
612
+ get_hnsw_bruteforcesearch(self)->loadIndex(filename, space);
613
+ RB_GC_GUARD(_filename);
614
+ return Qnil;
615
+ };
616
+
617
+ static VALUE _hnsw_bruteforcesearch_remove_point(VALUE self, VALUE idx) {
618
+ get_hnsw_bruteforcesearch(self)->removePoint((size_t)NUM2INT(idx));
619
+ return Qnil;
620
+ };
621
+
622
+ static VALUE _hnsw_bruteforcesearch_max_elements(VALUE self) {
623
+ return INT2NUM((int)(get_hnsw_bruteforcesearch(self)->maxelements_));
624
+ };
625
+
626
+ static VALUE _hnsw_bruteforcesearch_current_count(VALUE self) {
627
+ return INT2NUM((int)(get_hnsw_bruteforcesearch(self)->cur_element_count));
628
+ };
629
+ };
630
+
631
+ const rb_data_type_t RbHnswlibBruteforceSearch::hnsw_bruteforcesearch_type = {
632
+ "RbHnswlibBruteforceSearch",
633
+ {
634
+ NULL,
635
+ RbHnswlibBruteforceSearch::hnsw_bruteforcesearch_free,
636
+ RbHnswlibBruteforceSearch::hnsw_bruteforcesearch_size
637
+ },
638
+ NULL,
639
+ NULL,
640
+ RUBY_TYPED_FREE_IMMEDIATELY
641
+ };
642
+
421
643
  #endif /* HNSWLIBEXT_HPP */
@@ -8,6 +8,7 @@ namespace hnswlib {
8
8
  template<typename dist_t>
9
9
  class BruteforceSearch : public AlgorithmInterface<dist_t> {
10
10
  public:
11
+ BruteforceSearch() : data_(nullptr) { }
11
12
  BruteforceSearch(SpaceInterface <dist_t> *s) {
12
13
 
13
14
  }
@@ -91,13 +92,13 @@ namespace hnswlib {
91
92
  searchKnn(const void *query_data, size_t k) const {
92
93
  std::priority_queue<std::pair<dist_t, labeltype >> topResults;
93
94
  if (cur_element_count == 0) return topResults;
94
- for (int i = 0; i < k; i++) {
95
+ for (size_t i = 0; i < k; i++) {
95
96
  dist_t dist = fstdistfunc_(query_data, data_ + size_per_element_ * i, dist_func_param_);
96
97
  topResults.push(std::pair<dist_t, labeltype>(dist, *((labeltype *) (data_ + size_per_element_ * i +
97
98
  data_size_))));
98
99
  }
99
100
  dist_t lastdist = topResults.top().first;
100
- for (int i = k; i < cur_element_count; i++) {
101
+ for (size_t i = k; i < cur_element_count; i++) {
101
102
  dist_t dist = fstdistfunc_(query_data, data_ + size_per_element_ * i, dist_func_param_);
102
103
  if (dist <= lastdist) {
103
104
  topResults.push(std::pair<dist_t, labeltype>(dist, *((labeltype *) (data_ + size_per_element_ * i +
@@ -17,6 +17,7 @@ namespace hnswlib {
17
17
  class HierarchicalNSW : public AlgorithmInterface<dist_t> {
18
18
  public:
19
19
  static const tableint max_update_element_locks = 65536;
20
+ HierarchicalNSW() : visited_list_pool_(nullptr), data_level0_memory_(nullptr), linkLists_(nullptr), cur_element_count(0) { }
20
21
  HierarchicalNSW(SpaceInterface<dist_t> *s) {
21
22
 
22
23
  }
@@ -757,7 +758,7 @@ namespace hnswlib {
757
758
  size_t dim = *((size_t *) dist_func_param_);
758
759
  std::vector<data_t> data;
759
760
  data_t* data_ptr = (data_t*) data_ptrv;
760
- for (int i = 0; i < dim; i++) {
761
+ for (size_t i = 0; i < dim; i++) {
761
762
  data.push_back(*data_ptr);
762
763
  data_ptr += 1;
763
764
  }
@@ -247,6 +247,7 @@ namespace hnswlib {
247
247
  size_t data_size_;
248
248
  size_t dim_;
249
249
  public:
250
+ InnerProductSpace() : data_size_(0), dim_(0) { }
250
251
  InnerProductSpace(size_t dim) {
251
252
  fstdistfunc_ = InnerProduct;
252
253
  #if defined(USE_AVX) || defined(USE_SSE)
@@ -172,6 +172,7 @@ namespace hnswlib {
172
172
  size_t data_size_;
173
173
  size_t dim_;
174
174
  public:
175
+ L2Space() : data_size_(0), dim_(0) { }
175
176
  L2Space(size_t dim) {
176
177
  fstdistfunc_ = L2Sqr;
177
178
  #if defined(USE_SSE) || defined(USE_AVX)
@@ -3,7 +3,7 @@
3
3
  # Hnswlib.rb provides Ruby bindings for the Hnswlib.
4
4
  module Hnswlib
5
5
  # The version of Hnswlib.rb you install.
6
- VERSION = '0.1.1'
6
+ VERSION = '0.2.0'
7
7
 
8
8
  # The version of Hnswlib included with gem.
9
9
  HSWLIB_VERSION = '0.5.2'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hnswlib
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-07-25 00:00:00.000000000 Z
11
+ date: 2021-08-02 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Hnswlib.rb provides Ruby bindings for the Hnswlib.
14
14
  email:
@@ -62,7 +62,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
62
62
  - !ruby/object:Gem::Version
63
63
  version: '0'
64
64
  requirements: []
65
- rubygems_version: 3.1.6
65
+ rubygems_version: 3.2.22
66
66
  signing_key:
67
67
  specification_version: 4
68
68
  summary: Ruby bindings for the Hnswlib.