hnswlib 0.1.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c9a4129957f2d395c00654d74062bec452fec715e148e8db2b5ad3e4f3bc815e
4
- data.tar.gz: 54691ff4e40f812327ffa019527654e936ef570a115b9082359828a2e9d14704
3
+ metadata.gz: 99f5f1403a51083df75ef842a11b996fe6c159f95c3798f217a076cb5f535254
4
+ data.tar.gz: f9972ac4e644727a126e937e81494cc11bc051bc7ff0daf57139845aed1e7719
5
5
  SHA512:
6
- metadata.gz: 1c766bbe540dc5d8135b56959c8e088f5ab1ffc4e104fd6f8ea39de350ed4e7313fa495d7d76cea294d63e68679ac024a7d842b1c454d0252dcd330516511264
7
- data.tar.gz: af8ce64f52e0361497f17ef0e316890e870ca637757790f64973bd7f15d7d195c00a1faa5dc9c651276d7d8fb8622efad0efee55a2666ff948576421e05f1392
6
+ metadata.gz: c4f96fceab228e3cffd9f2b77e513b86fe71ec3156cdff10c0bc9206ad7951f87c507531088f91f45a940c399102bff6a2dedc5fee8b09b37a5f5739e9a02b6c
7
+ data.tar.gz: 8297e1e0d1f6753b3e8900b07b80dba271276a14303b102cd8084e480ec491da269baac1fc8be08ab350976fc22cb859583745dc219d73d6f56b33fe7502ac2b
data/.gitignore CHANGED
@@ -15,4 +15,5 @@ mkmf.log
15
15
  # rspec failure tracking
16
16
  .rspec_status
17
17
 
18
- spec/test.ann
18
+ *.ann
19
+ /bin/
data/CHANGELOG.md CHANGED
@@ -1,4 +1,22 @@
1
- ## [Unreleased]
1
+ ## [0.4.0] - 2021-09-12
2
+
3
+ - Add type declaration file.
4
+
5
+ ## [0.3.0] - 2021-08-08
6
+
7
+ - Rename `Hnswlib::Index` to `Hnswlib::HnswIndex` (for compatibility, `Hnswlib::Index` has been an alis for `Hnswlib::HnswIndex`).
8
+ - Update API documentation.
9
+
10
+ ## [0.2.0] - 2021-08-02
11
+
12
+ - Add binding class for the BruteforceSearch.
13
+ - Add type check for arguments of initialize methods of BruteforceSearch and HierarchicalNSW.
14
+ - Add dummy constructor call at memory allocation for class to prevent occuring segment fault on GC when initialize method is failed.
15
+
16
+ ## [0.1.1] - 2021-07-25
17
+
18
+ - Fix to use `rb_obj_is_isntance_of` for klass comparison due to type error when loading search index on irb 1.3.x: [issue #1](https://github.com/yoshoku/hnswlib.rb/issues/1)
19
+ - Update API documentation.
2
20
 
3
21
  ## [0.1.0] - 2021-07-24
4
22
 
data/Gemfile CHANGED
@@ -8,3 +8,5 @@ gemspec
8
8
  gem 'rake', '~> 13.0'
9
9
  gem 'rake-compiler', '~> 1.1'
10
10
  gem 'rspec', '~> 3.0'
11
+ gem 'rbs', '~> 1.2'
12
+ gem 'steep', '~> 0.44'
data/README.md CHANGED
@@ -35,7 +35,7 @@ Or install it yourself as:
35
35
  require 'hnswlib'
36
36
 
37
37
  f = 40 # length of item vector that will be indexed.
38
- t = Hnswlib::Index.new(n_features: f, max_item: 1000)
38
+ t = Hnswlib::HnswIndex.new(n_features: f, max_item: 1000)
39
39
 
40
40
  1000.times do |i|
41
41
  v = Array.new(f) { rand }
@@ -44,11 +44,15 @@ end
44
44
 
45
45
  t.save('test.ann')
46
46
 
47
- u = Hnswlib::Index.new(n_features: f, max_item: 1000)
47
+ u = Hnswlib::HnswIndex.new(n_features: f, max_item: 1000)
48
48
  u.load('test.ann')
49
49
  p u.get_nns_by_item(0, 100) # will find the 100 nearest neighbors.
50
50
  ```
51
51
 
52
+ ## License
53
+
54
+ The gem is available as open source under the terms of the [Apache-2.0 License](https://www.apache.org/licenses/LICENSE-2.0).
55
+
52
56
  ## Contributing
53
57
 
54
58
  Bug reports and pull requests are welcome on GitHub at https://github.com/yoshoku/hnswlib.rb.
data/Steepfile ADDED
@@ -0,0 +1,27 @@
1
+ # D = Steep::Diagnostic
2
+ #
3
+ target :lib do
4
+ signature "sig"
5
+ #
6
+ check "lib" # Directory name
7
+ # check "Gemfile" # File name
8
+ # check "app/models/**/*.rb" # Glob
9
+ # # ignore "lib/templates/*.rb"
10
+ #
11
+ # # library "pathname", "set" # Standard libraries
12
+ # # library "strong_json" # Gems
13
+ #
14
+ # # configure_code_diagnostics(D::Ruby.strict) # `strict` diagnostics setting
15
+ # # configure_code_diagnostics(D::Ruby.lenient) # `lenient` diagnostics setting
16
+ # # configure_code_diagnostics do |hash| # You can setup everything yourself
17
+ # # hash[D::Ruby::NoMethod] = :information
18
+ # # end
19
+ end
20
+
21
+ # target :test do
22
+ # signature "sig", "sig-private"
23
+ #
24
+ # check "test"
25
+ #
26
+ # # library "pathname", "set" # Standard libraries
27
+ # end
@@ -26,4 +26,5 @@ void Init_hnswlibext(void) {
26
26
  RbHnswlibL2Space::define_class(rb_mHnswlib);
27
27
  RbHnswlibInnerProductSpace::define_class(rb_mHnswlib);
28
28
  RbHnswlibHierarchicalNSW::define_class(rb_mHnswlib);
29
+ RbHnswlibBruteforceSearch::define_class(rb_mHnswlib);
29
30
  }
@@ -26,11 +26,13 @@
26
26
  VALUE rb_cHnswlibL2Space;
27
27
  VALUE rb_cHnswlibInnerProductSpace;
28
28
  VALUE rb_cHnswlibHierarchicalNSW;
29
+ VALUE rb_cHnswlibBruteforceSearch;
29
30
 
30
31
  class RbHnswlibL2Space {
31
32
  public:
32
33
  static VALUE hnsw_l2space_alloc(VALUE self) {
33
34
  hnswlib::L2Space* ptr = (hnswlib::L2Space*)ruby_xmalloc(sizeof(hnswlib::L2Space));
35
+ new (ptr) hnswlib::L2Space(); // dummy call to constructor for GC.
34
36
  return TypedData_Wrap_Struct(self, &hnsw_l2space_type, ptr);
35
37
  };
36
38
 
@@ -106,6 +108,7 @@ class RbHnswlibInnerProductSpace {
106
108
  public:
107
109
  static VALUE hnsw_ipspace_alloc(VALUE self) {
108
110
  hnswlib::InnerProductSpace* ptr = (hnswlib::InnerProductSpace*)ruby_xmalloc(sizeof(hnswlib::InnerProductSpace));
111
+ new (ptr) hnswlib::InnerProductSpace(); // dummy call to constructor for GC.
109
112
  return TypedData_Wrap_Struct(self, &hnsw_ipspace_type, ptr);
110
113
  };
111
114
 
@@ -181,6 +184,7 @@ class RbHnswlibHierarchicalNSW {
181
184
  public:
182
185
  static VALUE hnsw_hierarchicalnsw_alloc(VALUE self) {
183
186
  hnswlib::HierarchicalNSW<float>* ptr = (hnswlib::HierarchicalNSW<float>*)ruby_xmalloc(sizeof(hnswlib::HierarchicalNSW<float>));
187
+ new (ptr) hnswlib::HierarchicalNSW<float>(); // dummy call to constructor for GC.
184
188
  return TypedData_Wrap_Struct(self, &hnsw_hierarchicalnsw_type, ptr);
185
189
  };
186
190
 
@@ -239,9 +243,30 @@ class RbHnswlibHierarchicalNSW {
239
243
  if (kw_values[3] == Qundef) kw_values[3] = INT2NUM(200);
240
244
  if (kw_values[4] == Qundef) kw_values[4] = INT2NUM(100);
241
245
 
246
+ if (!(rb_obj_is_instance_of(kw_values[0], rb_cHnswlibL2Space) || rb_obj_is_instance_of(kw_values[0], rb_cHnswlibInnerProductSpace))) {
247
+ rb_raise(rb_eTypeError, "expected space, Hnswlib::L2Space or Hnswlib::InnerProductSpace");
248
+ return Qnil;
249
+ }
250
+ if (!RB_INTEGER_TYPE_P(kw_values[1])) {
251
+ rb_raise(rb_eTypeError, "expected max_elements, Integer");
252
+ return Qnil;
253
+ }
254
+ if (!RB_INTEGER_TYPE_P(kw_values[2])) {
255
+ rb_raise(rb_eTypeError, "expected m, Integer");
256
+ return Qnil;
257
+ }
258
+ if (!RB_INTEGER_TYPE_P(kw_values[3])) {
259
+ rb_raise(rb_eTypeError, "expected ef_construction, Integer");
260
+ return Qnil;
261
+ }
262
+ if (!RB_INTEGER_TYPE_P(kw_values[4])) {
263
+ rb_raise(rb_eTypeError, "expected random_seed, Integer");
264
+ return Qnil;
265
+ }
266
+
242
267
  rb_iv_set(self, "@space", kw_values[0]);
243
268
  hnswlib::SpaceInterface<float>* space;
244
- if (CLASS_OF(kw_values[0]) == rb_cHnswlibL2Space) {
269
+ if (rb_obj_is_instance_of(kw_values[0], rb_cHnswlibL2Space)) {
245
270
  space = RbHnswlibL2Space::get_hnsw_l2space(kw_values[0]);
246
271
  } else {
247
272
  space = RbHnswlibInnerProductSpace::get_hnsw_ipspace(kw_values[0]);
@@ -344,11 +369,12 @@ class RbHnswlibHierarchicalNSW {
344
369
 
345
370
  static VALUE _hnsw_hierarchicalnsw_load_index(VALUE self, VALUE _filename) {
346
371
  std::string filename(StringValuePtr(_filename));
372
+ VALUE ivspace = rb_iv_get(self, "@space");
347
373
  hnswlib::SpaceInterface<float>* space;
348
- if (CLASS_OF(rb_iv_get(self, "@space")) == rb_cHnswlibL2Space) {
349
- space = RbHnswlibL2Space::get_hnsw_l2space(rb_iv_get(self, "@space"));
374
+ if (rb_obj_is_instance_of(ivspace, rb_cHnswlibL2Space)) {
375
+ space = RbHnswlibL2Space::get_hnsw_l2space(ivspace);
350
376
  } else {
351
- space = RbHnswlibInnerProductSpace::get_hnsw_ipspace(rb_iv_get(self, "@space"));
377
+ space = RbHnswlibInnerProductSpace::get_hnsw_ipspace(ivspace);
352
378
  }
353
379
  get_hnsw_hierarchicalnsw(self)->loadIndex(filename, space);
354
380
  RB_GC_GUARD(_filename);
@@ -417,4 +443,201 @@ const rb_data_type_t RbHnswlibHierarchicalNSW::hnsw_hierarchicalnsw_type = {
417
443
  RUBY_TYPED_FREE_IMMEDIATELY
418
444
  };
419
445
 
446
+ class RbHnswlibBruteforceSearch {
447
+ public:
448
+ static VALUE hnsw_bruteforcesearch_alloc(VALUE self) {
449
+ hnswlib::BruteforceSearch<float>* ptr = (hnswlib::BruteforceSearch<float>*)ruby_xmalloc(sizeof(hnswlib::BruteforceSearch<float>));
450
+ new (ptr) hnswlib::BruteforceSearch<float>(); // dummy call to constructor for GC.
451
+ return TypedData_Wrap_Struct(self, &hnsw_bruteforcesearch_type, ptr);
452
+ };
453
+
454
+ static void hnsw_bruteforcesearch_free(void* ptr) {
455
+ ((hnswlib::BruteforceSearch<float>*)ptr)->~BruteforceSearch();
456
+ ruby_xfree(ptr);
457
+ };
458
+
459
+ static size_t hnsw_bruteforcesearch_size(const void* ptr) {
460
+ return sizeof(*((hnswlib::BruteforceSearch<float>*)ptr));
461
+ };
462
+
463
+ static hnswlib::BruteforceSearch<float>* get_hnsw_bruteforcesearch(VALUE self) {
464
+ hnswlib::BruteforceSearch<float>* ptr;
465
+ TypedData_Get_Struct(self, hnswlib::BruteforceSearch<float>, &hnsw_bruteforcesearch_type, ptr);
466
+ return ptr;
467
+ };
468
+
469
+ static VALUE define_class(VALUE rb_mHnswlib) {
470
+ rb_cHnswlibBruteforceSearch = rb_define_class_under(rb_mHnswlib, "BruteforceSearch", rb_cObject);
471
+ rb_define_alloc_func(rb_cHnswlibBruteforceSearch, hnsw_bruteforcesearch_alloc);
472
+ rb_define_method(rb_cHnswlibBruteforceSearch, "initialize", RUBY_METHOD_FUNC(_hnsw_bruteforcesearch_init), -1);
473
+ rb_define_method(rb_cHnswlibBruteforceSearch, "add_point", RUBY_METHOD_FUNC(_hnsw_bruteforcesearch_add_point), 2);
474
+ rb_define_method(rb_cHnswlibBruteforceSearch, "search_knn", RUBY_METHOD_FUNC(_hnsw_bruteforcesearch_search_knn), 2);
475
+ rb_define_method(rb_cHnswlibBruteforceSearch, "save_index", RUBY_METHOD_FUNC(_hnsw_bruteforcesearch_save_index), 1);
476
+ rb_define_method(rb_cHnswlibBruteforceSearch, "load_index", RUBY_METHOD_FUNC(_hnsw_bruteforcesearch_load_index), 1);
477
+ rb_define_method(rb_cHnswlibBruteforceSearch, "remove_point", RUBY_METHOD_FUNC(_hnsw_bruteforcesearch_remove_point), 1);
478
+ rb_define_method(rb_cHnswlibBruteforceSearch, "max_elements", RUBY_METHOD_FUNC(_hnsw_bruteforcesearch_max_elements), 0);
479
+ rb_define_method(rb_cHnswlibBruteforceSearch, "current_count", RUBY_METHOD_FUNC(_hnsw_bruteforcesearch_current_count), 0);
480
+ rb_define_attr(rb_cHnswlibBruteforceSearch, "space", 1, 0);
481
+ return rb_cHnswlibBruteforceSearch;
482
+ };
483
+
484
+ private:
485
+ static const rb_data_type_t hnsw_bruteforcesearch_type;
486
+
487
+ static VALUE _hnsw_bruteforcesearch_init(int argc, VALUE* argv, VALUE self) {
488
+ VALUE kw_args = Qnil;
489
+ ID kw_table[2] = { rb_intern("space"), rb_intern("max_elements") };
490
+ VALUE kw_values[2] = { Qundef, Qundef };
491
+ rb_scan_args(argc, argv, ":", &kw_args);
492
+ rb_get_kwargs(kw_args, kw_table, 2, 0, kw_values);
493
+
494
+ if (!(rb_obj_is_instance_of(kw_values[0], rb_cHnswlibL2Space) || rb_obj_is_instance_of(kw_values[0], rb_cHnswlibInnerProductSpace))) {
495
+ rb_raise(rb_eTypeError, "expected space, Hnswlib::L2Space or Hnswlib::InnerProductSpace");
496
+ return Qnil;
497
+ }
498
+ if (!RB_INTEGER_TYPE_P(kw_values[1])) {
499
+ rb_raise(rb_eTypeError, "expected max_elements, Integer");
500
+ return Qnil;
501
+ }
502
+
503
+ rb_iv_set(self, "@space", kw_values[0]);
504
+ hnswlib::SpaceInterface<float>* space;
505
+ if (rb_obj_is_instance_of(kw_values[0], rb_cHnswlibL2Space)) {
506
+ space = RbHnswlibL2Space::get_hnsw_l2space(kw_values[0]);
507
+ } else {
508
+ space = RbHnswlibInnerProductSpace::get_hnsw_ipspace(kw_values[0]);
509
+ }
510
+ const size_t max_elements = (size_t)NUM2INT(kw_values[1]);
511
+
512
+ hnswlib::BruteforceSearch<float>* ptr = get_hnsw_bruteforcesearch(self);
513
+ new (ptr) hnswlib::BruteforceSearch<float>(space, max_elements);
514
+
515
+ return Qnil;
516
+ };
517
+
518
+ static VALUE _hnsw_bruteforcesearch_add_point(VALUE self, VALUE arr, VALUE idx) {
519
+ const int dim = NUM2INT(rb_iv_get(rb_iv_get(self, "@space"), "@dim"));
520
+
521
+ if (!RB_TYPE_P(arr, T_ARRAY)) {
522
+ rb_raise(rb_eArgError, "Expect point vector to be Ruby Array.");
523
+ return Qfalse;
524
+ }
525
+
526
+ if (!RB_INTEGER_TYPE_P(idx)) {
527
+ rb_raise(rb_eArgError, "Expect index to be Ruby Integer.");
528
+ return Qfalse;
529
+ }
530
+
531
+ if (dim != RARRAY_LEN(arr)) {
532
+ rb_raise(rb_eArgError, "Array size does not match to index dimensionality.");
533
+ return Qfalse;
534
+ }
535
+
536
+ float* vec = (float*)ruby_xmalloc(dim * sizeof(float));
537
+ for (int i = 0; i < dim; i++) {
538
+ vec[i] = (float)NUM2DBL(rb_ary_entry(arr, i));
539
+ }
540
+
541
+ get_hnsw_bruteforcesearch(self)->addPoint((void *)vec, (size_t)NUM2INT(idx));
542
+
543
+ ruby_xfree(vec);
544
+ return Qtrue;
545
+ };
546
+
547
+ static VALUE _hnsw_bruteforcesearch_search_knn(VALUE self, VALUE arr, VALUE k) {
548
+ const int dim = NUM2INT(rb_iv_get(rb_iv_get(self, "@space"), "@dim"));
549
+
550
+ if (!RB_TYPE_P(arr, T_ARRAY)) {
551
+ rb_raise(rb_eArgError, "Expect query vector to be Ruby Array.");
552
+ return Qnil;
553
+ }
554
+
555
+ if (!RB_INTEGER_TYPE_P(k)) {
556
+ rb_raise(rb_eArgError, "Expect the number of nearest neighbors to be Ruby Integer.");
557
+ return Qnil;
558
+ }
559
+
560
+ if (dim != RARRAY_LEN(arr)) {
561
+ rb_raise(rb_eArgError, "Array size does not match to index dimensionality.");
562
+ return Qnil;
563
+ }
564
+
565
+ float* vec = (float*)ruby_xmalloc(dim * sizeof(float));
566
+ for (int i = 0; i < dim; i++) {
567
+ vec[i] = (float)NUM2DBL(rb_ary_entry(arr, i));
568
+ }
569
+
570
+ std::priority_queue<std::pair<float, size_t>> result =
571
+ get_hnsw_bruteforcesearch(self)->searchKnn((void *)vec, (size_t)NUM2INT(k));
572
+
573
+ ruby_xfree(vec);
574
+
575
+ if (result.size() != (size_t)NUM2INT(k)) {
576
+ rb_raise(rb_eRuntimeError, "Cannot return the results in a contigious 2D array. Probably ef or M is too small.");
577
+ return Qnil;
578
+ }
579
+
580
+ VALUE distances_arr = rb_ary_new2(result.size());
581
+ VALUE neighbors_arr = rb_ary_new2(result.size());
582
+
583
+ for (int i = NUM2INT(k) - 1; i >= 0; i--) {
584
+ const std::pair<float, size_t>& result_tuple = result.top();
585
+ rb_ary_store(distances_arr, i, DBL2NUM((double)result_tuple.first));
586
+ rb_ary_store(neighbors_arr, i, INT2NUM((int)result_tuple.second));
587
+ result.pop();
588
+ }
589
+
590
+ VALUE ret = rb_ary_new2(2);
591
+ rb_ary_store(ret, 0, neighbors_arr);
592
+ rb_ary_store(ret, 1, distances_arr);
593
+ return ret;
594
+ };
595
+
596
+ static VALUE _hnsw_bruteforcesearch_save_index(VALUE self, VALUE _filename) {
597
+ std::string filename(StringValuePtr(_filename));
598
+ get_hnsw_bruteforcesearch(self)->saveIndex(filename);
599
+ RB_GC_GUARD(_filename);
600
+ return Qnil;
601
+ };
602
+
603
+ static VALUE _hnsw_bruteforcesearch_load_index(VALUE self, VALUE _filename) {
604
+ std::string filename(StringValuePtr(_filename));
605
+ VALUE ivspace = rb_iv_get(self, "@space");
606
+ hnswlib::SpaceInterface<float>* space;
607
+ if (rb_obj_is_instance_of(ivspace, rb_cHnswlibL2Space)) {
608
+ space = RbHnswlibL2Space::get_hnsw_l2space(ivspace);
609
+ } else {
610
+ space = RbHnswlibInnerProductSpace::get_hnsw_ipspace(ivspace);
611
+ }
612
+ get_hnsw_bruteforcesearch(self)->loadIndex(filename, space);
613
+ RB_GC_GUARD(_filename);
614
+ return Qnil;
615
+ };
616
+
617
+ static VALUE _hnsw_bruteforcesearch_remove_point(VALUE self, VALUE idx) {
618
+ get_hnsw_bruteforcesearch(self)->removePoint((size_t)NUM2INT(idx));
619
+ return Qnil;
620
+ };
621
+
622
+ static VALUE _hnsw_bruteforcesearch_max_elements(VALUE self) {
623
+ return INT2NUM((int)(get_hnsw_bruteforcesearch(self)->maxelements_));
624
+ };
625
+
626
+ static VALUE _hnsw_bruteforcesearch_current_count(VALUE self) {
627
+ return INT2NUM((int)(get_hnsw_bruteforcesearch(self)->cur_element_count));
628
+ };
629
+ };
630
+
631
+ const rb_data_type_t RbHnswlibBruteforceSearch::hnsw_bruteforcesearch_type = {
632
+ "RbHnswlibBruteforceSearch",
633
+ {
634
+ NULL,
635
+ RbHnswlibBruteforceSearch::hnsw_bruteforcesearch_free,
636
+ RbHnswlibBruteforceSearch::hnsw_bruteforcesearch_size
637
+ },
638
+ NULL,
639
+ NULL,
640
+ RUBY_TYPED_FREE_IMMEDIATELY
641
+ };
642
+
420
643
  #endif /* HNSWLIBEXT_HPP */
@@ -8,6 +8,7 @@ namespace hnswlib {
8
8
  template<typename dist_t>
9
9
  class BruteforceSearch : public AlgorithmInterface<dist_t> {
10
10
  public:
11
+ BruteforceSearch() : data_(nullptr) { }
11
12
  BruteforceSearch(SpaceInterface <dist_t> *s) {
12
13
 
13
14
  }
@@ -91,13 +92,13 @@ namespace hnswlib {
91
92
  searchKnn(const void *query_data, size_t k) const {
92
93
  std::priority_queue<std::pair<dist_t, labeltype >> topResults;
93
94
  if (cur_element_count == 0) return topResults;
94
- for (int i = 0; i < k; i++) {
95
+ for (size_t i = 0; i < k; i++) {
95
96
  dist_t dist = fstdistfunc_(query_data, data_ + size_per_element_ * i, dist_func_param_);
96
97
  topResults.push(std::pair<dist_t, labeltype>(dist, *((labeltype *) (data_ + size_per_element_ * i +
97
98
  data_size_))));
98
99
  }
99
100
  dist_t lastdist = topResults.top().first;
100
- for (int i = k; i < cur_element_count; i++) {
101
+ for (size_t i = k; i < cur_element_count; i++) {
101
102
  dist_t dist = fstdistfunc_(query_data, data_ + size_per_element_ * i, dist_func_param_);
102
103
  if (dist <= lastdist) {
103
104
  topResults.push(std::pair<dist_t, labeltype>(dist, *((labeltype *) (data_ + size_per_element_ * i +
@@ -17,6 +17,7 @@ namespace hnswlib {
17
17
  class HierarchicalNSW : public AlgorithmInterface<dist_t> {
18
18
  public:
19
19
  static const tableint max_update_element_locks = 65536;
20
+ HierarchicalNSW() : visited_list_pool_(nullptr), data_level0_memory_(nullptr), linkLists_(nullptr), cur_element_count(0) { }
20
21
  HierarchicalNSW(SpaceInterface<dist_t> *s) {
21
22
 
22
23
  }
@@ -757,7 +758,7 @@ namespace hnswlib {
757
758
  size_t dim = *((size_t *) dist_func_param_);
758
759
  std::vector<data_t> data;
759
760
  data_t* data_ptr = (data_t*) data_ptrv;
760
- for (int i = 0; i < dim; i++) {
761
+ for (size_t i = 0; i < dim; i++) {
761
762
  data.push_back(*data_ptr);
762
763
  data_ptr += 1;
763
764
  }
@@ -247,6 +247,7 @@ namespace hnswlib {
247
247
  size_t data_size_;
248
248
  size_t dim_;
249
249
  public:
250
+ InnerProductSpace() : data_size_(0), dim_(0) { }
250
251
  InnerProductSpace(size_t dim) {
251
252
  fstdistfunc_ = InnerProduct;
252
253
  #if defined(USE_AVX) || defined(USE_SSE)
@@ -172,6 +172,7 @@ namespace hnswlib {
172
172
  size_t data_size_;
173
173
  size_t dim_;
174
174
  public:
175
+ L2Space() : data_size_(0), dim_(0) { }
175
176
  L2Space(size_t dim) {
176
177
  fstdistfunc_ = L2Sqr;
177
178
  #if defined(USE_SSE) || defined(USE_AVX)
data/hnswlib.gemspec CHANGED
@@ -20,7 +20,7 @@ Gem::Specification.new do |spec|
20
20
  # Specify which files should be added to the gem when it is released.
21
21
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
22
22
  spec.files = Dir.chdir(File.expand_path(__dir__)) do
23
- `git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:test|spec|features)/}) }
23
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:test|spec|features)/}) }.reject { |f| f.include?('dummy.rb') }
24
24
  end
25
25
  spec.bindir = 'exe'
26
26
  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
@@ -3,7 +3,8 @@
3
3
  # Hnswlib.rb provides Ruby bindings for the Hnswlib.
4
4
  module Hnswlib
5
5
  # The version of Hnswlib.rb you install.
6
- VERSION = '0.1.0'
6
+ VERSION = '0.4.0'
7
+
7
8
  # The version of Hnswlib included with gem.
8
9
  HSWLIB_VERSION = '0.5.2'
9
10
  end
data/lib/hnswlib.rb CHANGED
@@ -4,12 +4,12 @@ require_relative 'hnswlib/version'
4
4
  require_relative 'hnswlib/hnswlibext'
5
5
 
6
6
  module Hnswlib
7
- # Index is a class that provides functions for k-nearest eighbors search.
7
+ # HnswIndex is a class that provides functions for k-nearest eighbors search.
8
8
  #
9
9
  # @example
10
10
  # require 'hnswlib'
11
11
  #
12
- # index = Hnswlib::Index.new(n_features: 100, max_item: 10000)
12
+ # index = Hnswlib::HnswIndex.new(n_features: 100, max_item: 10000)
13
13
  #
14
14
  # 5000.times do |item_id|
15
15
  # item_vec = Array.new(100) { rand - 0.5 }
@@ -18,7 +18,7 @@ module Hnswlib
18
18
  #
19
19
  # index.get_nns_by_item(0, 100)
20
20
  #
21
- class Index
21
+ class HnswIndex
22
22
  # Returns the metric of index.
23
23
  # @return [String]
24
24
  attr_reader :metric
@@ -63,7 +63,6 @@ module Hnswlib
63
63
  # Remove the item vector.
64
64
  #
65
65
  # @param i [Integer] The ID of item.
66
- # @return [Array]
67
66
  def remove_item(i)
68
67
  @index.mark_deleted(i)
69
68
  end
@@ -151,4 +150,7 @@ module Hnswlib
151
150
  @index.max_elements
152
151
  end
153
152
  end
153
+
154
+ # Index is alias of HnswIndex
155
+ Index = ::Hnswlib::HnswIndex
154
156
  end
data/sig/hnswlib.rbs ADDED
@@ -0,0 +1,69 @@
1
+ module Hnswlib
2
+ VERSION: ::String
3
+ HSWLIB_VERSION: ::String
4
+
5
+ class HnswIndex
6
+ attr_reader metric: String
7
+
8
+ def initialize: (n_features: Integer n_features, max_item: Integer max_item, ?metric: ::String metric, ?m: ::Integer m, ?ef_construction: ::Integer ef_construction, ?random_seed: ::Integer random_seed) -> void
9
+ def add_item: (Integer i, Array[Float] v) -> bool
10
+ def get_item: (Integer i) -> Array[Float]
11
+ def remove_item: (Integer i) -> void
12
+ def get_nns_by_item: (Integer i, Integer n, ?include_distances: (true | false) include_distances) -> ([Array[Integer], Array[Float]] | Array[Integer])
13
+ def get_nns_by_vector: (Array[Float] v, Integer n, ?include_distances: (true | false) include_distances) -> ([Array[Integer], Array[Float]] | Array[Integer])
14
+ def resize_index: (Integer new_max_item) -> void
15
+ def set_ef: (Integer ef) -> void
16
+ def save: (String filename) -> void
17
+ def load: (String filename) -> void
18
+ def get_distance: (Integer i, Integer j) -> Float
19
+ def n_items: () -> Integer
20
+ def n_features: () -> Integer
21
+ def max_item: () -> Integer
22
+ end
23
+
24
+ Index: untyped
25
+
26
+ class L2Space
27
+ attr_accessor dim: Integer
28
+
29
+ def initialize: (Integer dim) -> void
30
+ def distance: (Array[Float] a, Array[Float] b) -> Float
31
+ end
32
+
33
+ class InnerProductSpace
34
+ attr_accessor dim: Integer
35
+
36
+ def initialize: (Integer dim) -> void
37
+ def distance: (Array[Float] a, Array[Float] b) -> Float
38
+ end
39
+
40
+ class BruteforceSearch
41
+ attr_accessor space: (::Hnswlib::L2Space | ::Hnswlib::InnerProductSpace)
42
+
43
+ def initialize: (space: (::Hnswlib::L2Space | ::Hnswlib::InnerProductSpace) space, max_elements: Integer max_elements) -> void
44
+ def add_point: (Array[Float] arr, Integer idx) -> bool
45
+ def current_count: () -> Integer
46
+ def load_index: (String filename) -> void
47
+ def max_elements: () -> Integer
48
+ def remove_point: (Integer idx) -> void
49
+ def save_index: (String filename) -> void
50
+ def search_knn: (Array[Float] arr, Integer k) -> [Array[Integer], Array[Float]]
51
+ end
52
+
53
+ class HierarchicalNSW
54
+ attr_accessor space: (::Hnswlib::L2Space | ::Hnswlib::InnerProductSpace)
55
+
56
+ def initialize: (space: (::Hnswlib::L2Space | ::Hnswlib::InnerProductSpace) space, max_elements: Integer max_elements, ?m: Integer m, ?ef_construction: Integer ef_construction, ?random_seed: Integer random_seed) -> void
57
+ def add_point: (Array[Float] arr, Integer idx) -> bool
58
+ def current_count: () -> Integer
59
+ def get_ids: () -> Array[Integer]
60
+ def get_point: (Integer idx) -> Array[Float]
61
+ def load_index: (String filename) -> void
62
+ def mark_deleted: (Integer idx) -> void
63
+ def max_elements: () -> Integer
64
+ def resize_index: (Integer new_max_elements) -> void
65
+ def save_index: (String filename) -> void
66
+ def search_knn: (Array[Float] arr, Integer k) -> [Array[Integer], Array[Float]]
67
+ def set_ef: (Integer ef) -> void
68
+ end
69
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hnswlib
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-07-24 00:00:00.000000000 Z
11
+ date: 2021-09-11 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Hnswlib.rb provides Ruby bindings for the Hnswlib.
14
14
  email:
@@ -27,6 +27,7 @@ files:
27
27
  - LICENSE.txt
28
28
  - README.md
29
29
  - Rakefile
30
+ - Steepfile
30
31
  - ext/hnswlib/extconf.rb
31
32
  - ext/hnswlib/hnswlibext.cpp
32
33
  - ext/hnswlib/hnswlibext.hpp
@@ -40,6 +41,7 @@ files:
40
41
  - hnswlib.gemspec
41
42
  - lib/hnswlib.rb
42
43
  - lib/hnswlib/version.rb
44
+ - sig/hnswlib.rbs
43
45
  homepage: https://github.com/yoshoku/hnswlib.rb
44
46
  licenses:
45
47
  - Apache-2.0
@@ -62,7 +64,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
62
64
  - !ruby/object:Gem::Version
63
65
  version: '0'
64
66
  requirements: []
65
- rubygems_version: 3.1.6
67
+ rubygems_version: 3.2.22
66
68
  signing_key:
67
69
  specification_version: 4
68
70
  summary: Ruby bindings for the Hnswlib.