hnswlib 0.6.2 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -5,75 +5,74 @@
5
5
  #include <deque>
6
6
 
7
7
  namespace hnswlib {
8
- typedef unsigned short int vl_type;
8
+ typedef unsigned short int vl_type;
9
9
 
10
- class VisitedList {
11
- public:
12
- vl_type curV;
13
- vl_type *mass;
14
- unsigned int numelements;
10
+ class VisitedList {
11
+ public:
12
+ vl_type curV;
13
+ vl_type *mass;
14
+ unsigned int numelements;
15
15
 
16
- VisitedList(int numelements1) {
17
- curV = -1;
18
- numelements = numelements1;
19
- mass = new vl_type[numelements];
20
- }
16
+ VisitedList(int numelements1) {
17
+ curV = -1;
18
+ numelements = numelements1;
19
+ mass = new vl_type[numelements];
20
+ }
21
21
 
22
- void reset() {
22
+ void reset() {
23
+ curV++;
24
+ if (curV == 0) {
25
+ memset(mass, 0, sizeof(vl_type) * numelements);
23
26
  curV++;
24
- if (curV == 0) {
25
- memset(mass, 0, sizeof(vl_type) * numelements);
26
- curV++;
27
- }
28
- };
27
+ }
28
+ }
29
29
 
30
- ~VisitedList() { delete[] mass; }
31
- };
30
+ ~VisitedList() { delete[] mass; }
31
+ };
32
32
  ///////////////////////////////////////////////////////////
33
33
  //
34
34
  // Class for multi-threaded pool-management of VisitedLists
35
35
  //
36
36
  /////////////////////////////////////////////////////////
37
37
 
38
- class VisitedListPool {
39
- std::deque<VisitedList *> pool;
40
- std::mutex poolguard;
41
- int numelements;
42
-
43
- public:
44
- VisitedListPool(int initmaxpools, int numelements1) {
45
- numelements = numelements1;
46
- for (int i = 0; i < initmaxpools; i++)
47
- pool.push_front(new VisitedList(numelements));
48
- }
38
+ class VisitedListPool {
39
+ std::deque<VisitedList *> pool;
40
+ std::mutex poolguard;
41
+ int numelements;
49
42
 
50
- VisitedList *getFreeVisitedList() {
51
- VisitedList *rez;
52
- {
53
- std::unique_lock <std::mutex> lock(poolguard);
54
- if (pool.size() > 0) {
55
- rez = pool.front();
56
- pool.pop_front();
57
- } else {
58
- rez = new VisitedList(numelements);
59
- }
60
- }
61
- rez->reset();
62
- return rez;
63
- };
43
+ public:
44
+ VisitedListPool(int initmaxpools, int numelements1) {
45
+ numelements = numelements1;
46
+ for (int i = 0; i < initmaxpools; i++)
47
+ pool.push_front(new VisitedList(numelements));
48
+ }
64
49
 
65
- void releaseVisitedList(VisitedList *vl) {
50
+ VisitedList *getFreeVisitedList() {
51
+ VisitedList *rez;
52
+ {
66
53
  std::unique_lock <std::mutex> lock(poolguard);
67
- pool.push_front(vl);
68
- };
69
-
70
- ~VisitedListPool() {
71
- while (pool.size()) {
72
- VisitedList *rez = pool.front();
54
+ if (pool.size() > 0) {
55
+ rez = pool.front();
73
56
  pool.pop_front();
74
- delete rez;
57
+ } else {
58
+ rez = new VisitedList(numelements);
75
59
  }
76
- };
77
- };
78
- }
60
+ }
61
+ rez->reset();
62
+ return rez;
63
+ }
79
64
 
65
+ void releaseVisitedList(VisitedList *vl) {
66
+ std::unique_lock <std::mutex> lock(poolguard);
67
+ pool.push_front(vl);
68
+ }
69
+
70
+ ~VisitedListPool() {
71
+ while (pool.size()) {
72
+ VisitedList *rez = pool.front();
73
+ pool.pop_front();
74
+ delete rez;
75
+ }
76
+ }
77
+ };
78
+ } // namespace hnswlib
@@ -3,8 +3,8 @@
3
3
  # Hnswlib.rb provides Ruby bindings for the Hnswlib.
4
4
  module Hnswlib
5
5
  # The version of Hnswlib.rb you install.
6
- VERSION = '0.6.2'
6
+ VERSION = '0.8.0'
7
7
 
8
8
  # The version of Hnswlib included with gem.
9
- HSWLIB_VERSION = '0.6.2'
9
+ HSWLIB_VERSION = '0.7.0'
10
10
  end
data/lib/hnswlib.rb CHANGED
@@ -18,6 +18,7 @@ module Hnswlib
18
18
  #
19
19
  # index.get_nns_by_item(0, 100)
20
20
  #
21
+ # @deprecated This class was prepared as a class with an interface similar to Annoy, but it is not very useful and will be deleted in the next version.
21
22
  class HnswIndex
22
23
  # Returns the metric of index.
23
24
  # @return [String]
@@ -27,29 +28,28 @@ module Hnswlib
27
28
  #
28
29
  # @param n_features [Integer] The number of features (dimensions) of stored vector.
29
30
  # @param max_item [Integer] The maximum number of items.
30
- # @param metric [String] The distance metric between vectors ('l2' or 'dot').
31
+ # @param metric [String] The distance metric between vectors ('l2', 'dot', or 'cosine').
31
32
  # @param m [Integer] The maximum number of outgoing connections in the graph
32
33
  # @param ef_construction [Integer] The size of the dynamic list for the nearest neighbors. It controls the index time/accuracy trade-off.
33
34
  # @param random_seed [Integer] The seed value using to initialize the random generator.
34
- def initialize(n_features:, max_item:, metric: 'l2', m: 16, ef_construction: 200, random_seed: 100)
35
+ # @param allow_replace_removed [Boolean] The flag to replace removed element when adding new element.
36
+ def initialize(n_features:, max_item:, metric: 'l2', m: 16, ef_construction: 200,
37
+ random_seed: 100, allow_replace_removed: false)
35
38
  @metric = metric
36
- space = if @metric == 'dot'
37
- Hnswlib::InnerProductSpace.new(n_features)
38
- else
39
- Hnswlib::L2Space.new(n_features)
40
- end
41
- @index = Hnswlib::HierarchicalNSW.new(
42
- space: space, max_elements: max_item, m: m, ef_construction: ef_construction, random_seed: random_seed
43
- )
39
+ space = @metric == 'dot' ? 'ip' : 'l2'
40
+ @index = Hnswlib::HierarchicalNSW.new(space: space, dim: n_features)
41
+ @index.init_index(max_elements: max_item, m: m, ef_construction: ef_construction,
42
+ random_seed: random_seed, allow_replace_deleted: allow_replace_removed)
44
43
  end
45
44
 
46
45
  # Add item to be indexed.
47
46
  #
48
47
  # @param i [Integer] The ID of item.
49
48
  # @param v [Array] The vector of item.
49
+ # @param replace_removed [Boolean] The flag to replace a removed element.
50
50
  # @return [Boolean]
51
- def add_item(i, v)
52
- @index.add_point(v, i)
51
+ def add_item(i, v, replace_removed: false)
52
+ @index.add_point(v, i, replace_deleted: replace_removed)
53
53
  end
54
54
 
55
55
  # Return the item vector.
@@ -72,10 +72,11 @@ module Hnswlib
72
72
  # @param i [Integer] The ID of query item.
73
73
  # @param n [Integer] The number of nearest neighbors.
74
74
  # @param include_distances [Boolean] The flag indicating whether to returns all corresponding distances.
75
+ # @param filter [Proc] The function that filters elements by its labels.
75
76
  # @return [Array<Integer> or Array<Array<Integer>, Array<Float>>]
76
- def get_nns_by_item(i, n, include_distances: false)
77
+ def get_nns_by_item(i, n, include_distances: false, filter: nil)
77
78
  v = @index.get_point(i)
78
- ids, dists = @index.search_knn(v, n)
79
+ ids, dists = @index.search_knn(v, n, filter: filter)
79
80
  include_distances ? [ids, dists] : ids
80
81
  end
81
82
 
@@ -84,9 +85,10 @@ module Hnswlib
84
85
  # @param v [Array] The vector of query item.
85
86
  # @param n [Integer] The number of nearest neighbors.
86
87
  # @param include_distances [Boolean] The flag indicating whether to returns all corresponding distances.
88
+ # @param filter [Proc] The function that filters elements by its labels.
87
89
  # @return [Array<Integer> or Array<Array<Integer>, Array<Float>>]
88
- def get_nns_by_vector(v, n, include_distances: false)
89
- ids, dists = @index.search_knn(v, n)
90
+ def get_nns_by_vector(v, n, include_distances: false, filter: nil)
91
+ ids, dists = @index.search_knn(v, n, filter: filter)
90
92
  include_distances ? [ids, dists] : ids
91
93
  end
92
94
 
@@ -114,8 +116,9 @@ module Hnswlib
114
116
  # Load a search index from disk.
115
117
  #
116
118
  # @param filename [String] The filename of search index.
117
- def load(filename)
118
- @index.load_index(filename)
119
+ # @param allow_replace_removed [Boolean] The flag to replace removed element when adding new element.
120
+ def load(filename, allow_replace_removed: false)
121
+ @index.load_index(filename, allow_replace_deleted: allow_replace_removed)
119
122
  end
120
123
 
121
124
  # Calculate the distances between items.
data/sig/hnswlib.rbs CHANGED
@@ -5,8 +5,8 @@ module Hnswlib
5
5
  class HnswIndex
6
6
  attr_reader metric: String
7
7
 
8
- def initialize: (n_features: Integer n_features, max_item: Integer max_item, ?metric: ::String metric, ?m: ::Integer m, ?ef_construction: ::Integer ef_construction, ?random_seed: ::Integer random_seed) -> void
9
- def add_item: (Integer i, Array[Float] v) -> bool
8
+ def initialize: (n_features: Integer n_features, max_item: Integer max_item, ?metric: ::String metric, ?m: ::Integer m, ?ef_construction: ::Integer ef_construction, ?random_seed: ::Integer random_seed, ?allow_replace_removed: (true | false) allow_replace_removed) -> void
9
+ def add_item: (Integer i, Array[Float] v, ?replace_removed: (true | false) replace_removed) -> bool
10
10
  def get_item: (Integer i) -> Array[Float]
11
11
  def remove_item: (Integer i) -> void
12
12
  def get_nns_by_item: (Integer i, Integer n, ?include_distances: (true | false) include_distances) -> ([Array[Integer], Array[Float]] | Array[Integer])
@@ -14,7 +14,7 @@ module Hnswlib
14
14
  def resize_index: (Integer new_max_item) -> void
15
15
  def set_ef: (Integer ef) -> void
16
16
  def save: (String filename) -> void
17
- def load: (String filename) -> void
17
+ def load: (String filename, ?allow_replace_removed: (true | false) allow_replace_removed) -> void
18
18
  def get_distance: (Integer i, Integer j) -> Float
19
19
  def n_items: () -> Integer
20
20
  def n_features: () -> Integer
@@ -40,7 +40,8 @@ module Hnswlib
40
40
  class BruteforceSearch
41
41
  attr_accessor space: (::Hnswlib::L2Space | ::Hnswlib::InnerProductSpace)
42
42
 
43
- def initialize: (space: (::Hnswlib::L2Space | ::Hnswlib::InnerProductSpace) space, max_elements: Integer max_elements) -> void
43
+ def initialize: (space: String space, dim: Integer dim) -> void
44
+ def init_index: (max_elements: Integer max_elements) -> void
44
45
  def add_point: (Array[Float] arr, Integer idx) -> bool
45
46
  def current_count: () -> Integer
46
47
  def load_index: (String filename) -> void
@@ -53,12 +54,13 @@ module Hnswlib
53
54
  class HierarchicalNSW
54
55
  attr_accessor space: (::Hnswlib::L2Space | ::Hnswlib::InnerProductSpace)
55
56
 
56
- def initialize: (space: (::Hnswlib::L2Space | ::Hnswlib::InnerProductSpace) space, max_elements: Integer max_elements, ?m: Integer m, ?ef_construction: Integer ef_construction, ?random_seed: Integer random_seed) -> void
57
- def add_point: (Array[Float] arr, Integer idx) -> bool
57
+ def initialize: (space: String space, dim: Integer dim) -> void
58
+ def init_index: (max_elements: Integer max_elements, ?m: Integer m, ?ef_construction: Integer ef_construction, ?random_seed: Integer random_seed, ?allow_replace_deleted: (true | false) allow_replace_deleted) -> void
59
+ def add_point: (Array[Float] arr, Integer idx, ?replace_deleted: (true | false) replace_deleted) -> bool
58
60
  def current_count: () -> Integer
59
61
  def get_ids: () -> Array[Integer]
60
62
  def get_point: (Integer idx) -> Array[Float]
61
- def load_index: (String filename) -> void
63
+ def load_index: (String filename, ?allow_replace_deleted: (true | false) allow_replace_deleted) -> void
62
64
  def mark_deleted: (Integer idx) -> void
63
65
  def max_elements: () -> Integer
64
66
  def resize_index: (Integer new_max_elements) -> void
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hnswlib
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.2
4
+ version: 0.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-06-25 00:00:00.000000000 Z
11
+ date: 2023-03-14 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Hnswlib.rb provides Ruby bindings for the Hnswlib.
14
14
  email:
@@ -57,7 +57,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
57
57
  - !ruby/object:Gem::Version
58
58
  version: '0'
59
59
  requirements: []
60
- rubygems_version: 3.2.33
60
+ rubygems_version: 3.3.26
61
61
  signing_key:
62
62
  specification_version: 4
63
63
  summary: Ruby bindings for the Hnswlib.