hnswlib 0.6.2 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/ext/hnswlib/hnswlibext.cpp +1 -1
- data/ext/hnswlib/hnswlibext.hpp +194 -62
- data/ext/hnswlib/src/bruteforce.h +142 -131
- data/ext/hnswlib/src/hnswalg.h +1028 -964
- data/ext/hnswlib/src/hnswlib.h +74 -66
- data/ext/hnswlib/src/space_ip.h +299 -299
- data/ext/hnswlib/src/space_l2.h +268 -273
- data/ext/hnswlib/src/visited_list_pool.h +54 -55
- data/lib/hnswlib/version.rb +2 -2
- data/lib/hnswlib.rb +17 -10
- data/sig/hnswlib.rbs +6 -6
- metadata +3 -3
@@ -5,75 +5,74 @@
|
|
5
5
|
#include <deque>
|
6
6
|
|
7
7
|
namespace hnswlib {
|
8
|
-
|
8
|
+
typedef unsigned short int vl_type;
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
10
|
+
class VisitedList {
|
11
|
+
public:
|
12
|
+
vl_type curV;
|
13
|
+
vl_type *mass;
|
14
|
+
unsigned int numelements;
|
15
15
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
16
|
+
VisitedList(int numelements1) {
|
17
|
+
curV = -1;
|
18
|
+
numelements = numelements1;
|
19
|
+
mass = new vl_type[numelements];
|
20
|
+
}
|
21
21
|
|
22
|
-
|
22
|
+
void reset() {
|
23
|
+
curV++;
|
24
|
+
if (curV == 0) {
|
25
|
+
memset(mass, 0, sizeof(vl_type) * numelements);
|
23
26
|
curV++;
|
24
|
-
|
25
|
-
|
26
|
-
curV++;
|
27
|
-
}
|
28
|
-
};
|
27
|
+
}
|
28
|
+
}
|
29
29
|
|
30
|
-
|
31
|
-
|
30
|
+
~VisitedList() { delete[] mass; }
|
31
|
+
};
|
32
32
|
///////////////////////////////////////////////////////////
|
33
33
|
//
|
34
34
|
// Class for multi-threaded pool-management of VisitedLists
|
35
35
|
//
|
36
36
|
/////////////////////////////////////////////////////////
|
37
37
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
public:
|
44
|
-
VisitedListPool(int initmaxpools, int numelements1) {
|
45
|
-
numelements = numelements1;
|
46
|
-
for (int i = 0; i < initmaxpools; i++)
|
47
|
-
pool.push_front(new VisitedList(numelements));
|
48
|
-
}
|
38
|
+
class VisitedListPool {
|
39
|
+
std::deque<VisitedList *> pool;
|
40
|
+
std::mutex poolguard;
|
41
|
+
int numelements;
|
49
42
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
pool.pop_front();
|
57
|
-
} else {
|
58
|
-
rez = new VisitedList(numelements);
|
59
|
-
}
|
60
|
-
}
|
61
|
-
rez->reset();
|
62
|
-
return rez;
|
63
|
-
};
|
43
|
+
public:
|
44
|
+
VisitedListPool(int initmaxpools, int numelements1) {
|
45
|
+
numelements = numelements1;
|
46
|
+
for (int i = 0; i < initmaxpools; i++)
|
47
|
+
pool.push_front(new VisitedList(numelements));
|
48
|
+
}
|
64
49
|
|
65
|
-
|
50
|
+
VisitedList *getFreeVisitedList() {
|
51
|
+
VisitedList *rez;
|
52
|
+
{
|
66
53
|
std::unique_lock <std::mutex> lock(poolguard);
|
67
|
-
pool.
|
68
|
-
|
69
|
-
|
70
|
-
~VisitedListPool() {
|
71
|
-
while (pool.size()) {
|
72
|
-
VisitedList *rez = pool.front();
|
54
|
+
if (pool.size() > 0) {
|
55
|
+
rez = pool.front();
|
73
56
|
pool.pop_front();
|
74
|
-
|
57
|
+
} else {
|
58
|
+
rez = new VisitedList(numelements);
|
75
59
|
}
|
76
|
-
}
|
77
|
-
|
78
|
-
|
60
|
+
}
|
61
|
+
rez->reset();
|
62
|
+
return rez;
|
63
|
+
}
|
79
64
|
|
65
|
+
void releaseVisitedList(VisitedList *vl) {
|
66
|
+
std::unique_lock <std::mutex> lock(poolguard);
|
67
|
+
pool.push_front(vl);
|
68
|
+
}
|
69
|
+
|
70
|
+
~VisitedListPool() {
|
71
|
+
while (pool.size()) {
|
72
|
+
VisitedList *rez = pool.front();
|
73
|
+
pool.pop_front();
|
74
|
+
delete rez;
|
75
|
+
}
|
76
|
+
}
|
77
|
+
};
|
78
|
+
} // namespace hnswlib
|
data/lib/hnswlib/version.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
# Hnswlib.rb provides Ruby bindings for the Hnswlib.
|
4
4
|
module Hnswlib
|
5
5
|
# The version of Hnswlib.rb you install.
|
6
|
-
VERSION = '0.
|
6
|
+
VERSION = '0.7.0'
|
7
7
|
|
8
8
|
# The version of Hnswlib included with gem.
|
9
|
-
HSWLIB_VERSION = '0.
|
9
|
+
HSWLIB_VERSION = '0.7.0'
|
10
10
|
end
|
data/lib/hnswlib.rb
CHANGED
@@ -31,7 +31,9 @@ module Hnswlib
|
|
31
31
|
# @param m [Integer] The maximum number of outgoing connections in the graph
|
32
32
|
# @param ef_construction [Integer] The size of the dynamic list for the nearest neighbors. It controls the index time/accuracy trade-off.
|
33
33
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
34
|
-
|
34
|
+
# @param allow_replace_removed [Boolean] The flag to replace removed element when adding new element.
|
35
|
+
def initialize(n_features:, max_item:, metric: 'l2', m: 16, ef_construction: 200,
|
36
|
+
random_seed: 100, allow_replace_removed: false)
|
35
37
|
@metric = metric
|
36
38
|
space = if @metric == 'dot'
|
37
39
|
Hnswlib::InnerProductSpace.new(n_features)
|
@@ -39,7 +41,8 @@ module Hnswlib
|
|
39
41
|
Hnswlib::L2Space.new(n_features)
|
40
42
|
end
|
41
43
|
@index = Hnswlib::HierarchicalNSW.new(
|
42
|
-
space: space, max_elements: max_item, m: m, ef_construction: ef_construction,
|
44
|
+
space: space, max_elements: max_item, m: m, ef_construction: ef_construction,
|
45
|
+
random_seed: random_seed, allow_replace_deleted: allow_replace_removed
|
43
46
|
)
|
44
47
|
end
|
45
48
|
|
@@ -47,9 +50,10 @@ module Hnswlib
|
|
47
50
|
#
|
48
51
|
# @param i [Integer] The ID of item.
|
49
52
|
# @param v [Array] The vector of item.
|
53
|
+
# @param replace_removed [Boolean] The flag to replace a removed element.
|
50
54
|
# @return [Boolean]
|
51
|
-
def add_item(i, v)
|
52
|
-
@index.add_point(v, i)
|
55
|
+
def add_item(i, v, replace_removed: false)
|
56
|
+
@index.add_point(v, i, replace_deleted: replace_removed)
|
53
57
|
end
|
54
58
|
|
55
59
|
# Return the item vector.
|
@@ -72,10 +76,11 @@ module Hnswlib
|
|
72
76
|
# @param i [Integer] The ID of query item.
|
73
77
|
# @param n [Integer] The number of nearest neighbors.
|
74
78
|
# @param include_distances [Boolean] The flag indicating whether to returns all corresponding distances.
|
79
|
+
# @param filter [Proc] The function that filters elements by its labels.
|
75
80
|
# @return [Array<Integer> or Array<Array<Integer>, Array<Float>>]
|
76
|
-
def get_nns_by_item(i, n, include_distances: false)
|
81
|
+
def get_nns_by_item(i, n, include_distances: false, filter: nil)
|
77
82
|
v = @index.get_point(i)
|
78
|
-
ids, dists = @index.search_knn(v, n)
|
83
|
+
ids, dists = @index.search_knn(v, n, filter: filter)
|
79
84
|
include_distances ? [ids, dists] : ids
|
80
85
|
end
|
81
86
|
|
@@ -84,9 +89,10 @@ module Hnswlib
|
|
84
89
|
# @param v [Array] The vector of query item.
|
85
90
|
# @param n [Integer] The number of nearest neighbors.
|
86
91
|
# @param include_distances [Boolean] The flag indicating whether to returns all corresponding distances.
|
92
|
+
# @param filter [Proc] The function that filters elements by its labels.
|
87
93
|
# @return [Array<Integer> or Array<Array<Integer>, Array<Float>>]
|
88
|
-
def get_nns_by_vector(v, n, include_distances: false)
|
89
|
-
ids, dists = @index.search_knn(v, n)
|
94
|
+
def get_nns_by_vector(v, n, include_distances: false, filter: nil)
|
95
|
+
ids, dists = @index.search_knn(v, n, filter: filter)
|
90
96
|
include_distances ? [ids, dists] : ids
|
91
97
|
end
|
92
98
|
|
@@ -114,8 +120,9 @@ module Hnswlib
|
|
114
120
|
# Load a search index from disk.
|
115
121
|
#
|
116
122
|
# @param filename [String] The filename of search index.
|
117
|
-
|
118
|
-
|
123
|
+
# @param allow_replace_removed [Boolean] The flag to replace removed element when adding new element.
|
124
|
+
def load(filename, allow_replace_removed: false)
|
125
|
+
@index.load_index(filename, allow_replace_deleted: allow_replace_removed)
|
119
126
|
end
|
120
127
|
|
121
128
|
# Calculate the distances between items.
|
data/sig/hnswlib.rbs
CHANGED
@@ -5,8 +5,8 @@ module Hnswlib
|
|
5
5
|
class HnswIndex
|
6
6
|
attr_reader metric: String
|
7
7
|
|
8
|
-
def initialize: (n_features: Integer n_features, max_item: Integer max_item, ?metric: ::String metric, ?m: ::Integer m, ?ef_construction: ::Integer ef_construction, ?random_seed: ::Integer random_seed) -> void
|
9
|
-
def add_item: (Integer i, Array[Float] v) -> bool
|
8
|
+
def initialize: (n_features: Integer n_features, max_item: Integer max_item, ?metric: ::String metric, ?m: ::Integer m, ?ef_construction: ::Integer ef_construction, ?random_seed: ::Integer random_seed, ?allow_replace_removed: (true | false) allow_replace_removed) -> void
|
9
|
+
def add_item: (Integer i, Array[Float] v, ?replace_removed: (true | false) replace_removed) -> bool
|
10
10
|
def get_item: (Integer i) -> Array[Float]
|
11
11
|
def remove_item: (Integer i) -> void
|
12
12
|
def get_nns_by_item: (Integer i, Integer n, ?include_distances: (true | false) include_distances) -> ([Array[Integer], Array[Float]] | Array[Integer])
|
@@ -14,7 +14,7 @@ module Hnswlib
|
|
14
14
|
def resize_index: (Integer new_max_item) -> void
|
15
15
|
def set_ef: (Integer ef) -> void
|
16
16
|
def save: (String filename) -> void
|
17
|
-
def load: (String filename) -> void
|
17
|
+
def load: (String filename, ?allow_replace_removed: (true | false) allow_replace_removed) -> void
|
18
18
|
def get_distance: (Integer i, Integer j) -> Float
|
19
19
|
def n_items: () -> Integer
|
20
20
|
def n_features: () -> Integer
|
@@ -53,12 +53,12 @@ module Hnswlib
|
|
53
53
|
class HierarchicalNSW
|
54
54
|
attr_accessor space: (::Hnswlib::L2Space | ::Hnswlib::InnerProductSpace)
|
55
55
|
|
56
|
-
def initialize: (space: (::Hnswlib::L2Space | ::Hnswlib::InnerProductSpace) space, max_elements: Integer max_elements, ?m: Integer m, ?ef_construction: Integer ef_construction, ?random_seed: Integer random_seed) -> void
|
57
|
-
def add_point: (Array[Float] arr, Integer idx) -> bool
|
56
|
+
def initialize: (space: (::Hnswlib::L2Space | ::Hnswlib::InnerProductSpace) space, max_elements: Integer max_elements, ?m: Integer m, ?ef_construction: Integer ef_construction, ?random_seed: Integer random_seed, ?allow_replace_deleted: (true | false) allow_replace_deleted) -> void
|
57
|
+
def add_point: (Array[Float] arr, Integer idx, ?replace_deleted: (true | false) replace_deleted) -> bool
|
58
58
|
def current_count: () -> Integer
|
59
59
|
def get_ids: () -> Array[Integer]
|
60
60
|
def get_point: (Integer idx) -> Array[Float]
|
61
|
-
def load_index: (String filename) -> void
|
61
|
+
def load_index: (String filename, ?allow_replace_deleted: (true | false) allow_replace_deleted) -> void
|
62
62
|
def mark_deleted: (Integer idx) -> void
|
63
63
|
def max_elements: () -> Integer
|
64
64
|
def resize_index: (Integer new_max_elements) -> void
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hnswlib
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-03-04 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Hnswlib.rb provides Ruby bindings for the Hnswlib.
|
14
14
|
email:
|
@@ -57,7 +57,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
57
57
|
- !ruby/object:Gem::Version
|
58
58
|
version: '0'
|
59
59
|
requirements: []
|
60
|
-
rubygems_version: 3.
|
60
|
+
rubygems_version: 3.3.26
|
61
61
|
signing_key:
|
62
62
|
specification_version: 4
|
63
63
|
summary: Ruby bindings for the Hnswlib.
|