hnswlib 0.6.2 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/ext/hnswlib/hnswlibext.cpp +1 -1
- data/ext/hnswlib/hnswlibext.hpp +194 -62
- data/ext/hnswlib/src/bruteforce.h +142 -131
- data/ext/hnswlib/src/hnswalg.h +1028 -964
- data/ext/hnswlib/src/hnswlib.h +74 -66
- data/ext/hnswlib/src/space_ip.h +299 -299
- data/ext/hnswlib/src/space_l2.h +268 -273
- data/ext/hnswlib/src/visited_list_pool.h +54 -55
- data/lib/hnswlib/version.rb +2 -2
- data/lib/hnswlib.rb +17 -10
- data/sig/hnswlib.rbs +6 -6
- metadata +3 -3
@@ -5,75 +5,74 @@
|
|
5
5
|
#include <deque>
|
6
6
|
|
7
7
|
namespace hnswlib {
|
8
|
-
|
8
|
+
typedef unsigned short int vl_type;
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
10
|
+
class VisitedList {
|
11
|
+
public:
|
12
|
+
vl_type curV;
|
13
|
+
vl_type *mass;
|
14
|
+
unsigned int numelements;
|
15
15
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
16
|
+
VisitedList(int numelements1) {
|
17
|
+
curV = -1;
|
18
|
+
numelements = numelements1;
|
19
|
+
mass = new vl_type[numelements];
|
20
|
+
}
|
21
21
|
|
22
|
-
|
22
|
+
void reset() {
|
23
|
+
curV++;
|
24
|
+
if (curV == 0) {
|
25
|
+
memset(mass, 0, sizeof(vl_type) * numelements);
|
23
26
|
curV++;
|
24
|
-
|
25
|
-
|
26
|
-
curV++;
|
27
|
-
}
|
28
|
-
};
|
27
|
+
}
|
28
|
+
}
|
29
29
|
|
30
|
-
|
31
|
-
|
30
|
+
~VisitedList() { delete[] mass; }
|
31
|
+
};
|
32
32
|
///////////////////////////////////////////////////////////
|
33
33
|
//
|
34
34
|
// Class for multi-threaded pool-management of VisitedLists
|
35
35
|
//
|
36
36
|
/////////////////////////////////////////////////////////
|
37
37
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
public:
|
44
|
-
VisitedListPool(int initmaxpools, int numelements1) {
|
45
|
-
numelements = numelements1;
|
46
|
-
for (int i = 0; i < initmaxpools; i++)
|
47
|
-
pool.push_front(new VisitedList(numelements));
|
48
|
-
}
|
38
|
+
class VisitedListPool {
|
39
|
+
std::deque<VisitedList *> pool;
|
40
|
+
std::mutex poolguard;
|
41
|
+
int numelements;
|
49
42
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
pool.pop_front();
|
57
|
-
} else {
|
58
|
-
rez = new VisitedList(numelements);
|
59
|
-
}
|
60
|
-
}
|
61
|
-
rez->reset();
|
62
|
-
return rez;
|
63
|
-
};
|
43
|
+
public:
|
44
|
+
VisitedListPool(int initmaxpools, int numelements1) {
|
45
|
+
numelements = numelements1;
|
46
|
+
for (int i = 0; i < initmaxpools; i++)
|
47
|
+
pool.push_front(new VisitedList(numelements));
|
48
|
+
}
|
64
49
|
|
65
|
-
|
50
|
+
VisitedList *getFreeVisitedList() {
|
51
|
+
VisitedList *rez;
|
52
|
+
{
|
66
53
|
std::unique_lock <std::mutex> lock(poolguard);
|
67
|
-
pool.
|
68
|
-
|
69
|
-
|
70
|
-
~VisitedListPool() {
|
71
|
-
while (pool.size()) {
|
72
|
-
VisitedList *rez = pool.front();
|
54
|
+
if (pool.size() > 0) {
|
55
|
+
rez = pool.front();
|
73
56
|
pool.pop_front();
|
74
|
-
|
57
|
+
} else {
|
58
|
+
rez = new VisitedList(numelements);
|
75
59
|
}
|
76
|
-
}
|
77
|
-
|
78
|
-
|
60
|
+
}
|
61
|
+
rez->reset();
|
62
|
+
return rez;
|
63
|
+
}
|
79
64
|
|
65
|
+
void releaseVisitedList(VisitedList *vl) {
|
66
|
+
std::unique_lock <std::mutex> lock(poolguard);
|
67
|
+
pool.push_front(vl);
|
68
|
+
}
|
69
|
+
|
70
|
+
~VisitedListPool() {
|
71
|
+
while (pool.size()) {
|
72
|
+
VisitedList *rez = pool.front();
|
73
|
+
pool.pop_front();
|
74
|
+
delete rez;
|
75
|
+
}
|
76
|
+
}
|
77
|
+
};
|
78
|
+
} // namespace hnswlib
|
data/lib/hnswlib/version.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
# Hnswlib.rb provides Ruby bindings for the Hnswlib.
|
4
4
|
module Hnswlib
|
5
5
|
# The version of Hnswlib.rb you install.
|
6
|
-
VERSION = '0.
|
6
|
+
VERSION = '0.7.0'
|
7
7
|
|
8
8
|
# The version of Hnswlib included with gem.
|
9
|
-
HSWLIB_VERSION = '0.
|
9
|
+
HSWLIB_VERSION = '0.7.0'
|
10
10
|
end
|
data/lib/hnswlib.rb
CHANGED
@@ -31,7 +31,9 @@ module Hnswlib
|
|
31
31
|
# @param m [Integer] The maximum number of outgoing connections in the graph
|
32
32
|
# @param ef_construction [Integer] The size of the dynamic list for the nearest neighbors. It controls the index time/accuracy trade-off.
|
33
33
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
34
|
-
|
34
|
+
# @param allow_replace_removed [Boolean] The flag to replace removed element when adding new element.
|
35
|
+
def initialize(n_features:, max_item:, metric: 'l2', m: 16, ef_construction: 200,
|
36
|
+
random_seed: 100, allow_replace_removed: false)
|
35
37
|
@metric = metric
|
36
38
|
space = if @metric == 'dot'
|
37
39
|
Hnswlib::InnerProductSpace.new(n_features)
|
@@ -39,7 +41,8 @@ module Hnswlib
|
|
39
41
|
Hnswlib::L2Space.new(n_features)
|
40
42
|
end
|
41
43
|
@index = Hnswlib::HierarchicalNSW.new(
|
42
|
-
space: space, max_elements: max_item, m: m, ef_construction: ef_construction,
|
44
|
+
space: space, max_elements: max_item, m: m, ef_construction: ef_construction,
|
45
|
+
random_seed: random_seed, allow_replace_deleted: allow_replace_removed
|
43
46
|
)
|
44
47
|
end
|
45
48
|
|
@@ -47,9 +50,10 @@ module Hnswlib
|
|
47
50
|
#
|
48
51
|
# @param i [Integer] The ID of item.
|
49
52
|
# @param v [Array] The vector of item.
|
53
|
+
# @param replace_removed [Boolean] The flag to replace a removed element.
|
50
54
|
# @return [Boolean]
|
51
|
-
def add_item(i, v)
|
52
|
-
@index.add_point(v, i)
|
55
|
+
def add_item(i, v, replace_removed: false)
|
56
|
+
@index.add_point(v, i, replace_deleted: replace_removed)
|
53
57
|
end
|
54
58
|
|
55
59
|
# Return the item vector.
|
@@ -72,10 +76,11 @@ module Hnswlib
|
|
72
76
|
# @param i [Integer] The ID of query item.
|
73
77
|
# @param n [Integer] The number of nearest neighbors.
|
74
78
|
# @param include_distances [Boolean] The flag indicating whether to returns all corresponding distances.
|
79
|
+
# @param filter [Proc] The function that filters elements by its labels.
|
75
80
|
# @return [Array<Integer> or Array<Array<Integer>, Array<Float>>]
|
76
|
-
def get_nns_by_item(i, n, include_distances: false)
|
81
|
+
def get_nns_by_item(i, n, include_distances: false, filter: nil)
|
77
82
|
v = @index.get_point(i)
|
78
|
-
ids, dists = @index.search_knn(v, n)
|
83
|
+
ids, dists = @index.search_knn(v, n, filter: filter)
|
79
84
|
include_distances ? [ids, dists] : ids
|
80
85
|
end
|
81
86
|
|
@@ -84,9 +89,10 @@ module Hnswlib
|
|
84
89
|
# @param v [Array] The vector of query item.
|
85
90
|
# @param n [Integer] The number of nearest neighbors.
|
86
91
|
# @param include_distances [Boolean] The flag indicating whether to returns all corresponding distances.
|
92
|
+
# @param filter [Proc] The function that filters elements by its labels.
|
87
93
|
# @return [Array<Integer> or Array<Array<Integer>, Array<Float>>]
|
88
|
-
def get_nns_by_vector(v, n, include_distances: false)
|
89
|
-
ids, dists = @index.search_knn(v, n)
|
94
|
+
def get_nns_by_vector(v, n, include_distances: false, filter: nil)
|
95
|
+
ids, dists = @index.search_knn(v, n, filter: filter)
|
90
96
|
include_distances ? [ids, dists] : ids
|
91
97
|
end
|
92
98
|
|
@@ -114,8 +120,9 @@ module Hnswlib
|
|
114
120
|
# Load a search index from disk.
|
115
121
|
#
|
116
122
|
# @param filename [String] The filename of search index.
|
117
|
-
|
118
|
-
|
123
|
+
# @param allow_replace_removed [Boolean] The flag to replace removed element when adding new element.
|
124
|
+
def load(filename, allow_replace_removed: false)
|
125
|
+
@index.load_index(filename, allow_replace_deleted: allow_replace_removed)
|
119
126
|
end
|
120
127
|
|
121
128
|
# Calculate the distances between items.
|
data/sig/hnswlib.rbs
CHANGED
@@ -5,8 +5,8 @@ module Hnswlib
|
|
5
5
|
class HnswIndex
|
6
6
|
attr_reader metric: String
|
7
7
|
|
8
|
-
def initialize: (n_features: Integer n_features, max_item: Integer max_item, ?metric: ::String metric, ?m: ::Integer m, ?ef_construction: ::Integer ef_construction, ?random_seed: ::Integer random_seed) -> void
|
9
|
-
def add_item: (Integer i, Array[Float] v) -> bool
|
8
|
+
def initialize: (n_features: Integer n_features, max_item: Integer max_item, ?metric: ::String metric, ?m: ::Integer m, ?ef_construction: ::Integer ef_construction, ?random_seed: ::Integer random_seed, ?allow_replace_removed: (true | false) allow_replace_removed) -> void
|
9
|
+
def add_item: (Integer i, Array[Float] v, ?replace_removed: (true | false) replace_removed) -> bool
|
10
10
|
def get_item: (Integer i) -> Array[Float]
|
11
11
|
def remove_item: (Integer i) -> void
|
12
12
|
def get_nns_by_item: (Integer i, Integer n, ?include_distances: (true | false) include_distances) -> ([Array[Integer], Array[Float]] | Array[Integer])
|
@@ -14,7 +14,7 @@ module Hnswlib
|
|
14
14
|
def resize_index: (Integer new_max_item) -> void
|
15
15
|
def set_ef: (Integer ef) -> void
|
16
16
|
def save: (String filename) -> void
|
17
|
-
def load: (String filename) -> void
|
17
|
+
def load: (String filename, ?allow_replace_removed: (true | false) allow_replace_removed) -> void
|
18
18
|
def get_distance: (Integer i, Integer j) -> Float
|
19
19
|
def n_items: () -> Integer
|
20
20
|
def n_features: () -> Integer
|
@@ -53,12 +53,12 @@ module Hnswlib
|
|
53
53
|
class HierarchicalNSW
|
54
54
|
attr_accessor space: (::Hnswlib::L2Space | ::Hnswlib::InnerProductSpace)
|
55
55
|
|
56
|
-
def initialize: (space: (::Hnswlib::L2Space | ::Hnswlib::InnerProductSpace) space, max_elements: Integer max_elements, ?m: Integer m, ?ef_construction: Integer ef_construction, ?random_seed: Integer random_seed) -> void
|
57
|
-
def add_point: (Array[Float] arr, Integer idx) -> bool
|
56
|
+
def initialize: (space: (::Hnswlib::L2Space | ::Hnswlib::InnerProductSpace) space, max_elements: Integer max_elements, ?m: Integer m, ?ef_construction: Integer ef_construction, ?random_seed: Integer random_seed, ?allow_replace_deleted: (true | false) allow_replace_deleted) -> void
|
57
|
+
def add_point: (Array[Float] arr, Integer idx, ?replace_deleted: (true | false) replace_deleted) -> bool
|
58
58
|
def current_count: () -> Integer
|
59
59
|
def get_ids: () -> Array[Integer]
|
60
60
|
def get_point: (Integer idx) -> Array[Float]
|
61
|
-
def load_index: (String filename) -> void
|
61
|
+
def load_index: (String filename, ?allow_replace_deleted: (true | false) allow_replace_deleted) -> void
|
62
62
|
def mark_deleted: (Integer idx) -> void
|
63
63
|
def max_elements: () -> Integer
|
64
64
|
def resize_index: (Integer new_max_elements) -> void
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hnswlib
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-03-04 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Hnswlib.rb provides Ruby bindings for the Hnswlib.
|
14
14
|
email:
|
@@ -57,7 +57,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
57
57
|
- !ruby/object:Gem::Version
|
58
58
|
version: '0'
|
59
59
|
requirements: []
|
60
|
-
rubygems_version: 3.
|
60
|
+
rubygems_version: 3.3.26
|
61
61
|
signing_key:
|
62
62
|
specification_version: 4
|
63
63
|
summary: Ruby bindings for the Hnswlib.
|