hnswlib 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/build.yml +20 -0
- data/.gitignore +18 -0
- data/.rspec +3 -0
- data/CHANGELOG.md +5 -0
- data/CODE_OF_CONDUCT.md +84 -0
- data/Gemfile +10 -0
- data/LICENSE.txt +176 -0
- data/README.md +56 -0
- data/Rakefile +17 -0
- data/ext/hnswlib/extconf.rb +11 -0
- data/ext/hnswlib/hnswlibext.cpp +29 -0
- data/ext/hnswlib/hnswlibext.hpp +420 -0
- data/ext/hnswlib/src/LICENSE +201 -0
- data/ext/hnswlib/src/bruteforce.h +152 -0
- data/ext/hnswlib/src/hnswalg.h +1192 -0
- data/ext/hnswlib/src/hnswlib.h +108 -0
- data/ext/hnswlib/src/space_ip.h +282 -0
- data/ext/hnswlib/src/space_l2.h +281 -0
- data/ext/hnswlib/src/visited_list_pool.h +78 -0
- data/hnswlib.gemspec +35 -0
- data/lib/hnswlib.rb +154 -0
- data/lib/hnswlib/version.rb +9 -0
- metadata +69 -0
@@ -0,0 +1,78 @@
|
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include <mutex>
|
4
|
+
#include <string.h>
|
5
|
+
|
6
|
+
namespace hnswlib {
|
7
|
+
typedef unsigned short int vl_type;
|
8
|
+
|
9
|
+
class VisitedList {
|
10
|
+
public:
|
11
|
+
vl_type curV;
|
12
|
+
vl_type *mass;
|
13
|
+
unsigned int numelements;
|
14
|
+
|
15
|
+
VisitedList(int numelements1) {
|
16
|
+
curV = -1;
|
17
|
+
numelements = numelements1;
|
18
|
+
mass = new vl_type[numelements];
|
19
|
+
}
|
20
|
+
|
21
|
+
void reset() {
|
22
|
+
curV++;
|
23
|
+
if (curV == 0) {
|
24
|
+
memset(mass, 0, sizeof(vl_type) * numelements);
|
25
|
+
curV++;
|
26
|
+
}
|
27
|
+
};
|
28
|
+
|
29
|
+
~VisitedList() { delete[] mass; }
|
30
|
+
};
|
31
|
+
///////////////////////////////////////////////////////////
|
32
|
+
//
|
33
|
+
// Class for multi-threaded pool-management of VisitedLists
|
34
|
+
//
|
35
|
+
/////////////////////////////////////////////////////////
|
36
|
+
|
37
|
+
class VisitedListPool {
|
38
|
+
std::deque<VisitedList *> pool;
|
39
|
+
std::mutex poolguard;
|
40
|
+
int numelements;
|
41
|
+
|
42
|
+
public:
|
43
|
+
VisitedListPool(int initmaxpools, int numelements1) {
|
44
|
+
numelements = numelements1;
|
45
|
+
for (int i = 0; i < initmaxpools; i++)
|
46
|
+
pool.push_front(new VisitedList(numelements));
|
47
|
+
}
|
48
|
+
|
49
|
+
VisitedList *getFreeVisitedList() {
|
50
|
+
VisitedList *rez;
|
51
|
+
{
|
52
|
+
std::unique_lock <std::mutex> lock(poolguard);
|
53
|
+
if (pool.size() > 0) {
|
54
|
+
rez = pool.front();
|
55
|
+
pool.pop_front();
|
56
|
+
} else {
|
57
|
+
rez = new VisitedList(numelements);
|
58
|
+
}
|
59
|
+
}
|
60
|
+
rez->reset();
|
61
|
+
return rez;
|
62
|
+
};
|
63
|
+
|
64
|
+
void releaseVisitedList(VisitedList *vl) {
|
65
|
+
std::unique_lock <std::mutex> lock(poolguard);
|
66
|
+
pool.push_front(vl);
|
67
|
+
};
|
68
|
+
|
69
|
+
~VisitedListPool() {
|
70
|
+
while (pool.size()) {
|
71
|
+
VisitedList *rez = pool.front();
|
72
|
+
pool.pop_front();
|
73
|
+
delete rez;
|
74
|
+
}
|
75
|
+
};
|
76
|
+
};
|
77
|
+
}
|
78
|
+
|
data/hnswlib.gemspec
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'lib/hnswlib/version'
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = 'hnswlib'
|
7
|
+
spec.version = Hnswlib::VERSION
|
8
|
+
spec.authors = ['yoshoku']
|
9
|
+
spec.email = ['yoshoku@outlook.com']
|
10
|
+
|
11
|
+
spec.summary = 'Ruby bindings for the Hnswlib.'
|
12
|
+
spec.description = 'Hnswlib.rb provides Ruby bindings for the Hnswlib.'
|
13
|
+
spec.homepage = 'https://github.com/yoshoku/hnswlib.rb'
|
14
|
+
spec.license = 'Apache-2.0'
|
15
|
+
|
16
|
+
spec.metadata['homepage_uri'] = spec.homepage
|
17
|
+
spec.metadata['source_code_uri'] = spec.homepage
|
18
|
+
spec.metadata['changelog_uri'] = 'https://github.com/yoshoku/hnswlib.rb/blob/main/CHANGELOG.md'
|
19
|
+
|
20
|
+
# Specify which files should be added to the gem when it is released.
|
21
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
22
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
23
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:test|spec|features)/}) }
|
24
|
+
end
|
25
|
+
spec.bindir = 'exe'
|
26
|
+
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
27
|
+
spec.require_paths = ['lib']
|
28
|
+
spec.extensions = ['ext/hnswlib/extconf.rb']
|
29
|
+
|
30
|
+
# Uncomment to register a new dependency of your gem
|
31
|
+
# spec.add_dependency "example-gem", "~> 1.0"
|
32
|
+
|
33
|
+
# For more information and examples about making a new gem, checkout our
|
34
|
+
# guide at: https://bundler.io/guides/creating_gem.html
|
35
|
+
end
|
data/lib/hnswlib.rb
ADDED
@@ -0,0 +1,154 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'hnswlib/version'
|
4
|
+
require_relative 'hnswlib/hnswlibext'
|
5
|
+
|
6
|
+
module Hnswlib
|
7
|
+
# Index is a class that provides functions for k-nearest eighbors search.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# require 'hnswlib'
|
11
|
+
#
|
12
|
+
# index = Hnswlib::Index.new(n_features: 100, max_item: 10000)
|
13
|
+
#
|
14
|
+
# 5000.times do |item_id|
|
15
|
+
# item_vec = Array.new(100) { rand - 0.5 }
|
16
|
+
# index.add_item(item_id, item_vec)
|
17
|
+
# end
|
18
|
+
#
|
19
|
+
# index.get_nns_by_item(0, 100)
|
20
|
+
#
|
21
|
+
class Index
|
22
|
+
# Returns the metric of index.
|
23
|
+
# @return [String]
|
24
|
+
attr_reader :metric
|
25
|
+
|
26
|
+
# Create a new search index.
|
27
|
+
#
|
28
|
+
# @param n_features [Integer] The number of features (dimensions) of stored vector.
|
29
|
+
# @param max_item [Integer] The maximum number of items.
|
30
|
+
# @param metric [String] The distance metric between vectors ('l2' or 'dot').
|
31
|
+
# @param m [Integer] The maximum number of outgoing connections in the graph
|
32
|
+
# @param ef_construction [Integer] The size of the dynamic list for the nearest neighbors. It controls the index time/accuracy trade-off.
|
33
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
34
|
+
def initialize(n_features:, max_item:, metric: 'l2', m: 16, ef_construction: 200, random_seed: 100)
|
35
|
+
@metric = metric
|
36
|
+
space = if @metric == 'dot'
|
37
|
+
Hnswlib::InnerProductSpace.new(n_features)
|
38
|
+
else
|
39
|
+
Hnswlib::L2Space.new(n_features)
|
40
|
+
end
|
41
|
+
@index = Hnswlib::HierarchicalNSW.new(
|
42
|
+
space: space, max_elements: max_item, m: m, ef_construction: ef_construction, random_seed: random_seed
|
43
|
+
)
|
44
|
+
end
|
45
|
+
|
46
|
+
# Add item to be indexed.
|
47
|
+
#
|
48
|
+
# @param i [Integer] The ID of item.
|
49
|
+
# @param v [Array] The vector of item.
|
50
|
+
# @return [Boolean]
|
51
|
+
def add_item(i, v)
|
52
|
+
@index.add_point(v, i)
|
53
|
+
end
|
54
|
+
|
55
|
+
# Return the item vector.
|
56
|
+
#
|
57
|
+
# @param i [Integer] The ID of item.
|
58
|
+
# @return [Array]
|
59
|
+
def get_item(i)
|
60
|
+
@index.get_point(i)
|
61
|
+
end
|
62
|
+
|
63
|
+
# Remove the item vector.
|
64
|
+
#
|
65
|
+
# @param i [Integer] The ID of item.
|
66
|
+
# @return [Array]
|
67
|
+
def remove_item(i)
|
68
|
+
@index.mark_deleted(i)
|
69
|
+
end
|
70
|
+
|
71
|
+
# Search the n closest items.
|
72
|
+
#
|
73
|
+
# @param i [Integer] The ID of query item.
|
74
|
+
# @param n [Integer] The number of nearest neighbors.
|
75
|
+
# @param include_distances [Boolean] The flag indicating whether to returns all corresponding distances.
|
76
|
+
# @return [Array<Integer> or Array<Array<Integer>, Array<Float>>]
|
77
|
+
def get_nns_by_item(i, n, include_distances: false)
|
78
|
+
v = @index.get_point(i)
|
79
|
+
ids, dists = @index.search_knn(v, n)
|
80
|
+
include_distances ? [ids, dists] : ids
|
81
|
+
end
|
82
|
+
|
83
|
+
# Search the n closest items.
|
84
|
+
#
|
85
|
+
# @param v [Array] The vector of query item.
|
86
|
+
# @param n [Integer] The number of nearest neighbors.
|
87
|
+
# @param include_distances [Boolean] The flag indicating whether to returns all corresponding distances.
|
88
|
+
# @return [Array<Integer> or Array<Array<Integer>, Array<Float>>]
|
89
|
+
def get_nns_by_vector(v, n, include_distances: false)
|
90
|
+
ids, dists = @index.search_knn(v, n)
|
91
|
+
include_distances ? [ids, dists] : ids
|
92
|
+
end
|
93
|
+
|
94
|
+
# Reize the search index.
|
95
|
+
#
|
96
|
+
# @param new_max_item [Integer] The maximum number of items.
|
97
|
+
def resize_index(new_max_item)
|
98
|
+
@index.reisze_index(new_max_item)
|
99
|
+
end
|
100
|
+
|
101
|
+
# Set the size of the dynamic list for the nearest neighbors.
|
102
|
+
#
|
103
|
+
# @param ef [Integer] The size of the dynamic list.
|
104
|
+
def set_ef(ef)
|
105
|
+
@index.set_ef(ef)
|
106
|
+
end
|
107
|
+
|
108
|
+
# Save the search index to disk.
|
109
|
+
#
|
110
|
+
# @param filename [String] The filename of search index.
|
111
|
+
def save(filename)
|
112
|
+
@index.save_index(filename)
|
113
|
+
end
|
114
|
+
|
115
|
+
# Load a search index from disk.
|
116
|
+
#
|
117
|
+
# @param filename [String] The filename of search index.
|
118
|
+
def load(filename)
|
119
|
+
@index.load_index(filename)
|
120
|
+
end
|
121
|
+
|
122
|
+
# Calculate the distances between items.
|
123
|
+
#
|
124
|
+
# @param i [Integer] The ID of item.
|
125
|
+
# @param j [Integer] The ID of item.
|
126
|
+
# @return [Float or Integer]
|
127
|
+
def get_distance(i, j)
|
128
|
+
vi = @index.get_point(i)
|
129
|
+
vj = @index.get_point(j)
|
130
|
+
@index.space.distance(vi, vj)
|
131
|
+
end
|
132
|
+
|
133
|
+
# Return the number of items in the search index.
|
134
|
+
#
|
135
|
+
# @return [Integer]
|
136
|
+
def n_items
|
137
|
+
@index.current_count
|
138
|
+
end
|
139
|
+
|
140
|
+
# Returns the number of features of indexed item.
|
141
|
+
#
|
142
|
+
# @return [Integer]
|
143
|
+
def n_features
|
144
|
+
@index.space.dim
|
145
|
+
end
|
146
|
+
|
147
|
+
# Return the maximum number of items.
|
148
|
+
#
|
149
|
+
# @return [Integer]
|
150
|
+
def max_item
|
151
|
+
@index.max_elements
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
metadata
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: hnswlib
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- yoshoku
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2021-07-24 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Hnswlib.rb provides Ruby bindings for the Hnswlib.
|
14
|
+
email:
|
15
|
+
- yoshoku@outlook.com
|
16
|
+
executables: []
|
17
|
+
extensions:
|
18
|
+
- ext/hnswlib/extconf.rb
|
19
|
+
extra_rdoc_files: []
|
20
|
+
files:
|
21
|
+
- ".github/workflows/build.yml"
|
22
|
+
- ".gitignore"
|
23
|
+
- ".rspec"
|
24
|
+
- CHANGELOG.md
|
25
|
+
- CODE_OF_CONDUCT.md
|
26
|
+
- Gemfile
|
27
|
+
- LICENSE.txt
|
28
|
+
- README.md
|
29
|
+
- Rakefile
|
30
|
+
- ext/hnswlib/extconf.rb
|
31
|
+
- ext/hnswlib/hnswlibext.cpp
|
32
|
+
- ext/hnswlib/hnswlibext.hpp
|
33
|
+
- ext/hnswlib/src/LICENSE
|
34
|
+
- ext/hnswlib/src/bruteforce.h
|
35
|
+
- ext/hnswlib/src/hnswalg.h
|
36
|
+
- ext/hnswlib/src/hnswlib.h
|
37
|
+
- ext/hnswlib/src/space_ip.h
|
38
|
+
- ext/hnswlib/src/space_l2.h
|
39
|
+
- ext/hnswlib/src/visited_list_pool.h
|
40
|
+
- hnswlib.gemspec
|
41
|
+
- lib/hnswlib.rb
|
42
|
+
- lib/hnswlib/version.rb
|
43
|
+
homepage: https://github.com/yoshoku/hnswlib.rb
|
44
|
+
licenses:
|
45
|
+
- Apache-2.0
|
46
|
+
metadata:
|
47
|
+
homepage_uri: https://github.com/yoshoku/hnswlib.rb
|
48
|
+
source_code_uri: https://github.com/yoshoku/hnswlib.rb
|
49
|
+
changelog_uri: https://github.com/yoshoku/hnswlib.rb/blob/main/CHANGELOG.md
|
50
|
+
post_install_message:
|
51
|
+
rdoc_options: []
|
52
|
+
require_paths:
|
53
|
+
- lib
|
54
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
55
|
+
requirements:
|
56
|
+
- - ">="
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
version: '0'
|
59
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
60
|
+
requirements:
|
61
|
+
- - ">="
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: '0'
|
64
|
+
requirements: []
|
65
|
+
rubygems_version: 3.1.6
|
66
|
+
signing_key:
|
67
|
+
specification_version: 4
|
68
|
+
summary: Ruby bindings for the Hnswlib.
|
69
|
+
test_files: []
|