hnswlib 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.github/workflows/build.yml +20 -0
- data/.gitignore +18 -0
- data/.rspec +3 -0
- data/CHANGELOG.md +5 -0
- data/CODE_OF_CONDUCT.md +84 -0
- data/Gemfile +10 -0
- data/LICENSE.txt +176 -0
- data/README.md +56 -0
- data/Rakefile +17 -0
- data/ext/hnswlib/extconf.rb +11 -0
- data/ext/hnswlib/hnswlibext.cpp +29 -0
- data/ext/hnswlib/hnswlibext.hpp +420 -0
- data/ext/hnswlib/src/LICENSE +201 -0
- data/ext/hnswlib/src/bruteforce.h +152 -0
- data/ext/hnswlib/src/hnswalg.h +1192 -0
- data/ext/hnswlib/src/hnswlib.h +108 -0
- data/ext/hnswlib/src/space_ip.h +282 -0
- data/ext/hnswlib/src/space_l2.h +281 -0
- data/ext/hnswlib/src/visited_list_pool.h +78 -0
- data/hnswlib.gemspec +35 -0
- data/lib/hnswlib.rb +154 -0
- data/lib/hnswlib/version.rb +9 -0
- metadata +69 -0
@@ -0,0 +1,78 @@
|
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include <mutex>
|
4
|
+
#include <string.h>
|
5
|
+
|
6
|
+
namespace hnswlib {
|
7
|
+
typedef unsigned short int vl_type;
|
8
|
+
|
9
|
+
class VisitedList {
|
10
|
+
public:
|
11
|
+
vl_type curV;
|
12
|
+
vl_type *mass;
|
13
|
+
unsigned int numelements;
|
14
|
+
|
15
|
+
VisitedList(int numelements1) {
|
16
|
+
curV = -1;
|
17
|
+
numelements = numelements1;
|
18
|
+
mass = new vl_type[numelements];
|
19
|
+
}
|
20
|
+
|
21
|
+
void reset() {
|
22
|
+
curV++;
|
23
|
+
if (curV == 0) {
|
24
|
+
memset(mass, 0, sizeof(vl_type) * numelements);
|
25
|
+
curV++;
|
26
|
+
}
|
27
|
+
};
|
28
|
+
|
29
|
+
~VisitedList() { delete[] mass; }
|
30
|
+
};
|
31
|
+
///////////////////////////////////////////////////////////
|
32
|
+
//
|
33
|
+
// Class for multi-threaded pool-management of VisitedLists
|
34
|
+
//
|
35
|
+
/////////////////////////////////////////////////////////
|
36
|
+
|
37
|
+
class VisitedListPool {
|
38
|
+
std::deque<VisitedList *> pool;
|
39
|
+
std::mutex poolguard;
|
40
|
+
int numelements;
|
41
|
+
|
42
|
+
public:
|
43
|
+
VisitedListPool(int initmaxpools, int numelements1) {
|
44
|
+
numelements = numelements1;
|
45
|
+
for (int i = 0; i < initmaxpools; i++)
|
46
|
+
pool.push_front(new VisitedList(numelements));
|
47
|
+
}
|
48
|
+
|
49
|
+
VisitedList *getFreeVisitedList() {
|
50
|
+
VisitedList *rez;
|
51
|
+
{
|
52
|
+
std::unique_lock <std::mutex> lock(poolguard);
|
53
|
+
if (pool.size() > 0) {
|
54
|
+
rez = pool.front();
|
55
|
+
pool.pop_front();
|
56
|
+
} else {
|
57
|
+
rez = new VisitedList(numelements);
|
58
|
+
}
|
59
|
+
}
|
60
|
+
rez->reset();
|
61
|
+
return rez;
|
62
|
+
};
|
63
|
+
|
64
|
+
void releaseVisitedList(VisitedList *vl) {
|
65
|
+
std::unique_lock <std::mutex> lock(poolguard);
|
66
|
+
pool.push_front(vl);
|
67
|
+
};
|
68
|
+
|
69
|
+
~VisitedListPool() {
|
70
|
+
while (pool.size()) {
|
71
|
+
VisitedList *rez = pool.front();
|
72
|
+
pool.pop_front();
|
73
|
+
delete rez;
|
74
|
+
}
|
75
|
+
};
|
76
|
+
};
|
77
|
+
}
|
78
|
+
|
data/hnswlib.gemspec
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'lib/hnswlib/version'
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = 'hnswlib'
|
7
|
+
spec.version = Hnswlib::VERSION
|
8
|
+
spec.authors = ['yoshoku']
|
9
|
+
spec.email = ['yoshoku@outlook.com']
|
10
|
+
|
11
|
+
spec.summary = 'Ruby bindings for the Hnswlib.'
|
12
|
+
spec.description = 'Hnswlib.rb provides Ruby bindings for the Hnswlib.'
|
13
|
+
spec.homepage = 'https://github.com/yoshoku/hnswlib.rb'
|
14
|
+
spec.license = 'Apache-2.0'
|
15
|
+
|
16
|
+
spec.metadata['homepage_uri'] = spec.homepage
|
17
|
+
spec.metadata['source_code_uri'] = spec.homepage
|
18
|
+
spec.metadata['changelog_uri'] = 'https://github.com/yoshoku/hnswlib.rb/blob/main/CHANGELOG.md'
|
19
|
+
|
20
|
+
# Specify which files should be added to the gem when it is released.
|
21
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
22
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
23
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:test|spec|features)/}) }
|
24
|
+
end
|
25
|
+
spec.bindir = 'exe'
|
26
|
+
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
27
|
+
spec.require_paths = ['lib']
|
28
|
+
spec.extensions = ['ext/hnswlib/extconf.rb']
|
29
|
+
|
30
|
+
# Uncomment to register a new dependency of your gem
|
31
|
+
# spec.add_dependency "example-gem", "~> 1.0"
|
32
|
+
|
33
|
+
# For more information and examples about making a new gem, checkout our
|
34
|
+
# guide at: https://bundler.io/guides/creating_gem.html
|
35
|
+
end
|
data/lib/hnswlib.rb
ADDED
@@ -0,0 +1,154 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'hnswlib/version'
|
4
|
+
require_relative 'hnswlib/hnswlibext'
|
5
|
+
|
6
|
+
module Hnswlib
|
7
|
+
# Index is a class that provides functions for k-nearest eighbors search.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# require 'hnswlib'
|
11
|
+
#
|
12
|
+
# index = Hnswlib::Index.new(n_features: 100, max_item: 10000)
|
13
|
+
#
|
14
|
+
# 5000.times do |item_id|
|
15
|
+
# item_vec = Array.new(100) { rand - 0.5 }
|
16
|
+
# index.add_item(item_id, item_vec)
|
17
|
+
# end
|
18
|
+
#
|
19
|
+
# index.get_nns_by_item(0, 100)
|
20
|
+
#
|
21
|
+
class Index
|
22
|
+
# Returns the metric of index.
|
23
|
+
# @return [String]
|
24
|
+
attr_reader :metric
|
25
|
+
|
26
|
+
# Create a new search index.
|
27
|
+
#
|
28
|
+
# @param n_features [Integer] The number of features (dimensions) of stored vector.
|
29
|
+
# @param max_item [Integer] The maximum number of items.
|
30
|
+
# @param metric [String] The distance metric between vectors ('l2' or 'dot').
|
31
|
+
# @param m [Integer] The maximum number of outgoing connections in the graph
|
32
|
+
# @param ef_construction [Integer] The size of the dynamic list for the nearest neighbors. It controls the index time/accuracy trade-off.
|
33
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
34
|
+
def initialize(n_features:, max_item:, metric: 'l2', m: 16, ef_construction: 200, random_seed: 100)
|
35
|
+
@metric = metric
|
36
|
+
space = if @metric == 'dot'
|
37
|
+
Hnswlib::InnerProductSpace.new(n_features)
|
38
|
+
else
|
39
|
+
Hnswlib::L2Space.new(n_features)
|
40
|
+
end
|
41
|
+
@index = Hnswlib::HierarchicalNSW.new(
|
42
|
+
space: space, max_elements: max_item, m: m, ef_construction: ef_construction, random_seed: random_seed
|
43
|
+
)
|
44
|
+
end
|
45
|
+
|
46
|
+
# Add item to be indexed.
|
47
|
+
#
|
48
|
+
# @param i [Integer] The ID of item.
|
49
|
+
# @param v [Array] The vector of item.
|
50
|
+
# @return [Boolean]
|
51
|
+
def add_item(i, v)
|
52
|
+
@index.add_point(v, i)
|
53
|
+
end
|
54
|
+
|
55
|
+
# Return the item vector.
|
56
|
+
#
|
57
|
+
# @param i [Integer] The ID of item.
|
58
|
+
# @return [Array]
|
59
|
+
def get_item(i)
|
60
|
+
@index.get_point(i)
|
61
|
+
end
|
62
|
+
|
63
|
+
# Remove the item vector.
|
64
|
+
#
|
65
|
+
# @param i [Integer] The ID of item.
|
66
|
+
# @return [Array]
|
67
|
+
def remove_item(i)
|
68
|
+
@index.mark_deleted(i)
|
69
|
+
end
|
70
|
+
|
71
|
+
# Search the n closest items.
|
72
|
+
#
|
73
|
+
# @param i [Integer] The ID of query item.
|
74
|
+
# @param n [Integer] The number of nearest neighbors.
|
75
|
+
# @param include_distances [Boolean] The flag indicating whether to returns all corresponding distances.
|
76
|
+
# @return [Array<Integer> or Array<Array<Integer>, Array<Float>>]
|
77
|
+
def get_nns_by_item(i, n, include_distances: false)
|
78
|
+
v = @index.get_point(i)
|
79
|
+
ids, dists = @index.search_knn(v, n)
|
80
|
+
include_distances ? [ids, dists] : ids
|
81
|
+
end
|
82
|
+
|
83
|
+
# Search the n closest items.
|
84
|
+
#
|
85
|
+
# @param v [Array] The vector of query item.
|
86
|
+
# @param n [Integer] The number of nearest neighbors.
|
87
|
+
# @param include_distances [Boolean] The flag indicating whether to returns all corresponding distances.
|
88
|
+
# @return [Array<Integer> or Array<Array<Integer>, Array<Float>>]
|
89
|
+
def get_nns_by_vector(v, n, include_distances: false)
|
90
|
+
ids, dists = @index.search_knn(v, n)
|
91
|
+
include_distances ? [ids, dists] : ids
|
92
|
+
end
|
93
|
+
|
94
|
+
# Reize the search index.
|
95
|
+
#
|
96
|
+
# @param new_max_item [Integer] The maximum number of items.
|
97
|
+
def resize_index(new_max_item)
|
98
|
+
@index.reisze_index(new_max_item)
|
99
|
+
end
|
100
|
+
|
101
|
+
# Set the size of the dynamic list for the nearest neighbors.
|
102
|
+
#
|
103
|
+
# @param ef [Integer] The size of the dynamic list.
|
104
|
+
def set_ef(ef)
|
105
|
+
@index.set_ef(ef)
|
106
|
+
end
|
107
|
+
|
108
|
+
# Save the search index to disk.
|
109
|
+
#
|
110
|
+
# @param filename [String] The filename of search index.
|
111
|
+
def save(filename)
|
112
|
+
@index.save_index(filename)
|
113
|
+
end
|
114
|
+
|
115
|
+
# Load a search index from disk.
|
116
|
+
#
|
117
|
+
# @param filename [String] The filename of search index.
|
118
|
+
def load(filename)
|
119
|
+
@index.load_index(filename)
|
120
|
+
end
|
121
|
+
|
122
|
+
# Calculate the distances between items.
|
123
|
+
#
|
124
|
+
# @param i [Integer] The ID of item.
|
125
|
+
# @param j [Integer] The ID of item.
|
126
|
+
# @return [Float or Integer]
|
127
|
+
def get_distance(i, j)
|
128
|
+
vi = @index.get_point(i)
|
129
|
+
vj = @index.get_point(j)
|
130
|
+
@index.space.distance(vi, vj)
|
131
|
+
end
|
132
|
+
|
133
|
+
# Return the number of items in the search index.
|
134
|
+
#
|
135
|
+
# @return [Integer]
|
136
|
+
def n_items
|
137
|
+
@index.current_count
|
138
|
+
end
|
139
|
+
|
140
|
+
# Returns the number of features of indexed item.
|
141
|
+
#
|
142
|
+
# @return [Integer]
|
143
|
+
def n_features
|
144
|
+
@index.space.dim
|
145
|
+
end
|
146
|
+
|
147
|
+
# Return the maximum number of items.
|
148
|
+
#
|
149
|
+
# @return [Integer]
|
150
|
+
def max_item
|
151
|
+
@index.max_elements
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
metadata
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: hnswlib
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- yoshoku
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2021-07-24 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Hnswlib.rb provides Ruby bindings for the Hnswlib.
|
14
|
+
email:
|
15
|
+
- yoshoku@outlook.com
|
16
|
+
executables: []
|
17
|
+
extensions:
|
18
|
+
- ext/hnswlib/extconf.rb
|
19
|
+
extra_rdoc_files: []
|
20
|
+
files:
|
21
|
+
- ".github/workflows/build.yml"
|
22
|
+
- ".gitignore"
|
23
|
+
- ".rspec"
|
24
|
+
- CHANGELOG.md
|
25
|
+
- CODE_OF_CONDUCT.md
|
26
|
+
- Gemfile
|
27
|
+
- LICENSE.txt
|
28
|
+
- README.md
|
29
|
+
- Rakefile
|
30
|
+
- ext/hnswlib/extconf.rb
|
31
|
+
- ext/hnswlib/hnswlibext.cpp
|
32
|
+
- ext/hnswlib/hnswlibext.hpp
|
33
|
+
- ext/hnswlib/src/LICENSE
|
34
|
+
- ext/hnswlib/src/bruteforce.h
|
35
|
+
- ext/hnswlib/src/hnswalg.h
|
36
|
+
- ext/hnswlib/src/hnswlib.h
|
37
|
+
- ext/hnswlib/src/space_ip.h
|
38
|
+
- ext/hnswlib/src/space_l2.h
|
39
|
+
- ext/hnswlib/src/visited_list_pool.h
|
40
|
+
- hnswlib.gemspec
|
41
|
+
- lib/hnswlib.rb
|
42
|
+
- lib/hnswlib/version.rb
|
43
|
+
homepage: https://github.com/yoshoku/hnswlib.rb
|
44
|
+
licenses:
|
45
|
+
- Apache-2.0
|
46
|
+
metadata:
|
47
|
+
homepage_uri: https://github.com/yoshoku/hnswlib.rb
|
48
|
+
source_code_uri: https://github.com/yoshoku/hnswlib.rb
|
49
|
+
changelog_uri: https://github.com/yoshoku/hnswlib.rb/blob/main/CHANGELOG.md
|
50
|
+
post_install_message:
|
51
|
+
rdoc_options: []
|
52
|
+
require_paths:
|
53
|
+
- lib
|
54
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
55
|
+
requirements:
|
56
|
+
- - ">="
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
version: '0'
|
59
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
60
|
+
requirements:
|
61
|
+
- - ">="
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: '0'
|
64
|
+
requirements: []
|
65
|
+
rubygems_version: 3.1.6
|
66
|
+
signing_key:
|
67
|
+
specification_version: 4
|
68
|
+
summary: Ruby bindings for the Hnswlib.
|
69
|
+
test_files: []
|