hnswlib 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,78 @@
1
+ #pragma once
2
+
3
+ #include <mutex>
4
+ #include <string.h>
5
+
6
+ namespace hnswlib {
7
+ typedef unsigned short int vl_type;
8
+
9
+ class VisitedList {
10
+ public:
11
+ vl_type curV;
12
+ vl_type *mass;
13
+ unsigned int numelements;
14
+
15
+ VisitedList(int numelements1) {
16
+ curV = -1;
17
+ numelements = numelements1;
18
+ mass = new vl_type[numelements];
19
+ }
20
+
21
+ void reset() {
22
+ curV++;
23
+ if (curV == 0) {
24
+ memset(mass, 0, sizeof(vl_type) * numelements);
25
+ curV++;
26
+ }
27
+ };
28
+
29
+ ~VisitedList() { delete[] mass; }
30
+ };
31
+ ///////////////////////////////////////////////////////////
32
+ //
33
+ // Class for multi-threaded pool-management of VisitedLists
34
+ //
35
+ /////////////////////////////////////////////////////////
36
+
37
+ class VisitedListPool {
38
+ std::deque<VisitedList *> pool;
39
+ std::mutex poolguard;
40
+ int numelements;
41
+
42
+ public:
43
+ VisitedListPool(int initmaxpools, int numelements1) {
44
+ numelements = numelements1;
45
+ for (int i = 0; i < initmaxpools; i++)
46
+ pool.push_front(new VisitedList(numelements));
47
+ }
48
+
49
+ VisitedList *getFreeVisitedList() {
50
+ VisitedList *rez;
51
+ {
52
+ std::unique_lock <std::mutex> lock(poolguard);
53
+ if (pool.size() > 0) {
54
+ rez = pool.front();
55
+ pool.pop_front();
56
+ } else {
57
+ rez = new VisitedList(numelements);
58
+ }
59
+ }
60
+ rez->reset();
61
+ return rez;
62
+ };
63
+
64
+ void releaseVisitedList(VisitedList *vl) {
65
+ std::unique_lock <std::mutex> lock(poolguard);
66
+ pool.push_front(vl);
67
+ };
68
+
69
+ ~VisitedListPool() {
70
+ while (pool.size()) {
71
+ VisitedList *rez = pool.front();
72
+ pool.pop_front();
73
+ delete rez;
74
+ }
75
+ };
76
+ };
77
+ }
78
+
data/hnswlib.gemspec ADDED
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'lib/hnswlib/version'
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = 'hnswlib'
7
+ spec.version = Hnswlib::VERSION
8
+ spec.authors = ['yoshoku']
9
+ spec.email = ['yoshoku@outlook.com']
10
+
11
+ spec.summary = 'Ruby bindings for the Hnswlib.'
12
+ spec.description = 'Hnswlib.rb provides Ruby bindings for the Hnswlib.'
13
+ spec.homepage = 'https://github.com/yoshoku/hnswlib.rb'
14
+ spec.license = 'Apache-2.0'
15
+
16
+ spec.metadata['homepage_uri'] = spec.homepage
17
+ spec.metadata['source_code_uri'] = spec.homepage
18
+ spec.metadata['changelog_uri'] = 'https://github.com/yoshoku/hnswlib.rb/blob/main/CHANGELOG.md'
19
+
20
+ # Specify which files should be added to the gem when it is released.
21
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
22
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
23
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:test|spec|features)/}) }
24
+ end
25
+ spec.bindir = 'exe'
26
+ spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
27
+ spec.require_paths = ['lib']
28
+ spec.extensions = ['ext/hnswlib/extconf.rb']
29
+
30
+ # Uncomment to register a new dependency of your gem
31
+ # spec.add_dependency "example-gem", "~> 1.0"
32
+
33
+ # For more information and examples about making a new gem, checkout our
34
+ # guide at: https://bundler.io/guides/creating_gem.html
35
+ end
data/lib/hnswlib.rb ADDED
@@ -0,0 +1,154 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'hnswlib/version'
4
+ require_relative 'hnswlib/hnswlibext'
5
+
6
+ module Hnswlib
7
+ # Index is a class that provides functions for k-nearest eighbors search.
8
+ #
9
+ # @example
10
+ # require 'hnswlib'
11
+ #
12
+ # index = Hnswlib::Index.new(n_features: 100, max_item: 10000)
13
+ #
14
+ # 5000.times do |item_id|
15
+ # item_vec = Array.new(100) { rand - 0.5 }
16
+ # index.add_item(item_id, item_vec)
17
+ # end
18
+ #
19
+ # index.get_nns_by_item(0, 100)
20
+ #
21
+ class Index
22
+ # Returns the metric of index.
23
+ # @return [String]
24
+ attr_reader :metric
25
+
26
+ # Create a new search index.
27
+ #
28
+ # @param n_features [Integer] The number of features (dimensions) of stored vector.
29
+ # @param max_item [Integer] The maximum number of items.
30
+ # @param metric [String] The distance metric between vectors ('l2' or 'dot').
31
+ # @param m [Integer] The maximum number of outgoing connections in the graph
32
+ # @param ef_construction [Integer] The size of the dynamic list for the nearest neighbors. It controls the index time/accuracy trade-off.
33
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
34
+ def initialize(n_features:, max_item:, metric: 'l2', m: 16, ef_construction: 200, random_seed: 100)
35
+ @metric = metric
36
+ space = if @metric == 'dot'
37
+ Hnswlib::InnerProductSpace.new(n_features)
38
+ else
39
+ Hnswlib::L2Space.new(n_features)
40
+ end
41
+ @index = Hnswlib::HierarchicalNSW.new(
42
+ space: space, max_elements: max_item, m: m, ef_construction: ef_construction, random_seed: random_seed
43
+ )
44
+ end
45
+
46
+ # Add item to be indexed.
47
+ #
48
+ # @param i [Integer] The ID of item.
49
+ # @param v [Array] The vector of item.
50
+ # @return [Boolean]
51
+ def add_item(i, v)
52
+ @index.add_point(v, i)
53
+ end
54
+
55
+ # Return the item vector.
56
+ #
57
+ # @param i [Integer] The ID of item.
58
+ # @return [Array]
59
+ def get_item(i)
60
+ @index.get_point(i)
61
+ end
62
+
63
+ # Remove the item vector.
64
+ #
65
+ # @param i [Integer] The ID of item.
66
+ # @return [Array]
67
+ def remove_item(i)
68
+ @index.mark_deleted(i)
69
+ end
70
+
71
+ # Search the n closest items.
72
+ #
73
+ # @param i [Integer] The ID of query item.
74
+ # @param n [Integer] The number of nearest neighbors.
75
+ # @param include_distances [Boolean] The flag indicating whether to returns all corresponding distances.
76
+ # @return [Array<Integer> or Array<Array<Integer>, Array<Float>>]
77
+ def get_nns_by_item(i, n, include_distances: false)
78
+ v = @index.get_point(i)
79
+ ids, dists = @index.search_knn(v, n)
80
+ include_distances ? [ids, dists] : ids
81
+ end
82
+
83
+ # Search the n closest items.
84
+ #
85
+ # @param v [Array] The vector of query item.
86
+ # @param n [Integer] The number of nearest neighbors.
87
+ # @param include_distances [Boolean] The flag indicating whether to returns all corresponding distances.
88
+ # @return [Array<Integer> or Array<Array<Integer>, Array<Float>>]
89
+ def get_nns_by_vector(v, n, include_distances: false)
90
+ ids, dists = @index.search_knn(v, n)
91
+ include_distances ? [ids, dists] : ids
92
+ end
93
+
94
+ # Reize the search index.
95
+ #
96
+ # @param new_max_item [Integer] The maximum number of items.
97
+ def resize_index(new_max_item)
98
+ @index.reisze_index(new_max_item)
99
+ end
100
+
101
+ # Set the size of the dynamic list for the nearest neighbors.
102
+ #
103
+ # @param ef [Integer] The size of the dynamic list.
104
+ def set_ef(ef)
105
+ @index.set_ef(ef)
106
+ end
107
+
108
+ # Save the search index to disk.
109
+ #
110
+ # @param filename [String] The filename of search index.
111
+ def save(filename)
112
+ @index.save_index(filename)
113
+ end
114
+
115
+ # Load a search index from disk.
116
+ #
117
+ # @param filename [String] The filename of search index.
118
+ def load(filename)
119
+ @index.load_index(filename)
120
+ end
121
+
122
+ # Calculate the distances between items.
123
+ #
124
+ # @param i [Integer] The ID of item.
125
+ # @param j [Integer] The ID of item.
126
+ # @return [Float or Integer]
127
+ def get_distance(i, j)
128
+ vi = @index.get_point(i)
129
+ vj = @index.get_point(j)
130
+ @index.space.distance(vi, vj)
131
+ end
132
+
133
+ # Return the number of items in the search index.
134
+ #
135
+ # @return [Integer]
136
+ def n_items
137
+ @index.current_count
138
+ end
139
+
140
+ # Returns the number of features of indexed item.
141
+ #
142
+ # @return [Integer]
143
+ def n_features
144
+ @index.space.dim
145
+ end
146
+
147
+ # Return the maximum number of items.
148
+ #
149
+ # @return [Integer]
150
+ def max_item
151
+ @index.max_elements
152
+ end
153
+ end
154
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Hnswlib.rb provides Ruby bindings for the Hnswlib.
4
+ module Hnswlib
5
+ # The version of Hnswlib.rb you install.
6
+ VERSION = '0.1.0'
7
+ # The version of Hnswlib included with gem.
8
+ HSWLIB_VERSION = '0.5.2'
9
+ end
metadata ADDED
@@ -0,0 +1,69 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hnswlib
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - yoshoku
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2021-07-24 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Hnswlib.rb provides Ruby bindings for the Hnswlib.
14
+ email:
15
+ - yoshoku@outlook.com
16
+ executables: []
17
+ extensions:
18
+ - ext/hnswlib/extconf.rb
19
+ extra_rdoc_files: []
20
+ files:
21
+ - ".github/workflows/build.yml"
22
+ - ".gitignore"
23
+ - ".rspec"
24
+ - CHANGELOG.md
25
+ - CODE_OF_CONDUCT.md
26
+ - Gemfile
27
+ - LICENSE.txt
28
+ - README.md
29
+ - Rakefile
30
+ - ext/hnswlib/extconf.rb
31
+ - ext/hnswlib/hnswlibext.cpp
32
+ - ext/hnswlib/hnswlibext.hpp
33
+ - ext/hnswlib/src/LICENSE
34
+ - ext/hnswlib/src/bruteforce.h
35
+ - ext/hnswlib/src/hnswalg.h
36
+ - ext/hnswlib/src/hnswlib.h
37
+ - ext/hnswlib/src/space_ip.h
38
+ - ext/hnswlib/src/space_l2.h
39
+ - ext/hnswlib/src/visited_list_pool.h
40
+ - hnswlib.gemspec
41
+ - lib/hnswlib.rb
42
+ - lib/hnswlib/version.rb
43
+ homepage: https://github.com/yoshoku/hnswlib.rb
44
+ licenses:
45
+ - Apache-2.0
46
+ metadata:
47
+ homepage_uri: https://github.com/yoshoku/hnswlib.rb
48
+ source_code_uri: https://github.com/yoshoku/hnswlib.rb
49
+ changelog_uri: https://github.com/yoshoku/hnswlib.rb/blob/main/CHANGELOG.md
50
+ post_install_message:
51
+ rdoc_options: []
52
+ require_paths:
53
+ - lib
54
+ required_ruby_version: !ruby/object:Gem::Requirement
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ version: '0'
59
+ required_rubygems_version: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ version: '0'
64
+ requirements: []
65
+ rubygems_version: 3.1.6
66
+ signing_key:
67
+ specification_version: 4
68
+ summary: Ruby bindings for the Hnswlib.
69
+ test_files: []