hnswlib 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,78 @@
1
+ #pragma once
2
+
3
+ #include <mutex>
4
+ #include <string.h>
5
+
6
+ namespace hnswlib {
7
+ typedef unsigned short int vl_type;
8
+
9
+ class VisitedList {
10
+ public:
11
+ vl_type curV;
12
+ vl_type *mass;
13
+ unsigned int numelements;
14
+
15
+ VisitedList(int numelements1) {
16
+ curV = -1;
17
+ numelements = numelements1;
18
+ mass = new vl_type[numelements];
19
+ }
20
+
21
+ void reset() {
22
+ curV++;
23
+ if (curV == 0) {
24
+ memset(mass, 0, sizeof(vl_type) * numelements);
25
+ curV++;
26
+ }
27
+ };
28
+
29
+ ~VisitedList() { delete[] mass; }
30
+ };
31
+ ///////////////////////////////////////////////////////////
32
+ //
33
+ // Class for multi-threaded pool-management of VisitedLists
34
+ //
35
+ /////////////////////////////////////////////////////////
36
+
37
+ class VisitedListPool {
38
+ std::deque<VisitedList *> pool;
39
+ std::mutex poolguard;
40
+ int numelements;
41
+
42
+ public:
43
+ VisitedListPool(int initmaxpools, int numelements1) {
44
+ numelements = numelements1;
45
+ for (int i = 0; i < initmaxpools; i++)
46
+ pool.push_front(new VisitedList(numelements));
47
+ }
48
+
49
+ VisitedList *getFreeVisitedList() {
50
+ VisitedList *rez;
51
+ {
52
+ std::unique_lock <std::mutex> lock(poolguard);
53
+ if (pool.size() > 0) {
54
+ rez = pool.front();
55
+ pool.pop_front();
56
+ } else {
57
+ rez = new VisitedList(numelements);
58
+ }
59
+ }
60
+ rez->reset();
61
+ return rez;
62
+ };
63
+
64
+ void releaseVisitedList(VisitedList *vl) {
65
+ std::unique_lock <std::mutex> lock(poolguard);
66
+ pool.push_front(vl);
67
+ };
68
+
69
+ ~VisitedListPool() {
70
+ while (pool.size()) {
71
+ VisitedList *rez = pool.front();
72
+ pool.pop_front();
73
+ delete rez;
74
+ }
75
+ };
76
+ };
77
+ }
78
+
data/hnswlib.gemspec ADDED
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'lib/hnswlib/version'
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = 'hnswlib'
7
+ spec.version = Hnswlib::VERSION
8
+ spec.authors = ['yoshoku']
9
+ spec.email = ['yoshoku@outlook.com']
10
+
11
+ spec.summary = 'Ruby bindings for the Hnswlib.'
12
+ spec.description = 'Hnswlib.rb provides Ruby bindings for the Hnswlib.'
13
+ spec.homepage = 'https://github.com/yoshoku/hnswlib.rb'
14
+ spec.license = 'Apache-2.0'
15
+
16
+ spec.metadata['homepage_uri'] = spec.homepage
17
+ spec.metadata['source_code_uri'] = spec.homepage
18
+ spec.metadata['changelog_uri'] = 'https://github.com/yoshoku/hnswlib.rb/blob/main/CHANGELOG.md'
19
+
20
+ # Specify which files should be added to the gem when it is released.
21
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
22
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
23
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:test|spec|features)/}) }
24
+ end
25
+ spec.bindir = 'exe'
26
+ spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
27
+ spec.require_paths = ['lib']
28
+ spec.extensions = ['ext/hnswlib/extconf.rb']
29
+
30
+ # Uncomment to register a new dependency of your gem
31
+ # spec.add_dependency "example-gem", "~> 1.0"
32
+
33
+ # For more information and examples about making a new gem, checkout our
34
+ # guide at: https://bundler.io/guides/creating_gem.html
35
+ end
data/lib/hnswlib.rb ADDED
@@ -0,0 +1,154 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'hnswlib/version'
4
+ require_relative 'hnswlib/hnswlibext'
5
+
6
+ module Hnswlib
7
+ # Index is a class that provides functions for k-nearest eighbors search.
8
+ #
9
+ # @example
10
+ # require 'hnswlib'
11
+ #
12
+ # index = Hnswlib::Index.new(n_features: 100, max_item: 10000)
13
+ #
14
+ # 5000.times do |item_id|
15
+ # item_vec = Array.new(100) { rand - 0.5 }
16
+ # index.add_item(item_id, item_vec)
17
+ # end
18
+ #
19
+ # index.get_nns_by_item(0, 100)
20
+ #
21
+ class Index
22
+ # Returns the metric of index.
23
+ # @return [String]
24
+ attr_reader :metric
25
+
26
+ # Create a new search index.
27
+ #
28
+ # @param n_features [Integer] The number of features (dimensions) of stored vector.
29
+ # @param max_item [Integer] The maximum number of items.
30
+ # @param metric [String] The distance metric between vectors ('l2' or 'dot').
31
+ # @param m [Integer] The maximum number of outgoing connections in the graph
32
+ # @param ef_construction [Integer] The size of the dynamic list for the nearest neighbors. It controls the index time/accuracy trade-off.
33
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
34
+ def initialize(n_features:, max_item:, metric: 'l2', m: 16, ef_construction: 200, random_seed: 100)
35
+ @metric = metric
36
+ space = if @metric == 'dot'
37
+ Hnswlib::InnerProductSpace.new(n_features)
38
+ else
39
+ Hnswlib::L2Space.new(n_features)
40
+ end
41
+ @index = Hnswlib::HierarchicalNSW.new(
42
+ space: space, max_elements: max_item, m: m, ef_construction: ef_construction, random_seed: random_seed
43
+ )
44
+ end
45
+
46
+ # Add item to be indexed.
47
+ #
48
+ # @param i [Integer] The ID of item.
49
+ # @param v [Array] The vector of item.
50
+ # @return [Boolean]
51
+ def add_item(i, v)
52
+ @index.add_point(v, i)
53
+ end
54
+
55
+ # Return the item vector.
56
+ #
57
+ # @param i [Integer] The ID of item.
58
+ # @return [Array]
59
+ def get_item(i)
60
+ @index.get_point(i)
61
+ end
62
+
63
+ # Remove the item vector.
64
+ #
65
+ # @param i [Integer] The ID of item.
66
+ # @return [Array]
67
+ def remove_item(i)
68
+ @index.mark_deleted(i)
69
+ end
70
+
71
+ # Search the n closest items.
72
+ #
73
+ # @param i [Integer] The ID of query item.
74
+ # @param n [Integer] The number of nearest neighbors.
75
+ # @param include_distances [Boolean] The flag indicating whether to returns all corresponding distances.
76
+ # @return [Array<Integer> or Array<Array<Integer>, Array<Float>>]
77
+ def get_nns_by_item(i, n, include_distances: false)
78
+ v = @index.get_point(i)
79
+ ids, dists = @index.search_knn(v, n)
80
+ include_distances ? [ids, dists] : ids
81
+ end
82
+
83
+ # Search the n closest items.
84
+ #
85
+ # @param v [Array] The vector of query item.
86
+ # @param n [Integer] The number of nearest neighbors.
87
+ # @param include_distances [Boolean] The flag indicating whether to returns all corresponding distances.
88
+ # @return [Array<Integer> or Array<Array<Integer>, Array<Float>>]
89
+ def get_nns_by_vector(v, n, include_distances: false)
90
+ ids, dists = @index.search_knn(v, n)
91
+ include_distances ? [ids, dists] : ids
92
+ end
93
+
94
+ # Reize the search index.
95
+ #
96
+ # @param new_max_item [Integer] The maximum number of items.
97
+ def resize_index(new_max_item)
98
+ @index.reisze_index(new_max_item)
99
+ end
100
+
101
+ # Set the size of the dynamic list for the nearest neighbors.
102
+ #
103
+ # @param ef [Integer] The size of the dynamic list.
104
+ def set_ef(ef)
105
+ @index.set_ef(ef)
106
+ end
107
+
108
+ # Save the search index to disk.
109
+ #
110
+ # @param filename [String] The filename of search index.
111
+ def save(filename)
112
+ @index.save_index(filename)
113
+ end
114
+
115
+ # Load a search index from disk.
116
+ #
117
+ # @param filename [String] The filename of search index.
118
+ def load(filename)
119
+ @index.load_index(filename)
120
+ end
121
+
122
+ # Calculate the distances between items.
123
+ #
124
+ # @param i [Integer] The ID of item.
125
+ # @param j [Integer] The ID of item.
126
+ # @return [Float or Integer]
127
+ def get_distance(i, j)
128
+ vi = @index.get_point(i)
129
+ vj = @index.get_point(j)
130
+ @index.space.distance(vi, vj)
131
+ end
132
+
133
+ # Return the number of items in the search index.
134
+ #
135
+ # @return [Integer]
136
+ def n_items
137
+ @index.current_count
138
+ end
139
+
140
+ # Returns the number of features of indexed item.
141
+ #
142
+ # @return [Integer]
143
+ def n_features
144
+ @index.space.dim
145
+ end
146
+
147
+ # Return the maximum number of items.
148
+ #
149
+ # @return [Integer]
150
+ def max_item
151
+ @index.max_elements
152
+ end
153
+ end
154
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Hnswlib.rb provides Ruby bindings for the Hnswlib.
4
+ module Hnswlib
5
+ # The version of Hnswlib.rb you install.
6
+ VERSION = '0.1.0'
7
+ # The version of Hnswlib included with gem.
8
+ HSWLIB_VERSION = '0.5.2'
9
+ end
metadata ADDED
@@ -0,0 +1,69 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hnswlib
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - yoshoku
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2021-07-24 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Hnswlib.rb provides Ruby bindings for the Hnswlib.
14
+ email:
15
+ - yoshoku@outlook.com
16
+ executables: []
17
+ extensions:
18
+ - ext/hnswlib/extconf.rb
19
+ extra_rdoc_files: []
20
+ files:
21
+ - ".github/workflows/build.yml"
22
+ - ".gitignore"
23
+ - ".rspec"
24
+ - CHANGELOG.md
25
+ - CODE_OF_CONDUCT.md
26
+ - Gemfile
27
+ - LICENSE.txt
28
+ - README.md
29
+ - Rakefile
30
+ - ext/hnswlib/extconf.rb
31
+ - ext/hnswlib/hnswlibext.cpp
32
+ - ext/hnswlib/hnswlibext.hpp
33
+ - ext/hnswlib/src/LICENSE
34
+ - ext/hnswlib/src/bruteforce.h
35
+ - ext/hnswlib/src/hnswalg.h
36
+ - ext/hnswlib/src/hnswlib.h
37
+ - ext/hnswlib/src/space_ip.h
38
+ - ext/hnswlib/src/space_l2.h
39
+ - ext/hnswlib/src/visited_list_pool.h
40
+ - hnswlib.gemspec
41
+ - lib/hnswlib.rb
42
+ - lib/hnswlib/version.rb
43
+ homepage: https://github.com/yoshoku/hnswlib.rb
44
+ licenses:
45
+ - Apache-2.0
46
+ metadata:
47
+ homepage_uri: https://github.com/yoshoku/hnswlib.rb
48
+ source_code_uri: https://github.com/yoshoku/hnswlib.rb
49
+ changelog_uri: https://github.com/yoshoku/hnswlib.rb/blob/main/CHANGELOG.md
50
+ post_install_message:
51
+ rdoc_options: []
52
+ require_paths:
53
+ - lib
54
+ required_ruby_version: !ruby/object:Gem::Requirement
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ version: '0'
59
+ required_rubygems_version: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ version: '0'
64
+ requirements: []
65
+ rubygems_version: 3.1.6
66
+ signing_key:
67
+ specification_version: 4
68
+ summary: Ruby bindings for the Hnswlib.
69
+ test_files: []