hanny 0.1.0 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.coveralls.yml +1 -1
- data/.github/workflows/build.yml +20 -0
- data/.github/workflows/coverage.yml +24 -0
- data/.rubocop.yml +36 -19
- data/CHANGELOG.md +13 -0
- data/Gemfile +14 -0
- data/LICENSE.txt +1 -1
- data/README.md +13 -13
- data/Rakefile +7 -1
- data/Steepfile +20 -0
- data/hanny.gemspec +17 -17
- data/lib/hanny/lsh_index.rb +22 -52
- data/lib/hanny/version.rb +2 -2
- data/sig/hanny/lsh_index.rbs +27 -0
- data/sig/hanny/utils.rbs +7 -0
- data/sig/hanny.rbs +3 -0
- metadata +22 -70
- data/.travis.yml +0 -11
- data/bin/console +0 -14
- data/bin/setup +0 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d39024d595c701feff9aec387768b7c9c6341b3131073f1c9cfad3d42cc8910f
|
4
|
+
data.tar.gz: 5c05ce7a494a3eec292289248a01dacc6cfaa845517ac7a18648e734245a365f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7ac234dc0f6305bb4bd877506bd30a086baf3a009d443a6e91100d8c33aecfe6138c8c9359f9e3e4d29b7a42fd588bae2682b70e53441e9ffca65da2e3a0620d
|
7
|
+
data.tar.gz: d779d5ad1434366d9b603aa5acea3bba4150fbdeb587d0bcbf057f45ef58ce2c8289d0117d4e15295f07ceabfdd47b1a4829fe1c3d2a4ca65f70d3b57c9a3df1
|
data/.coveralls.yml
CHANGED
@@ -1 +1 @@
|
|
1
|
-
service_name:
|
1
|
+
service_name: github-ci
|
@@ -0,0 +1,20 @@
|
|
1
|
+
name: build
|
2
|
+
|
3
|
+
on: [push, pull_request]
|
4
|
+
|
5
|
+
jobs:
|
6
|
+
build:
|
7
|
+
runs-on: ubuntu-latest
|
8
|
+
strategy:
|
9
|
+
fail-fast: false
|
10
|
+
matrix:
|
11
|
+
ruby: [ '2.7', '3.0', '3.1' ]
|
12
|
+
steps:
|
13
|
+
- uses: actions/checkout@v3
|
14
|
+
- name: Set up Ruby ${{ matrix.ruby }}
|
15
|
+
uses: ruby/setup-ruby@v1
|
16
|
+
with:
|
17
|
+
ruby-version: ${{ matrix.ruby }}
|
18
|
+
bundler-cache: true
|
19
|
+
- name: Build and test with Rake
|
20
|
+
run: bundle exec rake
|
@@ -0,0 +1,24 @@
|
|
1
|
+
name: coverage
|
2
|
+
|
3
|
+
on:
|
4
|
+
push:
|
5
|
+
branches: [ main ]
|
6
|
+
pull_request:
|
7
|
+
branches: [ main ]
|
8
|
+
|
9
|
+
jobs:
|
10
|
+
coverage:
|
11
|
+
runs-on: ubuntu-20.04
|
12
|
+
steps:
|
13
|
+
- uses: actions/checkout@v3
|
14
|
+
- name: Set up Ruby 2.7
|
15
|
+
uses: ruby/setup-ruby@v1
|
16
|
+
with:
|
17
|
+
ruby-version: '2.7'
|
18
|
+
bundler-cache: true
|
19
|
+
- name: Build and test with Rake
|
20
|
+
run: bundle exec rake
|
21
|
+
- name: Coveralls GitHub Action
|
22
|
+
uses: coverallsapp/github-action@v1.1.2
|
23
|
+
with:
|
24
|
+
github-token: ${{ secrets.GITHUB_TOKEN }}
|
data/.rubocop.yml
CHANGED
@@ -1,39 +1,56 @@
|
|
1
|
+
require:
|
2
|
+
- rubocop-performance
|
3
|
+
- rubocop-rake
|
4
|
+
- rubocop-rspec
|
5
|
+
|
1
6
|
AllCops:
|
2
|
-
|
7
|
+
NewCops: enable
|
3
8
|
DisplayCopNames: true
|
4
9
|
DisplayStyleGuide: true
|
10
|
+
Exclude:
|
11
|
+
- 'tmp/**/*'
|
12
|
+
- 'vendor/**/*'
|
13
|
+
- 'Steepfile'
|
5
14
|
|
6
|
-
|
15
|
+
Gemspec/RequiredRubyVersion:
|
7
16
|
Enabled: false
|
8
17
|
|
9
|
-
|
10
|
-
|
11
|
-
IgnoredPatterns: ['(\A|\s)#']
|
18
|
+
Layout/LineLength:
|
19
|
+
AllowedPatterns: ['(\A|\s)#']
|
12
20
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
Metrics/ClassLength:
|
17
|
-
Max: 200
|
18
|
-
|
19
|
-
Metrics/MethodLength:
|
20
|
-
Max: 40
|
21
|
+
Lint/AmbiguousOperatorPrecedence:
|
22
|
+
Enabled: false
|
21
23
|
|
22
24
|
Metrics/AbcSize:
|
23
|
-
Max:
|
25
|
+
Max: 32
|
24
26
|
|
25
27
|
Metrics/BlockLength:
|
26
28
|
Exclude:
|
27
29
|
- 'spec/**/*'
|
28
30
|
|
29
|
-
|
30
|
-
Max:
|
31
|
+
Metrics/ClassLength:
|
32
|
+
Max: 128
|
33
|
+
|
34
|
+
Metrics/MethodLength:
|
35
|
+
Max: 24
|
36
|
+
|
37
|
+
Metrics/ParameterLists:
|
38
|
+
Max: 8
|
31
39
|
|
32
|
-
|
40
|
+
Naming/AccessorMethodName:
|
33
41
|
Enabled: false
|
34
42
|
|
35
|
-
Naming/
|
43
|
+
Naming/MethodParameterName:
|
36
44
|
Enabled: false
|
37
45
|
|
38
|
-
|
46
|
+
RSpec/ExampleLength:
|
47
|
+
Max: 18
|
48
|
+
|
49
|
+
RSpec/MultipleMemoizedHelpers:
|
50
|
+
Max: 12
|
51
|
+
|
52
|
+
RSpec/NamedSubject:
|
39
53
|
Enabled: false
|
54
|
+
|
55
|
+
RSpec/NestedGroups:
|
56
|
+
Max: 5
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
# 0.2.2
|
2
|
+
- Refactor codes and configs with RuboCop
|
3
|
+
|
4
|
+
# 0.2.1
|
5
|
+
- Remove dependent gem's type declaration file from installation files.
|
6
|
+
|
7
|
+
# 0.2.0
|
8
|
+
- Add type declaration files.
|
9
|
+
- Refactor some codes with type check.
|
10
|
+
- Fix some configuration files.
|
11
|
+
|
12
|
+
# 0.1.0
|
13
|
+
- First release.
|
data/Gemfile
CHANGED
@@ -1,6 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
source 'https://rubygems.org'
|
2
4
|
|
3
5
|
git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
|
4
6
|
|
5
7
|
# Specify your gem's dependencies in hanny.gemspec
|
6
8
|
gemspec
|
9
|
+
|
10
|
+
gem 'bundler', '~> 2.0'
|
11
|
+
gem 'rake', '~> 12.0'
|
12
|
+
gem 'rbs', '~> 1.2'
|
13
|
+
gem 'rspec', '~> 3.0'
|
14
|
+
gem 'rubocop', '~> 1.35'
|
15
|
+
gem 'rubocop-performance', '~> 1.14'
|
16
|
+
gem 'rubocop-rake', '~> 0.6.0'
|
17
|
+
gem 'rubocop-rspec', '~> 2.12'
|
18
|
+
gem 'simplecov', '~> 0.19'
|
19
|
+
gem 'simplecov-lcov', '~> 0.8'
|
20
|
+
gem 'steep', '~> 0.44'
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -1,9 +1,10 @@
|
|
1
1
|
# Hanny
|
2
2
|
|
3
|
-
[![Build Status](https://
|
4
|
-
[![Coverage Status](https://coveralls.io/repos/github/yoshoku/
|
3
|
+
[![Build Status](https://github.com/yoshoku/hanny/actions/workflows/build.yml/badge.svg)](https://github.com/yoshoku/hanny/actions/workflows/build.yml)
|
4
|
+
[![Coverage Status](https://coveralls.io/repos/github/yoshoku/hanny/badge.svg?branch=main)](https://coveralls.io/github/yoshoku/hanny?branch=main)
|
5
5
|
[![Gem Version](https://badge.fury.io/rb/hanny.svg)](https://badge.fury.io/rb/hanny)
|
6
|
-
[![BSD 2-Clause License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://github.com/yoshoku/Hanny/blob/
|
6
|
+
[![BSD 2-Clause License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://github.com/yoshoku/Hanny/blob/main/LICENSE.txt)
|
7
|
+
[![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://yoshoku.github.io/hanny/doc/)
|
7
8
|
|
8
9
|
Hanny is a Hash-based Approximate Nearest Neighbor (ANN) search library in Ruby.
|
9
10
|
Hash-based ANN converts vector data into binary codes and builds a hash table by using the binary codes as hash keys.
|
@@ -27,6 +28,10 @@ Or install it yourself as:
|
|
27
28
|
|
28
29
|
$ gem install hanny
|
29
30
|
|
31
|
+
## Documentation
|
32
|
+
|
33
|
+
- [Hanny API Documentation](https://yoshoku.github.io/hanny/doc/)
|
34
|
+
|
30
35
|
## Usage
|
31
36
|
|
32
37
|
```ruby
|
@@ -67,14 +72,14 @@ I confirmed the search speed of Hanny's LSH with [MNIST](https://www.csie.ntu.ed
|
|
67
72
|
The experiment is carried out on MacBook Early 2016 (Core m3 1.1 GHz CPU and 8 GB memory).
|
68
73
|
|
69
74
|
Code:
|
75
|
+
|
70
76
|
```ruby
|
71
77
|
require 'benchmark'
|
72
|
-
require '
|
78
|
+
require 'rumale'
|
73
79
|
require 'hanny'
|
74
80
|
|
75
81
|
# Load MNIST data set.
|
76
|
-
samples, labels =
|
77
|
-
samples = Numo::DFloat.cast(samples)
|
82
|
+
samples, labels = Rumale::Dataset.load_libsvm_file('mnist')
|
78
83
|
queries = samples[0..5, true]
|
79
84
|
targets = samples[6..-1, true]
|
80
85
|
qlabels = labels[0..5]
|
@@ -111,6 +116,7 @@ end
|
|
111
116
|
```
|
112
117
|
|
113
118
|
Result:
|
119
|
+
|
114
120
|
```bash
|
115
121
|
user system total real
|
116
122
|
LSH
|
@@ -131,12 +137,6 @@ query label: 2, neighbors label: 2, 2, 2, 2, 2,
|
|
131
137
|
6.350000 0.280000 6.630000 ( 6.682365)
|
132
138
|
```
|
133
139
|
|
134
|
-
## Development
|
135
|
-
|
136
|
-
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
137
|
-
|
138
|
-
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
139
|
-
|
140
140
|
## Contributing
|
141
141
|
|
142
142
|
Bug reports and pull requests are welcome on GitHub at https://github.com/yoshoku/Hanny. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
@@ -147,4 +147,4 @@ The gem is available as open source under the terms of the [BSD 2-clause License
|
|
147
147
|
|
148
148
|
## Code of Conduct
|
149
149
|
|
150
|
-
Everyone interacting in the Hanny project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/yoshoku/Hanny/blob/
|
150
|
+
Everyone interacting in the Hanny project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/yoshoku/Hanny/blob/main/CODE_OF_CONDUCT.md).
|
data/Rakefile
CHANGED
data/Steepfile
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
target :lib do
|
2
|
+
signature "sig", "sig-deps"
|
3
|
+
|
4
|
+
check "lib" # Directory name
|
5
|
+
# check "Gemfile" # File name
|
6
|
+
# check "app/models/**/*.rb" # Glob
|
7
|
+
# # ignore "lib/templates/*.rb"
|
8
|
+
#
|
9
|
+
# # library "pathname", "set" # Standard libraries
|
10
|
+
# library "numo-narray" # Gems
|
11
|
+
end
|
12
|
+
|
13
|
+
# target :spec do
|
14
|
+
# signature "sig", "sig-private"
|
15
|
+
#
|
16
|
+
# check "spec"
|
17
|
+
#
|
18
|
+
# # library "pathname", "set" # Standard libraries
|
19
|
+
# # library "rspec"
|
20
|
+
# end
|
data/hanny.gemspec
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
|
2
3
|
lib = File.expand_path('lib', __dir__)
|
3
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
@@ -10,29 +11,28 @@ Gem::Specification.new do |spec|
|
|
10
11
|
spec.email = ['yoshoku@outlook.com']
|
11
12
|
|
12
13
|
spec.summary = 'Hanny is a Hash-based Approximate Nearest Neighbor search library in Ruby.'
|
13
|
-
spec.description =
|
14
|
-
Hanny is a Hash-based Approximate Nearest Neighbor (ANN) search library in Ruby.
|
15
|
-
Hash-based ANN converts vector data into binary codes and builds a hash table by using the binary codes as hash keys.
|
16
|
-
To build the hash table, Hanny uses Locality Sensitive Hashing (LSH) of approximating cosine similarity.
|
17
|
-
It is known that if the code length is sufficiently long (ex. greater than 128-bit), LSH can obtain high search performance.
|
18
|
-
In the experiment, Hanny achieved about twenty times faster search speed than the brute-force search by Euclidean distance.
|
19
|
-
MSG
|
14
|
+
spec.description = <<~MSG
|
15
|
+
Hanny is a Hash-based Approximate Nearest Neighbor (ANN) search library in Ruby.
|
16
|
+
Hash-based ANN converts vector data into binary codes and builds a hash table by using the binary codes as hash keys.
|
17
|
+
To build the hash table, Hanny uses Locality Sensitive Hashing (LSH) of approximating cosine similarity.
|
18
|
+
It is known that if the code length is sufficiently long (ex. greater than 128-bit), LSH can obtain high search performance.
|
19
|
+
In the experiment, Hanny achieved about twenty times faster search speed than the brute-force search by Euclidean distance.
|
20
|
+
MSG
|
20
21
|
spec.homepage = 'https://github.com/yoshoku/hanny'
|
21
22
|
spec.license = 'BSD-2-Clause'
|
22
23
|
|
23
|
-
spec.
|
24
|
-
|
24
|
+
spec.metadata['homepage_uri'] = spec.homepage
|
25
|
+
spec.metadata['source_code_uri'] = spec.homepage
|
26
|
+
spec.metadata['changelog_uri'] = 'https://github.com/yoshoku/hanny/blob/main/CHANGELOG.md'
|
27
|
+
spec.metadata['documentation_uri'] = 'https://yoshoku.github.io/hanny/doc/'
|
28
|
+
spec.metadata['rubygems_mfa_required'] = 'true'
|
29
|
+
|
30
|
+
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
31
|
+
f.match(%r{^(test|spec|features|sig-deps)/})
|
25
32
|
end
|
26
33
|
spec.bindir = 'exe'
|
27
34
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
28
35
|
spec.require_paths = ['lib']
|
29
36
|
|
30
|
-
spec.
|
31
|
-
|
32
|
-
spec.add_runtime_dependency 'numo-narray', '>= 0.9.0'
|
33
|
-
|
34
|
-
spec.add_development_dependency 'bundler', '~> 1.16'
|
35
|
-
spec.add_development_dependency 'coveralls', '~> 0.8'
|
36
|
-
spec.add_development_dependency 'rake', '~> 10.0'
|
37
|
-
spec.add_development_dependency 'rspec', '~> 3.0'
|
37
|
+
spec.add_runtime_dependency 'numo-narray', '>= 0.9.1'
|
38
38
|
end
|
data/lib/hanny/lsh_index.rb
CHANGED
@@ -65,13 +65,8 @@ module Hanny
|
|
65
65
|
# @param random_seed [Integer/NilClass] The seed value using to initialize the random generator.
|
66
66
|
def initialize(code_length: 256, random_seed: nil)
|
67
67
|
@code_length = code_length
|
68
|
-
@n_samples = nil
|
69
|
-
@n_features = nil
|
70
|
-
@n_keys = nil
|
71
68
|
@last_id = nil
|
72
69
|
@weight_mat = nil
|
73
|
-
@hash_table = nil
|
74
|
-
@hash_codes = nil
|
75
70
|
@random_seed = random_seed
|
76
71
|
@random_seed ||= srand
|
77
72
|
@rng = Random.new(@random_seed)
|
@@ -86,26 +81,27 @@ module Hanny
|
|
86
81
|
|
87
82
|
# Build a search index.
|
88
83
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The dataset for building search index.
|
89
|
-
# @return [
|
84
|
+
# @return [LSHIndex] The search index itself that has constructed the hash table.
|
90
85
|
def build_index(x)
|
91
86
|
# Initialize some variables.
|
92
|
-
@n_samples
|
87
|
+
@n_samples = x.shape[0]
|
88
|
+
@n_features = x.shape[1]
|
93
89
|
@hash_table = {}
|
94
|
-
@hash_codes = []
|
95
90
|
@weight_mat = Utils.rand_normal([@n_features, @code_length], @rng)
|
96
91
|
# Convert samples to binary codes.
|
97
92
|
bin_x = hash_function(x)
|
98
93
|
# Store samples to binary hash table.
|
94
|
+
codes = []
|
99
95
|
@n_samples.times do |m|
|
100
96
|
bin_code = bin_x[m, true]
|
101
97
|
hash_key = symbolized_hash_key(bin_code)
|
102
98
|
unless @hash_table.key?(hash_key)
|
103
|
-
|
99
|
+
codes.push(bin_code.to_a)
|
104
100
|
@hash_table[hash_key] = []
|
105
101
|
end
|
106
102
|
@hash_table[hash_key].push(m)
|
107
103
|
end
|
108
|
-
@hash_codes = Numo::Bit.cast(
|
104
|
+
@hash_codes = Numo::Bit.cast(codes)
|
109
105
|
# Update some variables.
|
110
106
|
@n_keys = @hash_codes.shape[0]
|
111
107
|
@last_id = @n_samples
|
@@ -117,7 +113,7 @@ module Hanny
|
|
117
113
|
# @return [Array<Integer>] The indices of appended data in search index
|
118
114
|
def append_data(x)
|
119
115
|
# Initialize some variables.
|
120
|
-
n_new_samples
|
116
|
+
n_new_samples = x.shape[0]
|
121
117
|
bin_x = hash_function(x)
|
122
118
|
added_data_ids = []
|
123
119
|
# Store samples to binary hash table.
|
@@ -152,12 +148,14 @@ module Hanny
|
|
152
148
|
removed_data_ids = []
|
153
149
|
data_ids.each do |query_id|
|
154
150
|
# Remove data id from hash table.
|
155
|
-
hash_key = @hash_table.keys.
|
151
|
+
hash_key = @hash_table.keys.find { |k| @hash_table[k].include?(query_id) }
|
156
152
|
next if hash_key.nil?
|
153
|
+
|
157
154
|
@hash_table[hash_key].delete(query_id)
|
158
155
|
removed_data_ids.push(query_id)
|
159
156
|
# Remove the hash key if there is no data.
|
160
157
|
next unless @hash_table[hash_key].empty?
|
158
|
+
|
161
159
|
target_id = distances_to_hash_codes(decoded_hash_key(hash_key)).index(0)
|
162
160
|
@hash_codes = @hash_codes.delete(target_id, 0)
|
163
161
|
end
|
@@ -171,15 +169,17 @@ module Hanny
|
|
171
169
|
# @return [Array<Integer>] The data indices of search result.
|
172
170
|
def search_knn(q, n_neighbors: 10)
|
173
171
|
# Initialize some variables.
|
174
|
-
n_queries
|
172
|
+
n_queries = q.shape[0]
|
175
173
|
candidates = Array.new(n_queries) { [] }
|
176
174
|
# Binarize queries.
|
177
175
|
bin_q = hash_function(q)
|
178
176
|
# Find k-nearest neighbors for each query.
|
179
177
|
n_queries.times do |m|
|
180
|
-
sort_with_index(distances_to_hash_codes(bin_q[m, true])).each do |
|
178
|
+
sort_with_index(distances_to_hash_codes(bin_q[m, true])).each do |d, n|
|
181
179
|
candidates[m] = candidates[m] | @hash_table[symbolized_hash_key(@hash_codes[n, true])]
|
182
|
-
|
180
|
+
# TODO: Investigate the cause of the steep Ruby::BreakTypeMismatch error.
|
181
|
+
# break if candidates[m].size >= n_neighbors
|
182
|
+
break [[d, n]] if candidates[m].size >= n_neighbors
|
183
183
|
end
|
184
184
|
candidates[m] = candidates[m].shift(n_neighbors)
|
185
185
|
end
|
@@ -190,55 +190,25 @@ module Hanny
|
|
190
190
|
# @param q [Numo::DFloat] (shape: [n_queries, n_features]) The data for search queries.
|
191
191
|
# @param radius [Float] The hamming radius for search range.
|
192
192
|
# @return [Array<Integer>] The data indices of search result.
|
193
|
-
def search_radius(q, radius: 1)
|
193
|
+
def search_radius(q, radius: 1.0)
|
194
194
|
# Initialize some variables.
|
195
|
-
n_queries
|
195
|
+
n_queries = q.shape[0]
|
196
196
|
candidates = Array.new(n_queries) { [] }
|
197
197
|
# Binarize queries.
|
198
198
|
bin_q = hash_function(q)
|
199
199
|
# Find k-nearest neighbors for each query.
|
200
200
|
n_queries.times do |m|
|
201
201
|
sort_with_index(distances_to_hash_codes(bin_q[m, true])).each do |d, n|
|
202
|
-
|
202
|
+
# TODO: Investigate the cause of the steep Ruby::BreakTypeMismatch error.
|
203
|
+
# break if d > radius
|
204
|
+
break [[d, n]] if d > radius
|
205
|
+
|
203
206
|
candidates[m] = candidates[m] | @hash_table[symbolized_hash_key(@hash_codes[n, true])]
|
204
207
|
end
|
205
208
|
end
|
206
209
|
candidates
|
207
210
|
end
|
208
211
|
|
209
|
-
# Dump marshal data.
|
210
|
-
# @return [Hash] The marshal data for search index.
|
211
|
-
def marshal_dump
|
212
|
-
{ code_length: @code_length,
|
213
|
-
n_samples: @n_samples,
|
214
|
-
n_features: @n_features,
|
215
|
-
n_keys: @n_keys,
|
216
|
-
last_id: @last_id,
|
217
|
-
weight_mat: @weight_mat,
|
218
|
-
bias_vec: @bias_vec,
|
219
|
-
hash_table: @hash_table,
|
220
|
-
hash_codes: @hash_codes,
|
221
|
-
random_seed: @random_seed,
|
222
|
-
rng: @rng }
|
223
|
-
end
|
224
|
-
|
225
|
-
# Load marshal data.
|
226
|
-
# @return [nil]
|
227
|
-
def marshal_load(obj)
|
228
|
-
@code_length = obj[:code_length]
|
229
|
-
@n_samples = obj[:n_samples]
|
230
|
-
@n_features = obj[:n_features]
|
231
|
-
@n_keys = obj[:n_keys]
|
232
|
-
@last_id = obj[:last_id]
|
233
|
-
@weight_mat = obj[:weight_mat]
|
234
|
-
@bias_vec = obj[:bias_vec]
|
235
|
-
@hash_table = obj[:hash_table]
|
236
|
-
@hash_codes = obj[:hash_codes]
|
237
|
-
@random_seed = obj[:random_seed]
|
238
|
-
@rng = obj[:rng]
|
239
|
-
nil
|
240
|
-
end
|
241
|
-
|
242
212
|
private
|
243
213
|
|
244
214
|
# Convert binary code to symbol as hash key.
|
@@ -266,7 +236,7 @@ module Hanny
|
|
266
236
|
# @param hash_key [Symbol]
|
267
237
|
# @return [Numo::Bit]
|
268
238
|
def decoded_hash_key(hash_key)
|
269
|
-
bin_code = Zlib::Inflate.inflate(hash_key.to_s).
|
239
|
+
bin_code = Zlib::Inflate.inflate(hash_key.to_s).chars.map(&:to_i)
|
270
240
|
Numo::Bit[*bin_code]
|
271
241
|
end
|
272
242
|
end
|
data/lib/hanny/version.rb
CHANGED
@@ -0,0 +1,27 @@
|
|
1
|
+
module Hanny
|
2
|
+
class LSHIndex
|
3
|
+
attr_reader code_length: Integer
|
4
|
+
attr_reader n_samples: Integer
|
5
|
+
attr_reader n_features: Integer
|
6
|
+
attr_reader n_keys: Integer
|
7
|
+
attr_reader hash_table: Hash[Symbol, Array[Integer]]
|
8
|
+
attr_reader hash_codes: Numo::Bit
|
9
|
+
attr_reader random_seed: untyped
|
10
|
+
attr_reader rng: Random
|
11
|
+
|
12
|
+
def initialize: (?code_length: Integer code_length, ?random_seed: Integer? random_seed) -> void
|
13
|
+
def hash_function: (Numo::DFloat x) -> Numo::Bit
|
14
|
+
def build_index: (Numo::DFloat x) -> LSHIndex
|
15
|
+
def append_data: (Numo::DFloat x) -> Array[Integer]
|
16
|
+
def remove_data: (Array[Integer] data_ids) -> Array[Integer]
|
17
|
+
def search_knn: (Numo::DFloat q, ?n_neighbors: Integer n_neighbors) -> Array[Array[Integer]]
|
18
|
+
def search_radius: (Numo::DFloat q, ?radius: Float radius) -> Array[Array[Integer]]
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def symbolized_hash_key: (Numo::Bit bin_code) -> Symbol
|
23
|
+
def distances_to_hash_codes: (Numo::Bit bin_code) -> Array[Float]
|
24
|
+
def sort_with_index: (Array[Float] arr) -> Array[[Float, Integer]]
|
25
|
+
def decoded_hash_key: (Symbol hash_key) -> Numo::Bit
|
26
|
+
end
|
27
|
+
end
|
data/sig/hanny/utils.rbs
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
module Hanny
|
2
|
+
module Utils
|
3
|
+
def self.euclidean_distance: (Numo::DFloat x, ?Numo::DFloat? y) -> Numo::DFloat
|
4
|
+
def self.rand_uniform: (Array[Integer] shape, Random rng) -> Numo::DFloat
|
5
|
+
def self.rand_normal: (Array[Integer] shape, Random rng, ?Float mu, ?Float sigma) -> Numo::DFloat
|
6
|
+
end
|
7
|
+
end
|
data/sig/hanny.rbs
ADDED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hanny
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-08-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -16,70 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.9.
|
19
|
+
version: 0.9.1
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.9.
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: bundler
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - "~>"
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '1.16'
|
34
|
-
type: :development
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - "~>"
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '1.16'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: coveralls
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - "~>"
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '0.8'
|
48
|
-
type: :development
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - "~>"
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '0.8'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: rake
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - "~>"
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '10.0'
|
62
|
-
type: :development
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - "~>"
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: '10.0'
|
69
|
-
- !ruby/object:Gem::Dependency
|
70
|
-
name: rspec
|
71
|
-
requirement: !ruby/object:Gem::Requirement
|
72
|
-
requirements:
|
73
|
-
- - "~>"
|
74
|
-
- !ruby/object:Gem::Version
|
75
|
-
version: '3.0'
|
76
|
-
type: :development
|
77
|
-
prerelease: false
|
78
|
-
version_requirements: !ruby/object:Gem::Requirement
|
79
|
-
requirements:
|
80
|
-
- - "~>"
|
81
|
-
- !ruby/object:Gem::Version
|
82
|
-
version: '3.0'
|
26
|
+
version: 0.9.1
|
83
27
|
description: |
|
84
28
|
Hanny is a Hash-based Approximate Nearest Neighbor (ANN) search library in Ruby.
|
85
29
|
Hash-based ANN converts vector data into binary codes and builds a hash table by using the binary codes as hash keys.
|
@@ -93,27 +37,36 @@ extensions: []
|
|
93
37
|
extra_rdoc_files: []
|
94
38
|
files:
|
95
39
|
- ".coveralls.yml"
|
40
|
+
- ".github/workflows/build.yml"
|
41
|
+
- ".github/workflows/coverage.yml"
|
96
42
|
- ".gitignore"
|
97
43
|
- ".rspec"
|
98
44
|
- ".rubocop.yml"
|
99
|
-
-
|
45
|
+
- CHANGELOG.md
|
100
46
|
- CODE_OF_CONDUCT.md
|
101
47
|
- Gemfile
|
102
48
|
- LICENSE.txt
|
103
49
|
- README.md
|
104
50
|
- Rakefile
|
105
|
-
-
|
106
|
-
- bin/setup
|
51
|
+
- Steepfile
|
107
52
|
- hanny.gemspec
|
108
53
|
- lib/hanny.rb
|
109
54
|
- lib/hanny/lsh_index.rb
|
110
55
|
- lib/hanny/utils.rb
|
111
56
|
- lib/hanny/version.rb
|
57
|
+
- sig/hanny.rbs
|
58
|
+
- sig/hanny/lsh_index.rbs
|
59
|
+
- sig/hanny/utils.rbs
|
112
60
|
homepage: https://github.com/yoshoku/hanny
|
113
61
|
licenses:
|
114
62
|
- BSD-2-Clause
|
115
|
-
metadata:
|
116
|
-
|
63
|
+
metadata:
|
64
|
+
homepage_uri: https://github.com/yoshoku/hanny
|
65
|
+
source_code_uri: https://github.com/yoshoku/hanny
|
66
|
+
changelog_uri: https://github.com/yoshoku/hanny/blob/main/CHANGELOG.md
|
67
|
+
documentation_uri: https://yoshoku.github.io/hanny/doc/
|
68
|
+
rubygems_mfa_required: 'true'
|
69
|
+
post_install_message:
|
117
70
|
rdoc_options: []
|
118
71
|
require_paths:
|
119
72
|
- lib
|
@@ -121,16 +74,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
121
74
|
requirements:
|
122
75
|
- - ">="
|
123
76
|
- !ruby/object:Gem::Version
|
124
|
-
version: '
|
77
|
+
version: '0'
|
125
78
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
126
79
|
requirements:
|
127
80
|
- - ">="
|
128
81
|
- !ruby/object:Gem::Version
|
129
82
|
version: '0'
|
130
83
|
requirements: []
|
131
|
-
|
132
|
-
|
133
|
-
signing_key:
|
84
|
+
rubygems_version: 3.2.33
|
85
|
+
signing_key:
|
134
86
|
specification_version: 4
|
135
87
|
summary: Hanny is a Hash-based Approximate Nearest Neighbor search library in Ruby.
|
136
88
|
test_files: []
|
data/.travis.yml
DELETED
data/bin/console
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require 'bundler/setup'
|
4
|
-
require 'hanny'
|
5
|
-
|
6
|
-
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
-
# with your gem easier. You can also use a different console, if you like.
|
8
|
-
|
9
|
-
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
-
# require "pry"
|
11
|
-
# Pry.start
|
12
|
-
|
13
|
-
require 'irb'
|
14
|
-
IRB.start(__FILE__)
|