hanny 0.1.0 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 76848de111777349352ddeb7aa269ad3694504661ac2fe1c45729ebcd0f27414
4
- data.tar.gz: a911da20689134ebecdd01b88827a2d2cc65149b497103f32f528fa754590c15
3
+ metadata.gz: d39024d595c701feff9aec387768b7c9c6341b3131073f1c9cfad3d42cc8910f
4
+ data.tar.gz: 5c05ce7a494a3eec292289248a01dacc6cfaa845517ac7a18648e734245a365f
5
5
  SHA512:
6
- metadata.gz: 6c1e4fde8dc03f439454a476d16be7b48f384cb0adf24a17fce47e09387d4583e31ba66b3a9b7f6d9fb5ff6e386e9282fba68f05c9ff0cb74f88013e19d65b91
7
- data.tar.gz: 30441f1aef6a05bc0a609d4b2176f4e04d7d0d9882579de8909aae9076dbdf68670eb4275da1a863dffa13454997597e83d229a1371f6a51bee0f58aa3740abd
6
+ metadata.gz: 7ac234dc0f6305bb4bd877506bd30a086baf3a009d443a6e91100d8c33aecfe6138c8c9359f9e3e4d29b7a42fd588bae2682b70e53441e9ffca65da2e3a0620d
7
+ data.tar.gz: d779d5ad1434366d9b603aa5acea3bba4150fbdeb587d0bcbf057f45ef58ce2c8289d0117d4e15295f07ceabfdd47b1a4829fe1c3d2a4ca65f70d3b57c9a3df1
data/.coveralls.yml CHANGED
@@ -1 +1 @@
1
- service_name: travis-ci
1
+ service_name: github-ci
@@ -0,0 +1,20 @@
1
+ name: build
2
+
3
+ on: [push, pull_request]
4
+
5
+ jobs:
6
+ build:
7
+ runs-on: ubuntu-latest
8
+ strategy:
9
+ fail-fast: false
10
+ matrix:
11
+ ruby: [ '2.7', '3.0', '3.1' ]
12
+ steps:
13
+ - uses: actions/checkout@v3
14
+ - name: Set up Ruby ${{ matrix.ruby }}
15
+ uses: ruby/setup-ruby@v1
16
+ with:
17
+ ruby-version: ${{ matrix.ruby }}
18
+ bundler-cache: true
19
+ - name: Build and test with Rake
20
+ run: bundle exec rake
@@ -0,0 +1,24 @@
1
+ name: coverage
2
+
3
+ on:
4
+ push:
5
+ branches: [ main ]
6
+ pull_request:
7
+ branches: [ main ]
8
+
9
+ jobs:
10
+ coverage:
11
+ runs-on: ubuntu-20.04
12
+ steps:
13
+ - uses: actions/checkout@v3
14
+ - name: Set up Ruby 2.7
15
+ uses: ruby/setup-ruby@v1
16
+ with:
17
+ ruby-version: '2.7'
18
+ bundler-cache: true
19
+ - name: Build and test with Rake
20
+ run: bundle exec rake
21
+ - name: Coveralls GitHub Action
22
+ uses: coverallsapp/github-action@v1.1.2
23
+ with:
24
+ github-token: ${{ secrets.GITHUB_TOKEN }}
data/.rubocop.yml CHANGED
@@ -1,39 +1,56 @@
1
+ require:
2
+ - rubocop-performance
3
+ - rubocop-rake
4
+ - rubocop-rspec
5
+
1
6
  AllCops:
2
- TargetRubyVersion: 2.1
7
+ NewCops: enable
3
8
  DisplayCopNames: true
4
9
  DisplayStyleGuide: true
10
+ Exclude:
11
+ - 'tmp/**/*'
12
+ - 'vendor/**/*'
13
+ - 'Steepfile'
5
14
 
6
- Documentation:
15
+ Gemspec/RequiredRubyVersion:
7
16
  Enabled: false
8
17
 
9
- Metrics/LineLength:
10
- Max: 140
11
- IgnoredPatterns: ['(\A|\s)#']
18
+ Layout/LineLength:
19
+ AllowedPatterns: ['(\A|\s)#']
12
20
 
13
- Metrics/ModuleLength:
14
- Max: 200
15
-
16
- Metrics/ClassLength:
17
- Max: 200
18
-
19
- Metrics/MethodLength:
20
- Max: 40
21
+ Lint/AmbiguousOperatorPrecedence:
22
+ Enabled: false
21
23
 
22
24
  Metrics/AbcSize:
23
- Max: 60
25
+ Max: 32
24
26
 
25
27
  Metrics/BlockLength:
26
28
  Exclude:
27
29
  - 'spec/**/*'
28
30
 
29
- ParameterLists:
30
- Max: 10
31
+ Metrics/ClassLength:
32
+ Max: 128
33
+
34
+ Metrics/MethodLength:
35
+ Max: 24
36
+
37
+ Metrics/ParameterLists:
38
+ Max: 8
31
39
 
32
- Security/MarshalLoad:
40
+ Naming/AccessorMethodName:
33
41
  Enabled: false
34
42
 
35
- Naming/UncommunicativeMethodParamName:
43
+ Naming/MethodParameterName:
36
44
  Enabled: false
37
45
 
38
- Style/FormatStringToken:
46
+ RSpec/ExampleLength:
47
+ Max: 18
48
+
49
+ RSpec/MultipleMemoizedHelpers:
50
+ Max: 12
51
+
52
+ RSpec/NamedSubject:
39
53
  Enabled: false
54
+
55
+ RSpec/NestedGroups:
56
+ Max: 5
data/CHANGELOG.md ADDED
@@ -0,0 +1,13 @@
1
+ # 0.2.2
2
+ - Refactor codes and configs with RuboCop
3
+
4
+ # 0.2.1
5
+ - Remove dependent gem's type declaration file from installation files.
6
+
7
+ # 0.2.0
8
+ - Add type declaration files.
9
+ - Refactor some codes with type check.
10
+ - Fix some configuration files.
11
+
12
+ # 0.1.0
13
+ - First release.
data/Gemfile CHANGED
@@ -1,6 +1,20 @@
1
+ # frozen_string_literal: true
2
+
1
3
  source 'https://rubygems.org'
2
4
 
3
5
  git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
4
6
 
5
7
  # Specify your gem's dependencies in hanny.gemspec
6
8
  gemspec
9
+
10
+ gem 'bundler', '~> 2.0'
11
+ gem 'rake', '~> 12.0'
12
+ gem 'rbs', '~> 1.2'
13
+ gem 'rspec', '~> 3.0'
14
+ gem 'rubocop', '~> 1.35'
15
+ gem 'rubocop-performance', '~> 1.14'
16
+ gem 'rubocop-rake', '~> 0.6.0'
17
+ gem 'rubocop-rspec', '~> 2.12'
18
+ gem 'simplecov', '~> 0.19'
19
+ gem 'simplecov-lcov', '~> 0.8'
20
+ gem 'steep', '~> 0.44'
data/LICENSE.txt CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2017 yoshoku
1
+ Copyright (c) 2017-2022 yoshoku
2
2
  All rights reserved.
3
3
 
4
4
  Redistribution and use in source and binary forms, with or without
data/README.md CHANGED
@@ -1,9 +1,10 @@
1
1
  # Hanny
2
2
 
3
- [![Build Status](https://travis-ci.org/yoshoku/Hanny.svg?branch=master)](https://travis-ci.org/yoshoku/Hanny)
4
- [![Coverage Status](https://coveralls.io/repos/github/yoshoku/Hanny/badge.svg?branch=master)](https://coveralls.io/github/yoshoku/Hanny?branch=master)
3
+ [![Build Status](https://github.com/yoshoku/hanny/actions/workflows/build.yml/badge.svg)](https://github.com/yoshoku/hanny/actions/workflows/build.yml)
4
+ [![Coverage Status](https://coveralls.io/repos/github/yoshoku/hanny/badge.svg?branch=main)](https://coveralls.io/github/yoshoku/hanny?branch=main)
5
5
  [![Gem Version](https://badge.fury.io/rb/hanny.svg)](https://badge.fury.io/rb/hanny)
6
- [![BSD 2-Clause License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://github.com/yoshoku/Hanny/blob/master/LICENSE.txt)
6
+ [![BSD 2-Clause License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://github.com/yoshoku/Hanny/blob/main/LICENSE.txt)
7
+ [![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://yoshoku.github.io/hanny/doc/)
7
8
 
8
9
  Hanny is a Hash-based Approximate Nearest Neighbor (ANN) search library in Ruby.
9
10
  Hash-based ANN converts vector data into binary codes and builds a hash table by using the binary codes as hash keys.
@@ -27,6 +28,10 @@ Or install it yourself as:
27
28
 
28
29
  $ gem install hanny
29
30
 
31
+ ## Documentation
32
+
33
+ - [Hanny API Documentation](https://yoshoku.github.io/hanny/doc/)
34
+
30
35
  ## Usage
31
36
 
32
37
  ```ruby
@@ -67,14 +72,14 @@ I confirmed the search speed of Hanny's LSH with [MNIST](https://www.csie.ntu.ed
67
72
  The experiment is carried out on MacBook Early 2016 (Core m3 1.1 GHz CPU and 8 GB memory).
68
73
 
69
74
  Code:
75
+
70
76
  ```ruby
71
77
  require 'benchmark'
72
- require 'svmkit'
78
+ require 'rumale'
73
79
  require 'hanny'
74
80
 
75
81
  # Load MNIST data set.
76
- samples, labels = SVMKit::Dataset.load_libsvm_file('mnist')
77
- samples = Numo::DFloat.cast(samples)
82
+ samples, labels = Rumale::Dataset.load_libsvm_file('mnist')
78
83
  queries = samples[0..5, true]
79
84
  targets = samples[6..-1, true]
80
85
  qlabels = labels[0..5]
@@ -111,6 +116,7 @@ end
111
116
  ```
112
117
 
113
118
  Result:
119
+
114
120
  ```bash
115
121
  user system total real
116
122
  LSH
@@ -131,12 +137,6 @@ query label: 2, neighbors label: 2, 2, 2, 2, 2,
131
137
  6.350000 0.280000 6.630000 ( 6.682365)
132
138
  ```
133
139
 
134
- ## Development
135
-
136
- After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
137
-
138
- To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
139
-
140
140
  ## Contributing
141
141
 
142
142
  Bug reports and pull requests are welcome on GitHub at https://github.com/yoshoku/Hanny. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
@@ -147,4 +147,4 @@ The gem is available as open source under the terms of the [BSD 2-clause License
147
147
 
148
148
  ## Code of Conduct
149
149
 
150
- Everyone interacting in the Hanny project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/yoshoku/Hanny/blob/master/CODE_OF_CONDUCT.md).
150
+ Everyone interacting in the Hanny project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/yoshoku/Hanny/blob/main/CODE_OF_CONDUCT.md).
data/Rakefile CHANGED
@@ -1,6 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'bundler/gem_tasks'
2
4
  require 'rspec/core/rake_task'
3
5
 
4
6
  RSpec::Core::RakeTask.new(:spec)
5
7
 
6
- task default: :spec
8
+ require 'rubocop/rake_task'
9
+
10
+ RuboCop::RakeTask.new
11
+
12
+ task default: %i[rubocop spec]
data/Steepfile ADDED
@@ -0,0 +1,20 @@
1
+ target :lib do
2
+ signature "sig", "sig-deps"
3
+
4
+ check "lib" # Directory name
5
+ # check "Gemfile" # File name
6
+ # check "app/models/**/*.rb" # Glob
7
+ # # ignore "lib/templates/*.rb"
8
+ #
9
+ # # library "pathname", "set" # Standard libraries
10
+ # library "numo-narray" # Gems
11
+ end
12
+
13
+ # target :spec do
14
+ # signature "sig", "sig-private"
15
+ #
16
+ # check "spec"
17
+ #
18
+ # # library "pathname", "set" # Standard libraries
19
+ # # library "rspec"
20
+ # end
data/hanny.gemspec CHANGED
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
 
2
3
  lib = File.expand_path('lib', __dir__)
3
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
@@ -10,29 +11,28 @@ Gem::Specification.new do |spec|
10
11
  spec.email = ['yoshoku@outlook.com']
11
12
 
12
13
  spec.summary = 'Hanny is a Hash-based Approximate Nearest Neighbor search library in Ruby.'
13
- spec.description = <<MSG
14
- Hanny is a Hash-based Approximate Nearest Neighbor (ANN) search library in Ruby.
15
- Hash-based ANN converts vector data into binary codes and builds a hash table by using the binary codes as hash keys.
16
- To build the hash table, Hanny uses Locality Sensitive Hashing (LSH) of approximating cosine similarity.
17
- It is known that if the code length is sufficiently long (ex. greater than 128-bit), LSH can obtain high search performance.
18
- In the experiment, Hanny achieved about twenty times faster search speed than the brute-force search by Euclidean distance.
19
- MSG
14
+ spec.description = <<~MSG
15
+ Hanny is a Hash-based Approximate Nearest Neighbor (ANN) search library in Ruby.
16
+ Hash-based ANN converts vector data into binary codes and builds a hash table by using the binary codes as hash keys.
17
+ To build the hash table, Hanny uses Locality Sensitive Hashing (LSH) of approximating cosine similarity.
18
+ It is known that if the code length is sufficiently long (ex. greater than 128-bit), LSH can obtain high search performance.
19
+ In the experiment, Hanny achieved about twenty times faster search speed than the brute-force search by Euclidean distance.
20
+ MSG
20
21
  spec.homepage = 'https://github.com/yoshoku/hanny'
21
22
  spec.license = 'BSD-2-Clause'
22
23
 
23
- spec.files = `git ls-files -z`.split("\x0").reject do |f|
24
- f.match(%r{^(test|spec|features)/})
24
+ spec.metadata['homepage_uri'] = spec.homepage
25
+ spec.metadata['source_code_uri'] = spec.homepage
26
+ spec.metadata['changelog_uri'] = 'https://github.com/yoshoku/hanny/blob/main/CHANGELOG.md'
27
+ spec.metadata['documentation_uri'] = 'https://yoshoku.github.io/hanny/doc/'
28
+ spec.metadata['rubygems_mfa_required'] = 'true'
29
+
30
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
31
+ f.match(%r{^(test|spec|features|sig-deps)/})
25
32
  end
26
33
  spec.bindir = 'exe'
27
34
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
28
35
  spec.require_paths = ['lib']
29
36
 
30
- spec.required_ruby_version = '>= 2.1'
31
-
32
- spec.add_runtime_dependency 'numo-narray', '>= 0.9.0'
33
-
34
- spec.add_development_dependency 'bundler', '~> 1.16'
35
- spec.add_development_dependency 'coveralls', '~> 0.8'
36
- spec.add_development_dependency 'rake', '~> 10.0'
37
- spec.add_development_dependency 'rspec', '~> 3.0'
37
+ spec.add_runtime_dependency 'numo-narray', '>= 0.9.1'
38
38
  end
@@ -65,13 +65,8 @@ module Hanny
65
65
  # @param random_seed [Integer/NilClass] The seed value using to initialize the random generator.
66
66
  def initialize(code_length: 256, random_seed: nil)
67
67
  @code_length = code_length
68
- @n_samples = nil
69
- @n_features = nil
70
- @n_keys = nil
71
68
  @last_id = nil
72
69
  @weight_mat = nil
73
- @hash_table = nil
74
- @hash_codes = nil
75
70
  @random_seed = random_seed
76
71
  @random_seed ||= srand
77
72
  @rng = Random.new(@random_seed)
@@ -86,26 +81,27 @@ module Hanny
86
81
 
87
82
  # Build a search index.
88
83
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The dataset for building search index.
89
- # @return [SVC] The search index itself that has constructed the hash table.
84
+ # @return [LSHIndex] The search index itself that has constructed the hash table.
90
85
  def build_index(x)
91
86
  # Initialize some variables.
92
- @n_samples, @n_features = x.shape
87
+ @n_samples = x.shape[0]
88
+ @n_features = x.shape[1]
93
89
  @hash_table = {}
94
- @hash_codes = []
95
90
  @weight_mat = Utils.rand_normal([@n_features, @code_length], @rng)
96
91
  # Convert samples to binary codes.
97
92
  bin_x = hash_function(x)
98
93
  # Store samples to binary hash table.
94
+ codes = []
99
95
  @n_samples.times do |m|
100
96
  bin_code = bin_x[m, true]
101
97
  hash_key = symbolized_hash_key(bin_code)
102
98
  unless @hash_table.key?(hash_key)
103
- @hash_codes.push(bin_code.to_a)
99
+ codes.push(bin_code.to_a)
104
100
  @hash_table[hash_key] = []
105
101
  end
106
102
  @hash_table[hash_key].push(m)
107
103
  end
108
- @hash_codes = Numo::Bit.cast(@hash_codes)
104
+ @hash_codes = Numo::Bit.cast(codes)
109
105
  # Update some variables.
110
106
  @n_keys = @hash_codes.shape[0]
111
107
  @last_id = @n_samples
@@ -117,7 +113,7 @@ module Hanny
117
113
  # @return [Array<Integer>] The indices of appended data in search index
118
114
  def append_data(x)
119
115
  # Initialize some variables.
120
- n_new_samples, = x.shape
116
+ n_new_samples = x.shape[0]
121
117
  bin_x = hash_function(x)
122
118
  added_data_ids = []
123
119
  # Store samples to binary hash table.
@@ -152,12 +148,14 @@ module Hanny
152
148
  removed_data_ids = []
153
149
  data_ids.each do |query_id|
154
150
  # Remove data id from hash table.
155
- hash_key = @hash_table.keys.select { |k| @hash_table[k].include?(query_id) }.first
151
+ hash_key = @hash_table.keys.find { |k| @hash_table[k].include?(query_id) }
156
152
  next if hash_key.nil?
153
+
157
154
  @hash_table[hash_key].delete(query_id)
158
155
  removed_data_ids.push(query_id)
159
156
  # Remove the hash key if there is no data.
160
157
  next unless @hash_table[hash_key].empty?
158
+
161
159
  target_id = distances_to_hash_codes(decoded_hash_key(hash_key)).index(0)
162
160
  @hash_codes = @hash_codes.delete(target_id, 0)
163
161
  end
@@ -171,15 +169,17 @@ module Hanny
171
169
  # @return [Array<Integer>] The data indices of search result.
172
170
  def search_knn(q, n_neighbors: 10)
173
171
  # Initialize some variables.
174
- n_queries, = q.shape
172
+ n_queries = q.shape[0]
175
173
  candidates = Array.new(n_queries) { [] }
176
174
  # Binarize queries.
177
175
  bin_q = hash_function(q)
178
176
  # Find k-nearest neighbors for each query.
179
177
  n_queries.times do |m|
180
- sort_with_index(distances_to_hash_codes(bin_q[m, true])).each do |_, n|
178
+ sort_with_index(distances_to_hash_codes(bin_q[m, true])).each do |d, n|
181
179
  candidates[m] = candidates[m] | @hash_table[symbolized_hash_key(@hash_codes[n, true])]
182
- break if candidates[m].size >= n_neighbors
180
+ # TODO: Investigate the cause of the steep Ruby::BreakTypeMismatch error.
181
+ # break if candidates[m].size >= n_neighbors
182
+ break [[d, n]] if candidates[m].size >= n_neighbors
183
183
  end
184
184
  candidates[m] = candidates[m].shift(n_neighbors)
185
185
  end
@@ -190,55 +190,25 @@ module Hanny
190
190
  # @param q [Numo::DFloat] (shape: [n_queries, n_features]) The data for search queries.
191
191
  # @param radius [Float] The hamming radius for search range.
192
192
  # @return [Array<Integer>] The data indices of search result.
193
- def search_radius(q, radius: 1)
193
+ def search_radius(q, radius: 1.0)
194
194
  # Initialize some variables.
195
- n_queries, = q.shape
195
+ n_queries = q.shape[0]
196
196
  candidates = Array.new(n_queries) { [] }
197
197
  # Binarize queries.
198
198
  bin_q = hash_function(q)
199
199
  # Find k-nearest neighbors for each query.
200
200
  n_queries.times do |m|
201
201
  sort_with_index(distances_to_hash_codes(bin_q[m, true])).each do |d, n|
202
- break if d > radius
202
+ # TODO: Investigate the cause of the steep Ruby::BreakTypeMismatch error.
203
+ # break if d > radius
204
+ break [[d, n]] if d > radius
205
+
203
206
  candidates[m] = candidates[m] | @hash_table[symbolized_hash_key(@hash_codes[n, true])]
204
207
  end
205
208
  end
206
209
  candidates
207
210
  end
208
211
 
209
- # Dump marshal data.
210
- # @return [Hash] The marshal data for search index.
211
- def marshal_dump
212
- { code_length: @code_length,
213
- n_samples: @n_samples,
214
- n_features: @n_features,
215
- n_keys: @n_keys,
216
- last_id: @last_id,
217
- weight_mat: @weight_mat,
218
- bias_vec: @bias_vec,
219
- hash_table: @hash_table,
220
- hash_codes: @hash_codes,
221
- random_seed: @random_seed,
222
- rng: @rng }
223
- end
224
-
225
- # Load marshal data.
226
- # @return [nil]
227
- def marshal_load(obj)
228
- @code_length = obj[:code_length]
229
- @n_samples = obj[:n_samples]
230
- @n_features = obj[:n_features]
231
- @n_keys = obj[:n_keys]
232
- @last_id = obj[:last_id]
233
- @weight_mat = obj[:weight_mat]
234
- @bias_vec = obj[:bias_vec]
235
- @hash_table = obj[:hash_table]
236
- @hash_codes = obj[:hash_codes]
237
- @random_seed = obj[:random_seed]
238
- @rng = obj[:rng]
239
- nil
240
- end
241
-
242
212
  private
243
213
 
244
214
  # Convert binary code to symbol as hash key.
@@ -266,7 +236,7 @@ module Hanny
266
236
  # @param hash_key [Symbol]
267
237
  # @return [Numo::Bit]
268
238
  def decoded_hash_key(hash_key)
269
- bin_code = Zlib::Inflate.inflate(hash_key.to_s).split('').map(&:to_i)
239
+ bin_code = Zlib::Inflate.inflate(hash_key.to_s).chars.map(&:to_i)
270
240
  Numo::Bit[*bin_code]
271
241
  end
272
242
  end
data/lib/hanny/version.rb CHANGED
@@ -2,6 +2,6 @@
2
2
 
3
3
  # Hanny is a hash-based approximate nearest neighbor search library.
4
4
  module Hanny
5
- # @!visibility private
6
- VERSION = '0.1.0'.freeze
5
+ # The version of Hanny you are using.
6
+ VERSION = '0.2.2'
7
7
  end
@@ -0,0 +1,27 @@
1
+ module Hanny
2
+ class LSHIndex
3
+ attr_reader code_length: Integer
4
+ attr_reader n_samples: Integer
5
+ attr_reader n_features: Integer
6
+ attr_reader n_keys: Integer
7
+ attr_reader hash_table: Hash[Symbol, Array[Integer]]
8
+ attr_reader hash_codes: Numo::Bit
9
+ attr_reader random_seed: untyped
10
+ attr_reader rng: Random
11
+
12
+ def initialize: (?code_length: Integer code_length, ?random_seed: Integer? random_seed) -> void
13
+ def hash_function: (Numo::DFloat x) -> Numo::Bit
14
+ def build_index: (Numo::DFloat x) -> LSHIndex
15
+ def append_data: (Numo::DFloat x) -> Array[Integer]
16
+ def remove_data: (Array[Integer] data_ids) -> Array[Integer]
17
+ def search_knn: (Numo::DFloat q, ?n_neighbors: Integer n_neighbors) -> Array[Array[Integer]]
18
+ def search_radius: (Numo::DFloat q, ?radius: Float radius) -> Array[Array[Integer]]
19
+
20
+ private
21
+
22
+ def symbolized_hash_key: (Numo::Bit bin_code) -> Symbol
23
+ def distances_to_hash_codes: (Numo::Bit bin_code) -> Array[Float]
24
+ def sort_with_index: (Array[Float] arr) -> Array[[Float, Integer]]
25
+ def decoded_hash_key: (Symbol hash_key) -> Numo::Bit
26
+ end
27
+ end
@@ -0,0 +1,7 @@
1
+ module Hanny
2
+ module Utils
3
+ def self.euclidean_distance: (Numo::DFloat x, ?Numo::DFloat? y) -> Numo::DFloat
4
+ def self.rand_uniform: (Array[Integer] shape, Random rng) -> Numo::DFloat
5
+ def self.rand_normal: (Array[Integer] shape, Random rng, ?Float mu, ?Float sigma) -> Numo::DFloat
6
+ end
7
+ end
data/sig/hanny.rbs ADDED
@@ -0,0 +1,3 @@
1
+ module Hanny
2
+ VERSION: String
3
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hanny
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-05-04 00:00:00.000000000 Z
11
+ date: 2022-08-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -16,70 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 0.9.0
19
+ version: 0.9.1
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: 0.9.0
27
- - !ruby/object:Gem::Dependency
28
- name: bundler
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - "~>"
32
- - !ruby/object:Gem::Version
33
- version: '1.16'
34
- type: :development
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - "~>"
39
- - !ruby/object:Gem::Version
40
- version: '1.16'
41
- - !ruby/object:Gem::Dependency
42
- name: coveralls
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - "~>"
46
- - !ruby/object:Gem::Version
47
- version: '0.8'
48
- type: :development
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - "~>"
53
- - !ruby/object:Gem::Version
54
- version: '0.8'
55
- - !ruby/object:Gem::Dependency
56
- name: rake
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - "~>"
60
- - !ruby/object:Gem::Version
61
- version: '10.0'
62
- type: :development
63
- prerelease: false
64
- version_requirements: !ruby/object:Gem::Requirement
65
- requirements:
66
- - - "~>"
67
- - !ruby/object:Gem::Version
68
- version: '10.0'
69
- - !ruby/object:Gem::Dependency
70
- name: rspec
71
- requirement: !ruby/object:Gem::Requirement
72
- requirements:
73
- - - "~>"
74
- - !ruby/object:Gem::Version
75
- version: '3.0'
76
- type: :development
77
- prerelease: false
78
- version_requirements: !ruby/object:Gem::Requirement
79
- requirements:
80
- - - "~>"
81
- - !ruby/object:Gem::Version
82
- version: '3.0'
26
+ version: 0.9.1
83
27
  description: |
84
28
  Hanny is a Hash-based Approximate Nearest Neighbor (ANN) search library in Ruby.
85
29
  Hash-based ANN converts vector data into binary codes and builds a hash table by using the binary codes as hash keys.
@@ -93,27 +37,36 @@ extensions: []
93
37
  extra_rdoc_files: []
94
38
  files:
95
39
  - ".coveralls.yml"
40
+ - ".github/workflows/build.yml"
41
+ - ".github/workflows/coverage.yml"
96
42
  - ".gitignore"
97
43
  - ".rspec"
98
44
  - ".rubocop.yml"
99
- - ".travis.yml"
45
+ - CHANGELOG.md
100
46
  - CODE_OF_CONDUCT.md
101
47
  - Gemfile
102
48
  - LICENSE.txt
103
49
  - README.md
104
50
  - Rakefile
105
- - bin/console
106
- - bin/setup
51
+ - Steepfile
107
52
  - hanny.gemspec
108
53
  - lib/hanny.rb
109
54
  - lib/hanny/lsh_index.rb
110
55
  - lib/hanny/utils.rb
111
56
  - lib/hanny/version.rb
57
+ - sig/hanny.rbs
58
+ - sig/hanny/lsh_index.rbs
59
+ - sig/hanny/utils.rbs
112
60
  homepage: https://github.com/yoshoku/hanny
113
61
  licenses:
114
62
  - BSD-2-Clause
115
- metadata: {}
116
- post_install_message:
63
+ metadata:
64
+ homepage_uri: https://github.com/yoshoku/hanny
65
+ source_code_uri: https://github.com/yoshoku/hanny
66
+ changelog_uri: https://github.com/yoshoku/hanny/blob/main/CHANGELOG.md
67
+ documentation_uri: https://yoshoku.github.io/hanny/doc/
68
+ rubygems_mfa_required: 'true'
69
+ post_install_message:
117
70
  rdoc_options: []
118
71
  require_paths:
119
72
  - lib
@@ -121,16 +74,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
121
74
  requirements:
122
75
  - - ">="
123
76
  - !ruby/object:Gem::Version
124
- version: '2.1'
77
+ version: '0'
125
78
  required_rubygems_version: !ruby/object:Gem::Requirement
126
79
  requirements:
127
80
  - - ">="
128
81
  - !ruby/object:Gem::Version
129
82
  version: '0'
130
83
  requirements: []
131
- rubyforge_project:
132
- rubygems_version: 2.7.6
133
- signing_key:
84
+ rubygems_version: 3.2.33
85
+ signing_key:
134
86
  specification_version: 4
135
87
  summary: Hanny is a Hash-based Approximate Nearest Neighbor search library in Ruby.
136
88
  test_files: []
data/.travis.yml DELETED
@@ -1,11 +0,0 @@
1
- sudo: false
2
- os: linux
3
- dist: trusty
4
- language: ruby
5
- rvm:
6
- - 2.2
7
- - 2.3
8
- - 2.4
9
- - 2.5
10
- before_install:
11
- - gem install --no-document bundler -v '~> 1.16'
data/bin/console DELETED
@@ -1,14 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'bundler/setup'
4
- require 'hanny'
5
-
6
- # You can add fixtures and/or initialization code here to make experimenting
7
- # with your gem easier. You can also use a different console, if you like.
8
-
9
- # (If you use this, don't forget to add pry to your Gemfile!)
10
- # require "pry"
11
- # Pry.start
12
-
13
- require 'irb'
14
- IRB.start(__FILE__)
data/bin/setup DELETED
@@ -1,8 +0,0 @@
1
- #!/usr/bin/env bash
2
- set -euo pipefail
3
- IFS=$'\n\t'
4
- set -vx
5
-
6
- bundle install
7
-
8
- # Do any other automated setup that you need to do here