ngt 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 967a8b449269b1d56539e294015f925791ca960a8bfe622aeff171d0d3170069
4
+ data.tar.gz: 2bc6884f36fe47ddf06cfcc6850dbd3b31d1b2044a11e477ea0684a72d325145
5
+ SHA512:
6
+ metadata.gz: 399e1cb7a347da8ef610ff394fbd84f73d29edc9408563327bb1a58877c9cb5e255394417642fe84a08e4ee09b15083dc7a789a30192ae3ce25474a6b4211a23
7
+ data.tar.gz: 92b97b7a9d32bf85bf016e81b83fea70f9292a5fda829f4e1433ca8402b9e1c0f7f48b91279898a80e4bbe61ca91e913e83e4805740939a179d85790d618daa0
@@ -0,0 +1,3 @@
1
+ ## 0.1.0
2
+
3
+ - First release
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2019 Andrew Kane
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,137 @@
1
+ # NGT
2
+
3
+ [NGT](https://github.com/yahoojapan/NGT) - high-speed approximate nearest neighbors - for Ruby
4
+
5
+ ## Installation
6
+
7
+ First, [install NGT](https://github.com/yahoojapan/NGT/blob/master/README.md#Installation).
8
+
9
+ Add this line to your application’s Gemfile:
10
+
11
+ ```ruby
12
+ gem 'ngt'
13
+ ```
14
+
15
+ ## Getting Started
16
+
17
+ Prep your data
18
+
19
+ ```ruby
20
+ objects = [
21
+ [1, 1, 2, 1],
22
+ [5, 4, 6, 5],
23
+ [1, 2, 1, 2]
24
+ ]
25
+ ```
26
+
27
+ Create an index
28
+
29
+ ```ruby
30
+ index = Ngt::Index.create(path, dimensions)
31
+ ```
32
+
33
+ Insert objects
34
+
35
+ ```ruby
36
+ index.batch_insert(objects)
37
+ ```
38
+
39
+ Search the index
40
+
41
+ ```ruby
42
+ index.search(query, size: 3)
43
+ ```
44
+
45
+ Save the index
46
+
47
+ ```ruby
48
+ index.save
49
+ ```
50
+
51
+ Load an index
52
+
53
+ ```ruby
54
+ Ngt::Index.new(path)
55
+ ```
56
+
57
+ Get an object by id
58
+
59
+ ```ruby
60
+ index.object(id)
61
+ ```
62
+
63
+ Insert a single object
64
+
65
+ ```ruby
66
+ index.insert(object)
67
+ ```
68
+
69
+ Remove an object by id
70
+
71
+ ```ruby
72
+ index.remove(id)
73
+ ```
74
+
75
+ Build the index
76
+
77
+ ```ruby
78
+ index.build_index
79
+ ```
80
+
81
+ ## Full Example
82
+
83
+ ```ruby
84
+ dim = 10
85
+ objects = []
86
+ 100.times do |i|
87
+ objects << dim.times.map { rand(100) }
88
+ end
89
+
90
+ index = Ngt::Index.create("tmp", dim)
91
+ index.batch_insert(objects)
92
+ index.save
93
+
94
+ query = objects[0]
95
+ result = index.search(query, size: 3)
96
+
97
+ result.each do |res|
98
+ puts "#{res[:id]}, #{res[:distance]}"
99
+ object = index.object(res[:id])
100
+ p object
101
+ end
102
+ ```
103
+
104
+ ## Data
105
+
106
+ Data can be an array of arrays
107
+
108
+ ```ruby
109
+ [[1, 2, 3], [4, 5, 6]]
110
+ ```
111
+
112
+ Or a Numo NArray
113
+
114
+ ```ruby
115
+ Numo::DFloat.new(3, 2).seq
116
+ ```
117
+
118
+ ## Resources
119
+
120
+ - [ANN Benchmarks](https://github.com/erikbern/ann-benchmarks)
121
+
122
+ ## Credits
123
+
124
+ This library is modeled after NGT’s [Python API](https://github.com/yahoojapan/NGT/blob/master/python/README-ngtpy.md).
125
+
126
+ ## History
127
+
128
+ View the [changelog](https://github.com/ankane/ngt/blob/master/CHANGELOG.md)
129
+
130
+ ## Contributing
131
+
132
+ Everyone is encouraged to help improve this project. Here are a few ways you can help:
133
+
134
+ - [Report bugs](https://github.com/ankane/ngt/issues)
135
+ - Fix bugs and [submit pull requests](https://github.com/ankane/ngt/pulls)
136
+ - Write, clarify, or fix documentation
137
+ - Suggest or add new features
@@ -0,0 +1,18 @@
1
+ # dependencies
2
+ require "ffi"
3
+
4
+ # modules
5
+ require "ngt/index"
6
+ require "ngt/version"
7
+
8
+ module Ngt
9
+ class Error < StandardError; end
10
+
11
+ class << self
12
+ attr_accessor :ffi_lib
13
+ end
14
+ self.ffi_lib = ["ngt"]
15
+
16
+ # friendlier error message
17
+ autoload :FFI, "ngt/ffi"
18
+ end
@@ -0,0 +1,57 @@
1
+ module Ngt
2
+ module FFI
3
+ extend ::FFI::Library
4
+
5
+ begin
6
+ ffi_lib Ngt.ffi_lib
7
+ rescue LoadError => e
8
+ raise e if ENV["NGT_DEBUG"]
9
+ raise LoadError, "Could not find NGT"
10
+ end
11
+
12
+ # https://github.com/yahoojapan/NGT/blob/master/lib/NGT/Capi.h
13
+ # keep same order
14
+
15
+ class ObjectDistance < ::FFI::Struct
16
+ layout :id, :int,
17
+ :distance, :float
18
+ end
19
+
20
+ attach_function :ngt_open_index, %i[string pointer], :pointer
21
+ attach_function :ngt_create_graph_and_tree, %i[string pointer pointer], :pointer
22
+ attach_function :ngt_create_property, %i[pointer], :pointer
23
+ attach_function :ngt_save_index, %i[pointer string pointer], :bool
24
+ attach_function :ngt_get_property, %i[pointer pointer pointer], :bool
25
+ attach_function :ngt_get_property_dimension, %i[pointer pointer], :int32_t
26
+ attach_function :ngt_set_property_dimension, %i[pointer int32_t pointer], :bool
27
+ attach_function :ngt_set_property_edge_size_for_creation, %i[pointer int16_t pointer], :bool
28
+ attach_function :ngt_set_property_edge_size_for_search, %i[pointer int16_t pointer], :bool
29
+ attach_function :ngt_is_property_object_type_float, %i[int32_t], :bool
30
+ attach_function :ngt_get_property_object_type, %i[pointer pointer], :int32_t
31
+ attach_function :ngt_set_property_object_type_float, %i[pointer pointer], :bool
32
+ attach_function :ngt_set_property_object_type_integer, %i[pointer pointer], :bool
33
+ attach_function :ngt_set_property_distance_type_l1, %i[pointer pointer], :bool
34
+ attach_function :ngt_set_property_distance_type_l2, %i[pointer pointer], :bool
35
+ attach_function :ngt_set_property_distance_type_angle, %i[pointer pointer], :bool
36
+ attach_function :ngt_set_property_distance_type_hamming, %i[pointer pointer], :bool
37
+ attach_function :ngt_set_property_distance_type_jaccard, %i[pointer pointer], :bool
38
+ attach_function :ngt_set_property_distance_type_cosine, %i[pointer pointer], :bool
39
+ attach_function :ngt_batch_insert_index, %i[pointer pointer uint32_t pointer pointer], :bool
40
+ attach_function :ngt_create_index, %i[pointer uint32_t pointer], :bool
41
+ attach_function :ngt_remove_index, %i[pointer int pointer], :bool
42
+ attach_function :ngt_insert_index, %i[pointer pointer uint32_t pointer], :int
43
+ attach_function :ngt_insert_index_as_float, %i[pointer pointer uint32_t pointer], :int
44
+ attach_function :ngt_create_empty_results, %i[pointer], :pointer
45
+ attach_function :ngt_search_index, %i[pointer pointer int32_t size_t float float pointer pointer], :bool
46
+ attach_function :ngt_get_result_size, %i[pointer pointer], :uint32_t
47
+ attach_function :ngt_get_result, %i[pointer uint32_t pointer], ObjectDistance.by_value
48
+ attach_function :ngt_get_object_space, %i[pointer pointer], :pointer
49
+ attach_function :ngt_get_object_as_float, %i[pointer int pointer], :pointer
50
+ attach_function :ngt_get_object_as_integer, %i[pointer int pointer], :pointer
51
+ attach_function :ngt_destroy_property, %i[pointer], :void
52
+ attach_function :ngt_close_index, %i[pointer], :void
53
+ attach_function :ngt_create_error_object, %i[], :pointer
54
+ attach_function :ngt_get_error_string, %i[pointer], :string
55
+ attach_function :ngt_destroy_error_object, %i[pointer], :void
56
+ end
57
+ end
@@ -0,0 +1,161 @@
1
+ module Ngt
2
+ class Index
3
+ def initialize(path)
4
+ @path = path
5
+ @error = FFI.ngt_create_error_object
6
+ @index = ffi(:ngt_open_index, path)
7
+
8
+ property = ffi(:ngt_create_property)
9
+ ffi(:ngt_get_property, @index, property)
10
+
11
+ @dimension = ffi(:ngt_get_property_dimension, property)
12
+
13
+ object_type = ffi(:ngt_get_property_object_type, property)
14
+ @float = FFI.ngt_is_property_object_type_float(object_type)
15
+
16
+ @object_space = ffi(:ngt_get_object_space, @index)
17
+
18
+ ObjectSpace.define_finalizer(self, self.class.finalize(@error))
19
+ end
20
+
21
+ def insert(object)
22
+ ffi(:ngt_insert_index, @index, c_object(object.to_a), @dimension)
23
+ end
24
+
25
+ # TODO make more performant for Numo
26
+ def batch_insert(objects, num_threads: 8)
27
+ objects = objects.to_a
28
+ flat_objects = objects.flatten
29
+ obj = ::FFI::MemoryPointer.new(:float, flat_objects.size)
30
+ obj.write_array_of_float(flat_objects)
31
+
32
+ ids = ::FFI::MemoryPointer.new(:uint32_t, objects.size)
33
+ ffi(:ngt_batch_insert_index, @index, obj, objects.size, ids)
34
+
35
+ build_index(num_threads: num_threads)
36
+
37
+ ids.read_array_of_uint32(objects.size)
38
+ end
39
+
40
+ def build_index(num_threads: 8)
41
+ ffi(:ngt_create_index, @index, num_threads)
42
+ end
43
+
44
+ def object(id)
45
+ if float?
46
+ res = ffi(:ngt_get_object_as_float, @object_space, id)
47
+ res.read_array_of_float(@dimension)
48
+ else
49
+ res = ffi(:ngt_get_object_as_integer, @object_space, id)
50
+ res.read_array_of_uint8(@dimension)
51
+ end
52
+ end
53
+
54
+ def remove(id)
55
+ ffi(:ngt_remove_index, @index, id)
56
+ end
57
+
58
+ def search(query, size: 20, epsilon: 0.1, radius: nil)
59
+ radius ||= -1.0
60
+ results = ffi(:ngt_create_empty_results)
61
+ ffi(:ngt_search_index, @index, c_object(query.to_a), @dimension, size, epsilon, radius, results)
62
+ result_size = ffi(:ngt_get_result_size, results)
63
+ ret = []
64
+ result_size.times do |i|
65
+ res = ffi(:ngt_get_result, results, i)
66
+ ret << {
67
+ id: res[:id],
68
+ distance: res[:distance]
69
+ }
70
+ end
71
+ ret
72
+ end
73
+
74
+ def save(path: nil)
75
+ path ||= @path
76
+ ffi(:ngt_save_index, @index, path)
77
+ end
78
+
79
+ def close
80
+ FFI.ngt_close_index(@index)
81
+ end
82
+
83
+ def self.create(path, dimension, edge_size_for_creation: 10,
84
+ edge_size_for_search: 40, object_type: "Float", distance_type: "L2")
85
+
86
+ error = FFI.ngt_create_error_object
87
+ property = ffi(:ngt_create_property, error)
88
+ ffi(:ngt_set_property_dimension, property, dimension, error)
89
+ ffi(:ngt_set_property_edge_size_for_creation, property, edge_size_for_creation, error)
90
+ ffi(:ngt_set_property_edge_size_for_search, property, edge_size_for_search, error)
91
+
92
+ case object_type.to_s
93
+ when "Float", "float"
94
+ ffi(:ngt_set_property_object_type_float, property, error)
95
+ when "Integer", "integer"
96
+ ffi(:ngt_set_property_object_type_integer, property, error)
97
+ else
98
+ raise ArgumentError, "Unknown object type: #{object_type}"
99
+ end
100
+
101
+ case distance_type.to_s
102
+ when "L1"
103
+ ffi(:ngt_set_property_distance_type_l1, property, error)
104
+ when "L2"
105
+ ffi(:ngt_set_property_distance_type_l2, property, error)
106
+ when "Angle"
107
+ ffi(:ngt_set_property_distance_type_angle, property, error)
108
+ when "Hamming"
109
+ ffi(:ngt_set_property_distance_type_hamming, property, error)
110
+ when "Jaccard"
111
+ ffi(:ngt_set_property_distance_type_jaccard, property, error)
112
+ when "Cosine"
113
+ ffi(:ngt_set_property_distance_type_cosine, property, error)
114
+ else
115
+ raise ArgumentError, "Unknown distance type: #{distance_type}"
116
+ end
117
+
118
+ index = ffi(:ngt_create_graph_and_tree, path, property, error)
119
+ FFI.ngt_close_index(index)
120
+ index = nil
121
+
122
+ Index.new(path)
123
+ ensure
124
+ FFI.ngt_destroy_error_object(error) if error
125
+ FFI.ngt_destroy_property(property) if property
126
+ FFI.ngt_close_index(index) if index
127
+ end
128
+
129
+ # private
130
+ def self.ffi(method, *args)
131
+ res = FFI.send(method, *args)
132
+ message = FFI.ngt_get_error_string(args.last)
133
+ raise Error, message unless message.empty?
134
+ res
135
+ end
136
+
137
+ def self.finalize(error)
138
+ # must use proc instead of stabby lambda
139
+ proc do
140
+ # TODO clean-up more objects
141
+ FFI.ngt_destroy_error_object(error)
142
+ end
143
+ end
144
+
145
+ private
146
+
147
+ def ffi(*args)
148
+ self.class.ffi(*args, @error)
149
+ end
150
+
151
+ def float?
152
+ @float
153
+ end
154
+
155
+ def c_object(object)
156
+ c_object = ::FFI::MemoryPointer.new(:double, object.size)
157
+ c_object.write_array_of_double(object)
158
+ c_object
159
+ end
160
+ end
161
+ end
@@ -0,0 +1,3 @@
1
+ module Ngt
2
+ VERSION = "0.1.0"
3
+ end
metadata ADDED
@@ -0,0 +1,119 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ngt
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Andrew Kane
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-10-22 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: ffi
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: minitest
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '5'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '5'
69
+ - !ruby/object:Gem::Dependency
70
+ name: numo-narray
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description:
84
+ email: andrew@chartkick.com
85
+ executables: []
86
+ extensions: []
87
+ extra_rdoc_files: []
88
+ files:
89
+ - CHANGELOG.md
90
+ - LICENSE.txt
91
+ - README.md
92
+ - lib/ngt.rb
93
+ - lib/ngt/ffi.rb
94
+ - lib/ngt/index.rb
95
+ - lib/ngt/version.rb
96
+ homepage: https://github.com/ankane/ngt
97
+ licenses:
98
+ - MIT
99
+ metadata: {}
100
+ post_install_message:
101
+ rdoc_options: []
102
+ require_paths:
103
+ - lib
104
+ required_ruby_version: !ruby/object:Gem::Requirement
105
+ requirements:
106
+ - - ">="
107
+ - !ruby/object:Gem::Version
108
+ version: '2.4'
109
+ required_rubygems_version: !ruby/object:Gem::Requirement
110
+ requirements:
111
+ - - ">="
112
+ - !ruby/object:Gem::Version
113
+ version: '0'
114
+ requirements: []
115
+ rubygems_version: 3.0.3
116
+ signing_key:
117
+ specification_version: 4
118
+ summary: NGT - high-speed approximate nearest neighbors - for Ruby
119
+ test_files: []