ngt 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 967a8b449269b1d56539e294015f925791ca960a8bfe622aeff171d0d3170069
4
+ data.tar.gz: 2bc6884f36fe47ddf06cfcc6850dbd3b31d1b2044a11e477ea0684a72d325145
5
+ SHA512:
6
+ metadata.gz: 399e1cb7a347da8ef610ff394fbd84f73d29edc9408563327bb1a58877c9cb5e255394417642fe84a08e4ee09b15083dc7a789a30192ae3ce25474a6b4211a23
7
+ data.tar.gz: 92b97b7a9d32bf85bf016e81b83fea70f9292a5fda829f4e1433ca8402b9e1c0f7f48b91279898a80e4bbe61ca91e913e83e4805740939a179d85790d618daa0
@@ -0,0 +1,3 @@
1
+ ## 0.1.0
2
+
3
+ - First release
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2019 Andrew Kane
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,137 @@
1
+ # NGT
2
+
3
+ [NGT](https://github.com/yahoojapan/NGT) - high-speed approximate nearest neighbors - for Ruby
4
+
5
+ ## Installation
6
+
7
+ First, [install NGT](https://github.com/yahoojapan/NGT/blob/master/README.md#Installation).
8
+
9
+ Add this line to your application’s Gemfile:
10
+
11
+ ```ruby
12
+ gem 'ngt'
13
+ ```
14
+
15
+ ## Getting Started
16
+
17
+ Prep your data
18
+
19
+ ```ruby
20
+ objects = [
21
+ [1, 1, 2, 1],
22
+ [5, 4, 6, 5],
23
+ [1, 2, 1, 2]
24
+ ]
25
+ ```
26
+
27
+ Create an index
28
+
29
+ ```ruby
30
+ index = Ngt::Index.create(path, dimensions)
31
+ ```
32
+
33
+ Insert objects
34
+
35
+ ```ruby
36
+ index.batch_insert(objects)
37
+ ```
38
+
39
+ Search the index
40
+
41
+ ```ruby
42
+ index.search(query, size: 3)
43
+ ```
44
+
45
+ Save the index
46
+
47
+ ```ruby
48
+ index.save
49
+ ```
50
+
51
+ Load an index
52
+
53
+ ```ruby
54
+ Ngt::Index.new(path)
55
+ ```
56
+
57
+ Get an object by id
58
+
59
+ ```ruby
60
+ index.object(id)
61
+ ```
62
+
63
+ Insert a single object
64
+
65
+ ```ruby
66
+ index.insert(object)
67
+ ```
68
+
69
+ Remove an object by id
70
+
71
+ ```ruby
72
+ index.remove(id)
73
+ ```
74
+
75
+ Build the index
76
+
77
+ ```ruby
78
+ index.build_index
79
+ ```
80
+
81
+ ## Full Example
82
+
83
+ ```ruby
84
+ dim = 10
85
+ objects = []
86
+ 100.times do |i|
87
+ objects << dim.times.map { rand(100) }
88
+ end
89
+
90
+ index = Ngt::Index.create("tmp", dim)
91
+ index.batch_insert(objects)
92
+ index.save
93
+
94
+ query = objects[0]
95
+ result = index.search(query, size: 3)
96
+
97
+ result.each do |res|
98
+ puts "#{res[:id]}, #{res[:distance]}"
99
+ object = index.object(res[:id])
100
+ p object
101
+ end
102
+ ```
103
+
104
+ ## Data
105
+
106
+ Data can be an array of arrays
107
+
108
+ ```ruby
109
+ [[1, 2, 3], [4, 5, 6]]
110
+ ```
111
+
112
+ Or a Numo NArray
113
+
114
+ ```ruby
115
+ Numo::DFloat.new(3, 2).seq
116
+ ```
117
+
118
+ ## Resources
119
+
120
+ - [ANN Benchmarks](https://github.com/erikbern/ann-benchmarks)
121
+
122
+ ## Credits
123
+
124
+ This library is modeled after NGT’s [Python API](https://github.com/yahoojapan/NGT/blob/master/python/README-ngtpy.md).
125
+
126
+ ## History
127
+
128
+ View the [changelog](https://github.com/ankane/ngt/blob/master/CHANGELOG.md)
129
+
130
+ ## Contributing
131
+
132
+ Everyone is encouraged to help improve this project. Here are a few ways you can help:
133
+
134
+ - [Report bugs](https://github.com/ankane/ngt/issues)
135
+ - Fix bugs and [submit pull requests](https://github.com/ankane/ngt/pulls)
136
+ - Write, clarify, or fix documentation
137
+ - Suggest or add new features
@@ -0,0 +1,18 @@
1
+ # dependencies
2
+ require "ffi"
3
+
4
+ # modules
5
+ require "ngt/index"
6
+ require "ngt/version"
7
+
8
+ module Ngt
9
+ class Error < StandardError; end
10
+
11
+ class << self
12
+ attr_accessor :ffi_lib
13
+ end
14
+ self.ffi_lib = ["ngt"]
15
+
16
+ # friendlier error message
17
+ autoload :FFI, "ngt/ffi"
18
+ end
@@ -0,0 +1,57 @@
1
+ module Ngt
2
+ module FFI
3
+ extend ::FFI::Library
4
+
5
+ begin
6
+ ffi_lib Ngt.ffi_lib
7
+ rescue LoadError => e
8
+ raise e if ENV["NGT_DEBUG"]
9
+ raise LoadError, "Could not find NGT"
10
+ end
11
+
12
+ # https://github.com/yahoojapan/NGT/blob/master/lib/NGT/Capi.h
13
+ # keep same order
14
+
15
+ class ObjectDistance < ::FFI::Struct
16
+ layout :id, :int,
17
+ :distance, :float
18
+ end
19
+
20
+ attach_function :ngt_open_index, %i[string pointer], :pointer
21
+ attach_function :ngt_create_graph_and_tree, %i[string pointer pointer], :pointer
22
+ attach_function :ngt_create_property, %i[pointer], :pointer
23
+ attach_function :ngt_save_index, %i[pointer string pointer], :bool
24
+ attach_function :ngt_get_property, %i[pointer pointer pointer], :bool
25
+ attach_function :ngt_get_property_dimension, %i[pointer pointer], :int32_t
26
+ attach_function :ngt_set_property_dimension, %i[pointer int32_t pointer], :bool
27
+ attach_function :ngt_set_property_edge_size_for_creation, %i[pointer int16_t pointer], :bool
28
+ attach_function :ngt_set_property_edge_size_for_search, %i[pointer int16_t pointer], :bool
29
+ attach_function :ngt_is_property_object_type_float, %i[int32_t], :bool
30
+ attach_function :ngt_get_property_object_type, %i[pointer pointer], :int32_t
31
+ attach_function :ngt_set_property_object_type_float, %i[pointer pointer], :bool
32
+ attach_function :ngt_set_property_object_type_integer, %i[pointer pointer], :bool
33
+ attach_function :ngt_set_property_distance_type_l1, %i[pointer pointer], :bool
34
+ attach_function :ngt_set_property_distance_type_l2, %i[pointer pointer], :bool
35
+ attach_function :ngt_set_property_distance_type_angle, %i[pointer pointer], :bool
36
+ attach_function :ngt_set_property_distance_type_hamming, %i[pointer pointer], :bool
37
+ attach_function :ngt_set_property_distance_type_jaccard, %i[pointer pointer], :bool
38
+ attach_function :ngt_set_property_distance_type_cosine, %i[pointer pointer], :bool
39
+ attach_function :ngt_batch_insert_index, %i[pointer pointer uint32_t pointer pointer], :bool
40
+ attach_function :ngt_create_index, %i[pointer uint32_t pointer], :bool
41
+ attach_function :ngt_remove_index, %i[pointer int pointer], :bool
42
+ attach_function :ngt_insert_index, %i[pointer pointer uint32_t pointer], :int
43
+ attach_function :ngt_insert_index_as_float, %i[pointer pointer uint32_t pointer], :int
44
+ attach_function :ngt_create_empty_results, %i[pointer], :pointer
45
+ attach_function :ngt_search_index, %i[pointer pointer int32_t size_t float float pointer pointer], :bool
46
+ attach_function :ngt_get_result_size, %i[pointer pointer], :uint32_t
47
+ attach_function :ngt_get_result, %i[pointer uint32_t pointer], ObjectDistance.by_value
48
+ attach_function :ngt_get_object_space, %i[pointer pointer], :pointer
49
+ attach_function :ngt_get_object_as_float, %i[pointer int pointer], :pointer
50
+ attach_function :ngt_get_object_as_integer, %i[pointer int pointer], :pointer
51
+ attach_function :ngt_destroy_property, %i[pointer], :void
52
+ attach_function :ngt_close_index, %i[pointer], :void
53
+ attach_function :ngt_create_error_object, %i[], :pointer
54
+ attach_function :ngt_get_error_string, %i[pointer], :string
55
+ attach_function :ngt_destroy_error_object, %i[pointer], :void
56
+ end
57
+ end
@@ -0,0 +1,161 @@
1
+ module Ngt
2
+ class Index
3
+ def initialize(path)
4
+ @path = path
5
+ @error = FFI.ngt_create_error_object
6
+ @index = ffi(:ngt_open_index, path)
7
+
8
+ property = ffi(:ngt_create_property)
9
+ ffi(:ngt_get_property, @index, property)
10
+
11
+ @dimension = ffi(:ngt_get_property_dimension, property)
12
+
13
+ object_type = ffi(:ngt_get_property_object_type, property)
14
+ @float = FFI.ngt_is_property_object_type_float(object_type)
15
+
16
+ @object_space = ffi(:ngt_get_object_space, @index)
17
+
18
+ ObjectSpace.define_finalizer(self, self.class.finalize(@error))
19
+ end
20
+
21
+ def insert(object)
22
+ ffi(:ngt_insert_index, @index, c_object(object.to_a), @dimension)
23
+ end
24
+
25
+ # TODO make more performant for Numo
26
+ def batch_insert(objects, num_threads: 8)
27
+ objects = objects.to_a
28
+ flat_objects = objects.flatten
29
+ obj = ::FFI::MemoryPointer.new(:float, flat_objects.size)
30
+ obj.write_array_of_float(flat_objects)
31
+
32
+ ids = ::FFI::MemoryPointer.new(:uint32_t, objects.size)
33
+ ffi(:ngt_batch_insert_index, @index, obj, objects.size, ids)
34
+
35
+ build_index(num_threads: num_threads)
36
+
37
+ ids.read_array_of_uint32(objects.size)
38
+ end
39
+
40
+ def build_index(num_threads: 8)
41
+ ffi(:ngt_create_index, @index, num_threads)
42
+ end
43
+
44
+ def object(id)
45
+ if float?
46
+ res = ffi(:ngt_get_object_as_float, @object_space, id)
47
+ res.read_array_of_float(@dimension)
48
+ else
49
+ res = ffi(:ngt_get_object_as_integer, @object_space, id)
50
+ res.read_array_of_uint8(@dimension)
51
+ end
52
+ end
53
+
54
+ def remove(id)
55
+ ffi(:ngt_remove_index, @index, id)
56
+ end
57
+
58
+ def search(query, size: 20, epsilon: 0.1, radius: nil)
59
+ radius ||= -1.0
60
+ results = ffi(:ngt_create_empty_results)
61
+ ffi(:ngt_search_index, @index, c_object(query.to_a), @dimension, size, epsilon, radius, results)
62
+ result_size = ffi(:ngt_get_result_size, results)
63
+ ret = []
64
+ result_size.times do |i|
65
+ res = ffi(:ngt_get_result, results, i)
66
+ ret << {
67
+ id: res[:id],
68
+ distance: res[:distance]
69
+ }
70
+ end
71
+ ret
72
+ end
73
+
74
+ def save(path: nil)
75
+ path ||= @path
76
+ ffi(:ngt_save_index, @index, path)
77
+ end
78
+
79
+ def close
80
+ FFI.ngt_close_index(@index)
81
+ end
82
+
83
+ def self.create(path, dimension, edge_size_for_creation: 10,
84
+ edge_size_for_search: 40, object_type: "Float", distance_type: "L2")
85
+
86
+ error = FFI.ngt_create_error_object
87
+ property = ffi(:ngt_create_property, error)
88
+ ffi(:ngt_set_property_dimension, property, dimension, error)
89
+ ffi(:ngt_set_property_edge_size_for_creation, property, edge_size_for_creation, error)
90
+ ffi(:ngt_set_property_edge_size_for_search, property, edge_size_for_search, error)
91
+
92
+ case object_type.to_s
93
+ when "Float", "float"
94
+ ffi(:ngt_set_property_object_type_float, property, error)
95
+ when "Integer", "integer"
96
+ ffi(:ngt_set_property_object_type_integer, property, error)
97
+ else
98
+ raise ArgumentError, "Unknown object type: #{object_type}"
99
+ end
100
+
101
+ case distance_type.to_s
102
+ when "L1"
103
+ ffi(:ngt_set_property_distance_type_l1, property, error)
104
+ when "L2"
105
+ ffi(:ngt_set_property_distance_type_l2, property, error)
106
+ when "Angle"
107
+ ffi(:ngt_set_property_distance_type_angle, property, error)
108
+ when "Hamming"
109
+ ffi(:ngt_set_property_distance_type_hamming, property, error)
110
+ when "Jaccard"
111
+ ffi(:ngt_set_property_distance_type_jaccard, property, error)
112
+ when "Cosine"
113
+ ffi(:ngt_set_property_distance_type_cosine, property, error)
114
+ else
115
+ raise ArgumentError, "Unknown distance type: #{distance_type}"
116
+ end
117
+
118
+ index = ffi(:ngt_create_graph_and_tree, path, property, error)
119
+ FFI.ngt_close_index(index)
120
+ index = nil
121
+
122
+ Index.new(path)
123
+ ensure
124
+ FFI.ngt_destroy_error_object(error) if error
125
+ FFI.ngt_destroy_property(property) if property
126
+ FFI.ngt_close_index(index) if index
127
+ end
128
+
129
+ # private
130
+ def self.ffi(method, *args)
131
+ res = FFI.send(method, *args)
132
+ message = FFI.ngt_get_error_string(args.last)
133
+ raise Error, message unless message.empty?
134
+ res
135
+ end
136
+
137
+ def self.finalize(error)
138
+ # must use proc instead of stabby lambda
139
+ proc do
140
+ # TODO clean-up more objects
141
+ FFI.ngt_destroy_error_object(error)
142
+ end
143
+ end
144
+
145
+ private
146
+
147
+ def ffi(*args)
148
+ self.class.ffi(*args, @error)
149
+ end
150
+
151
+ def float?
152
+ @float
153
+ end
154
+
155
+ def c_object(object)
156
+ c_object = ::FFI::MemoryPointer.new(:double, object.size)
157
+ c_object.write_array_of_double(object)
158
+ c_object
159
+ end
160
+ end
161
+ end
@@ -0,0 +1,3 @@
1
+ module Ngt
2
+ VERSION = "0.1.0"
3
+ end
metadata ADDED
@@ -0,0 +1,119 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ngt
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Andrew Kane
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-10-22 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: ffi
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: minitest
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '5'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '5'
69
+ - !ruby/object:Gem::Dependency
70
+ name: numo-narray
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description:
84
+ email: andrew@chartkick.com
85
+ executables: []
86
+ extensions: []
87
+ extra_rdoc_files: []
88
+ files:
89
+ - CHANGELOG.md
90
+ - LICENSE.txt
91
+ - README.md
92
+ - lib/ngt.rb
93
+ - lib/ngt/ffi.rb
94
+ - lib/ngt/index.rb
95
+ - lib/ngt/version.rb
96
+ homepage: https://github.com/ankane/ngt
97
+ licenses:
98
+ - MIT
99
+ metadata: {}
100
+ post_install_message:
101
+ rdoc_options: []
102
+ require_paths:
103
+ - lib
104
+ required_ruby_version: !ruby/object:Gem::Requirement
105
+ requirements:
106
+ - - ">="
107
+ - !ruby/object:Gem::Version
108
+ version: '2.4'
109
+ required_rubygems_version: !ruby/object:Gem::Requirement
110
+ requirements:
111
+ - - ">="
112
+ - !ruby/object:Gem::Version
113
+ version: '0'
114
+ requirements: []
115
+ rubygems_version: 3.0.3
116
+ signing_key:
117
+ specification_version: 4
118
+ summary: NGT - high-speed approximate nearest neighbors - for Ruby
119
+ test_files: []