ngt 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: d97bd9eb8a0db31ece99f2ad749e00b9b46ae5929f4d4be4863aae7f68bc2167
4
+ data.tar.gz: 64bef298d58ed4e5c8c7215d7bc19e7b918b51a2f83113277da3a8c7d08080a4
5
+ SHA512:
6
+ metadata.gz: 1ef9696c68b051f1f225cdb2bcdce06859ea4d6321382a0702e4c32915d3d347caa4d4a697b54ada8f1956602b158f0f5bcb2006561052d0e9b0acd2962fc68c
7
+ data.tar.gz: f6f027ec3f216660482323bb44a822f8bcca59ff5e0bf35ce3f27dbb7331e933bf700de73a20a0094790f758fc5b3a946378ec8b8ad69444b472bbf6a9a9a21d
@@ -0,0 +1,7 @@
1
+ ## 0.1.1
2
+
3
+ - Fixed `unable to resolve type 'uint32_t'` error on Ubuntu
4
+
5
+ ## 0.1.0
6
+
7
+ - First release
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2019 Andrew Kane
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,142 @@
1
+ # NGT
2
+
3
+ [NGT](https://github.com/yahoojapan/NGT) - high-speed approximate nearest neighbors - for Ruby
4
+
5
+ [![Build Status](https://travis-ci.org/ankane/ngt.svg?branch=master)](https://travis-ci.org/ankane/ngt)
6
+
7
+ ## Installation
8
+
9
+ First, [install NGT](https://github.com/yahoojapan/NGT/blob/master/README.md#Installation). For Homebrew, use:
10
+
11
+ ```sh
12
+ brew install ngt
13
+ ```
14
+
15
+ Add this line to your application’s Gemfile:
16
+
17
+ ```ruby
18
+ gem 'ngt'
19
+ ```
20
+
21
+ ## Getting Started
22
+
23
+ Prep your data
24
+
25
+ ```ruby
26
+ objects = [
27
+ [1, 1, 2, 1],
28
+ [5, 4, 6, 5],
29
+ [1, 2, 1, 2]
30
+ ]
31
+ ```
32
+
33
+ Create an index
34
+
35
+ ```ruby
36
+ index = Ngt::Index.create(path, dimensions)
37
+ ```
38
+
39
+ Insert objects
40
+
41
+ ```ruby
42
+ index.batch_insert(objects)
43
+ ```
44
+
45
+ Search the index
46
+
47
+ ```ruby
48
+ index.search(query, size: 3)
49
+ ```
50
+
51
+ Save the index
52
+
53
+ ```ruby
54
+ index.save
55
+ ```
56
+
57
+ Load an index
58
+
59
+ ```ruby
60
+ index = Ngt::Index.new(path)
61
+ ```
62
+
63
+ Get an object by id
64
+
65
+ ```ruby
66
+ index.object(id)
67
+ ```
68
+
69
+ Insert a single object
70
+
71
+ ```ruby
72
+ index.insert(object)
73
+ ```
74
+
75
+ Remove an object by id
76
+
77
+ ```ruby
78
+ index.remove(id)
79
+ ```
80
+
81
+ Build the index
82
+
83
+ ```ruby
84
+ index.build_index
85
+ ```
86
+
87
+ ## Full Example
88
+
89
+ ```ruby
90
+ dim = 10
91
+ objects = []
92
+ 100.times do |i|
93
+ objects << dim.times.map { rand(100) }
94
+ end
95
+
96
+ index = Ngt::Index.create("tmp", dim)
97
+ index.batch_insert(objects)
98
+ index.save
99
+
100
+ query = objects[0]
101
+ result = index.search(query, size: 3)
102
+
103
+ result.each do |res|
104
+ puts "#{res[:id]}, #{res[:distance]}"
105
+ p index.object(res[:id])
106
+ end
107
+ ```
108
+
109
+ ## Data
110
+
111
+ Data can be an array of arrays
112
+
113
+ ```ruby
114
+ [[1, 2, 3], [4, 5, 6]]
115
+ ```
116
+
117
+ Or a Numo NArray
118
+
119
+ ```ruby
120
+ Numo::DFloat.new(3, 2).seq
121
+ ```
122
+
123
+ ## Resources
124
+
125
+ - [ANN Benchmarks](https://github.com/erikbern/ann-benchmarks)
126
+
127
+ ## Credits
128
+
129
+ This library is modeled after NGT’s [Python API](https://github.com/yahoojapan/NGT/blob/master/python/README-ngtpy.md).
130
+
131
+ ## History
132
+
133
+ View the [changelog](https://github.com/ankane/ngt/blob/master/CHANGELOG.md)
134
+
135
+ ## Contributing
136
+
137
+ Everyone is encouraged to help improve this project. Here are a few ways you can help:
138
+
139
+ - [Report bugs](https://github.com/ankane/ngt/issues)
140
+ - Fix bugs and [submit pull requests](https://github.com/ankane/ngt/pulls)
141
+ - Write, clarify, or fix documentation
142
+ - Suggest or add new features
@@ -0,0 +1,18 @@
1
+ # dependencies
2
+ require "ffi"
3
+
4
+ # modules
5
+ require "ngt/index"
6
+ require "ngt/version"
7
+
8
+ module Ngt
9
+ class Error < StandardError; end
10
+
11
+ class << self
12
+ attr_accessor :ffi_lib
13
+ end
14
+ self.ffi_lib = ["ngt"]
15
+
16
+ # friendlier error message
17
+ autoload :FFI, "ngt/ffi"
18
+ end
@@ -0,0 +1,60 @@
1
+ module Ngt
2
+ module FFI
3
+ extend ::FFI::Library
4
+
5
+ begin
6
+ ffi_lib Ngt.ffi_lib
7
+ rescue LoadError => e
8
+ raise e if ENV["NGT_DEBUG"]
9
+ raise LoadError, "Could not find NGT"
10
+ end
11
+
12
+ # https://github.com/yahoojapan/NGT/blob/master/lib/NGT/Capi.h
13
+ # keep same order
14
+
15
+ # use uint32 instead of uint32_t
16
+ # to prevent "unable to resolve type" error on Ubuntu
17
+
18
+ class ObjectDistance < ::FFI::Struct
19
+ layout :id, :int,
20
+ :distance, :float
21
+ end
22
+
23
+ attach_function :ngt_open_index, %i[string pointer], :pointer
24
+ attach_function :ngt_create_graph_and_tree, %i[string pointer pointer], :pointer
25
+ attach_function :ngt_create_property, %i[pointer], :pointer
26
+ attach_function :ngt_save_index, %i[pointer string pointer], :bool
27
+ attach_function :ngt_get_property, %i[pointer pointer pointer], :bool
28
+ attach_function :ngt_get_property_dimension, %i[pointer pointer], :int32_t
29
+ attach_function :ngt_set_property_dimension, %i[pointer int32_t pointer], :bool
30
+ attach_function :ngt_set_property_edge_size_for_creation, %i[pointer int16_t pointer], :bool
31
+ attach_function :ngt_set_property_edge_size_for_search, %i[pointer int16_t pointer], :bool
32
+ attach_function :ngt_is_property_object_type_float, %i[int32_t], :bool
33
+ attach_function :ngt_get_property_object_type, %i[pointer pointer], :int32_t
34
+ attach_function :ngt_set_property_object_type_float, %i[pointer pointer], :bool
35
+ attach_function :ngt_set_property_object_type_integer, %i[pointer pointer], :bool
36
+ attach_function :ngt_set_property_distance_type_l1, %i[pointer pointer], :bool
37
+ attach_function :ngt_set_property_distance_type_l2, %i[pointer pointer], :bool
38
+ attach_function :ngt_set_property_distance_type_angle, %i[pointer pointer], :bool
39
+ attach_function :ngt_set_property_distance_type_hamming, %i[pointer pointer], :bool
40
+ attach_function :ngt_set_property_distance_type_jaccard, %i[pointer pointer], :bool
41
+ attach_function :ngt_set_property_distance_type_cosine, %i[pointer pointer], :bool
42
+ attach_function :ngt_batch_insert_index, %i[pointer pointer uint32 pointer pointer], :bool
43
+ attach_function :ngt_create_index, %i[pointer uint32 pointer], :bool
44
+ attach_function :ngt_remove_index, %i[pointer int pointer], :bool
45
+ attach_function :ngt_insert_index, %i[pointer pointer uint32 pointer], :int
46
+ attach_function :ngt_insert_index_as_float, %i[pointer pointer uint32 pointer], :int
47
+ attach_function :ngt_create_empty_results, %i[pointer], :pointer
48
+ attach_function :ngt_search_index, %i[pointer pointer int32 size_t float float pointer pointer], :bool
49
+ attach_function :ngt_get_result_size, %i[pointer pointer], :uint32
50
+ attach_function :ngt_get_result, %i[pointer uint32 pointer], ObjectDistance.by_value
51
+ attach_function :ngt_get_object_space, %i[pointer pointer], :pointer
52
+ attach_function :ngt_get_object_as_float, %i[pointer int pointer], :pointer
53
+ attach_function :ngt_get_object_as_integer, %i[pointer int pointer], :pointer
54
+ attach_function :ngt_destroy_property, %i[pointer], :void
55
+ attach_function :ngt_close_index, %i[pointer], :void
56
+ attach_function :ngt_create_error_object, %i[], :pointer
57
+ attach_function :ngt_get_error_string, %i[pointer], :string
58
+ attach_function :ngt_destroy_error_object, %i[pointer], :void
59
+ end
60
+ end
@@ -0,0 +1,161 @@
1
+ module Ngt
2
+ class Index
3
+ def initialize(path)
4
+ @path = path
5
+ @error = FFI.ngt_create_error_object
6
+ @index = ffi(:ngt_open_index, path)
7
+
8
+ property = ffi(:ngt_create_property)
9
+ ffi(:ngt_get_property, @index, property)
10
+
11
+ @dimension = ffi(:ngt_get_property_dimension, property)
12
+
13
+ object_type = ffi(:ngt_get_property_object_type, property)
14
+ @float = FFI.ngt_is_property_object_type_float(object_type)
15
+
16
+ @object_space = ffi(:ngt_get_object_space, @index)
17
+
18
+ ObjectSpace.define_finalizer(self, self.class.finalize(@error))
19
+ end
20
+
21
+ def insert(object)
22
+ ffi(:ngt_insert_index, @index, c_object(object.to_a), @dimension)
23
+ end
24
+
25
+ # TODO make more performant for Numo
26
+ def batch_insert(objects, num_threads: 8)
27
+ objects = objects.to_a
28
+ flat_objects = objects.flatten
29
+ obj = ::FFI::MemoryPointer.new(:float, flat_objects.size)
30
+ obj.write_array_of_float(flat_objects)
31
+
32
+ ids = ::FFI::MemoryPointer.new(:uint32, objects.size)
33
+ ffi(:ngt_batch_insert_index, @index, obj, objects.size, ids)
34
+
35
+ build_index(num_threads: num_threads)
36
+
37
+ ids.read_array_of_uint32(objects.size)
38
+ end
39
+
40
+ def build_index(num_threads: 8)
41
+ ffi(:ngt_create_index, @index, num_threads)
42
+ end
43
+
44
+ def object(id)
45
+ if float?
46
+ res = ffi(:ngt_get_object_as_float, @object_space, id)
47
+ res.read_array_of_float(@dimension)
48
+ else
49
+ res = ffi(:ngt_get_object_as_integer, @object_space, id)
50
+ res.read_array_of_uint8(@dimension)
51
+ end
52
+ end
53
+
54
+ def remove(id)
55
+ ffi(:ngt_remove_index, @index, id)
56
+ end
57
+
58
+ def search(query, size: 20, epsilon: 0.1, radius: nil)
59
+ radius ||= -1.0
60
+ results = ffi(:ngt_create_empty_results)
61
+ ffi(:ngt_search_index, @index, c_object(query.to_a), @dimension, size, epsilon, radius, results)
62
+ result_size = ffi(:ngt_get_result_size, results)
63
+ ret = []
64
+ result_size.times do |i|
65
+ res = ffi(:ngt_get_result, results, i)
66
+ ret << {
67
+ id: res[:id],
68
+ distance: res[:distance]
69
+ }
70
+ end
71
+ ret
72
+ end
73
+
74
+ def save(path: nil)
75
+ path ||= @path
76
+ ffi(:ngt_save_index, @index, path)
77
+ end
78
+
79
+ def close
80
+ FFI.ngt_close_index(@index)
81
+ end
82
+
83
+ def self.create(path, dimension, edge_size_for_creation: 10,
84
+ edge_size_for_search: 40, object_type: "Float", distance_type: "L2")
85
+
86
+ error = FFI.ngt_create_error_object
87
+ property = ffi(:ngt_create_property, error)
88
+ ffi(:ngt_set_property_dimension, property, dimension, error)
89
+ ffi(:ngt_set_property_edge_size_for_creation, property, edge_size_for_creation, error)
90
+ ffi(:ngt_set_property_edge_size_for_search, property, edge_size_for_search, error)
91
+
92
+ case object_type.to_s
93
+ when "Float", "float"
94
+ ffi(:ngt_set_property_object_type_float, property, error)
95
+ when "Integer", "integer"
96
+ ffi(:ngt_set_property_object_type_integer, property, error)
97
+ else
98
+ raise ArgumentError, "Unknown object type: #{object_type}"
99
+ end
100
+
101
+ case distance_type.to_s
102
+ when "L1"
103
+ ffi(:ngt_set_property_distance_type_l1, property, error)
104
+ when "L2"
105
+ ffi(:ngt_set_property_distance_type_l2, property, error)
106
+ when "Angle"
107
+ ffi(:ngt_set_property_distance_type_angle, property, error)
108
+ when "Hamming"
109
+ ffi(:ngt_set_property_distance_type_hamming, property, error)
110
+ when "Jaccard"
111
+ ffi(:ngt_set_property_distance_type_jaccard, property, error)
112
+ when "Cosine"
113
+ ffi(:ngt_set_property_distance_type_cosine, property, error)
114
+ else
115
+ raise ArgumentError, "Unknown distance type: #{distance_type}"
116
+ end
117
+
118
+ index = ffi(:ngt_create_graph_and_tree, path, property, error)
119
+ FFI.ngt_close_index(index)
120
+ index = nil
121
+
122
+ Index.new(path)
123
+ ensure
124
+ FFI.ngt_destroy_error_object(error) if error
125
+ FFI.ngt_destroy_property(property) if property
126
+ FFI.ngt_close_index(index) if index
127
+ end
128
+
129
+ # private
130
+ def self.ffi(method, *args)
131
+ res = FFI.send(method, *args)
132
+ message = FFI.ngt_get_error_string(args.last)
133
+ raise Error, message unless message.empty?
134
+ res
135
+ end
136
+
137
+ def self.finalize(error)
138
+ # must use proc instead of stabby lambda
139
+ proc do
140
+ # TODO clean-up more objects
141
+ FFI.ngt_destroy_error_object(error)
142
+ end
143
+ end
144
+
145
+ private
146
+
147
+ def ffi(*args)
148
+ self.class.ffi(*args, @error)
149
+ end
150
+
151
+ def float?
152
+ @float
153
+ end
154
+
155
+ def c_object(object)
156
+ c_object = ::FFI::MemoryPointer.new(:double, object.size)
157
+ c_object.write_array_of_double(object)
158
+ c_object
159
+ end
160
+ end
161
+ end
@@ -0,0 +1,3 @@
1
+ module Ngt
2
+ VERSION = "0.1.1"
3
+ end
metadata ADDED
@@ -0,0 +1,119 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ngt
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - Andrew Kane
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-10-28 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: ffi
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: minitest
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '5'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '5'
69
+ - !ruby/object:Gem::Dependency
70
+ name: numo-narray
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description:
84
+ email: andrew@chartkick.com
85
+ executables: []
86
+ extensions: []
87
+ extra_rdoc_files: []
88
+ files:
89
+ - CHANGELOG.md
90
+ - LICENSE.txt
91
+ - README.md
92
+ - lib/ngt.rb
93
+ - lib/ngt/ffi.rb
94
+ - lib/ngt/index.rb
95
+ - lib/ngt/version.rb
96
+ homepage: https://github.com/ankane/ngt
97
+ licenses:
98
+ - MIT
99
+ metadata: {}
100
+ post_install_message:
101
+ rdoc_options: []
102
+ require_paths:
103
+ - lib
104
+ required_ruby_version: !ruby/object:Gem::Requirement
105
+ requirements:
106
+ - - ">="
107
+ - !ruby/object:Gem::Version
108
+ version: '2.4'
109
+ required_rubygems_version: !ruby/object:Gem::Requirement
110
+ requirements:
111
+ - - ">="
112
+ - !ruby/object:Gem::Version
113
+ version: '0'
114
+ requirements: []
115
+ rubygems_version: 3.0.3
116
+ signing_key:
117
+ specification_version: 4
118
+ summary: NGT - high-speed approximate nearest neighbors - for Ruby
119
+ test_files: []