ngt 0.2.2 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 349ccec555d0066d9ccf024a40cf0ecaa478ac628d7cc1240ac2ffa288735a29
4
- data.tar.gz: 6242793a35080d8b52c650ebb3b24bee762b36bcf5035bd99263ac2657f3f12c
3
+ metadata.gz: 850afe334c78ec69bfb7c975629336a615d92bb2d03efade8ffe148ed4d1dfc9
4
+ data.tar.gz: 3bc13fbbc90c7323dd4322f3492d355363d16f28baf5bd0b7b88fc35e44b807b
5
5
  SHA512:
6
- metadata.gz: 9fe092180d1a68e7843089a2a6c1fda1475ca2ad05d302209db501704994b694a5e94ac34daa9898d26efdf739c775741b79e24bd5aa3fbcdd58fb28063c1bef
7
- data.tar.gz: 866b54645ab021c69b50967ae5952d159d4e45ff850f450500c550c154a5f9b224ca45e36ead4f4d3ed96d43a26d3ad601040ba91a31671d815789161a981a83
6
+ metadata.gz: '08cb7235b719ef9bffe6b0557538dd9464d6bbd0dea617d260de8cb57756f4388c65d1d83d5265f6c2036291769186cdb845a7b02e5838af0c964ab0b1dd0752'
7
+ data.tar.gz: ccfd9d4139d1297be48fa06b8da41055dfbd7d4802062bdad3730007d7d86e6a4262a731e0d27a793c79cce2fb01a60807a975b24619ae7b35c8edf8425b1b2c
@@ -1,3 +1,30 @@
1
+ ## 0.3.2 (2020-12-27)
2
+
3
+ - Updated NGT to 1.12.2
4
+
5
+ ## 0.3.1 (2020-05-17)
6
+
7
+ - Updated NGT to 1.11.5
8
+ - Improved error message when OpenMP not found on Mac
9
+
10
+ ## 0.3.0 (2020-03-25)
11
+
12
+ - Updated NGT to 1.10.0
13
+ - Added support for OpenMP on Mac
14
+ - Create index in memory if no path specified
15
+ - Added `normalized_angle` and `normalized_cosine`
16
+
17
+ ## 0.2.4 (2020-03-09)
18
+
19
+ - Updated NGT to 1.9.1
20
+ - Added support for passing an index to optimizers
21
+ - Added `dimensions`, `distance_type`, `edge_size_for_creation`, `edge_size_for_search`, and `object_type` methods
22
+
23
+ ## 0.2.3 (2020-03-08)
24
+
25
+ - Added `load` method
26
+ - Deprecated `create` and passing path to `new`
27
+
1
28
  ## 0.2.2 (2020-02-11)
2
29
 
3
30
  - Fixed `Could not find NGT` error on some Linux platforms
@@ -11,6 +38,7 @@
11
38
  - Changed to Apache 2.0 license to match NGT
12
39
  - Added shared libraries
13
40
  - Added optimizer
41
+ - Improved performance of `batch_insert` for Numo
14
42
 
15
43
  ## 0.1.1 (2019-10-27)
16
44
 
data/NOTICE.txt CHANGED
@@ -1,3 +1,4 @@
1
+ Copyright 2016-2020 Yahoo Japan Corporation
1
2
  Copyright 2019-2020 Andrew Kane
2
3
 
3
4
  Licensed under the Apache License, Version 2.0 (the "License");
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  [NGT](https://github.com/yahoojapan/NGT) - high-speed approximate nearest neighbors - for Ruby
4
4
 
5
- [![Build Status](https://travis-ci.org/ankane/ngt.svg?branch=master)](https://travis-ci.org/ankane/ngt)
5
+ [![Build Status](https://github.com/ankane/ngt/workflows/build/badge.svg?branch=master)](https://github.com/ankane/ngt/actions)
6
6
 
7
7
  ## Installation
8
8
 
@@ -12,7 +12,13 @@ Add this line to your application’s Gemfile:
12
12
  gem 'ngt'
13
13
  ```
14
14
 
15
- NGT is not available for Windows yet
15
+ On Mac, also install OpenMP:
16
+
17
+ ```sh
18
+ brew install libomp
19
+ ```
20
+
21
+ NGT is not available for Windows
16
22
 
17
23
  ## Getting Started
18
24
 
@@ -29,7 +35,7 @@ objects = [
29
35
  Create an index
30
36
 
31
37
  ```ruby
32
- index = Ngt::Index.create(path, dimensions)
38
+ index = Ngt::Index.new(dimensions)
33
39
  ```
34
40
 
35
41
  Insert objects
@@ -47,13 +53,13 @@ index.search(query, size: 3)
47
53
  Save the index
48
54
 
49
55
  ```ruby
50
- index.save
56
+ index.save(path)
51
57
  ```
52
58
 
53
59
  Load an index
54
60
 
55
61
  ```ruby
56
- index = Ngt::Index.new(path)
62
+ index = Ngt::Index.load(path)
57
63
  ```
58
64
 
59
65
  Get an object by id
@@ -84,7 +90,8 @@ Optimize the index
84
90
 
85
91
  ```ruby
86
92
  optimizer = Ngt::Optimizer.new(outgoing: 10, incoming: 120)
87
- optimizer.execute(path, new_path)
93
+ optimizer.adjust_search_coefficients(index)
94
+ optimizer.execute(index, new_path)
88
95
  ```
89
96
 
90
97
  ## Full Example
@@ -96,9 +103,8 @@ objects = []
96
103
  objects << dim.times.map { rand(100) }
97
104
  end
98
105
 
99
- index = Ngt::Index.create("tmp", dim)
106
+ index = Ngt::Index.new(dim)
100
107
  index.batch_insert(objects)
101
- index.save
102
108
 
103
109
  query = objects[0]
104
110
  result = index.search(query, size: 3)
@@ -109,6 +115,38 @@ result.each do |res|
109
115
  end
110
116
  ```
111
117
 
118
+ ## Index Options
119
+
120
+ Defaults shown below
121
+
122
+ ```ruby
123
+ Ngt::Index.new(dimensions,
124
+ edge_size_for_creation: 10,
125
+ edge_size_for_search: 40,
126
+ object_type: :float, # :float, :integer
127
+ distance_type: :l2, # :l1, :l2, :hamming, :angle, :cosine, :normalized_angle, :normalized_cosine, :jaccard
128
+ path: nil
129
+ )
130
+ ```
131
+
132
+ ## Optimizer Options
133
+
134
+ Defaults shown below
135
+
136
+ ```ruby
137
+ Ngt::Optimizer.new(
138
+ outgoing: 10,
139
+ incoming: 120,
140
+ queries: 100,
141
+ low_accuracy_from: 0.3,
142
+ low_accuracy_to: 0.5,
143
+ high_accuracy_from: 0.8,
144
+ high_accuracy_to: 0.9,
145
+ gt_epsilon: 0.1,
146
+ merge: 0.2
147
+ )
148
+ ```
149
+
112
150
  ## Data
113
151
 
114
152
  Data can be an array of arrays
data/lib/ngt.rb CHANGED
@@ -1,6 +1,9 @@
1
1
  # dependencies
2
2
  require "ffi"
3
3
 
4
+ # stdlib
5
+ require "tmpdir"
6
+
4
7
  # modules
5
8
  require "ngt/utils"
6
9
  require "ngt/index"
@@ -5,7 +5,11 @@ module Ngt
5
5
  begin
6
6
  ffi_lib Ngt.ffi_lib
7
7
  rescue LoadError => e
8
- raise e
8
+ if e.message.include?("Library not loaded: /usr/local/opt/libomp/lib/libomp.dylib") && e.message.include?("Reason: image not found")
9
+ raise LoadError, "OpenMP not found. Run `brew install libomp`"
10
+ else
11
+ raise e
12
+ end
9
13
  end
10
14
 
11
15
  # https://github.com/yahoojapan/NGT/blob/master/lib/NGT/Capi.h
@@ -19,8 +23,11 @@ module Ngt
19
23
  :distance, :float
20
24
  end
21
25
 
26
+ enum :distance_type, [:l1, :l2, :hamming, :angle, :cosine, :normalized_angle, :normalized_cosine, :jaccard]
27
+
22
28
  attach_function :ngt_open_index, %i[string pointer], :pointer
23
29
  attach_function :ngt_create_graph_and_tree, %i[string pointer pointer], :pointer
30
+ attach_function :ngt_create_graph_and_tree_in_memory, %i[pointer pointer], :pointer
24
31
  attach_function :ngt_create_property, %i[pointer], :pointer
25
32
  attach_function :ngt_save_index, %i[pointer string pointer], :bool
26
33
  attach_function :ngt_get_property, %i[pointer pointer pointer], :bool
@@ -38,20 +45,26 @@ module Ngt
38
45
  attach_function :ngt_set_property_distance_type_hamming, %i[pointer pointer], :bool
39
46
  attach_function :ngt_set_property_distance_type_jaccard, %i[pointer pointer], :bool
40
47
  attach_function :ngt_set_property_distance_type_cosine, %i[pointer pointer], :bool
41
- attach_function :ngt_batch_insert_index, %i[pointer pointer uint32 pointer pointer], :bool
42
- attach_function :ngt_create_index, %i[pointer uint32 pointer], :bool
43
- attach_function :ngt_remove_index, %i[pointer int pointer], :bool
48
+ attach_function :ngt_set_property_distance_type_normalized_angle, %i[pointer pointer], :bool
49
+ attach_function :ngt_set_property_distance_type_normalized_cosine, %i[pointer pointer], :bool
44
50
  attach_function :ngt_insert_index, %i[pointer pointer uint32 pointer], :int
45
51
  attach_function :ngt_insert_index_as_float, %i[pointer pointer uint32 pointer], :int
46
52
  attach_function :ngt_create_empty_results, %i[pointer], :pointer
47
53
  attach_function :ngt_search_index, %i[pointer pointer int32 size_t float float pointer pointer], :bool
48
54
  attach_function :ngt_get_result_size, %i[pointer pointer], :uint32
49
55
  attach_function :ngt_get_result, %i[pointer uint32 pointer], ObjectDistance.by_value
56
+ attach_function :ngt_batch_insert_index, %i[pointer pointer uint32 pointer pointer], :bool
57
+ attach_function :ngt_create_index, %i[pointer uint32 pointer], :bool
58
+ attach_function :ngt_remove_index, %i[pointer int pointer], :bool
50
59
  attach_function :ngt_get_object_space, %i[pointer pointer], :pointer
51
60
  attach_function :ngt_get_object_as_float, %i[pointer int pointer], :pointer
52
61
  attach_function :ngt_get_object_as_integer, %i[pointer int pointer], :pointer
62
+ attach_function :ngt_destroy_results, %i[pointer], :void
53
63
  attach_function :ngt_destroy_property, %i[pointer], :void
54
64
  attach_function :ngt_close_index, %i[pointer], :void
65
+ attach_function :ngt_get_property_edge_size_for_creation, %i[pointer pointer], :int16
66
+ attach_function :ngt_get_property_edge_size_for_search, %i[pointer pointer], :int16
67
+ attach_function :ngt_get_property_distance_type, %i[pointer pointer], :distance_type
55
68
  attach_function :ngt_create_error_object, %i[], :pointer
56
69
  attach_function :ngt_get_error_string, %i[pointer], :string
57
70
  attach_function :ngt_destroy_error_object, %i[pointer], :void
@@ -2,26 +2,44 @@ module Ngt
2
2
  class Index
3
3
  include Utils
4
4
 
5
- def initialize(path)
5
+ attr_reader :path
6
+
7
+ def initialize(index, path)
8
+ @index = index
6
9
  @path = path
10
+
7
11
  @error = FFI.ngt_create_error_object
8
- @index = ffi(:ngt_open_index, path)
12
+ @property = ffi(:ngt_create_property)
13
+ ffi(:ngt_get_property, @index, @property)
9
14
 
10
- property = ffi(:ngt_create_property)
11
- ffi(:ngt_get_property, @index, property)
15
+ ObjectSpace.define_finalizer(self, self.class.finalize(@error, @index, @property))
16
+ end
17
+
18
+ def dimensions
19
+ @dimensions ||= ffi(:ngt_get_property_dimension, @property)
20
+ end
12
21
 
13
- @dimension = ffi(:ngt_get_property_dimension, property)
22
+ def distance_type
23
+ @distance_type ||= ffi(:ngt_get_property_distance_type, @property)
24
+ end
14
25
 
15
- object_type = ffi(:ngt_get_property_object_type, property)
16
- @float = FFI.ngt_is_property_object_type_float(object_type)
26
+ def edge_size_for_creation
27
+ @edge_size_for_creation ||= ffi(:ngt_get_property_edge_size_for_creation, @property)
28
+ end
17
29
 
18
- @object_space = ffi(:ngt_get_object_space, @index)
30
+ def edge_size_for_search
31
+ @edge_size_for_search ||= ffi(:ngt_get_property_edge_size_for_search, @property)
32
+ end
19
33
 
20
- ObjectSpace.define_finalizer(self, self.class.finalize(@error))
34
+ def object_type
35
+ @object_type ||= begin
36
+ object_type = ffi(:ngt_get_property_object_type, @property)
37
+ FFI.ngt_is_property_object_type_float(object_type) ? :float : :integer
38
+ end
21
39
  end
22
40
 
23
41
  def insert(object)
24
- ffi(:ngt_insert_index, @index, c_object(object.to_a), @dimension)
42
+ ffi(:ngt_insert_index, @index, c_object(object.to_a), dimensions)
25
43
  end
26
44
 
27
45
  def batch_insert(objects, num_threads: 8)
@@ -51,12 +69,12 @@ module Ngt
51
69
  end
52
70
 
53
71
  def object(id)
54
- if float?
72
+ if object_type == :float
55
73
  res = ffi(:ngt_get_object_as_float, @object_space, id)
56
- res.read_array_of_float(@dimension)
74
+ res.read_array_of_float(dimensions)
57
75
  else
58
76
  res = ffi(:ngt_get_object_as_integer, @object_space, id)
59
- res.read_array_of_uint8(@dimension)
77
+ res.read_array_of_uint8(dimensions)
60
78
  end
61
79
  end
62
80
 
@@ -67,7 +85,7 @@ module Ngt
67
85
  def search(query, size: 20, epsilon: 0.1, radius: nil)
68
86
  radius ||= -1.0
69
87
  results = ffi(:ngt_create_empty_results)
70
- ffi(:ngt_search_index, @index, c_object(query.to_a), @dimension, size, epsilon, radius, results)
88
+ ffi(:ngt_search_index, @index, c_object(query.to_a), dimensions, size, epsilon, radius, results)
71
89
  result_size = ffi(:ngt_get_result_size, results)
72
90
  ret = []
73
91
  result_size.times do |i|
@@ -78,61 +96,91 @@ module Ngt
78
96
  }
79
97
  end
80
98
  ret
99
+ ensure
100
+ FFI.ngt_destroy_results(results) if results
81
101
  end
82
102
 
83
- def save(path: nil)
84
- path ||= @path
85
- ffi(:ngt_save_index, @index, path)
103
+ def save(path2 = nil, path: nil)
104
+ warn "[ngt] Passing path as an option is deprecated - use an argument instead" if path
105
+ @path = path || path2 || @path || Dir.mktmpdir
106
+ ffi(:ngt_save_index, @index, @path)
86
107
  end
87
108
 
88
109
  def close
89
110
  FFI.ngt_close_index(@index)
90
111
  end
91
112
 
92
- def self.create(path, dimension, edge_size_for_creation: 10,
93
- edge_size_for_search: 40, object_type: "Float", distance_type: "L2")
113
+ def self.new(dimensions, path: nil, edge_size_for_creation: 10,
114
+ edge_size_for_search: 40, object_type: :float, distance_type: :l2)
94
115
 
95
116
  error = FFI.ngt_create_error_object
96
- property = ffi(:ngt_create_property, error)
97
- ffi(:ngt_set_property_dimension, property, dimension, error)
98
- ffi(:ngt_set_property_edge_size_for_creation, property, edge_size_for_creation, error)
99
- ffi(:ngt_set_property_edge_size_for_search, property, edge_size_for_search, error)
100
-
101
- case object_type.to_s
102
- when "Float", "float"
103
- ffi(:ngt_set_property_object_type_float, property, error)
104
- when "Integer", "integer"
105
- ffi(:ngt_set_property_object_type_integer, property, error)
106
- else
107
- raise ArgumentError, "Unknown object type: #{object_type}"
117
+
118
+ # TODO remove in 0.4.0
119
+ if !dimensions.is_a?(Integer) && !path
120
+ warn "[ngt] Passing a path to new is deprecated - use load instead"
121
+ path = dimensions
122
+ dimensions = nil
108
123
  end
109
124
 
110
- case distance_type.to_s
111
- when "L1"
112
- ffi(:ngt_set_property_distance_type_l1, property, error)
113
- when "L2"
114
- ffi(:ngt_set_property_distance_type_l2, property, error)
115
- when "Angle"
116
- ffi(:ngt_set_property_distance_type_angle, property, error)
117
- when "Hamming"
118
- ffi(:ngt_set_property_distance_type_hamming, property, error)
119
- when "Jaccard"
120
- ffi(:ngt_set_property_distance_type_jaccard, property, error)
121
- when "Cosine"
122
- ffi(:ngt_set_property_distance_type_cosine, property, error)
125
+ if path && dimensions.nil?
126
+ index = ffi(:ngt_open_index, path, error)
123
127
  else
124
- raise ArgumentError, "Unknown distance type: #{distance_type}"
128
+ property = ffi(:ngt_create_property, error)
129
+ ffi(:ngt_set_property_dimension, property, dimensions, error)
130
+ ffi(:ngt_set_property_edge_size_for_creation, property, edge_size_for_creation, error)
131
+ ffi(:ngt_set_property_edge_size_for_search, property, edge_size_for_search, error)
132
+
133
+ case object_type.to_s.downcase
134
+ when "float"
135
+ ffi(:ngt_set_property_object_type_float, property, error)
136
+ when "integer"
137
+ ffi(:ngt_set_property_object_type_integer, property, error)
138
+ else
139
+ raise ArgumentError, "Unknown object type: #{object_type}"
140
+ end
141
+
142
+ case distance_type.to_s.downcase
143
+ when "l1"
144
+ ffi(:ngt_set_property_distance_type_l1, property, error)
145
+ when "l2"
146
+ ffi(:ngt_set_property_distance_type_l2, property, error)
147
+ when "angle"
148
+ ffi(:ngt_set_property_distance_type_angle, property, error)
149
+ when "hamming"
150
+ ffi(:ngt_set_property_distance_type_hamming, property, error)
151
+ when "jaccard"
152
+ ffi(:ngt_set_property_distance_type_jaccard, property, error)
153
+ when "cosine"
154
+ ffi(:ngt_set_property_distance_type_cosine, property, error)
155
+ when "normalized_angle"
156
+ ffi(:ngt_set_property_distance_type_normalized_angle, property, error)
157
+ when "normalized_cosine"
158
+ ffi(:ngt_set_property_distance_type_normalized_cosine, property, error)
159
+ else
160
+ raise ArgumentError, "Unknown distance type: #{distance_type}"
161
+ end
162
+
163
+ index =
164
+ if path
165
+ ffi(:ngt_create_graph_and_tree, path, property, error)
166
+ else
167
+ ffi(:ngt_create_graph_and_tree_in_memory, property, error)
168
+ end
125
169
  end
126
170
 
127
- index = ffi(:ngt_create_graph_and_tree, path, property, error)
128
- FFI.ngt_close_index(index)
129
- index = nil
130
-
131
- Index.new(path)
171
+ super(index, path)
132
172
  ensure
133
173
  FFI.ngt_destroy_error_object(error) if error
134
174
  FFI.ngt_destroy_property(property) if property
135
- FFI.ngt_close_index(index) if index
175
+ end
176
+
177
+ def self.load(path)
178
+ new(nil, path: path)
179
+ end
180
+
181
+ def self.create(path, dimensions, **options)
182
+ warn "[ngt] create is deprecated - use new instead"
183
+ new(dimensions, path: path, **options)
136
184
  end
137
185
 
138
186
  # private
@@ -140,11 +188,12 @@ module Ngt
140
188
  Utils.ffi(*args)
141
189
  end
142
190
 
143
- def self.finalize(error)
191
+ def self.finalize(error, index, property)
144
192
  # must use proc instead of stabby lambda
145
193
  proc do
146
- # TODO clean-up more objects
147
194
  FFI.ngt_destroy_error_object(error)
195
+ FFI.ngt_close_index(index)
196
+ FFI.ngt_destroy_property(property)
148
197
  end
149
198
  end
150
199
 
@@ -154,10 +203,6 @@ module Ngt
154
203
  defined?(Numo::NArray) && data.is_a?(Numo::NArray)
155
204
  end
156
205
 
157
- def float?
158
- @float
159
- end
160
-
161
206
  def c_object(object)
162
207
  c_object = ::FFI::MemoryPointer.new(:double, object.size)
163
208
  c_object.write_array_of_double(object)
@@ -15,11 +15,11 @@ module Ngt
15
15
  end
16
16
 
17
17
  def execute(in_index_path, out_index_path)
18
- ffi(:ngt_optimizer_execute, @optimizer, in_index_path, out_index_path)
18
+ ffi(:ngt_optimizer_execute, @optimizer, path(in_index_path), out_index_path)
19
19
  end
20
20
 
21
21
  def adjust_search_coefficients(index_path)
22
- ffi(:ngt_optimizer_adjust_search_coefficients, @optimizer, index_path)
22
+ ffi(:ngt_optimizer_adjust_search_coefficients, @optimizer, path(index_path))
23
23
  end
24
24
 
25
25
  def self.finalize(optimizer, error)
@@ -29,5 +29,16 @@ module Ngt
29
29
  FFI.ngt_destroy_error_object(error)
30
30
  end
31
31
  end
32
+
33
+ private
34
+
35
+ def path(obj)
36
+ if obj.is_a?(Ngt::Index)
37
+ raise ArgumentError, "Index not saved" unless obj.path
38
+ obj.path
39
+ else
40
+ obj
41
+ end
42
+ end
32
43
  end
33
44
  end
@@ -1,3 +1,3 @@
1
1
  module Ngt
2
- VERSION = "0.2.2"
2
+ VERSION = "0.3.2"
3
3
  end
Binary file
Binary file
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ngt
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-02-12 00:00:00.000000000 Z
11
+ date: 2020-12-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ffi
@@ -80,7 +80,7 @@ dependencies:
80
80
  - - ">="
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
- description:
83
+ description:
84
84
  email: andrew@chartkick.com
85
85
  executables: []
86
86
  extensions: []
@@ -103,7 +103,7 @@ homepage: https://github.com/ankane/ngt
103
103
  licenses:
104
104
  - Apache-2.0
105
105
  metadata: {}
106
- post_install_message:
106
+ post_install_message:
107
107
  rdoc_options: []
108
108
  require_paths:
109
109
  - lib
@@ -118,8 +118,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
118
118
  - !ruby/object:Gem::Version
119
119
  version: '0'
120
120
  requirements: []
121
- rubygems_version: 3.1.2
122
- signing_key:
121
+ rubygems_version: 3.2.3
122
+ signing_key:
123
123
  specification_version: 4
124
124
  summary: High-speed approximate nearest neighbors for Ruby
125
125
  test_files: []