redisbench-admin 0.11.63__py3-none-any.whl → 0.11.65__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. redisbench_admin/run/ann/pkg/.dockerignore +2 -0
  2. redisbench_admin/run/ann/pkg/.git +1 -0
  3. redisbench_admin/run/ann/pkg/.github/workflows/benchmarks.yml +100 -0
  4. redisbench_admin/run/ann/pkg/.gitignore +21 -0
  5. redisbench_admin/run/ann/pkg/LICENSE +21 -0
  6. redisbench_admin/run/ann/pkg/README.md +157 -0
  7. redisbench_admin/run/ann/pkg/algos.yaml +1294 -0
  8. redisbench_admin/run/ann/pkg/algosP.yaml +67 -0
  9. redisbench_admin/run/ann/pkg/ann_benchmarks/__init__.py +2 -0
  10. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/__init__.py +0 -0
  11. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/annoy.py +26 -0
  12. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/balltree.py +22 -0
  13. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/base.py +36 -0
  14. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/bruteforce.py +110 -0
  15. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/ckdtree.py +17 -0
  16. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/datasketch.py +29 -0
  17. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/definitions.py +187 -0
  18. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/diskann.py +190 -0
  19. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/dolphinnpy.py +31 -0
  20. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/dummy_algo.py +25 -0
  21. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/elasticsearch.py +107 -0
  22. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/elastiknn.py +124 -0
  23. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/faiss.py +124 -0
  24. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/faiss_gpu.py +61 -0
  25. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/faiss_hnsw.py +39 -0
  26. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/flann.py +27 -0
  27. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/hnswlib.py +36 -0
  28. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/kdtree.py +22 -0
  29. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/kgraph.py +39 -0
  30. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/lshf.py +25 -0
  31. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/milvus.py +99 -0
  32. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/mrpt.py +41 -0
  33. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/n2.py +28 -0
  34. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/nearpy.py +48 -0
  35. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/nmslib.py +74 -0
  36. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/onng_ngt.py +100 -0
  37. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/opensearchknn.py +107 -0
  38. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/panng_ngt.py +79 -0
  39. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/pinecone.py +39 -0
  40. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/puffinn.py +45 -0
  41. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/pynndescent.py +115 -0
  42. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/qg_ngt.py +102 -0
  43. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/redisearch.py +90 -0
  44. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/rpforest.py +20 -0
  45. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/scann.py +34 -0
  46. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/sptag.py +28 -0
  47. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/subprocess.py +246 -0
  48. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/vald.py +149 -0
  49. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/vecsim-hnsw.py +43 -0
  50. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/vespa.py +47 -0
  51. redisbench_admin/run/ann/pkg/ann_benchmarks/constants.py +1 -0
  52. redisbench_admin/run/ann/pkg/ann_benchmarks/data.py +48 -0
  53. redisbench_admin/run/ann/pkg/ann_benchmarks/datasets.py +620 -0
  54. redisbench_admin/run/ann/pkg/ann_benchmarks/distance.py +53 -0
  55. redisbench_admin/run/ann/pkg/ann_benchmarks/main.py +325 -0
  56. redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/__init__.py +2 -0
  57. redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/metrics.py +183 -0
  58. redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/plot_variants.py +17 -0
  59. redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/utils.py +165 -0
  60. redisbench_admin/run/ann/pkg/ann_benchmarks/results.py +71 -0
  61. redisbench_admin/run/ann/pkg/ann_benchmarks/runner.py +333 -0
  62. redisbench_admin/run/ann/pkg/create_dataset.py +12 -0
  63. redisbench_admin/run/ann/pkg/create_hybrid_dataset.py +147 -0
  64. redisbench_admin/run/ann/pkg/create_text_to_image_ds.py +117 -0
  65. redisbench_admin/run/ann/pkg/create_website.py +272 -0
  66. redisbench_admin/run/ann/pkg/install/Dockerfile +11 -0
  67. redisbench_admin/run/ann/pkg/install/Dockerfile.annoy +5 -0
  68. redisbench_admin/run/ann/pkg/install/Dockerfile.datasketch +4 -0
  69. redisbench_admin/run/ann/pkg/install/Dockerfile.diskann +29 -0
  70. redisbench_admin/run/ann/pkg/install/Dockerfile.diskann_pq +31 -0
  71. redisbench_admin/run/ann/pkg/install/Dockerfile.dolphinn +5 -0
  72. redisbench_admin/run/ann/pkg/install/Dockerfile.elasticsearch +45 -0
  73. redisbench_admin/run/ann/pkg/install/Dockerfile.elastiknn +61 -0
  74. redisbench_admin/run/ann/pkg/install/Dockerfile.faiss +18 -0
  75. redisbench_admin/run/ann/pkg/install/Dockerfile.flann +10 -0
  76. redisbench_admin/run/ann/pkg/install/Dockerfile.hnswlib +10 -0
  77. redisbench_admin/run/ann/pkg/install/Dockerfile.kgraph +6 -0
  78. redisbench_admin/run/ann/pkg/install/Dockerfile.mih +4 -0
  79. redisbench_admin/run/ann/pkg/install/Dockerfile.milvus +27 -0
  80. redisbench_admin/run/ann/pkg/install/Dockerfile.mrpt +4 -0
  81. redisbench_admin/run/ann/pkg/install/Dockerfile.n2 +5 -0
  82. redisbench_admin/run/ann/pkg/install/Dockerfile.nearpy +5 -0
  83. redisbench_admin/run/ann/pkg/install/Dockerfile.ngt +13 -0
  84. redisbench_admin/run/ann/pkg/install/Dockerfile.nmslib +10 -0
  85. redisbench_admin/run/ann/pkg/install/Dockerfile.opensearchknn +43 -0
  86. redisbench_admin/run/ann/pkg/install/Dockerfile.puffinn +6 -0
  87. redisbench_admin/run/ann/pkg/install/Dockerfile.pynndescent +4 -0
  88. redisbench_admin/run/ann/pkg/install/Dockerfile.redisearch +18 -0
  89. redisbench_admin/run/ann/pkg/install/Dockerfile.rpforest +5 -0
  90. redisbench_admin/run/ann/pkg/install/Dockerfile.scann +5 -0
  91. redisbench_admin/run/ann/pkg/install/Dockerfile.scipy +4 -0
  92. redisbench_admin/run/ann/pkg/install/Dockerfile.sklearn +4 -0
  93. redisbench_admin/run/ann/pkg/install/Dockerfile.sptag +30 -0
  94. redisbench_admin/run/ann/pkg/install/Dockerfile.vald +8 -0
  95. redisbench_admin/run/ann/pkg/install/Dockerfile.vespa +17 -0
  96. redisbench_admin/run/ann/pkg/install.py +70 -0
  97. redisbench_admin/run/ann/pkg/logging.conf +34 -0
  98. redisbench_admin/run/ann/pkg/multirun.py +298 -0
  99. redisbench_admin/run/ann/pkg/plot.py +159 -0
  100. redisbench_admin/run/ann/pkg/protocol/bf-runner +10 -0
  101. redisbench_admin/run/ann/pkg/protocol/bf-runner.py +204 -0
  102. redisbench_admin/run/ann/pkg/protocol/ext-add-query-metric.md +51 -0
  103. redisbench_admin/run/ann/pkg/protocol/ext-batch-queries.md +77 -0
  104. redisbench_admin/run/ann/pkg/protocol/ext-prepared-queries.md +77 -0
  105. redisbench_admin/run/ann/pkg/protocol/ext-query-parameters.md +47 -0
  106. redisbench_admin/run/ann/pkg/protocol/specification.md +194 -0
  107. redisbench_admin/run/ann/pkg/requirements.txt +14 -0
  108. redisbench_admin/run/ann/pkg/requirements_py38.txt +11 -0
  109. redisbench_admin/run/ann/pkg/results/fashion-mnist-784-euclidean.png +0 -0
  110. redisbench_admin/run/ann/pkg/results/gist-960-euclidean.png +0 -0
  111. redisbench_admin/run/ann/pkg/results/glove-100-angular.png +0 -0
  112. redisbench_admin/run/ann/pkg/results/glove-25-angular.png +0 -0
  113. redisbench_admin/run/ann/pkg/results/lastfm-64-dot.png +0 -0
  114. redisbench_admin/run/ann/pkg/results/mnist-784-euclidean.png +0 -0
  115. redisbench_admin/run/ann/pkg/results/nytimes-256-angular.png +0 -0
  116. redisbench_admin/run/ann/pkg/results/sift-128-euclidean.png +0 -0
  117. redisbench_admin/run/ann/pkg/run.py +12 -0
  118. redisbench_admin/run/ann/pkg/run_algorithm.py +3 -0
  119. redisbench_admin/run/ann/pkg/templates/chartjs.template +102 -0
  120. redisbench_admin/run/ann/pkg/templates/detail_page.html +23 -0
  121. redisbench_admin/run/ann/pkg/templates/general.html +58 -0
  122. redisbench_admin/run/ann/pkg/templates/latex.template +30 -0
  123. redisbench_admin/run/ann/pkg/templates/summary.html +60 -0
  124. redisbench_admin/run/ann/pkg/test/__init__.py +0 -0
  125. redisbench_admin/run/ann/pkg/test/test-jaccard.py +19 -0
  126. redisbench_admin/run/ann/pkg/test/test-metrics.py +99 -0
  127. redisbench_admin/run_async/run_async.py +2 -2
  128. redisbench_admin/run_local/run_local.py +2 -2
  129. redisbench_admin/run_remote/run_remote.py +9 -5
  130. {redisbench_admin-0.11.63.dist-info → redisbench_admin-0.11.65.dist-info}/METADATA +2 -5
  131. redisbench_admin-0.11.65.dist-info/RECORD +243 -0
  132. {redisbench_admin-0.11.63.dist-info → redisbench_admin-0.11.65.dist-info}/WHEEL +1 -1
  133. redisbench_admin-0.11.63.dist-info/RECORD +0 -117
  134. {redisbench_admin-0.11.63.dist-info/licenses → redisbench_admin-0.11.65.dist-info}/LICENSE +0 -0
  135. {redisbench_admin-0.11.63.dist-info → redisbench_admin-0.11.65.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,246 @@
1
+ from __future__ import absolute_import
2
+ from os.path import basename
3
+ import shlex
4
+ from types import MethodType
5
+ import psutil
6
+ import subprocess
7
+ from ann_benchmarks.data import \
8
+ bit_unparse_entry, int_unparse_entry, float_unparse_entry
9
+ from ann_benchmarks.algorithms.base import BaseANN
10
+
11
+
12
+ class SubprocessStoppedError(Exception):
13
+ def __init__(self, code):
14
+ super(Exception, self).__init__(code)
15
+ self.code = code
16
+
17
+
18
+ class Subprocess(BaseANN):
19
+ def _raw_line(self):
20
+ return shlex.split(
21
+ self._get_program_handle().stdout.readline().strip())
22
+
23
+ def _line(self):
24
+ line = self._raw_line()
25
+ # print("<- %s" % (" ".join(line)))
26
+ while len(line) < 1 or line[0] != "epbprtv0":
27
+ line = self._raw_line()
28
+ return line[1:]
29
+
30
+ @staticmethod
31
+ def _quote(token):
32
+ return "'" + str(token).replace("'", "'\\'") + "'"
33
+
34
+ def _write(self, string):
35
+ # print("-> %s" % string)
36
+ self._get_program_handle().stdin.write(string + "\n")
37
+
38
+ # Called immediately before transitioning from query mode to training mode
39
+ def _configuration_hook(self):
40
+ pass
41
+
42
+ def _get_program_handle(self):
43
+ if self._program:
44
+ self._program.poll()
45
+ if self._program.returncode:
46
+ raise SubprocessStoppedError(self._program.returncode)
47
+ else:
48
+ self._program = subprocess.Popen(
49
+ self._args,
50
+ bufsize=1, # line buffering
51
+ stdin=subprocess.PIPE,
52
+ stdout=subprocess.PIPE,
53
+ universal_newlines=True)
54
+
55
+ for key, value in iter(self._params.items()):
56
+ self._write("%s %s" %
57
+ (Subprocess._quote(key), Subprocess._quote(value)))
58
+ assert self._line()[0] == "ok", """\
59
+ assigning value '%s' to option '%s' failed""" % (value, key)
60
+ self._configuration_hook()
61
+
62
+ self._write("")
63
+ assert self._line()[0] == "ok", """\
64
+ transitioning to training mode failed"""
65
+ return self._program
66
+
67
+ def __init__(self, args, encoder, params):
68
+ self.name = "Subprocess(program = %s, %s)" % \
69
+ (basename(args[0]), str(params))
70
+ self._program = None
71
+ self._args = args
72
+ self._encoder = encoder
73
+ self._params = params
74
+
75
+ def get_memory_usage(self):
76
+ if not self._program:
77
+ self._get_program_handle()
78
+ return psutil.Process(pid=self._program.pid).memory_info().rss / 1024
79
+
80
+ def fit(self, X):
81
+ for entry in X:
82
+ d = Subprocess._quote(self._encoder(entry))
83
+ self._write(d)
84
+ assert self._line()[0] == "ok", """\
85
+ encoded training point '%s' was rejected""" % d
86
+ self._write("")
87
+ assert self._line()[0] == "ok", """\
88
+ transitioning to query mode failed"""
89
+
90
+ def query(self, v, n):
91
+ d = Subprocess._quote(self._encoder(v))
92
+ self._write("%s %d" % (d, n))
93
+ return self._handle_query_response()
94
+
95
+ def _handle_query_response(self):
96
+ status = self._line()
97
+ if status[0] == "ok":
98
+ count = int(status[1])
99
+ return self._collect_query_response_lines(count)
100
+ else:
101
+ assert status[0] == "fail", """\
102
+ query neither succeeded nor failed"""
103
+ return []
104
+
105
+ def _collect_query_response_lines(self, count):
106
+ results = []
107
+ i = 0
108
+ while i < count:
109
+ line = self._line()
110
+ results.append(int(line[0]))
111
+ i += 1
112
+ return results
113
+
114
+ def done(self):
115
+ if self._program:
116
+ self._program.poll()
117
+ if not self._program.returncode:
118
+ self._program.terminate()
119
+
120
+
121
+ class PreparedSubprocess(Subprocess):
122
+ def __init__(self, args, encoder, params):
123
+ super(PreparedSubprocess, self).__init__(args, encoder, params)
124
+ self._result_count = None
125
+
126
+ def _configuration_hook(self):
127
+ self._write("frontend prepared-queries 1")
128
+ assert self._line()[0] == "ok", """\
129
+ enabling prepared queries mode failed"""
130
+
131
+ def query(self, v, n):
132
+ self.prepare_query(v, n)
133
+ self.run_prepared_query()
134
+ return self.get_prepared_query_results()
135
+
136
+ def prepare_query(self, v, n):
137
+ d = Subprocess._quote(self._encoder(v))
138
+ self._write("%s %d" % (d, n))
139
+ assert self._line()[0] == "ok", """\
140
+ preparing the query '%s' failed""" % d
141
+
142
+ def run_prepared_query(self):
143
+ self._write("query")
144
+ status = self._line()
145
+ if status[0] == "ok":
146
+ self._result_count = int(status[1])
147
+ else:
148
+ assert status[0] == "fail", """\
149
+ query neither succeeded nor failed"""
150
+ self._result_count = 0
151
+
152
+ def get_prepared_query_results(self):
153
+ if self._result_count:
154
+ try:
155
+ return self._collect_query_response_lines(self._result_count)
156
+ finally:
157
+ self._result_count = 0
158
+ else:
159
+ return []
160
+
161
+
162
+ class BatchSubprocess(Subprocess):
163
+ def __init__(self, args, encoder, params):
164
+ super(BatchSubprocess, self).__init__(args, encoder, params)
165
+ self._qp_count = None
166
+
167
+ def _configuration_hook(self):
168
+ self._write("frontend batch-queries 1")
169
+ assert self._line()[0] == "ok", """\
170
+ enabling batch queries mode failed"""
171
+
172
+ def query(self, v, n):
173
+ self.prepare_batch_query([v], n)
174
+ self.run_batch_query()
175
+ return self.get_batch_results()[0]
176
+
177
+ def prepare_batch_query(self, X, n):
178
+ d = " ".join(map(lambda p: Subprocess._quote(self._encoder(p)), X))
179
+ self._qp_count = len(X)
180
+ self._write("%s %d" % (d, n))
181
+ assert self._line()[0] == "ok", """\
182
+ preparing the batch query '%s' failed""" % d
183
+
184
+ def run_batch_query(self):
185
+ self._write("query")
186
+ status = self._line()
187
+ assert status[0] == "ok", """\
188
+ batch query failed completely"""
189
+
190
+ def get_batch_results(self):
191
+ results = []
192
+ i = 0
193
+ while i < self._qp_count:
194
+ # print("%d/%d" % (i, self._qp_count))
195
+ status = self._line()
196
+ if status[0] == "ok":
197
+ rc = int(status[1])
198
+ results.append(self._collect_query_response_lines(rc))
199
+ else:
200
+ results.append([])
201
+ i += 1
202
+ return results
203
+
204
+
205
+ def BitSubprocess(args, params):
206
+ return Subprocess(args, bit_unparse_entry, params)
207
+
208
+
209
+ def BitSubprocessPrepared(args, params):
210
+ return PreparedSubprocess(args, bit_unparse_entry, params)
211
+
212
+
213
+ def FloatSubprocess(args, params):
214
+ return Subprocess(args, float_unparse_entry, params)
215
+
216
+
217
+ def FloatSubprocessPrepared(args, params):
218
+ return PreparedSubprocess(args, float_unparse_entry, params)
219
+
220
+
221
+ def FloatSubprocessBatch(args, params):
222
+ return BatchSubprocess(args, float_unparse_entry, params)
223
+
224
+
225
+ def IntSubprocess(args, params):
226
+ return Subprocess(args, int_unparse_entry, params)
227
+
228
+
229
+ def QueryParamWrapper(constructor, args, params):
230
+ r = constructor(args, params)
231
+
232
+ def _do(self, original=r._configuration_hook):
233
+ original()
234
+ self._write("frontend query-parameters 1")
235
+ assert self._line()[0] == "ok", """\
236
+ enabling query parameter support failed"""
237
+ r._configuration_hook = MethodType(_do, r)
238
+
239
+ def _sqa(self, *args):
240
+ self._write("query-params %s set" %
241
+ (" ".join(map(Subprocess._quote, args))))
242
+ assert self._line()[0] == "ok", """\
243
+ reconfiguring query parameters failed"""
244
+ print(args)
245
+ r.set_query_arguments = MethodType(_sqa, r)
246
+ return r
@@ -0,0 +1,149 @@
1
+ from __future__ import absolute_import
2
+
3
+ import atexit
4
+ import subprocess
5
+ import urllib.error
6
+ import urllib.request
7
+
8
+ import grpc
9
+ import yaml
10
+ from ann_benchmarks.algorithms.base import BaseANN
11
+
12
+ from vald.v1.vald import insert_pb2_grpc, search_pb2_grpc
13
+ from vald.v1.agent.core import agent_pb2_grpc
14
+ from vald.v1.payload import payload_pb2
15
+
16
+
17
+ default_server_config = {
18
+ 'version': 'v0.0.0',
19
+ 'logging': {
20
+ 'logger': 'glg',
21
+ 'level': 'info',
22
+ 'format': 'raw'
23
+ },
24
+ 'server_config': {
25
+ 'servers': [
26
+ {
27
+ 'name': 'agent-grpc',
28
+ 'host': '127.0.0.1',
29
+ 'port': 8082,
30
+ 'mode': 'GRPC',
31
+ 'probe_wait_time': '3s',
32
+ #'grpc': {
33
+ # 'bidirectional_stream_concurrency': 1
34
+ #},
35
+ "network": "unix",
36
+ "socket_path": "/var/run/vald.sock"
37
+ }
38
+ ],
39
+ 'health_check_servers': [
40
+ {
41
+ 'name': 'readiness',
42
+ 'host': '127.0.0.1',
43
+ 'port': 3001,
44
+ 'mode': '',
45
+ 'probe_wait_time': '3s',
46
+ 'http': {
47
+ 'shutdown_duration': '5s',
48
+ 'handler_timeout': '',
49
+ 'idle_timeout': '',
50
+ 'read_header_timeout': '',
51
+ 'read_timeout': '',
52
+ 'write_timeout': ''
53
+ }
54
+ }
55
+ ],
56
+ 'startup_strategy': ['agent-grpc', 'readiness'],
57
+ 'shutdown_strategy': ['readiness', 'agent-grpc'],
58
+ 'full_shutdown_duration': '600s',
59
+ 'tls': {
60
+ 'enabled': False,
61
+ }
62
+ },
63
+ 'ngt': {
64
+ 'enable_in_memory_mode': True,
65
+ 'default_pool_size': 10000,
66
+ 'default_epsilon': 0.01,
67
+ 'default_radius': -1.0,
68
+ #'vqueue': {
69
+ # 'insert_buffer_size': 100,
70
+ # 'insert_buffer_pool_size': 1000,
71
+ # 'delete_buffer_size': 100,
72
+ # 'delete_buffer_pool_size': 1000
73
+ #}
74
+ }
75
+ }
76
+
77
+ grpc_opts = [
78
+ ('grpc.keepalive_time_ms', 1000 * 10),
79
+ ('grpc.keepalive_timeout_ms', 1000 * 10),
80
+ ('grpc.max_connection_idle_ms', 1000 * 50)
81
+ ]
82
+
83
+ metrics = {'euclidean': 'l2', 'angular': 'cosine'}
84
+
85
+
86
+ class Vald(BaseANN):
87
+ def __init__(self, metric, object_type, params):
88
+ self._param = default_server_config
89
+ self._ngt_config = {
90
+ 'distance_type': metrics[metric],
91
+ 'object_type': object_type,
92
+ 'search_edge_size': int(params['searchedge']),
93
+ 'creation_edge_size': int(params['edge']),
94
+ 'bulk_insert_chunk_size': int(params['bulk'])
95
+ }
96
+ #self._address = 'localhost:8082'
97
+ self._address = 'unix:///var/run/vald.sock'
98
+
99
+ def fit(self, X):
100
+ dim = len(X[0])
101
+ self._ngt_config['dimension'] = dim
102
+ self._param['ngt'].update(self._ngt_config)
103
+ with open('config.yaml', 'w') as f:
104
+ yaml.dump(self._param, f)
105
+
106
+ cfg = payload_pb2.Insert.Config(skip_strict_exist_check=True)
107
+ vectors = [
108
+ payload_pb2.Insert.Request(
109
+ vector=payload_pb2.Object.Vector(id=str(i), vector=x.tolist()),
110
+ config=cfg) for i, x in enumerate(X[:100])]
111
+
112
+ p = subprocess.Popen(['/go/bin/ngt', '-f', 'config.yaml'])
113
+ atexit.register(lambda: p.kill())
114
+
115
+ while True:
116
+ try:
117
+ with urllib.request.urlopen('http://localhost:3001/readiness') as response:
118
+ if response.getcode() == 200:
119
+ break
120
+ except (urllib.error.HTTPError, urllib.error.URLError):
121
+ pass
122
+
123
+ channel = grpc.insecure_channel(self._address, grpc_opts)
124
+ istub = insert_pb2_grpc.InsertStub(channel)
125
+ for _ in istub.StreamInsert(iter(vectors)):
126
+ pass
127
+
128
+ astub = agent_pb2_grpc.AgentStub(channel)
129
+ astub.CreateIndex(
130
+ payload_pb2.Control.CreateIndexRequest(
131
+ pool_size=10000))
132
+
133
+ def set_query_arguments(self, epsilon):
134
+ self._epsilon = epsilon - 1.0
135
+ channel = grpc.insecure_channel(self._address, grpc_opts)
136
+ self._stub = search_pb2_grpc.SearchStub(channel)
137
+
138
+ def query(self, v, n):
139
+ cfg = payload_pb2.Search.Config(num=n, radius=-1.0, epsilon=self._epsilon, timeout=3000000)
140
+ response = self._stub.Search(payload_pb2.Search.Request(vector=v.tolist(), config=cfg))
141
+ return [int(result.id) for result in response.results]
142
+
143
+ def __str__(self):
144
+ return 'Vald(%d, %d, %d, %1.3f)' % (
145
+ self._ngt_config['creation_edge_size'],
146
+ self._ngt_config['search_edge_size'],
147
+ self._ngt_config['bulk_insert_chunk_size'],
148
+ self._epsilon + 1.0
149
+ )
@@ -0,0 +1,43 @@
1
+ from __future__ import absolute_import
2
+ import os
3
+ from VecSim import *
4
+ import numpy as np
5
+ from ann_benchmarks.constants import INDEX_DIR
6
+ from ann_benchmarks.algorithms.base import BaseANN
7
+
8
+
9
+ class VecSimHnsw(BaseANN):
10
+ def __init__(self, metric, method_param):
11
+ self.metric = {'angular': VecSimMetric_Cosine, 'euclidean': VecSimMetric_L2}[metric]
12
+ self.method_param = method_param
13
+ # print(self.method_param,save_index,query_param)
14
+ self.ef = None
15
+ self.name = 'VecSim-hnsw (%s)' % (self.method_param)
16
+
17
+ def fit(self, X):
18
+ hnswparams = HNSWParams()
19
+ hnswparams.M =self.method_param['M']
20
+ hnswparams.efConstruction = self.method_param['efConstruction']
21
+ hnswparams.initialCapacity = len(X)
22
+ hnswparams.dim = len(X[0])
23
+ hnswparams.type = VecSimType_FLOAT32
24
+ hnswparams.metric = self.metric
25
+ hnswparams.multi = False
26
+
27
+ self.index = HNSWIndex(hnswparams)
28
+
29
+ for i, vector in enumerate(X):
30
+ self.index.add_vector(vector, i)
31
+
32
+ def set_query_arguments(self, ef):
33
+ self.ef = ef
34
+ self.index.set_ef(ef)
35
+
36
+ def query(self, v, n):
37
+ return self.index.knn_query(np.expand_dims(v, axis=0), k=n)[0][0]
38
+
39
+ def freeIndex(self):
40
+ del self.index
41
+
42
+ def __str__(self):
43
+ return f"{self.name}, efRuntime: {self.ef}"
@@ -0,0 +1,47 @@
1
+ from ann_benchmarks.algorithms.base import BaseANN
2
+ from vespa_ann_benchmark import DistanceMetric, HnswIndexParams, HnswIndex
3
+ import time
4
+
5
+ # Class using the Vespa implementation of an HNSW index for nearest neighbor
6
+ # search over data points in a high dimensional vector space.
7
+ #
8
+ # To use nearest neighbor search in a Vespa application,
9
+ # see https://docs.vespa.ai/en/approximate-nn-hnsw.html for more details.
10
+ class VespaHnswBase(BaseANN):
11
+ def __init__(self, enable_normalize, metric, dimension, param):
12
+ if metric not in ('angular', 'euclidean'):
13
+ raise NotImplementedError(
14
+ "VespaHnsw doesn't support metric %s" % metric)
15
+ self.metric = {'angular': DistanceMetric.Angular, 'euclidean': DistanceMetric.Euclidean}[metric]
16
+ normalize = False
17
+ if self.metric == DistanceMetric.Angular and enable_normalize:
18
+ normalize = True
19
+ self.metric = DistanceMetric.InnerProduct
20
+ self.param = param
21
+ self.neighbors_to_explore_at_insert = param.get("efConstruction", 200)
22
+ self.max_links_per_node = param.get("M", 8)
23
+ self.dimension = dimension
24
+ self.neighbors_to_explore = 200
25
+ self.name = 'VespaHnsw()'
26
+ self.index = HnswIndex(dimension, HnswIndexParams(self.max_links_per_node, self.neighbors_to_explore_at_insert, self.metric, False), normalize)
27
+
28
+ def fit(self, X):
29
+ for i, x in enumerate(X):
30
+ self.index.set_vector(i, x)
31
+
32
+ def set_query_arguments(self, ef):
33
+ print("VespaHnsw: ef = " + str(ef))
34
+ self.neighbors_to_explore = ef
35
+
36
+ def query(self, v, n):
37
+ return [index for index, _ in self.query_with_distances(v, n)]
38
+
39
+ def query_with_distances(self, v, n):
40
+ return self.index.find_top_k(n, v, self.neighbors_to_explore)
41
+
42
+ class VespaHnsw(VespaHnswBase):
43
+ def __init__(self, metric, dimension, param):
44
+ super().__init__(True, metric, dimension, param)
45
+
46
+ def __str__(self):
47
+ return 'VespaHnsw ({}, ef: {})'.format(self.param, self.neighbors_to_explore)
@@ -0,0 +1 @@
1
+ INDEX_DIR = 'indices'
@@ -0,0 +1,48 @@
1
+ from __future__ import absolute_import
2
+ import numpy
3
+
4
+
5
+ def float_parse_entry(line):
6
+ return [float(x) for x in line.strip().split()]
7
+
8
+
9
+ def float_unparse_entry(entry):
10
+ return " ".join(map(str, entry))
11
+
12
+
13
+ def int_parse_entry(line):
14
+ return frozenset([int(x) for x in line.strip().split()])
15
+
16
+
17
+ def int_unparse_entry(entry):
18
+ return " ".join(map(str, map(int, entry)))
19
+
20
+
21
+ def bit_parse_entry(line):
22
+ return [bool(int(x)) for x in list(line.strip()
23
+ .replace(" ", "")
24
+ .replace("\t", ""))]
25
+
26
+
27
+ def bit_unparse_entry(entry):
28
+ return " ".join(map(lambda el: "1" if el else "0", entry))
29
+
30
+
31
+ type_info = {
32
+ "float": {
33
+ "type": numpy.float,
34
+ "parse_entry": float_parse_entry,
35
+ "unparse_entry": float_unparse_entry,
36
+ "finish_entries": numpy.vstack
37
+ },
38
+ "bit": {
39
+ "type": numpy.bool_,
40
+ "parse_entry": bit_parse_entry,
41
+ "unparse_entry": bit_unparse_entry
42
+ },
43
+ "int": {
44
+ "type": numpy.object,
45
+ "parse_entry": int_parse_entry,
46
+ "unparse_entry": int_unparse_entry,
47
+ },
48
+ }