redisbench-admin 0.11.66__py3-none-any.whl → 0.11.68__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. redisbench_admin/run/args.py +1 -0
  2. redisbench_admin/run/cluster.py +1 -3
  3. redisbench_admin/run_remote/remote_db.py +3 -1
  4. redisbench_admin/run_remote/remote_helpers.py +27 -11
  5. redisbench_admin/run_remote/run_remote.py +11 -8
  6. redisbench_admin/run_remote/standalone.py +6 -2
  7. redisbench_admin/utils/benchmark_config.py +6 -2
  8. redisbench_admin/utils/local.py +4 -2
  9. redisbench_admin/utils/remote.py +81 -33
  10. {redisbench_admin-0.11.66.dist-info → redisbench_admin-0.11.68.dist-info}/METADATA +5 -2
  11. redisbench_admin-0.11.68.dist-info/RECORD +117 -0
  12. {redisbench_admin-0.11.66.dist-info → redisbench_admin-0.11.68.dist-info}/WHEEL +1 -1
  13. redisbench_admin/run/ann/pkg/.dockerignore +0 -2
  14. redisbench_admin/run/ann/pkg/.git +0 -1
  15. redisbench_admin/run/ann/pkg/.github/workflows/benchmarks.yml +0 -100
  16. redisbench_admin/run/ann/pkg/.gitignore +0 -21
  17. redisbench_admin/run/ann/pkg/LICENSE +0 -21
  18. redisbench_admin/run/ann/pkg/README.md +0 -157
  19. redisbench_admin/run/ann/pkg/algos.yaml +0 -1294
  20. redisbench_admin/run/ann/pkg/algosP.yaml +0 -67
  21. redisbench_admin/run/ann/pkg/ann_benchmarks/__init__.py +0 -2
  22. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/__init__.py +0 -0
  23. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/annoy.py +0 -26
  24. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/balltree.py +0 -22
  25. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/base.py +0 -36
  26. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/bruteforce.py +0 -110
  27. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/ckdtree.py +0 -17
  28. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/datasketch.py +0 -29
  29. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/definitions.py +0 -187
  30. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/diskann.py +0 -190
  31. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/dolphinnpy.py +0 -31
  32. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/dummy_algo.py +0 -25
  33. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/elasticsearch.py +0 -107
  34. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/elastiknn.py +0 -124
  35. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/faiss.py +0 -124
  36. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/faiss_gpu.py +0 -61
  37. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/faiss_hnsw.py +0 -39
  38. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/flann.py +0 -27
  39. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/hnswlib.py +0 -36
  40. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/kdtree.py +0 -22
  41. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/kgraph.py +0 -39
  42. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/lshf.py +0 -25
  43. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/milvus.py +0 -99
  44. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/mrpt.py +0 -41
  45. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/n2.py +0 -28
  46. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/nearpy.py +0 -48
  47. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/nmslib.py +0 -74
  48. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/onng_ngt.py +0 -100
  49. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/opensearchknn.py +0 -107
  50. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/panng_ngt.py +0 -79
  51. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/pinecone.py +0 -39
  52. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/puffinn.py +0 -45
  53. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/pynndescent.py +0 -115
  54. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/qg_ngt.py +0 -102
  55. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/redisearch.py +0 -90
  56. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/rpforest.py +0 -20
  57. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/scann.py +0 -34
  58. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/sptag.py +0 -28
  59. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/subprocess.py +0 -246
  60. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/vald.py +0 -149
  61. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/vecsim-hnsw.py +0 -43
  62. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/vespa.py +0 -47
  63. redisbench_admin/run/ann/pkg/ann_benchmarks/constants.py +0 -1
  64. redisbench_admin/run/ann/pkg/ann_benchmarks/data.py +0 -48
  65. redisbench_admin/run/ann/pkg/ann_benchmarks/datasets.py +0 -620
  66. redisbench_admin/run/ann/pkg/ann_benchmarks/distance.py +0 -53
  67. redisbench_admin/run/ann/pkg/ann_benchmarks/main.py +0 -325
  68. redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/__init__.py +0 -2
  69. redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/metrics.py +0 -183
  70. redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/plot_variants.py +0 -17
  71. redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/utils.py +0 -165
  72. redisbench_admin/run/ann/pkg/ann_benchmarks/results.py +0 -71
  73. redisbench_admin/run/ann/pkg/ann_benchmarks/runner.py +0 -333
  74. redisbench_admin/run/ann/pkg/create_dataset.py +0 -12
  75. redisbench_admin/run/ann/pkg/create_hybrid_dataset.py +0 -147
  76. redisbench_admin/run/ann/pkg/create_text_to_image_ds.py +0 -117
  77. redisbench_admin/run/ann/pkg/create_website.py +0 -272
  78. redisbench_admin/run/ann/pkg/install/Dockerfile +0 -11
  79. redisbench_admin/run/ann/pkg/install/Dockerfile.annoy +0 -5
  80. redisbench_admin/run/ann/pkg/install/Dockerfile.datasketch +0 -4
  81. redisbench_admin/run/ann/pkg/install/Dockerfile.diskann +0 -29
  82. redisbench_admin/run/ann/pkg/install/Dockerfile.diskann_pq +0 -31
  83. redisbench_admin/run/ann/pkg/install/Dockerfile.dolphinn +0 -5
  84. redisbench_admin/run/ann/pkg/install/Dockerfile.elasticsearch +0 -45
  85. redisbench_admin/run/ann/pkg/install/Dockerfile.elastiknn +0 -61
  86. redisbench_admin/run/ann/pkg/install/Dockerfile.faiss +0 -18
  87. redisbench_admin/run/ann/pkg/install/Dockerfile.flann +0 -10
  88. redisbench_admin/run/ann/pkg/install/Dockerfile.hnswlib +0 -10
  89. redisbench_admin/run/ann/pkg/install/Dockerfile.kgraph +0 -6
  90. redisbench_admin/run/ann/pkg/install/Dockerfile.mih +0 -4
  91. redisbench_admin/run/ann/pkg/install/Dockerfile.milvus +0 -27
  92. redisbench_admin/run/ann/pkg/install/Dockerfile.mrpt +0 -4
  93. redisbench_admin/run/ann/pkg/install/Dockerfile.n2 +0 -5
  94. redisbench_admin/run/ann/pkg/install/Dockerfile.nearpy +0 -5
  95. redisbench_admin/run/ann/pkg/install/Dockerfile.ngt +0 -13
  96. redisbench_admin/run/ann/pkg/install/Dockerfile.nmslib +0 -10
  97. redisbench_admin/run/ann/pkg/install/Dockerfile.opensearchknn +0 -43
  98. redisbench_admin/run/ann/pkg/install/Dockerfile.puffinn +0 -6
  99. redisbench_admin/run/ann/pkg/install/Dockerfile.pynndescent +0 -4
  100. redisbench_admin/run/ann/pkg/install/Dockerfile.redisearch +0 -18
  101. redisbench_admin/run/ann/pkg/install/Dockerfile.rpforest +0 -5
  102. redisbench_admin/run/ann/pkg/install/Dockerfile.scann +0 -5
  103. redisbench_admin/run/ann/pkg/install/Dockerfile.scipy +0 -4
  104. redisbench_admin/run/ann/pkg/install/Dockerfile.sklearn +0 -4
  105. redisbench_admin/run/ann/pkg/install/Dockerfile.sptag +0 -30
  106. redisbench_admin/run/ann/pkg/install/Dockerfile.vald +0 -8
  107. redisbench_admin/run/ann/pkg/install/Dockerfile.vespa +0 -17
  108. redisbench_admin/run/ann/pkg/install.py +0 -70
  109. redisbench_admin/run/ann/pkg/logging.conf +0 -34
  110. redisbench_admin/run/ann/pkg/multirun.py +0 -298
  111. redisbench_admin/run/ann/pkg/plot.py +0 -159
  112. redisbench_admin/run/ann/pkg/protocol/bf-runner +0 -10
  113. redisbench_admin/run/ann/pkg/protocol/bf-runner.py +0 -204
  114. redisbench_admin/run/ann/pkg/protocol/ext-add-query-metric.md +0 -51
  115. redisbench_admin/run/ann/pkg/protocol/ext-batch-queries.md +0 -77
  116. redisbench_admin/run/ann/pkg/protocol/ext-prepared-queries.md +0 -77
  117. redisbench_admin/run/ann/pkg/protocol/ext-query-parameters.md +0 -47
  118. redisbench_admin/run/ann/pkg/protocol/specification.md +0 -194
  119. redisbench_admin/run/ann/pkg/requirements.txt +0 -14
  120. redisbench_admin/run/ann/pkg/requirements_py38.txt +0 -11
  121. redisbench_admin/run/ann/pkg/results/fashion-mnist-784-euclidean.png +0 -0
  122. redisbench_admin/run/ann/pkg/results/gist-960-euclidean.png +0 -0
  123. redisbench_admin/run/ann/pkg/results/glove-100-angular.png +0 -0
  124. redisbench_admin/run/ann/pkg/results/glove-25-angular.png +0 -0
  125. redisbench_admin/run/ann/pkg/results/lastfm-64-dot.png +0 -0
  126. redisbench_admin/run/ann/pkg/results/mnist-784-euclidean.png +0 -0
  127. redisbench_admin/run/ann/pkg/results/nytimes-256-angular.png +0 -0
  128. redisbench_admin/run/ann/pkg/results/sift-128-euclidean.png +0 -0
  129. redisbench_admin/run/ann/pkg/run.py +0 -12
  130. redisbench_admin/run/ann/pkg/run_algorithm.py +0 -3
  131. redisbench_admin/run/ann/pkg/templates/chartjs.template +0 -102
  132. redisbench_admin/run/ann/pkg/templates/detail_page.html +0 -23
  133. redisbench_admin/run/ann/pkg/templates/general.html +0 -58
  134. redisbench_admin/run/ann/pkg/templates/latex.template +0 -30
  135. redisbench_admin/run/ann/pkg/templates/summary.html +0 -60
  136. redisbench_admin/run/ann/pkg/test/__init__.py +0 -0
  137. redisbench_admin/run/ann/pkg/test/test-jaccard.py +0 -19
  138. redisbench_admin/run/ann/pkg/test/test-metrics.py +0 -99
  139. redisbench_admin-0.11.66.dist-info/RECORD +0 -243
  140. {redisbench_admin-0.11.66.dist-info → redisbench_admin-0.11.68.dist-info}/entry_points.txt +0 -0
  141. {redisbench_admin-0.11.66.dist-info → redisbench_admin-0.11.68.dist-info/licenses}/LICENSE +0 -0
@@ -1,325 +0,0 @@
1
- from __future__ import absolute_import
2
- import argparse
3
- import logging
4
- import logging.config
5
-
6
- import docker
7
- import multiprocessing.pool
8
- import os
9
- import psutil
10
- import random
11
- import shutil
12
- import sys
13
- import traceback
14
- import time
15
-
16
- from ann_benchmarks.datasets import get_dataset, DATASETS
17
- from ann_benchmarks.constants import INDEX_DIR
18
- from ann_benchmarks.algorithms.definitions import (get_definitions,
19
- list_algorithms,
20
- algorithm_status,
21
- InstantiationStatus)
22
- from ann_benchmarks.results import get_result_filename
23
- from ann_benchmarks.runner import run, run_docker
24
-
25
-
26
- def positive_int(s):
27
- i = None
28
- try:
29
- i = int(s)
30
- except ValueError:
31
- pass
32
- if not i or i < 1:
33
- raise argparse.ArgumentTypeError("%r is not a positive integer" % s)
34
- return i
35
-
36
-
37
- def run_worker(cpu, args, queue):
38
- while not queue.empty():
39
- definition = queue.get()
40
- if args.local:
41
- run(definition, args.dataset, args.count, args.runs, args.batch,
42
- args.build_only, args.test_only, args.total_clients, args.client_id)
43
- else:
44
- memory_margin = 500e6 # reserve some extra memory for misc stuff
45
- mem_limit = int((psutil.virtual_memory().available - memory_margin) / args.parallelism)
46
- cpu_limit = str(cpu)
47
- if args.batch:
48
- cpu_limit = "0-%d" % (multiprocessing.cpu_count() - 1)
49
-
50
- run_docker(definition, args.dataset, args.count,
51
- args.runs, args.timeout, args.batch, cpu_limit, mem_limit)
52
-
53
-
54
- def main():
55
- parser = argparse.ArgumentParser(
56
- formatter_class=argparse.ArgumentDefaultsHelpFormatter)
57
- parser.add_argument(
58
- '--dataset',
59
- metavar='NAME',
60
- help='the dataset to load training points from',
61
- default='glove-100-angular',
62
- choices=DATASETS.keys())
63
- parser.add_argument(
64
- "-k", "--count",
65
- default=10,
66
- type=positive_int,
67
- help="the number of near neighbours to search for")
68
- parser.add_argument(
69
- '--definitions',
70
- metavar='FILE',
71
- help='load algorithm definitions from FILE',
72
- default='algos.yaml')
73
- parser.add_argument(
74
- '--algorithm',
75
- metavar='NAME',
76
- help='run only the named algorithm',
77
- default=None)
78
- parser.add_argument(
79
- '--run-group',
80
- metavar='NAME',
81
- help='run only the named run group',
82
- default=None)
83
- parser.add_argument(
84
- '--docker-tag',
85
- metavar='NAME',
86
- help='run only algorithms in a particular docker image',
87
- default=None)
88
- parser.add_argument(
89
- '--list-algorithms',
90
- help='print the names of all known algorithms and exit',
91
- action='store_true')
92
- parser.add_argument(
93
- '--force',
94
- help='re-run algorithms even if their results already exist',
95
- action='store_true')
96
- parser.add_argument(
97
- '--runs',
98
- metavar='COUNT',
99
- type=positive_int,
100
- help='run each algorithm instance %(metavar)s times and use only'
101
- ' the best result',
102
- default=5)
103
- parser.add_argument(
104
- '--timeout',
105
- type=int,
106
- help='Timeout (in seconds) for each individual algorithm run, or -1'
107
- 'if no timeout should be set',
108
- default=2 * 3600)
109
- parser.add_argument(
110
- '--local',
111
- action='store_true',
112
- help='If set, then will run everything locally (inside the same '
113
- 'process) rather than using Docker')
114
- parser.add_argument(
115
- '--batch',
116
- action='store_true',
117
- help='If set, algorithms get all queries at once')
118
- parser.add_argument(
119
- '--max-n-algorithms',
120
- type=int,
121
- help='Max number of algorithms to run (just used for testing)',
122
- default=-1)
123
- parser.add_argument(
124
- '--run-disabled',
125
- help='run algorithms that are disabled in algos.yml',
126
- action='store_true')
127
- parser.add_argument(
128
- '--parallelism',
129
- type=positive_int,
130
- help='Number of Docker containers in parallel',
131
- default=1)
132
- parser.add_argument(
133
- '--build-only',
134
- action='store_true',
135
- help='building index only, not testing with queries')
136
- parser.add_argument(
137
- '--test-only',
138
- action='store_true',
139
- help='querying index only, not building it (should be built first)')
140
- parser.add_argument(
141
- '--cluster',
142
- action='store_true',
143
- help='working with a cluster')
144
- parser.add_argument(
145
- '--host',
146
- metavar='NAME',
147
- help='host name or IP',
148
- default=None)
149
- parser.add_argument(
150
- '--port',
151
- type=positive_int,
152
- help='the port "host" is listening on',
153
- default=None)
154
- parser.add_argument(
155
- '--auth', '-a',
156
- metavar='PASSWORD',
157
- help='password for connection',
158
- default=None)
159
- parser.add_argument(
160
- '--user',
161
- metavar='NAME',
162
- help='user name for connection',
163
- default=None)
164
- parser.add_argument(
165
- '--total-clients',
166
- metavar='NUM',
167
- type=positive_int,
168
- help='total number of clients running in parallel',
169
- default=1)
170
- parser.add_argument(
171
- '--client-id',
172
- metavar='NUM',
173
- type=positive_int,
174
- help='specific client id (among the total clients)',
175
- default=1)
176
- parser.add_argument(
177
- '--shards',
178
- type=str,
179
- metavar='NUM',
180
- default="1",
181
- help='specify number of shards')
182
-
183
- args = parser.parse_args()
184
- if args.timeout == -1:
185
- args.timeout = None
186
-
187
- if args.list_algorithms:
188
- list_algorithms(args.definitions)
189
- sys.exit(0)
190
-
191
- if args.build_only and args.test_only:
192
- raise Exception('Nothing to run (build only and test only was specified)')
193
- if (args.build_only or args.test_only) and not args.local:
194
- raise Exception('Can\'t run build or test only on docker')
195
-
196
- conn_params = {'host': args.host, 'port': args.port, 'auth': args.auth, 'user': args.user, 'cluster': args.cluster, 'shards': args.shards}
197
-
198
- if args.total_clients < args.client_id:
199
- raise Exception('must satisfy 1 <= client_id <= total_clients')
200
-
201
- logging.config.fileConfig("logging.conf")
202
- logger = logging.getLogger("annb")
203
-
204
- # Nmslib specific code
205
- # Remove old indices stored on disk
206
- if os.path.exists(INDEX_DIR):
207
- shutil.rmtree(INDEX_DIR)
208
-
209
- dataset, dimension = get_dataset(args.dataset)
210
- point_type = dataset.attrs.get('point_type', 'float')
211
- distance = dataset.attrs['distance']
212
- definitions = get_definitions(
213
- args.definitions, dimension, point_type, distance, args.count, conn_params)
214
-
215
- # Filter out, from the loaded definitions, all those query argument groups
216
- # that correspond to experiments that have already been run. (This might
217
- # mean removing a definition altogether, so we can't just use a list
218
- # comprehension.)
219
- filtered_definitions = []
220
- for definition in definitions:
221
- query_argument_groups = definition.query_argument_groups
222
- if not query_argument_groups:
223
- query_argument_groups = [[]]
224
- not_yet_run = []
225
- for query_arguments in query_argument_groups:
226
- fn = get_result_filename(args.dataset,
227
- args.count, definition,
228
- query_arguments, args.batch)
229
- if args.force or not os.path.exists(fn):
230
- not_yet_run.append(query_arguments)
231
- if not_yet_run:
232
- if definition.query_argument_groups:
233
- definition = definition._replace(
234
- query_argument_groups=not_yet_run)
235
- filtered_definitions.append(definition)
236
- definitions = filtered_definitions
237
-
238
- random.shuffle(definitions)
239
-
240
- if args.algorithm:
241
- logger.info(f'running only {args.algorithm} algorithms')
242
- definitions = [d for d in definitions if d.algorithm == args.algorithm]
243
-
244
- if args.run_group:
245
- logger.info(f'running only {args.run_group} run groups')
246
- definitions = [d for d in definitions if d.run_group == args.run_group]
247
-
248
- if not args.local:
249
- # See which Docker images we have available
250
- docker_client = docker.from_env()
251
- docker_tags = set()
252
- for image in docker_client.images.list():
253
- for tag in image.tags:
254
- tag = tag.split(':')[0]
255
- docker_tags.add(tag)
256
-
257
- if args.docker_tag:
258
- logger.info(f'running only {args.docker_tag}')
259
- definitions = [
260
- d for d in definitions if d.docker_tag == args.docker_tag]
261
-
262
- if set(d.docker_tag for d in definitions).difference(docker_tags):
263
- logger.info(f'not all docker images available, only: {set(docker_tags)}')
264
- logger.info(f'missing docker images: '
265
- f'{str(set(d.docker_tag for d in definitions).difference(docker_tags))}')
266
- definitions = [
267
- d for d in definitions if d.docker_tag in docker_tags]
268
- else:
269
- def _test(df):
270
- status = algorithm_status(df)
271
- # If the module was loaded but doesn't actually have a constructor
272
- # of the right name, then the definition is broken
273
- if status == InstantiationStatus.NO_CONSTRUCTOR:
274
- raise Exception("%s.%s(%s): error: the module '%s' does not"
275
- " expose the named constructor" % (
276
- df.module, df.constructor,
277
- df.arguments, df.module))
278
-
279
- if status == InstantiationStatus.NO_MODULE:
280
- # If the module couldn't be loaded (presumably because
281
- # of a missing dependency), print a warning and remove
282
- # this definition from the list of things to be run
283
- logging.warning("%s.%s(%s): the module '%s' could not be "
284
- "loaded; skipping" % (df.module, df.constructor,
285
- df.arguments, df.module))
286
- return False
287
- else:
288
- return True
289
- definitions = [d for d in definitions if _test(d)]
290
-
291
- if not args.run_disabled:
292
- if len([d for d in definitions if d.disabled]):
293
- logger.info(f'Not running disabled algorithms {[d for d in definitions if d.disabled]}')
294
- definitions = [d for d in definitions if not d.disabled]
295
-
296
- if args.max_n_algorithms >= 0:
297
- definitions = definitions[:args.max_n_algorithms]
298
-
299
- if len(definitions) == 0:
300
- raise Exception('Nothing to run')
301
- else:
302
- logger.info(f'Order: {definitions}')
303
-
304
- if args.parallelism > multiprocessing.cpu_count() - 1:
305
- raise Exception('Parallelism larger than %d! (CPU count minus one)' % (multiprocessing.cpu_count() - 1))
306
-
307
- # Multiprocessing magic to farm this out to all CPUs
308
- queue = multiprocessing.Queue()
309
- for definition in definitions:
310
- queue.put(definition)
311
-
312
- if args.parallelism == 1:
313
- # Wait for some jobs to be inserted into the queue
314
- while queue.empty(): time.sleep(0.01)
315
- # If we're only running one worker, then we can just run it in the same process
316
- run_worker(1, args, queue)
317
- else:
318
- if args.batch:
319
- raise Exception(f"Batch mode uses all available CPU resources, --parallelism should be set to 1. (Was: {args.parallelism})")
320
- workers = [multiprocessing.Process(target=run_worker, args=(i+1, args, queue))
321
- for i in range(args.parallelism)]
322
- [worker.start() for worker in workers]
323
- [worker.join() for worker in workers]
324
-
325
- # TODO: need to figure out cleanup handling here
@@ -1,2 +0,0 @@
1
- from __future__ import absolute_import
2
- from ann_benchmarks.plotting import *
@@ -1,183 +0,0 @@
1
- from __future__ import absolute_import
2
- import numpy as np
3
-
4
-
5
- def knn_threshold(data, count, epsilon):
6
- return data[count - 1] + epsilon
7
-
8
-
9
- def epsilon_threshold(data, count, epsilon):
10
- return data[count - 1] * (1 + epsilon)
11
-
12
-
13
- def get_recall_values(dataset_distances, run_distances, count, threshold,
14
- epsilon=1e-3):
15
- recalls = np.zeros(len(run_distances))
16
- for i in range(len(run_distances)):
17
- t = threshold(dataset_distances[i], count, epsilon)
18
- actual = 0
19
- for d in run_distances[i][:count]:
20
- if d <= t:
21
- actual += 1
22
- recalls[i] = actual
23
- return (np.mean(recalls) / float(count),
24
- np.std(recalls) / float(count),
25
- recalls)
26
-
27
-
28
- def knn(dataset_distances, run_distances, count, metrics, epsilon=1e-3):
29
- if 'knn' not in metrics:
30
- print('Computing knn metrics')
31
- knn_metrics = metrics.create_group('knn')
32
- mean, std, recalls = get_recall_values(dataset_distances,
33
- run_distances, count,
34
- knn_threshold, epsilon)
35
- knn_metrics.attrs['mean'] = mean
36
- knn_metrics.attrs['std'] = std
37
- knn_metrics['recalls'] = recalls
38
- else:
39
- print("Found cached result")
40
- return metrics['knn']
41
-
42
-
43
- def epsilon(dataset_distances, run_distances, count, metrics, epsilon=0.01):
44
- s = 'eps' + str(epsilon)
45
- if s not in metrics:
46
- print('Computing epsilon metrics')
47
- epsilon_metrics = metrics.create_group(s)
48
- mean, std, recalls = get_recall_values(dataset_distances,
49
- run_distances, count,
50
- epsilon_threshold, epsilon)
51
- epsilon_metrics.attrs['mean'] = mean
52
- epsilon_metrics.attrs['std'] = std
53
- epsilon_metrics['recalls'] = recalls
54
- else:
55
- print("Found cached result")
56
- return metrics[s]
57
-
58
-
59
- def rel(dataset_distances, run_distances, metrics):
60
- if 'rel' not in metrics.attrs:
61
- print('Computing rel metrics')
62
- total_closest_distance = 0.0
63
- total_candidate_distance = 0.0
64
- for true_distances, found_distances in zip(dataset_distances,
65
- run_distances):
66
- for rdist, cdist in zip(true_distances, found_distances):
67
- total_closest_distance += rdist
68
- total_candidate_distance += cdist
69
- if total_closest_distance < 0.01:
70
- metrics.attrs['rel'] = float("inf")
71
- else:
72
- metrics.attrs['rel'] = total_candidate_distance / \
73
- total_closest_distance
74
- else:
75
- print("Found cached result")
76
- return metrics.attrs['rel']
77
-
78
-
79
- def queries_per_second(queries, attrs):
80
- return 1.0 / attrs["best_search_time"]
81
-
82
- def percentile_50(times):
83
- return np.percentile(times, 50.0) * 1000.0
84
-
85
- def percentile_95(times):
86
- return np.percentile(times, 95.0) * 1000.0
87
-
88
- def percentile_99(times):
89
- return np.percentile(times, 99.0) * 1000.0
90
-
91
- def percentile_999(times):
92
- return np.percentile(times, 99.9) * 1000.0
93
-
94
- def index_size(queries, attrs):
95
- # TODO(erikbern): should replace this with peak memory usage or something
96
- return attrs.get("index_size", 0)
97
-
98
-
99
- def build_time(queries, attrs):
100
- return attrs["build_time"]
101
-
102
-
103
- def candidates(queries, attrs):
104
- return attrs["candidates"]
105
-
106
-
107
- def dist_computations(queries, attrs):
108
- return attrs.get("dist_comps", 0) / (attrs['run_count'] * len(queries))
109
-
110
-
111
- all_metrics = {
112
- "k-nn": {
113
- "description": "Recall",
114
- "function": lambda true_distances, run_distances, metrics, times, run_attrs: knn(true_distances, run_distances, run_attrs["count"], metrics).attrs['mean'], # noqa
115
- "worst": float("-inf"),
116
- "lim": [0.0, 1.03]
117
- },
118
- "epsilon": {
119
- "description": "Epsilon 0.01 Recall",
120
- "function": lambda true_distances, run_distances, metrics, times, run_attrs: epsilon(true_distances, run_distances, run_attrs["count"], metrics).attrs['mean'], # noqa
121
- "worst": float("-inf")
122
- },
123
- "largeepsilon": {
124
- "description": "Epsilon 0.1 Recall",
125
- "function": lambda true_distances, run_distances, metrics, times, run_attrs: epsilon(true_distances, run_distances, run_attrs["count"], metrics, 0.1).attrs['mean'], # noqa
126
- "worst": float("-inf")
127
- },
128
- "rel": {
129
- "description": "Relative Error",
130
- "function": lambda true_distances, run_distances, metrics, times, run_attrs: rel(true_distances, run_distances, metrics), # noqa
131
- "worst": float("inf")
132
- },
133
- "qps": {
134
- "description": "Queries per second (1/s)",
135
- "function": lambda true_distances, run_distances, metrics, times, run_attrs: queries_per_second(true_distances, run_attrs), # noqa
136
- "worst": float("-inf")
137
- },
138
- "p50": {
139
- "description": "Percentile 50 (millis)",
140
- "function": lambda true_distances, run_distances, metrics, times, run_attrs: percentile_50(times), # noqa
141
- "worst": float("inf")
142
- },
143
- "p95": {
144
- "description": "Percentile 95 (millis)",
145
- "function": lambda true_distances, run_distances, metrics, times, run_attrs: percentile_95(times), # noqa
146
- "worst": float("inf")
147
- },
148
- "p99": {
149
- "description": "Percentile 99 (millis)",
150
- "function": lambda true_distances, run_distances, metrics, times, run_attrs: percentile_99(times), # noqa
151
- "worst": float("inf")
152
- },
153
- "p999": {
154
- "description": "Percentile 99.9 (millis)",
155
- "function": lambda true_distances, run_distances, metrics, times, run_attrs: percentile_999(times), # noqa
156
- "worst": float("inf")
157
- },
158
- "distcomps": {
159
- "description": "Distance computations",
160
- "function": lambda true_distances, run_distances, metrics, times, run_attrs: dist_computations(true_distances, run_attrs), # noqa
161
- "worst": float("inf")
162
- },
163
- "build": {
164
- "description": "Build time (s)",
165
- "function": lambda true_distances, run_distances, metrics, times, run_attrs: build_time(true_distances, run_attrs), # noqa
166
- "worst": float("inf")
167
- },
168
- "candidates": {
169
- "description": "Candidates generated",
170
- "function": lambda true_distances, run_distances, metrics, times, run_attrs: candidates(true_distances, run_attrs), # noqa
171
- "worst": float("inf")
172
- },
173
- "indexsize": {
174
- "description": "Index size (kB)",
175
- "function": lambda true_distances, run_distances, metrics, times, run_attrs: index_size(true_distances, run_attrs), # noqa
176
- "worst": float("inf")
177
- },
178
- "queriessize": {
179
- "description": "Index size (kB)/Queries per second (s)",
180
- "function": lambda true_distances, run_distances, metrics, times, run_attrs: index_size(true_distances, run_attrs) / queries_per_second(true_distances, run_attrs), # noqa
181
- "worst": float("inf")
182
- }
183
- }
@@ -1,17 +0,0 @@
1
- from ann_benchmarks.plotting.metrics import all_metrics as metrics
2
-
3
- all_plot_variants = {
4
- "recall/time": ("k-nn", "qps"),
5
- "recall/buildtime": ("k-nn", "build"),
6
- "recall/indexsize": ("k-nn", "indexsize"),
7
- "recall/distcomps": ("k-nn", "distcomps"),
8
- "rel/time": ("rel", "qps"),
9
- "recall/candidates": ("k-nn", "candidates"),
10
- "recall/qpssize": ("k-nn", "queriessize"),
11
- "eps/time": ("epsilon", "qps"),
12
- "largeeps/time": ("largeepsilon", "qps"),
13
- "recall/p50": ("k-nn", "p50"),
14
- "recall/p95": ("k-nn", "p95"),
15
- "recall/p99": ("k-nn", "p99"),
16
- "recall/p999": ("k-nn", "p999"),
17
- }
@@ -1,165 +0,0 @@
1
- from __future__ import absolute_import
2
-
3
- import itertools
4
- import numpy
5
- from ann_benchmarks.plotting.metrics import all_metrics as metrics
6
-
7
-
8
- def get_or_create_metrics(run):
9
- if 'metrics' not in run:
10
- run.create_group('metrics')
11
- return run['metrics']
12
-
13
-
14
- def create_pointset(data, xn, yn):
15
- xm, ym = (metrics[xn], metrics[yn])
16
- rev_y = -1 if ym["worst"] < 0 else 1
17
- rev_x = -1 if xm["worst"] < 0 else 1
18
- data.sort(key=lambda t: (rev_y * t[-1], rev_x * t[-2]))
19
-
20
- axs, ays, als = [], [], []
21
- # Generate Pareto frontier
22
- xs, ys, ls = [], [], []
23
- last_x = xm["worst"]
24
- comparator = ((lambda xv, lx: xv > lx)
25
- if last_x < 0 else (lambda xv, lx: xv < lx))
26
- for algo, algo_name, xv, yv in data:
27
- if not xv or not yv:
28
- continue
29
- axs.append(xv)
30
- ays.append(yv)
31
- als.append(algo_name)
32
- if comparator(xv, last_x):
33
- last_x = xv
34
- xs.append(xv)
35
- ys.append(yv)
36
- ls.append(algo_name)
37
- return xs, ys, ls, axs, ays, als
38
-
39
-
40
- def compute_metrics(true_nn_distances, res, metric_1, metric_2,
41
- recompute=False):
42
- all_results = {}
43
- for i, (properties, run) in enumerate(res):
44
- algo = properties['algo']
45
- algo_name = properties['name']
46
- # cache distances to avoid access to hdf5 file
47
- run_distances = numpy.array(run['distances'])
48
- # cache times to avoid access to hdf5 file
49
- times = numpy.array(run['times'])
50
- if recompute and 'metrics' in run:
51
- del run['metrics']
52
- metrics_cache = get_or_create_metrics(run)
53
-
54
- metric_1_value = metrics[metric_1]['function'](
55
- true_nn_distances,
56
- run_distances, metrics_cache, times, properties)
57
- metric_2_value = metrics[metric_2]['function'](
58
- true_nn_distances,
59
- run_distances, metrics_cache, times, properties)
60
-
61
- print('%3d: %80s %12.3f %12.3f' %
62
- (i, algo_name, metric_1_value, metric_2_value))
63
-
64
- all_results.setdefault(algo, []).append(
65
- (algo, algo_name, metric_1_value, metric_2_value))
66
-
67
- return all_results
68
-
69
-
70
- def compute_all_metrics(true_nn_distances, run, properties, recompute=False):
71
- algo = properties["algo"]
72
- algo_name = properties["name"]
73
- print('--')
74
- print(algo_name)
75
- results = {}
76
- # cache distances to avoid access to hdf5 file
77
- run_distances = numpy.array(run["distances"])
78
- # cache times to avoid access to hdf5 file
79
- times = numpy.array(run['times'])
80
- if recompute and 'metrics' in run:
81
- del run['metrics']
82
- metrics_cache = get_or_create_metrics(run)
83
-
84
- for name, metric in metrics.items():
85
- v = metric["function"](
86
- true_nn_distances, run_distances, metrics_cache, times, properties)
87
- results[name] = v
88
- if v:
89
- print('%s: %g' % (name, v))
90
- return (algo, algo_name, results)
91
-
92
- def compute_metrics_all_runs(dataset, res, recompute=False):
93
- true_nn_distances=list(dataset['distances'])
94
- for i, (properties, run) in enumerate(res):
95
- algo = properties['algo']
96
- algo_name = properties['name']
97
- # cache distances to avoid access to hdf5 file
98
- # print('Load distances and times')
99
- run_distances = numpy.array(run['distances'])
100
- times = numpy.array(run['times'])
101
- # print('... done')
102
- if recompute and 'metrics' in run:
103
- print('Recomputing metrics, clearing cache')
104
- del run['metrics']
105
- metrics_cache = get_or_create_metrics(run)
106
-
107
- dataset = properties['dataset']
108
-
109
- run_result = {
110
- 'algorithm': algo,
111
- 'parameters': algo_name,
112
- 'count': properties['count']
113
- }
114
- for name, metric in metrics.items():
115
- v = metric["function"](true_nn_distances, run_distances, metrics_cache, times, properties)
116
- run_result[name] = v
117
- yield run_result
118
-
119
-
120
- def generate_n_colors(n):
121
- vs = numpy.linspace(0.3, 0.9, 7)
122
- colors = [(.9, .4, .4, 1.)]
123
-
124
- def euclidean(a, b):
125
- return sum((x - y)**2 for x, y in zip(a, b))
126
- while len(colors) < n:
127
- new_color = max(itertools.product(vs, vs, vs),
128
- key=lambda a: min(euclidean(a, b) for b in colors))
129
- colors.append(new_color + (1.,))
130
- return colors
131
-
132
-
133
- def create_linestyles(unique_algorithms):
134
- colors = dict(
135
- zip(unique_algorithms, generate_n_colors(len(unique_algorithms))))
136
- linestyles = dict((algo, ['--', '-.', '-', ':'][i % 4])
137
- for i, algo in enumerate(unique_algorithms))
138
- markerstyles = dict((algo, ['+', '<', 'o', '*', 'x'][i % 5])
139
- for i, algo in enumerate(unique_algorithms))
140
- faded = dict((algo, (r, g, b, 0.3))
141
- for algo, (r, g, b, a) in colors.items())
142
- return dict((algo, (colors[algo], faded[algo],
143
- linestyles[algo], markerstyles[algo]))
144
- for algo in unique_algorithms)
145
-
146
-
147
- def get_up_down(metric):
148
- if metric["worst"] == float("inf"):
149
- return "down"
150
- return "up"
151
-
152
-
153
- def get_left_right(metric):
154
- if metric["worst"] == float("inf"):
155
- return "left"
156
- return "right"
157
-
158
-
159
- def get_plot_label(xm, ym):
160
- template = ("%(xlabel)s-%(ylabel)s tradeoff - %(updown)s and"
161
- " to the %(leftright)s is better")
162
- return template % {"xlabel": xm["description"],
163
- "ylabel": ym["description"],
164
- "updown": get_up_down(ym),
165
- "leftright": get_left_right(xm)}