redisbench-admin 0.11.55__py3-none-any.whl → 0.11.57__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. redisbench_admin/environments/oss_cluster.py +9 -1
  2. redisbench_admin/run/asm.py +1 -1
  3. redisbench_admin/run_remote/remote_helpers.py +41 -11
  4. redisbench_admin/run_remote/standalone.py +2 -3
  5. redisbench_admin/utils/remote.py +2 -0
  6. {redisbench_admin-0.11.55.dist-info → redisbench_admin-0.11.57.dist-info}/METADATA +7 -4
  7. redisbench_admin-0.11.57.dist-info/RECORD +117 -0
  8. {redisbench_admin-0.11.55.dist-info → redisbench_admin-0.11.57.dist-info}/WHEEL +1 -1
  9. redisbench_admin/run/ann/pkg/.dockerignore +0 -2
  10. redisbench_admin/run/ann/pkg/.git +0 -1
  11. redisbench_admin/run/ann/pkg/.github/workflows/benchmarks.yml +0 -100
  12. redisbench_admin/run/ann/pkg/.gitignore +0 -21
  13. redisbench_admin/run/ann/pkg/LICENSE +0 -21
  14. redisbench_admin/run/ann/pkg/README.md +0 -157
  15. redisbench_admin/run/ann/pkg/algos.yaml +0 -1294
  16. redisbench_admin/run/ann/pkg/algosP.yaml +0 -67
  17. redisbench_admin/run/ann/pkg/ann_benchmarks/__init__.py +0 -2
  18. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/__init__.py +0 -0
  19. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/annoy.py +0 -26
  20. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/balltree.py +0 -22
  21. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/base.py +0 -36
  22. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/bruteforce.py +0 -110
  23. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/ckdtree.py +0 -17
  24. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/datasketch.py +0 -29
  25. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/definitions.py +0 -187
  26. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/diskann.py +0 -190
  27. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/dolphinnpy.py +0 -31
  28. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/dummy_algo.py +0 -25
  29. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/elasticsearch.py +0 -107
  30. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/elastiknn.py +0 -124
  31. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/faiss.py +0 -124
  32. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/faiss_gpu.py +0 -61
  33. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/faiss_hnsw.py +0 -39
  34. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/flann.py +0 -27
  35. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/hnswlib.py +0 -36
  36. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/kdtree.py +0 -22
  37. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/kgraph.py +0 -39
  38. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/lshf.py +0 -25
  39. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/milvus.py +0 -99
  40. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/mrpt.py +0 -41
  41. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/n2.py +0 -28
  42. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/nearpy.py +0 -48
  43. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/nmslib.py +0 -74
  44. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/onng_ngt.py +0 -100
  45. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/opensearchknn.py +0 -107
  46. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/panng_ngt.py +0 -79
  47. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/pinecone.py +0 -39
  48. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/puffinn.py +0 -45
  49. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/pynndescent.py +0 -115
  50. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/qg_ngt.py +0 -102
  51. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/redisearch.py +0 -90
  52. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/rpforest.py +0 -20
  53. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/scann.py +0 -34
  54. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/sptag.py +0 -28
  55. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/subprocess.py +0 -246
  56. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/vald.py +0 -149
  57. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/vecsim-hnsw.py +0 -43
  58. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/vespa.py +0 -47
  59. redisbench_admin/run/ann/pkg/ann_benchmarks/constants.py +0 -1
  60. redisbench_admin/run/ann/pkg/ann_benchmarks/data.py +0 -48
  61. redisbench_admin/run/ann/pkg/ann_benchmarks/datasets.py +0 -620
  62. redisbench_admin/run/ann/pkg/ann_benchmarks/distance.py +0 -53
  63. redisbench_admin/run/ann/pkg/ann_benchmarks/main.py +0 -325
  64. redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/__init__.py +0 -2
  65. redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/metrics.py +0 -183
  66. redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/plot_variants.py +0 -17
  67. redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/utils.py +0 -165
  68. redisbench_admin/run/ann/pkg/ann_benchmarks/results.py +0 -71
  69. redisbench_admin/run/ann/pkg/ann_benchmarks/runner.py +0 -333
  70. redisbench_admin/run/ann/pkg/create_dataset.py +0 -12
  71. redisbench_admin/run/ann/pkg/create_hybrid_dataset.py +0 -147
  72. redisbench_admin/run/ann/pkg/create_text_to_image_ds.py +0 -117
  73. redisbench_admin/run/ann/pkg/create_website.py +0 -272
  74. redisbench_admin/run/ann/pkg/install/Dockerfile +0 -11
  75. redisbench_admin/run/ann/pkg/install/Dockerfile.annoy +0 -5
  76. redisbench_admin/run/ann/pkg/install/Dockerfile.datasketch +0 -4
  77. redisbench_admin/run/ann/pkg/install/Dockerfile.diskann +0 -29
  78. redisbench_admin/run/ann/pkg/install/Dockerfile.diskann_pq +0 -31
  79. redisbench_admin/run/ann/pkg/install/Dockerfile.dolphinn +0 -5
  80. redisbench_admin/run/ann/pkg/install/Dockerfile.elasticsearch +0 -45
  81. redisbench_admin/run/ann/pkg/install/Dockerfile.elastiknn +0 -61
  82. redisbench_admin/run/ann/pkg/install/Dockerfile.faiss +0 -18
  83. redisbench_admin/run/ann/pkg/install/Dockerfile.flann +0 -10
  84. redisbench_admin/run/ann/pkg/install/Dockerfile.hnswlib +0 -10
  85. redisbench_admin/run/ann/pkg/install/Dockerfile.kgraph +0 -6
  86. redisbench_admin/run/ann/pkg/install/Dockerfile.mih +0 -4
  87. redisbench_admin/run/ann/pkg/install/Dockerfile.milvus +0 -27
  88. redisbench_admin/run/ann/pkg/install/Dockerfile.mrpt +0 -4
  89. redisbench_admin/run/ann/pkg/install/Dockerfile.n2 +0 -5
  90. redisbench_admin/run/ann/pkg/install/Dockerfile.nearpy +0 -5
  91. redisbench_admin/run/ann/pkg/install/Dockerfile.ngt +0 -13
  92. redisbench_admin/run/ann/pkg/install/Dockerfile.nmslib +0 -10
  93. redisbench_admin/run/ann/pkg/install/Dockerfile.opensearchknn +0 -43
  94. redisbench_admin/run/ann/pkg/install/Dockerfile.puffinn +0 -6
  95. redisbench_admin/run/ann/pkg/install/Dockerfile.pynndescent +0 -4
  96. redisbench_admin/run/ann/pkg/install/Dockerfile.redisearch +0 -18
  97. redisbench_admin/run/ann/pkg/install/Dockerfile.rpforest +0 -5
  98. redisbench_admin/run/ann/pkg/install/Dockerfile.scann +0 -5
  99. redisbench_admin/run/ann/pkg/install/Dockerfile.scipy +0 -4
  100. redisbench_admin/run/ann/pkg/install/Dockerfile.sklearn +0 -4
  101. redisbench_admin/run/ann/pkg/install/Dockerfile.sptag +0 -30
  102. redisbench_admin/run/ann/pkg/install/Dockerfile.vald +0 -8
  103. redisbench_admin/run/ann/pkg/install/Dockerfile.vespa +0 -17
  104. redisbench_admin/run/ann/pkg/install.py +0 -70
  105. redisbench_admin/run/ann/pkg/logging.conf +0 -34
  106. redisbench_admin/run/ann/pkg/multirun.py +0 -298
  107. redisbench_admin/run/ann/pkg/plot.py +0 -159
  108. redisbench_admin/run/ann/pkg/protocol/bf-runner +0 -10
  109. redisbench_admin/run/ann/pkg/protocol/bf-runner.py +0 -204
  110. redisbench_admin/run/ann/pkg/protocol/ext-add-query-metric.md +0 -51
  111. redisbench_admin/run/ann/pkg/protocol/ext-batch-queries.md +0 -77
  112. redisbench_admin/run/ann/pkg/protocol/ext-prepared-queries.md +0 -77
  113. redisbench_admin/run/ann/pkg/protocol/ext-query-parameters.md +0 -47
  114. redisbench_admin/run/ann/pkg/protocol/specification.md +0 -194
  115. redisbench_admin/run/ann/pkg/requirements.txt +0 -14
  116. redisbench_admin/run/ann/pkg/requirements_py38.txt +0 -11
  117. redisbench_admin/run/ann/pkg/results/fashion-mnist-784-euclidean.png +0 -0
  118. redisbench_admin/run/ann/pkg/results/gist-960-euclidean.png +0 -0
  119. redisbench_admin/run/ann/pkg/results/glove-100-angular.png +0 -0
  120. redisbench_admin/run/ann/pkg/results/glove-25-angular.png +0 -0
  121. redisbench_admin/run/ann/pkg/results/lastfm-64-dot.png +0 -0
  122. redisbench_admin/run/ann/pkg/results/mnist-784-euclidean.png +0 -0
  123. redisbench_admin/run/ann/pkg/results/nytimes-256-angular.png +0 -0
  124. redisbench_admin/run/ann/pkg/results/sift-128-euclidean.png +0 -0
  125. redisbench_admin/run/ann/pkg/run.py +0 -12
  126. redisbench_admin/run/ann/pkg/run_algorithm.py +0 -3
  127. redisbench_admin/run/ann/pkg/templates/chartjs.template +0 -102
  128. redisbench_admin/run/ann/pkg/templates/detail_page.html +0 -23
  129. redisbench_admin/run/ann/pkg/templates/general.html +0 -58
  130. redisbench_admin/run/ann/pkg/templates/latex.template +0 -30
  131. redisbench_admin/run/ann/pkg/templates/summary.html +0 -60
  132. redisbench_admin/run/ann/pkg/test/__init__.py +0 -0
  133. redisbench_admin/run/ann/pkg/test/test-jaccard.py +0 -19
  134. redisbench_admin/run/ann/pkg/test/test-metrics.py +0 -99
  135. redisbench_admin-0.11.55.dist-info/RECORD +0 -243
  136. {redisbench_admin-0.11.55.dist-info → redisbench_admin-0.11.57.dist-info}/entry_points.txt +0 -0
  137. {redisbench_admin-0.11.55.dist-info → redisbench_admin-0.11.57.dist-info/licenses}/LICENSE +0 -0
@@ -1,18 +0,0 @@
1
- FROM redislabs/redisearch:feature-vecsim as redis
2
- FROM ann-benchmarks
3
- # COPY --from=redis /usr/local/ /usr/local/
4
-
5
- run git clone https://github.com/redis/redis.git
6
- run cd redis; \
7
- git checkout 6.2.5; \
8
- make install
9
-
10
-
11
- COPY --from=redis /usr/lib/redis /usr/lib/redis
12
-
13
-
14
- RUN pip3 install redisearch redis
15
-
16
- # CMD ["redis-server", "--loadmodule", "/usr/lib/redis/modules/redisearch.so"]
17
- RUN echo 'redis-server --daemonize yes --loadmodule /usr/lib/redis/modules/redisearch.so && python3 -u run_algorithm.py "$@"' > entrypoint.sh
18
- ENTRYPOINT ["/bin/bash", "/home/app/entrypoint.sh"]
@@ -1,5 +0,0 @@
1
- FROM ann-benchmarks
2
-
3
- RUN git clone https://github.com/lyst/rpforest
4
- RUN cd rpforest && python3 setup.py install
5
- RUN python3 -c 'import rpforest'
@@ -1,5 +0,0 @@
1
- FROM ann-benchmarks
2
-
3
- RUN pip3 install --upgrade pip
4
- RUN pip3 install scann
5
- RUN python3 -c 'import scann'
@@ -1,4 +0,0 @@
1
- FROM ann-benchmarks
2
-
3
- RUN pip3 install scipy
4
- RUN python3 -c 'import scipy'
@@ -1,4 +0,0 @@
1
- FROM ann-benchmarks
2
-
3
- RUN pip3 install scikit-learn
4
- RUN python3 -c 'import sklearn'
@@ -1,30 +0,0 @@
1
- # Adopted from https://github.com/microsoft/SPTAG/blob/master/Dockerfile
2
-
3
- FROM ann-benchmarks
4
-
5
- RUN git clone https://github.com/microsoft/SPTAG
6
- RUN apt-get update && apt-get -y install wget build-essential libtbb-dev software-properties-common swig
7
-
8
- # cmake >= 3.12 is required
9
- RUN wget "https://github.com/Kitware/CMake/releases/download/v3.14.4/cmake-3.14.4-Linux-x86_64.tar.gz" -q -O - \
10
- | tar -xz --strip-components=1 -C /usr/local
11
-
12
- # specific version of boost
13
- RUN wget "https://boostorg.jfrog.io/artifactory/main/release/1.67.0/source/boost_1_67_0.tar.gz" -q -O - \
14
- | tar -xz && \
15
- cd boost_1_67_0 && \
16
- ./bootstrap.sh && \
17
- ./b2 install && \
18
- # update ld cache so it finds boost in /usr/local/lib
19
- ldconfig && \
20
- cd .. && rm -rf boost_1_67_0
21
-
22
- # SPTAG defaults to Python 2 if it's found on the system, so as a hack, we remove it. See https://github.com/microsoft/SPTAG/blob/master/Wrappers/CMakeLists.txt
23
- RUN apt-get -y remove libpython2.7
24
-
25
- # Compile
26
- RUN cd SPTAG && mkdir build && cd build && cmake .. && make && cd ..
27
-
28
- # so python can find the SPTAG module
29
- ENV PYTHONPATH=/home/app/SPTAG/Release
30
- RUN python3 -c 'import SPTAG'
@@ -1,8 +0,0 @@
1
- ARG VALD_VERSION=v1.3.1
2
- FROM vdaas/vald-agent-ngt:${VALD_VERSION} as vald
3
-
4
- FROM ann-benchmarks
5
- ARG VALD_CLIENT_VERSION=1.3.1
6
- COPY --from=vald /go/bin/ngt /go/bin/ngt
7
-
8
- RUN pip3 install vald-client-python==${VALD_CLIENT_VERSION}
@@ -1,17 +0,0 @@
1
- FROM centos:7
2
-
3
- RUN yum -y install epel-release && \
4
- yum -y install centos-release-scl && \
5
- yum -y --setopt=skip_missing_names_on_install=False install gcc make git python3-devel && \
6
- python3 -m pip install --upgrade pip setuptools wheel && \
7
- yum-config-manager --add-repo https://copr.fedorainfracloud.org/coprs/g/vespa/vespa/repo/epel-7/group_vespa-vespa-epel-7.repo && \
8
- yum -y --setopt=skip_missing_names_on_install=False --enablerepo=epel-testing install vespa-ann-benchmark
9
-
10
- WORKDIR /home/app
11
-
12
- COPY requirements.txt run_algorithm.py ./
13
-
14
- RUN python3 -m pip install -r requirements.txt && \
15
- python3 -m pip install /opt/vespa/libexec/vespa_ann_benchmark
16
-
17
- ENTRYPOINT ["python3", "-u", "run_algorithm.py"]
@@ -1,70 +0,0 @@
1
- import json
2
- import os
3
- import argparse
4
- import subprocess
5
- from multiprocessing import Pool
6
- from ann_benchmarks.main import positive_int
7
-
8
-
9
- def build(library, args):
10
- print('Building %s...' % library)
11
- if args is not None and len(args) != 0:
12
- q = " ".join(["--build-arg " + x.replace(" ", "\\ ") for x in args])
13
- else:
14
- q = ""
15
-
16
- try:
17
- subprocess.check_call(
18
- 'docker build %s --rm -t ann-benchmarks-%s -f'
19
- ' install/Dockerfile.%s .' % (q, library, library), shell=True)
20
- return {library: 'success'}
21
- except subprocess.CalledProcessError:
22
- return {library: 'fail'}
23
-
24
-
25
- def build_multiprocess(args):
26
- return build(*args)
27
-
28
-
29
- if __name__ == "__main__":
30
- parser = argparse.ArgumentParser(
31
- formatter_class=argparse.ArgumentDefaultsHelpFormatter)
32
- parser.add_argument(
33
- "--proc",
34
- default=1,
35
- type=positive_int,
36
- help="the number of process to build docker images")
37
- parser.add_argument(
38
- '--algorithm',
39
- metavar='NAME',
40
- help='build only the named algorithm image',
41
- default=None)
42
- parser.add_argument(
43
- '--build-arg',
44
- help='pass given args to all docker builds',
45
- nargs="+")
46
- args = parser.parse_args()
47
-
48
- print('Building base image...')
49
- subprocess.check_call(
50
- 'docker build \
51
- --rm -t ann-benchmarks -f install/Dockerfile .', shell=True)
52
-
53
- if args.algorithm:
54
- tags = [args.algorithm]
55
- elif os.getenv('LIBRARY'):
56
- tags = [os.getenv('LIBRARY')]
57
- else:
58
- tags = [fn.split('.')[-1] for fn in os.listdir('install') if fn.startswith('Dockerfile.')]
59
-
60
- print('Building algorithm images... with (%d) processes' % args.proc)
61
-
62
- if args.proc == 1:
63
- install_status = [build(tag, args.build_arg) for tag in tags]
64
- else:
65
- pool = Pool(processes=args.proc)
66
- install_status = pool.map(build_multiprocess, [(tag, args.build_arg) for tag in tags])
67
- pool.close()
68
- pool.join()
69
-
70
- print('\n\nInstall Status:\n' + '\n'.join(str(algo) for algo in install_status))
@@ -1,34 +0,0 @@
1
- [loggers]
2
- keys=root,annb
3
-
4
- [handlers]
5
- keys=consoleHandler,fileHandler
6
-
7
- [formatters]
8
- keys=simpleFormatter
9
-
10
- [formatter_simpleFormatter]
11
- format=%(asctime)s - %(name)s - %(levelname)s - %(message)s
12
- datefmt=
13
-
14
- [handler_consoleHandler]
15
- class=StreamHandler
16
- level=INFO
17
- formatter=simpleFormatter
18
- args=(sys.stdout,)
19
-
20
- [handler_fileHandler]
21
- class=FileHandler
22
- level=INFO
23
- formatter=simpleFormatter
24
- args=('annb.log','w')
25
-
26
- [logger_root]
27
- level=WARN
28
- handlers=consoleHandler
29
-
30
- [logger_annb]
31
- level=INFO
32
- handlers=consoleHandler,fileHandler
33
- qualname=annb
34
- propagate=0
@@ -1,298 +0,0 @@
1
- from multiprocessing import Process
2
- import argparse
3
- import time
4
- import json
5
- from numpy import average
6
- import h5py
7
- import os
8
- from watchdog.observers import Observer
9
- from watchdog.events import PatternMatchingEventHandler
10
- import pathlib
11
- from ann_benchmarks.results import get_result_filename
12
- from ann_benchmarks.algorithms.definitions import get_run_groups
13
-
14
-
15
- def aggregate_outputs(files, clients):
16
- different_attrs = set([f.split('client')[0] for f in files])
17
- groups = [[f + f'client_{i}.hdf5' for i in range(1, clients + 1)] for f in different_attrs]
18
-
19
- if len(different_attrs) * clients > len(files):
20
- print(f'missing files! got {len(files)} but expected {len(different_attrs) * clients}')
21
- print('got files:')
22
- [print('\t' + f) for f in files]
23
- print('probably missing files:')
24
- [[print('\t' + f) for f in g if f not in files] for g in groups]
25
- assert False
26
- elif len(different_attrs) * clients < len(files):
27
- print(f'too many files! got {len(files)} but expected {len(different_attrs) * clients}')
28
- print('got files:')
29
- [print('\t' + f) for f in files]
30
- print('probably unnecessary files:')
31
- [print('\t' + f) for f in files if len([g for g in groups if f in g]) == 0]
32
- raise False
33
-
34
- for group in groups:
35
- fn = group[0].split('client')[0][:-1] + '.hdf5'
36
- f = h5py.File(fn, 'w')
37
-
38
- fs = [h5py.File(fi, 'r') for fi in group]
39
- for k, v in fs[0].attrs.items():
40
- f.attrs[k] = v
41
- f.attrs["best_search_time"] = average([fi.attrs["best_search_time"] for fi in fs])
42
- f.attrs["candidates"] = average([fi.attrs["candidates"] for fi in fs])
43
-
44
- # As we split the test work between the clients, wee should concatenate their results
45
- f['times'] = [t for fi in fs for t in fi['times']]
46
- f['neighbors'] = [n for fi in fs for n in fi['neighbors']]
47
- f['distances'] = [d for fi in fs for d in fi['distances']]
48
-
49
- [fi.close() for fi in fs]
50
- [os.remove(fi) for fi in group]
51
- f.close()
52
-
53
-
54
- if __name__ == "__main__":
55
- parser = argparse.ArgumentParser(
56
- formatter_class=argparse.ArgumentDefaultsHelpFormatter)
57
- parser.add_argument(
58
- '--dataset',
59
- metavar='NAME',
60
- help='the dataset to load training points from',
61
- default='glove-100-angular')
62
- parser.add_argument(
63
- '--json-output',
64
- help='Path to the output file. If defined will store the results in json format.',
65
- default=""
66
- )
67
- parser.add_argument(
68
- "-k", "--count",
69
- default="10",
70
- type=str,
71
- help="the number of near neighbours to search for")
72
- parser.add_argument(
73
- '--host',
74
- type=str,
75
- help='host name or IP',
76
- default=None)
77
- parser.add_argument(
78
- '--port',
79
- type=str,
80
- help='the port "host" is listening on',
81
- default=None)
82
- parser.add_argument(
83
- '--auth', '-a',
84
- type=str,
85
- metavar='PASS',
86
- help='password for connection',
87
- default=None)
88
- parser.add_argument(
89
- '--user',
90
- type=str,
91
- metavar='NAME',
92
- help='user name for connection',
93
- default=None)
94
- parser.add_argument(
95
- '--build-clients',
96
- type=str,
97
- metavar='NUM',
98
- help='total number of clients running in parallel to build the index (could be 0)',
99
- default="1")
100
- parser.add_argument(
101
- '--test-clients',
102
- type=str,
103
- metavar='NUM',
104
- help='total number of clients running in parallel to test the index (could be 0)',
105
- default="1")
106
- parser.add_argument(
107
- '--force',
108
- help='re-run algorithms even if their results already exist',
109
- action='store_true')
110
- parser.add_argument(
111
- '--algorithm',
112
- metavar='ALGO',
113
- help='run redisearch with this algorithm',
114
- default="redisearch-hnsw")
115
- parser.add_argument(
116
- '--run-group',
117
- type=str,
118
- metavar='NAME',
119
- help='run only the named run group',
120
- default=None)
121
- parser.add_argument(
122
- '--runs',
123
- type=str,
124
- help='run each algorithm instance %(metavar)s times and use only'
125
- ' the best result',
126
- default="3")
127
- parser.add_argument(
128
- '--cluster',
129
- action='store_true',
130
- help='working with a cluster')
131
- parser.add_argument(
132
- '--shards',
133
- type=str,
134
- metavar='NUM',
135
- default="1",
136
- help='specify number of shards')
137
-
138
- args = parser.parse_args()
139
-
140
- # we should change to the proper workdir as soon we parse the args
141
- # given some functions bellow require on relative path to the project
142
- workdir = pathlib.Path(__file__).parent.absolute()
143
- print("Changing the workdir to {}".format(workdir))
144
- os.chdir(workdir)
145
-
146
- # All supported algorithms that need spacial stuff
147
- isredis = ismilvus = ispinecone = iselastic = False
148
-
149
- if 'redisearch' in args.algorithm:
150
- from redis import Redis
151
- from redis.cluster import RedisCluster
152
- isredis = True
153
-
154
- elif 'milvus' in args.algorithm:
155
- from pymilvus import utility, connections
156
- ismilvus = True
157
-
158
- elif 'pinecone' in args.algorithm:
159
- import pinecone
160
- ispinecone = True
161
-
162
- elif 'elasticsearch' in args.algorithm:
163
- from elasticsearch import Elasticsearch
164
- from elastic_transport.client_utils import DEFAULT
165
- iselastic = True
166
-
167
- if args.host is None:
168
- args.host = 'localhost'
169
- if args.port is None:
170
- if isredis: args.port = '6379'
171
- elif ismilvus: args.port = '19530'
172
- elif iselastic: args.port = '9200'
173
-
174
- if isredis:
175
- redis = RedisCluster if args.cluster else Redis
176
- redis = redis(host=args.host, port=int(args.port), password=args.auth, username=args.user)
177
- elif ismilvus:
178
- connections.connect(host=args.host, port=args.port)
179
- elif ispinecone:
180
- pinecone.init(api_key=args.auth)
181
- elif iselastic:
182
- args.user = args.user if args.user is not None else 'elastic'
183
- args.auth = args.auth if args.auth is not None else os.environ.get('ELASTIC_PASSWORD', '')
184
- try:
185
- es = Elasticsearch([f'http://{args.host}:{args.port}'], request_timeout=3600, basic_auth=(args.user, args.auth))
186
- es.info()
187
- except Exception:
188
- es = Elasticsearch([f'https://{args.host}:{args.port}'], request_timeout=3600, basic_auth=(args.user, args.auth), ca_certs=os.environ.get('ELASTIC_CA', DEFAULT))
189
-
190
- if args.run_group is not None:
191
- run_groups = [args.run_group]
192
- else:
193
- run_groups = get_run_groups('algos.yaml', args.algorithm)
194
-
195
- base = 'python3 run.py --local --algorithm ' + args.algorithm + ' -k ' + args.count + ' --dataset ' + args.dataset
196
-
197
- if args.host: base += ' --host ' + args.host
198
- if args.port: base += ' --port ' + args.port
199
- if args.user: base += ' --user ' + args.user
200
- if args.auth: base += ' --auth ' + args.auth
201
- if args.force: base += ' --force'
202
- if args.cluster: base += ' --cluster'
203
- if args.shards: base += ' --shards ' + args.shards
204
-
205
- base_build = base + ' --build-only --total-clients ' + args.build_clients
206
- base_test = base + ' --test-only --runs {} --total-clients {}'.format(args.runs, args.test_clients)
207
- outputsdir = "{}/{}".format(workdir, get_result_filename(args.dataset, args.count))
208
- outputsdir = os.path.join(outputsdir, args.algorithm)
209
- if not os.path.isdir(outputsdir):
210
- os.makedirs(outputsdir)
211
- results_dicts = []
212
-
213
- # skipping aggregation if using one tester
214
- if int(args.test_clients) > 1:
215
- test_stats_files = set()
216
- watcher = PatternMatchingEventHandler(["*.hdf5"], ignore_directories=True)
217
-
218
-
219
- def on_created_or_modified(event):
220
- test_stats_files.add(event.src_path)
221
-
222
-
223
- watcher.on_created = on_created_or_modified
224
- watcher.on_modified = on_created_or_modified
225
- observer = Observer()
226
- observer.schedule(watcher, outputsdir)
227
- observer.start()
228
-
229
- for run_group in run_groups:
230
- results_dict = {}
231
- curr_base_build = base_build + ' --run-group ' + run_group
232
- curr_base_test = base_test + ' --run-group ' + run_group
233
-
234
- if int(args.build_clients) > 0:
235
- if isredis:
236
- redis.flushall()
237
- elif ismilvus:
238
- if utility.has_collection('milvus'):
239
- utility.drop_collection('milvus')
240
- elif ispinecone:
241
- for idx in pinecone.list_indexes():
242
- pinecone.delete_index(idx)
243
- elif iselastic:
244
- for idx in es.indices.stats()['indices']:
245
- es.indices.delete(index=idx)
246
-
247
- clients = [Process(target=os.system, args=(curr_base_build + ' --client-id ' + str(i),)) for i in
248
- range(1, int(args.build_clients) + 1)]
249
-
250
- t0 = time.time()
251
- for client in clients: client.start()
252
- for client in clients: client.join()
253
- total_time = time.time() - t0
254
- print(f'total build time: {total_time}\n\n')
255
-
256
- fn = os.path.join(outputsdir, 'build_stats')
257
- f = h5py.File(fn, 'w')
258
- f.attrs["build_time"] = total_time
259
- print(fn)
260
- index_size = -1
261
- if isredis:
262
- if not args.cluster: # TODO: get total size from all the shards
263
- index_size = float(redis.ft('ann_benchmark').info()['vector_index_sz_mb']) * 1024
264
- f.attrs["index_size"] = index_size
265
- elif iselastic:
266
- f.attrs["index_size"] = es.indices.stats(index='ann_benchmark')['indices']['ann_benchmark']['total']['store']['size_in_bytes']
267
- f.close()
268
- results_dict["build"] = {"total_clients": args.build_clients, "build_time": total_time,
269
- "vector_index_sz_mb": index_size}
270
-
271
- if int(args.test_clients) > 0:
272
- queriers = [Process(target=os.system, args=(curr_base_test + ' --client-id ' + str(i),)) for i in
273
- range(1, int(args.test_clients) + 1)]
274
- t0 = time.time()
275
- for querier in queriers: querier.start()
276
- for querier in queriers: querier.join()
277
- query_time = time.time() - t0
278
- print(f'total test time: {query_time}')
279
- results_dict["query"] = {"total_clients": args.test_clients, "test_time": query_time}
280
-
281
- results_dicts.append(results_dict)
282
-
283
- # skipping aggregation if using one tester
284
- if int(args.test_clients) > 1:
285
- observer.stop()
286
- observer.join()
287
- print(
288
- f'summarizing {int(args.test_clients)} clients data ({len(test_stats_files)} files into {len(test_stats_files) // int(args.test_clients)})...')
289
- # ls = os.listdir(outputsdir)
290
- # ls.remove('build_stats')
291
- # aggregate_outputs(ls, int(args.test_clients))
292
- aggregate_outputs(test_stats_files, int(args.test_clients))
293
- print('done!')
294
-
295
- if args.json_output != "":
296
- with open(args.json_output, "w") as json_out_file:
297
- print(f'storing json result into: {args.json_output}')
298
- json.dump(results_dict, json_out_file)
@@ -1,159 +0,0 @@
1
- import os
2
- import matplotlib as mpl
3
- mpl.use('Agg') # noqa
4
- import matplotlib.pyplot as plt
5
- import numpy as np
6
- import argparse
7
-
8
- from ann_benchmarks.datasets import get_dataset
9
- from ann_benchmarks.algorithms.definitions import get_definitions
10
- from ann_benchmarks.plotting.metrics import all_metrics as metrics
11
- from ann_benchmarks.plotting.utils import (get_plot_label, compute_metrics,
12
- create_linestyles, create_pointset)
13
- from ann_benchmarks.results import (store_results, load_all_results,
14
- get_unique_algorithms)
15
-
16
-
17
- def create_plot(all_data, raw, x_scale, y_scale, xn, yn, fn_out, linestyles,
18
- batch):
19
- xm, ym = (metrics[xn], metrics[yn])
20
- # Now generate each plot
21
- handles = []
22
- labels = []
23
- plt.figure(figsize=(12, 9))
24
-
25
- # Sorting by mean y-value helps aligning plots with labels
26
- def mean_y(algo):
27
- xs, ys, ls, axs, ays, als = create_pointset(all_data[algo], xn, yn)
28
- return -np.log(np.array(ys)).mean()
29
- # Find range for logit x-scale
30
- min_x, max_x = 1, 0
31
- for algo in sorted(all_data.keys(), key=mean_y):
32
- xs, ys, ls, axs, ays, als = create_pointset(all_data[algo], xn, yn)
33
- min_x = min([min_x]+[x for x in xs if x > 0])
34
- max_x = max([max_x]+[x for x in xs if x < 1])
35
- color, faded, linestyle, marker = linestyles[algo]
36
- handle, = plt.plot(xs, ys, '-', label=algo, color=color,
37
- ms=7, mew=3, lw=3, linestyle=linestyle,
38
- marker=marker)
39
- handles.append(handle)
40
- if raw:
41
- handle2, = plt.plot(axs, ays, '-', label=algo, color=faded,
42
- ms=5, mew=2, lw=2, linestyle=linestyle,
43
- marker=marker)
44
- labels.append(algo)
45
-
46
- ax = plt.gca()
47
- ax.set_ylabel(ym['description'])
48
- ax.set_xlabel(xm['description'])
49
- # Custom scales of the type --x-scale a3
50
- if x_scale[0] == 'a':
51
- alpha = float(x_scale[1:])
52
- fun = lambda x: 1-(1-x)**(1/alpha)
53
- inv_fun = lambda x: 1-(1-x)**alpha
54
- ax.set_xscale('function', functions=(fun, inv_fun))
55
- if alpha <= 3:
56
- ticks = [inv_fun(x) for x in np.arange(0,1.2,.2)]
57
- plt.xticks(ticks)
58
- if alpha > 3:
59
- from matplotlib import ticker
60
- ax.xaxis.set_major_formatter(ticker.LogitFormatter())
61
- #plt.xticks(ticker.LogitLocator().tick_values(min_x, max_x))
62
- plt.xticks([0, 1/2, 1-1e-1, 1-1e-2, 1-1e-3, 1-1e-4, 1])
63
- # Other x-scales
64
- else:
65
- ax.set_xscale(x_scale)
66
- ax.set_yscale(y_scale)
67
- ax.set_title(get_plot_label(xm, ym))
68
- box = plt.gca().get_position()
69
- # plt.gca().set_position([box.x0, box.y0, box.width * 0.8, box.height])
70
- ax.legend(handles, labels, loc='center left',
71
- bbox_to_anchor=(1, 0.5), prop={'size': 9})
72
- plt.grid(b=True, which='major', color='0.65', linestyle='-')
73
- plt.setp(ax.get_xminorticklabels(), visible=True)
74
-
75
- # Logit scale has to be a subset of (0,1)
76
- if 'lim' in xm and x_scale != 'logit':
77
- x0, x1 = xm['lim']
78
- plt.xlim(max(x0,0), min(x1,1))
79
- elif x_scale == 'logit':
80
- plt.xlim(min_x, max_x)
81
- if 'lim' in ym:
82
- plt.ylim(ym['lim'])
83
-
84
- # Workaround for bug https://github.com/matplotlib/matplotlib/issues/6789
85
- ax.spines['bottom']._adjust_location()
86
-
87
- plt.savefig(fn_out, bbox_inches='tight')
88
- plt.close()
89
-
90
-
91
- if __name__ == "__main__":
92
- parser = argparse.ArgumentParser()
93
- parser.add_argument(
94
- '--dataset',
95
- metavar="DATASET",
96
- default='glove-100-angular')
97
- parser.add_argument(
98
- '--count',
99
- default=10)
100
- parser.add_argument(
101
- '--definitions',
102
- metavar='FILE',
103
- help='load algorithm definitions from FILE',
104
- default='algos.yaml')
105
- parser.add_argument(
106
- '--limit',
107
- default=-1)
108
- parser.add_argument(
109
- '-o', '--output')
110
- parser.add_argument(
111
- '-x', '--x-axis',
112
- help='Which metric to use on the X-axis',
113
- choices=metrics.keys(),
114
- default="k-nn")
115
- parser.add_argument(
116
- '-y', '--y-axis',
117
- help='Which metric to use on the Y-axis',
118
- choices=metrics.keys(),
119
- default="qps")
120
- parser.add_argument(
121
- '-X', '--x-scale',
122
- help='Scale to use when drawing the X-axis. Typically linear, logit or a2',
123
- default='linear')
124
- parser.add_argument(
125
- '-Y', '--y-scale',
126
- help='Scale to use when drawing the Y-axis',
127
- choices=["linear", "log", "symlog", "logit"],
128
- default='linear')
129
- parser.add_argument(
130
- '--raw',
131
- help='Show raw results (not just Pareto frontier) in faded colours',
132
- action='store_true')
133
- parser.add_argument(
134
- '--batch',
135
- help='Plot runs in batch mode',
136
- action='store_true')
137
- parser.add_argument(
138
- '--recompute',
139
- help='Clears the cache and recomputes the metrics',
140
- action='store_true')
141
- args = parser.parse_args()
142
-
143
- if not args.output:
144
- args.output = 'results/%s.png' % (args.dataset + ('-batch' if args.batch else ''))
145
- print('writing output to %s' % args.output)
146
-
147
- dataset, _ = get_dataset(args.dataset)
148
- count = int(args.count)
149
- unique_algorithms = get_unique_algorithms()
150
- results = load_all_results(args.dataset, count, args.batch)
151
- linestyles = create_linestyles(sorted(unique_algorithms))
152
- runs = compute_metrics(np.array(dataset["distances"]),
153
- results, args.x_axis, args.y_axis, args.recompute)
154
- if not runs:
155
- raise Exception('Nothing to plot')
156
-
157
- create_plot(runs, args.raw, args.x_scale,
158
- args.y_scale, args.x_axis, args.y_axis, args.output,
159
- linestyles, args.batch)
@@ -1,10 +0,0 @@
1
- #!/bin/sh
2
-
3
- where="$(dirname "$0")"
4
- if [ "$1" = "2" -o "$1" = "-2" -o "$1" = "--2" ]; then
5
- PYTHON=python2
6
- else
7
- PYTHON=python3
8
- fi
9
- export PYTHONPATH="$where/..:$PYTHONPATH"
10
- exec $PYTHON "$where/bf-runner.py"