redisbench-admin 0.11.54__py3-none-any.whl → 0.11.56__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- redisbench_admin/environments/oss_cluster.py +9 -1
- redisbench_admin/run/aibench_run_inference_redisai_vision/aibench_run_inference_redisai_vision.py +4 -16
- redisbench_admin/run/asm.py +426 -0
- redisbench_admin/run/common.py +3 -0
- redisbench_admin/run/ftsb/ftsb.py +4 -16
- redisbench_admin/run/tsbs_run_queries_redistimeseries/tsbs_run_queries_redistimeseries.py +4 -16
- redisbench_admin/run_remote/standalone.py +2 -3
- redisbench_admin/utils/benchmark_config.py +11 -13
- redisbench_admin/utils/utils.py +0 -21
- {redisbench_admin-0.11.54.dist-info → redisbench_admin-0.11.56.dist-info}/METADATA +7 -4
- redisbench_admin-0.11.56.dist-info/RECORD +117 -0
- {redisbench_admin-0.11.54.dist-info → redisbench_admin-0.11.56.dist-info}/WHEEL +1 -1
- redisbench_admin/run/ann/pkg/.dockerignore +0 -2
- redisbench_admin/run/ann/pkg/.git +0 -1
- redisbench_admin/run/ann/pkg/.github/workflows/benchmarks.yml +0 -100
- redisbench_admin/run/ann/pkg/.gitignore +0 -21
- redisbench_admin/run/ann/pkg/LICENSE +0 -21
- redisbench_admin/run/ann/pkg/README.md +0 -157
- redisbench_admin/run/ann/pkg/algos.yaml +0 -1294
- redisbench_admin/run/ann/pkg/algosP.yaml +0 -67
- redisbench_admin/run/ann/pkg/ann_benchmarks/__init__.py +0 -2
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/__init__.py +0 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/annoy.py +0 -26
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/balltree.py +0 -22
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/base.py +0 -36
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/bruteforce.py +0 -110
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/ckdtree.py +0 -17
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/datasketch.py +0 -29
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/definitions.py +0 -187
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/diskann.py +0 -190
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/dolphinnpy.py +0 -31
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/dummy_algo.py +0 -25
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/elasticsearch.py +0 -107
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/elastiknn.py +0 -124
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/faiss.py +0 -124
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/faiss_gpu.py +0 -61
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/faiss_hnsw.py +0 -39
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/flann.py +0 -27
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/hnswlib.py +0 -36
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/kdtree.py +0 -22
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/kgraph.py +0 -39
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/lshf.py +0 -25
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/milvus.py +0 -99
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/mrpt.py +0 -41
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/n2.py +0 -28
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/nearpy.py +0 -48
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/nmslib.py +0 -74
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/onng_ngt.py +0 -100
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/opensearchknn.py +0 -107
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/panng_ngt.py +0 -79
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/pinecone.py +0 -39
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/puffinn.py +0 -45
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/pynndescent.py +0 -115
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/qg_ngt.py +0 -102
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/redisearch.py +0 -90
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/rpforest.py +0 -20
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/scann.py +0 -34
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/sptag.py +0 -28
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/subprocess.py +0 -246
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/vald.py +0 -149
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/vecsim-hnsw.py +0 -43
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/vespa.py +0 -47
- redisbench_admin/run/ann/pkg/ann_benchmarks/constants.py +0 -1
- redisbench_admin/run/ann/pkg/ann_benchmarks/data.py +0 -48
- redisbench_admin/run/ann/pkg/ann_benchmarks/datasets.py +0 -620
- redisbench_admin/run/ann/pkg/ann_benchmarks/distance.py +0 -53
- redisbench_admin/run/ann/pkg/ann_benchmarks/main.py +0 -325
- redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/__init__.py +0 -2
- redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/metrics.py +0 -183
- redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/plot_variants.py +0 -17
- redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/utils.py +0 -165
- redisbench_admin/run/ann/pkg/ann_benchmarks/results.py +0 -71
- redisbench_admin/run/ann/pkg/ann_benchmarks/runner.py +0 -333
- redisbench_admin/run/ann/pkg/create_dataset.py +0 -12
- redisbench_admin/run/ann/pkg/create_hybrid_dataset.py +0 -147
- redisbench_admin/run/ann/pkg/create_text_to_image_ds.py +0 -117
- redisbench_admin/run/ann/pkg/create_website.py +0 -272
- redisbench_admin/run/ann/pkg/install/Dockerfile +0 -11
- redisbench_admin/run/ann/pkg/install/Dockerfile.annoy +0 -5
- redisbench_admin/run/ann/pkg/install/Dockerfile.datasketch +0 -4
- redisbench_admin/run/ann/pkg/install/Dockerfile.diskann +0 -29
- redisbench_admin/run/ann/pkg/install/Dockerfile.diskann_pq +0 -31
- redisbench_admin/run/ann/pkg/install/Dockerfile.dolphinn +0 -5
- redisbench_admin/run/ann/pkg/install/Dockerfile.elasticsearch +0 -45
- redisbench_admin/run/ann/pkg/install/Dockerfile.elastiknn +0 -61
- redisbench_admin/run/ann/pkg/install/Dockerfile.faiss +0 -18
- redisbench_admin/run/ann/pkg/install/Dockerfile.flann +0 -10
- redisbench_admin/run/ann/pkg/install/Dockerfile.hnswlib +0 -10
- redisbench_admin/run/ann/pkg/install/Dockerfile.kgraph +0 -6
- redisbench_admin/run/ann/pkg/install/Dockerfile.mih +0 -4
- redisbench_admin/run/ann/pkg/install/Dockerfile.milvus +0 -27
- redisbench_admin/run/ann/pkg/install/Dockerfile.mrpt +0 -4
- redisbench_admin/run/ann/pkg/install/Dockerfile.n2 +0 -5
- redisbench_admin/run/ann/pkg/install/Dockerfile.nearpy +0 -5
- redisbench_admin/run/ann/pkg/install/Dockerfile.ngt +0 -13
- redisbench_admin/run/ann/pkg/install/Dockerfile.nmslib +0 -10
- redisbench_admin/run/ann/pkg/install/Dockerfile.opensearchknn +0 -43
- redisbench_admin/run/ann/pkg/install/Dockerfile.puffinn +0 -6
- redisbench_admin/run/ann/pkg/install/Dockerfile.pynndescent +0 -4
- redisbench_admin/run/ann/pkg/install/Dockerfile.redisearch +0 -18
- redisbench_admin/run/ann/pkg/install/Dockerfile.rpforest +0 -5
- redisbench_admin/run/ann/pkg/install/Dockerfile.scann +0 -5
- redisbench_admin/run/ann/pkg/install/Dockerfile.scipy +0 -4
- redisbench_admin/run/ann/pkg/install/Dockerfile.sklearn +0 -4
- redisbench_admin/run/ann/pkg/install/Dockerfile.sptag +0 -30
- redisbench_admin/run/ann/pkg/install/Dockerfile.vald +0 -8
- redisbench_admin/run/ann/pkg/install/Dockerfile.vespa +0 -17
- redisbench_admin/run/ann/pkg/install.py +0 -70
- redisbench_admin/run/ann/pkg/logging.conf +0 -34
- redisbench_admin/run/ann/pkg/multirun.py +0 -298
- redisbench_admin/run/ann/pkg/plot.py +0 -159
- redisbench_admin/run/ann/pkg/protocol/bf-runner +0 -10
- redisbench_admin/run/ann/pkg/protocol/bf-runner.py +0 -204
- redisbench_admin/run/ann/pkg/protocol/ext-add-query-metric.md +0 -51
- redisbench_admin/run/ann/pkg/protocol/ext-batch-queries.md +0 -77
- redisbench_admin/run/ann/pkg/protocol/ext-prepared-queries.md +0 -77
- redisbench_admin/run/ann/pkg/protocol/ext-query-parameters.md +0 -47
- redisbench_admin/run/ann/pkg/protocol/specification.md +0 -194
- redisbench_admin/run/ann/pkg/requirements.txt +0 -14
- redisbench_admin/run/ann/pkg/requirements_py38.txt +0 -11
- redisbench_admin/run/ann/pkg/results/fashion-mnist-784-euclidean.png +0 -0
- redisbench_admin/run/ann/pkg/results/gist-960-euclidean.png +0 -0
- redisbench_admin/run/ann/pkg/results/glove-100-angular.png +0 -0
- redisbench_admin/run/ann/pkg/results/glove-25-angular.png +0 -0
- redisbench_admin/run/ann/pkg/results/lastfm-64-dot.png +0 -0
- redisbench_admin/run/ann/pkg/results/mnist-784-euclidean.png +0 -0
- redisbench_admin/run/ann/pkg/results/nytimes-256-angular.png +0 -0
- redisbench_admin/run/ann/pkg/results/sift-128-euclidean.png +0 -0
- redisbench_admin/run/ann/pkg/run.py +0 -12
- redisbench_admin/run/ann/pkg/run_algorithm.py +0 -3
- redisbench_admin/run/ann/pkg/templates/chartjs.template +0 -102
- redisbench_admin/run/ann/pkg/templates/detail_page.html +0 -23
- redisbench_admin/run/ann/pkg/templates/general.html +0 -58
- redisbench_admin/run/ann/pkg/templates/latex.template +0 -30
- redisbench_admin/run/ann/pkg/templates/summary.html +0 -60
- redisbench_admin/run/ann/pkg/test/__init__.py +0 -0
- redisbench_admin/run/ann/pkg/test/test-jaccard.py +0 -19
- redisbench_admin/run/ann/pkg/test/test-metrics.py +0 -99
- redisbench_admin-0.11.54.dist-info/RECORD +0 -242
- {redisbench_admin-0.11.54.dist-info → redisbench_admin-0.11.56.dist-info}/entry_points.txt +0 -0
- {redisbench_admin-0.11.54.dist-info → redisbench_admin-0.11.56.dist-info/licenses}/LICENSE +0 -0
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
FROM redislabs/redisearch:feature-vecsim as redis
|
|
2
|
-
FROM ann-benchmarks
|
|
3
|
-
# COPY --from=redis /usr/local/ /usr/local/
|
|
4
|
-
|
|
5
|
-
run git clone https://github.com/redis/redis.git
|
|
6
|
-
run cd redis; \
|
|
7
|
-
git checkout 6.2.5; \
|
|
8
|
-
make install
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
COPY --from=redis /usr/lib/redis /usr/lib/redis
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
RUN pip3 install redisearch redis
|
|
15
|
-
|
|
16
|
-
# CMD ["redis-server", "--loadmodule", "/usr/lib/redis/modules/redisearch.so"]
|
|
17
|
-
RUN echo 'redis-server --daemonize yes --loadmodule /usr/lib/redis/modules/redisearch.so && python3 -u run_algorithm.py "$@"' > entrypoint.sh
|
|
18
|
-
ENTRYPOINT ["/bin/bash", "/home/app/entrypoint.sh"]
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
# Adopted from https://github.com/microsoft/SPTAG/blob/master/Dockerfile
|
|
2
|
-
|
|
3
|
-
FROM ann-benchmarks
|
|
4
|
-
|
|
5
|
-
RUN git clone https://github.com/microsoft/SPTAG
|
|
6
|
-
RUN apt-get update && apt-get -y install wget build-essential libtbb-dev software-properties-common swig
|
|
7
|
-
|
|
8
|
-
# cmake >= 3.12 is required
|
|
9
|
-
RUN wget "https://github.com/Kitware/CMake/releases/download/v3.14.4/cmake-3.14.4-Linux-x86_64.tar.gz" -q -O - \
|
|
10
|
-
| tar -xz --strip-components=1 -C /usr/local
|
|
11
|
-
|
|
12
|
-
# specific version of boost
|
|
13
|
-
RUN wget "https://boostorg.jfrog.io/artifactory/main/release/1.67.0/source/boost_1_67_0.tar.gz" -q -O - \
|
|
14
|
-
| tar -xz && \
|
|
15
|
-
cd boost_1_67_0 && \
|
|
16
|
-
./bootstrap.sh && \
|
|
17
|
-
./b2 install && \
|
|
18
|
-
# update ld cache so it finds boost in /usr/local/lib
|
|
19
|
-
ldconfig && \
|
|
20
|
-
cd .. && rm -rf boost_1_67_0
|
|
21
|
-
|
|
22
|
-
# SPTAG defaults to Python 2 if it's found on the system, so as a hack, we remove it. See https://github.com/microsoft/SPTAG/blob/master/Wrappers/CMakeLists.txt
|
|
23
|
-
RUN apt-get -y remove libpython2.7
|
|
24
|
-
|
|
25
|
-
# Compile
|
|
26
|
-
RUN cd SPTAG && mkdir build && cd build && cmake .. && make && cd ..
|
|
27
|
-
|
|
28
|
-
# so python can find the SPTAG module
|
|
29
|
-
ENV PYTHONPATH=/home/app/SPTAG/Release
|
|
30
|
-
RUN python3 -c 'import SPTAG'
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
FROM centos:7
|
|
2
|
-
|
|
3
|
-
RUN yum -y install epel-release && \
|
|
4
|
-
yum -y install centos-release-scl && \
|
|
5
|
-
yum -y --setopt=skip_missing_names_on_install=False install gcc make git python3-devel && \
|
|
6
|
-
python3 -m pip install --upgrade pip setuptools wheel && \
|
|
7
|
-
yum-config-manager --add-repo https://copr.fedorainfracloud.org/coprs/g/vespa/vespa/repo/epel-7/group_vespa-vespa-epel-7.repo && \
|
|
8
|
-
yum -y --setopt=skip_missing_names_on_install=False --enablerepo=epel-testing install vespa-ann-benchmark
|
|
9
|
-
|
|
10
|
-
WORKDIR /home/app
|
|
11
|
-
|
|
12
|
-
COPY requirements.txt run_algorithm.py ./
|
|
13
|
-
|
|
14
|
-
RUN python3 -m pip install -r requirements.txt && \
|
|
15
|
-
python3 -m pip install /opt/vespa/libexec/vespa_ann_benchmark
|
|
16
|
-
|
|
17
|
-
ENTRYPOINT ["python3", "-u", "run_algorithm.py"]
|
|
@@ -1,70 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
import os
|
|
3
|
-
import argparse
|
|
4
|
-
import subprocess
|
|
5
|
-
from multiprocessing import Pool
|
|
6
|
-
from ann_benchmarks.main import positive_int
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
def build(library, args):
|
|
10
|
-
print('Building %s...' % library)
|
|
11
|
-
if args is not None and len(args) != 0:
|
|
12
|
-
q = " ".join(["--build-arg " + x.replace(" ", "\\ ") for x in args])
|
|
13
|
-
else:
|
|
14
|
-
q = ""
|
|
15
|
-
|
|
16
|
-
try:
|
|
17
|
-
subprocess.check_call(
|
|
18
|
-
'docker build %s --rm -t ann-benchmarks-%s -f'
|
|
19
|
-
' install/Dockerfile.%s .' % (q, library, library), shell=True)
|
|
20
|
-
return {library: 'success'}
|
|
21
|
-
except subprocess.CalledProcessError:
|
|
22
|
-
return {library: 'fail'}
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def build_multiprocess(args):
|
|
26
|
-
return build(*args)
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
if __name__ == "__main__":
|
|
30
|
-
parser = argparse.ArgumentParser(
|
|
31
|
-
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
|
32
|
-
parser.add_argument(
|
|
33
|
-
"--proc",
|
|
34
|
-
default=1,
|
|
35
|
-
type=positive_int,
|
|
36
|
-
help="the number of process to build docker images")
|
|
37
|
-
parser.add_argument(
|
|
38
|
-
'--algorithm',
|
|
39
|
-
metavar='NAME',
|
|
40
|
-
help='build only the named algorithm image',
|
|
41
|
-
default=None)
|
|
42
|
-
parser.add_argument(
|
|
43
|
-
'--build-arg',
|
|
44
|
-
help='pass given args to all docker builds',
|
|
45
|
-
nargs="+")
|
|
46
|
-
args = parser.parse_args()
|
|
47
|
-
|
|
48
|
-
print('Building base image...')
|
|
49
|
-
subprocess.check_call(
|
|
50
|
-
'docker build \
|
|
51
|
-
--rm -t ann-benchmarks -f install/Dockerfile .', shell=True)
|
|
52
|
-
|
|
53
|
-
if args.algorithm:
|
|
54
|
-
tags = [args.algorithm]
|
|
55
|
-
elif os.getenv('LIBRARY'):
|
|
56
|
-
tags = [os.getenv('LIBRARY')]
|
|
57
|
-
else:
|
|
58
|
-
tags = [fn.split('.')[-1] for fn in os.listdir('install') if fn.startswith('Dockerfile.')]
|
|
59
|
-
|
|
60
|
-
print('Building algorithm images... with (%d) processes' % args.proc)
|
|
61
|
-
|
|
62
|
-
if args.proc == 1:
|
|
63
|
-
install_status = [build(tag, args.build_arg) for tag in tags]
|
|
64
|
-
else:
|
|
65
|
-
pool = Pool(processes=args.proc)
|
|
66
|
-
install_status = pool.map(build_multiprocess, [(tag, args.build_arg) for tag in tags])
|
|
67
|
-
pool.close()
|
|
68
|
-
pool.join()
|
|
69
|
-
|
|
70
|
-
print('\n\nInstall Status:\n' + '\n'.join(str(algo) for algo in install_status))
|
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
[loggers]
|
|
2
|
-
keys=root,annb
|
|
3
|
-
|
|
4
|
-
[handlers]
|
|
5
|
-
keys=consoleHandler,fileHandler
|
|
6
|
-
|
|
7
|
-
[formatters]
|
|
8
|
-
keys=simpleFormatter
|
|
9
|
-
|
|
10
|
-
[formatter_simpleFormatter]
|
|
11
|
-
format=%(asctime)s - %(name)s - %(levelname)s - %(message)s
|
|
12
|
-
datefmt=
|
|
13
|
-
|
|
14
|
-
[handler_consoleHandler]
|
|
15
|
-
class=StreamHandler
|
|
16
|
-
level=INFO
|
|
17
|
-
formatter=simpleFormatter
|
|
18
|
-
args=(sys.stdout,)
|
|
19
|
-
|
|
20
|
-
[handler_fileHandler]
|
|
21
|
-
class=FileHandler
|
|
22
|
-
level=INFO
|
|
23
|
-
formatter=simpleFormatter
|
|
24
|
-
args=('annb.log','w')
|
|
25
|
-
|
|
26
|
-
[logger_root]
|
|
27
|
-
level=WARN
|
|
28
|
-
handlers=consoleHandler
|
|
29
|
-
|
|
30
|
-
[logger_annb]
|
|
31
|
-
level=INFO
|
|
32
|
-
handlers=consoleHandler,fileHandler
|
|
33
|
-
qualname=annb
|
|
34
|
-
propagate=0
|
|
@@ -1,298 +0,0 @@
|
|
|
1
|
-
from multiprocessing import Process
|
|
2
|
-
import argparse
|
|
3
|
-
import time
|
|
4
|
-
import json
|
|
5
|
-
from numpy import average
|
|
6
|
-
import h5py
|
|
7
|
-
import os
|
|
8
|
-
from watchdog.observers import Observer
|
|
9
|
-
from watchdog.events import PatternMatchingEventHandler
|
|
10
|
-
import pathlib
|
|
11
|
-
from ann_benchmarks.results import get_result_filename
|
|
12
|
-
from ann_benchmarks.algorithms.definitions import get_run_groups
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def aggregate_outputs(files, clients):
|
|
16
|
-
different_attrs = set([f.split('client')[0] for f in files])
|
|
17
|
-
groups = [[f + f'client_{i}.hdf5' for i in range(1, clients + 1)] for f in different_attrs]
|
|
18
|
-
|
|
19
|
-
if len(different_attrs) * clients > len(files):
|
|
20
|
-
print(f'missing files! got {len(files)} but expected {len(different_attrs) * clients}')
|
|
21
|
-
print('got files:')
|
|
22
|
-
[print('\t' + f) for f in files]
|
|
23
|
-
print('probably missing files:')
|
|
24
|
-
[[print('\t' + f) for f in g if f not in files] for g in groups]
|
|
25
|
-
assert False
|
|
26
|
-
elif len(different_attrs) * clients < len(files):
|
|
27
|
-
print(f'too many files! got {len(files)} but expected {len(different_attrs) * clients}')
|
|
28
|
-
print('got files:')
|
|
29
|
-
[print('\t' + f) for f in files]
|
|
30
|
-
print('probably unnecessary files:')
|
|
31
|
-
[print('\t' + f) for f in files if len([g for g in groups if f in g]) == 0]
|
|
32
|
-
raise False
|
|
33
|
-
|
|
34
|
-
for group in groups:
|
|
35
|
-
fn = group[0].split('client')[0][:-1] + '.hdf5'
|
|
36
|
-
f = h5py.File(fn, 'w')
|
|
37
|
-
|
|
38
|
-
fs = [h5py.File(fi, 'r') for fi in group]
|
|
39
|
-
for k, v in fs[0].attrs.items():
|
|
40
|
-
f.attrs[k] = v
|
|
41
|
-
f.attrs["best_search_time"] = average([fi.attrs["best_search_time"] for fi in fs])
|
|
42
|
-
f.attrs["candidates"] = average([fi.attrs["candidates"] for fi in fs])
|
|
43
|
-
|
|
44
|
-
# As we split the test work between the clients, wee should concatenate their results
|
|
45
|
-
f['times'] = [t for fi in fs for t in fi['times']]
|
|
46
|
-
f['neighbors'] = [n for fi in fs for n in fi['neighbors']]
|
|
47
|
-
f['distances'] = [d for fi in fs for d in fi['distances']]
|
|
48
|
-
|
|
49
|
-
[fi.close() for fi in fs]
|
|
50
|
-
[os.remove(fi) for fi in group]
|
|
51
|
-
f.close()
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
if __name__ == "__main__":
|
|
55
|
-
parser = argparse.ArgumentParser(
|
|
56
|
-
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
|
57
|
-
parser.add_argument(
|
|
58
|
-
'--dataset',
|
|
59
|
-
metavar='NAME',
|
|
60
|
-
help='the dataset to load training points from',
|
|
61
|
-
default='glove-100-angular')
|
|
62
|
-
parser.add_argument(
|
|
63
|
-
'--json-output',
|
|
64
|
-
help='Path to the output file. If defined will store the results in json format.',
|
|
65
|
-
default=""
|
|
66
|
-
)
|
|
67
|
-
parser.add_argument(
|
|
68
|
-
"-k", "--count",
|
|
69
|
-
default="10",
|
|
70
|
-
type=str,
|
|
71
|
-
help="the number of near neighbours to search for")
|
|
72
|
-
parser.add_argument(
|
|
73
|
-
'--host',
|
|
74
|
-
type=str,
|
|
75
|
-
help='host name or IP',
|
|
76
|
-
default=None)
|
|
77
|
-
parser.add_argument(
|
|
78
|
-
'--port',
|
|
79
|
-
type=str,
|
|
80
|
-
help='the port "host" is listening on',
|
|
81
|
-
default=None)
|
|
82
|
-
parser.add_argument(
|
|
83
|
-
'--auth', '-a',
|
|
84
|
-
type=str,
|
|
85
|
-
metavar='PASS',
|
|
86
|
-
help='password for connection',
|
|
87
|
-
default=None)
|
|
88
|
-
parser.add_argument(
|
|
89
|
-
'--user',
|
|
90
|
-
type=str,
|
|
91
|
-
metavar='NAME',
|
|
92
|
-
help='user name for connection',
|
|
93
|
-
default=None)
|
|
94
|
-
parser.add_argument(
|
|
95
|
-
'--build-clients',
|
|
96
|
-
type=str,
|
|
97
|
-
metavar='NUM',
|
|
98
|
-
help='total number of clients running in parallel to build the index (could be 0)',
|
|
99
|
-
default="1")
|
|
100
|
-
parser.add_argument(
|
|
101
|
-
'--test-clients',
|
|
102
|
-
type=str,
|
|
103
|
-
metavar='NUM',
|
|
104
|
-
help='total number of clients running in parallel to test the index (could be 0)',
|
|
105
|
-
default="1")
|
|
106
|
-
parser.add_argument(
|
|
107
|
-
'--force',
|
|
108
|
-
help='re-run algorithms even if their results already exist',
|
|
109
|
-
action='store_true')
|
|
110
|
-
parser.add_argument(
|
|
111
|
-
'--algorithm',
|
|
112
|
-
metavar='ALGO',
|
|
113
|
-
help='run redisearch with this algorithm',
|
|
114
|
-
default="redisearch-hnsw")
|
|
115
|
-
parser.add_argument(
|
|
116
|
-
'--run-group',
|
|
117
|
-
type=str,
|
|
118
|
-
metavar='NAME',
|
|
119
|
-
help='run only the named run group',
|
|
120
|
-
default=None)
|
|
121
|
-
parser.add_argument(
|
|
122
|
-
'--runs',
|
|
123
|
-
type=str,
|
|
124
|
-
help='run each algorithm instance %(metavar)s times and use only'
|
|
125
|
-
' the best result',
|
|
126
|
-
default="3")
|
|
127
|
-
parser.add_argument(
|
|
128
|
-
'--cluster',
|
|
129
|
-
action='store_true',
|
|
130
|
-
help='working with a cluster')
|
|
131
|
-
parser.add_argument(
|
|
132
|
-
'--shards',
|
|
133
|
-
type=str,
|
|
134
|
-
metavar='NUM',
|
|
135
|
-
default="1",
|
|
136
|
-
help='specify number of shards')
|
|
137
|
-
|
|
138
|
-
args = parser.parse_args()
|
|
139
|
-
|
|
140
|
-
# we should change to the proper workdir as soon we parse the args
|
|
141
|
-
# given some functions bellow require on relative path to the project
|
|
142
|
-
workdir = pathlib.Path(__file__).parent.absolute()
|
|
143
|
-
print("Changing the workdir to {}".format(workdir))
|
|
144
|
-
os.chdir(workdir)
|
|
145
|
-
|
|
146
|
-
# All supported algorithms that need spacial stuff
|
|
147
|
-
isredis = ismilvus = ispinecone = iselastic = False
|
|
148
|
-
|
|
149
|
-
if 'redisearch' in args.algorithm:
|
|
150
|
-
from redis import Redis
|
|
151
|
-
from redis.cluster import RedisCluster
|
|
152
|
-
isredis = True
|
|
153
|
-
|
|
154
|
-
elif 'milvus' in args.algorithm:
|
|
155
|
-
from pymilvus import utility, connections
|
|
156
|
-
ismilvus = True
|
|
157
|
-
|
|
158
|
-
elif 'pinecone' in args.algorithm:
|
|
159
|
-
import pinecone
|
|
160
|
-
ispinecone = True
|
|
161
|
-
|
|
162
|
-
elif 'elasticsearch' in args.algorithm:
|
|
163
|
-
from elasticsearch import Elasticsearch
|
|
164
|
-
from elastic_transport.client_utils import DEFAULT
|
|
165
|
-
iselastic = True
|
|
166
|
-
|
|
167
|
-
if args.host is None:
|
|
168
|
-
args.host = 'localhost'
|
|
169
|
-
if args.port is None:
|
|
170
|
-
if isredis: args.port = '6379'
|
|
171
|
-
elif ismilvus: args.port = '19530'
|
|
172
|
-
elif iselastic: args.port = '9200'
|
|
173
|
-
|
|
174
|
-
if isredis:
|
|
175
|
-
redis = RedisCluster if args.cluster else Redis
|
|
176
|
-
redis = redis(host=args.host, port=int(args.port), password=args.auth, username=args.user)
|
|
177
|
-
elif ismilvus:
|
|
178
|
-
connections.connect(host=args.host, port=args.port)
|
|
179
|
-
elif ispinecone:
|
|
180
|
-
pinecone.init(api_key=args.auth)
|
|
181
|
-
elif iselastic:
|
|
182
|
-
args.user = args.user if args.user is not None else 'elastic'
|
|
183
|
-
args.auth = args.auth if args.auth is not None else os.environ.get('ELASTIC_PASSWORD', '')
|
|
184
|
-
try:
|
|
185
|
-
es = Elasticsearch([f'http://{args.host}:{args.port}'], request_timeout=3600, basic_auth=(args.user, args.auth))
|
|
186
|
-
es.info()
|
|
187
|
-
except Exception:
|
|
188
|
-
es = Elasticsearch([f'https://{args.host}:{args.port}'], request_timeout=3600, basic_auth=(args.user, args.auth), ca_certs=os.environ.get('ELASTIC_CA', DEFAULT))
|
|
189
|
-
|
|
190
|
-
if args.run_group is not None:
|
|
191
|
-
run_groups = [args.run_group]
|
|
192
|
-
else:
|
|
193
|
-
run_groups = get_run_groups('algos.yaml', args.algorithm)
|
|
194
|
-
|
|
195
|
-
base = 'python3 run.py --local --algorithm ' + args.algorithm + ' -k ' + args.count + ' --dataset ' + args.dataset
|
|
196
|
-
|
|
197
|
-
if args.host: base += ' --host ' + args.host
|
|
198
|
-
if args.port: base += ' --port ' + args.port
|
|
199
|
-
if args.user: base += ' --user ' + args.user
|
|
200
|
-
if args.auth: base += ' --auth ' + args.auth
|
|
201
|
-
if args.force: base += ' --force'
|
|
202
|
-
if args.cluster: base += ' --cluster'
|
|
203
|
-
if args.shards: base += ' --shards ' + args.shards
|
|
204
|
-
|
|
205
|
-
base_build = base + ' --build-only --total-clients ' + args.build_clients
|
|
206
|
-
base_test = base + ' --test-only --runs {} --total-clients {}'.format(args.runs, args.test_clients)
|
|
207
|
-
outputsdir = "{}/{}".format(workdir, get_result_filename(args.dataset, args.count))
|
|
208
|
-
outputsdir = os.path.join(outputsdir, args.algorithm)
|
|
209
|
-
if not os.path.isdir(outputsdir):
|
|
210
|
-
os.makedirs(outputsdir)
|
|
211
|
-
results_dicts = []
|
|
212
|
-
|
|
213
|
-
# skipping aggregation if using one tester
|
|
214
|
-
if int(args.test_clients) > 1:
|
|
215
|
-
test_stats_files = set()
|
|
216
|
-
watcher = PatternMatchingEventHandler(["*.hdf5"], ignore_directories=True)
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
def on_created_or_modified(event):
|
|
220
|
-
test_stats_files.add(event.src_path)
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
watcher.on_created = on_created_or_modified
|
|
224
|
-
watcher.on_modified = on_created_or_modified
|
|
225
|
-
observer = Observer()
|
|
226
|
-
observer.schedule(watcher, outputsdir)
|
|
227
|
-
observer.start()
|
|
228
|
-
|
|
229
|
-
for run_group in run_groups:
|
|
230
|
-
results_dict = {}
|
|
231
|
-
curr_base_build = base_build + ' --run-group ' + run_group
|
|
232
|
-
curr_base_test = base_test + ' --run-group ' + run_group
|
|
233
|
-
|
|
234
|
-
if int(args.build_clients) > 0:
|
|
235
|
-
if isredis:
|
|
236
|
-
redis.flushall()
|
|
237
|
-
elif ismilvus:
|
|
238
|
-
if utility.has_collection('milvus'):
|
|
239
|
-
utility.drop_collection('milvus')
|
|
240
|
-
elif ispinecone:
|
|
241
|
-
for idx in pinecone.list_indexes():
|
|
242
|
-
pinecone.delete_index(idx)
|
|
243
|
-
elif iselastic:
|
|
244
|
-
for idx in es.indices.stats()['indices']:
|
|
245
|
-
es.indices.delete(index=idx)
|
|
246
|
-
|
|
247
|
-
clients = [Process(target=os.system, args=(curr_base_build + ' --client-id ' + str(i),)) for i in
|
|
248
|
-
range(1, int(args.build_clients) + 1)]
|
|
249
|
-
|
|
250
|
-
t0 = time.time()
|
|
251
|
-
for client in clients: client.start()
|
|
252
|
-
for client in clients: client.join()
|
|
253
|
-
total_time = time.time() - t0
|
|
254
|
-
print(f'total build time: {total_time}\n\n')
|
|
255
|
-
|
|
256
|
-
fn = os.path.join(outputsdir, 'build_stats')
|
|
257
|
-
f = h5py.File(fn, 'w')
|
|
258
|
-
f.attrs["build_time"] = total_time
|
|
259
|
-
print(fn)
|
|
260
|
-
index_size = -1
|
|
261
|
-
if isredis:
|
|
262
|
-
if not args.cluster: # TODO: get total size from all the shards
|
|
263
|
-
index_size = float(redis.ft('ann_benchmark').info()['vector_index_sz_mb']) * 1024
|
|
264
|
-
f.attrs["index_size"] = index_size
|
|
265
|
-
elif iselastic:
|
|
266
|
-
f.attrs["index_size"] = es.indices.stats(index='ann_benchmark')['indices']['ann_benchmark']['total']['store']['size_in_bytes']
|
|
267
|
-
f.close()
|
|
268
|
-
results_dict["build"] = {"total_clients": args.build_clients, "build_time": total_time,
|
|
269
|
-
"vector_index_sz_mb": index_size}
|
|
270
|
-
|
|
271
|
-
if int(args.test_clients) > 0:
|
|
272
|
-
queriers = [Process(target=os.system, args=(curr_base_test + ' --client-id ' + str(i),)) for i in
|
|
273
|
-
range(1, int(args.test_clients) + 1)]
|
|
274
|
-
t0 = time.time()
|
|
275
|
-
for querier in queriers: querier.start()
|
|
276
|
-
for querier in queriers: querier.join()
|
|
277
|
-
query_time = time.time() - t0
|
|
278
|
-
print(f'total test time: {query_time}')
|
|
279
|
-
results_dict["query"] = {"total_clients": args.test_clients, "test_time": query_time}
|
|
280
|
-
|
|
281
|
-
results_dicts.append(results_dict)
|
|
282
|
-
|
|
283
|
-
# skipping aggregation if using one tester
|
|
284
|
-
if int(args.test_clients) > 1:
|
|
285
|
-
observer.stop()
|
|
286
|
-
observer.join()
|
|
287
|
-
print(
|
|
288
|
-
f'summarizing {int(args.test_clients)} clients data ({len(test_stats_files)} files into {len(test_stats_files) // int(args.test_clients)})...')
|
|
289
|
-
# ls = os.listdir(outputsdir)
|
|
290
|
-
# ls.remove('build_stats')
|
|
291
|
-
# aggregate_outputs(ls, int(args.test_clients))
|
|
292
|
-
aggregate_outputs(test_stats_files, int(args.test_clients))
|
|
293
|
-
print('done!')
|
|
294
|
-
|
|
295
|
-
if args.json_output != "":
|
|
296
|
-
with open(args.json_output, "w") as json_out_file:
|
|
297
|
-
print(f'storing json result into: {args.json_output}')
|
|
298
|
-
json.dump(results_dict, json_out_file)
|
|
@@ -1,159 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import matplotlib as mpl
|
|
3
|
-
mpl.use('Agg') # noqa
|
|
4
|
-
import matplotlib.pyplot as plt
|
|
5
|
-
import numpy as np
|
|
6
|
-
import argparse
|
|
7
|
-
|
|
8
|
-
from ann_benchmarks.datasets import get_dataset
|
|
9
|
-
from ann_benchmarks.algorithms.definitions import get_definitions
|
|
10
|
-
from ann_benchmarks.plotting.metrics import all_metrics as metrics
|
|
11
|
-
from ann_benchmarks.plotting.utils import (get_plot_label, compute_metrics,
|
|
12
|
-
create_linestyles, create_pointset)
|
|
13
|
-
from ann_benchmarks.results import (store_results, load_all_results,
|
|
14
|
-
get_unique_algorithms)
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def create_plot(all_data, raw, x_scale, y_scale, xn, yn, fn_out, linestyles,
|
|
18
|
-
batch):
|
|
19
|
-
xm, ym = (metrics[xn], metrics[yn])
|
|
20
|
-
# Now generate each plot
|
|
21
|
-
handles = []
|
|
22
|
-
labels = []
|
|
23
|
-
plt.figure(figsize=(12, 9))
|
|
24
|
-
|
|
25
|
-
# Sorting by mean y-value helps aligning plots with labels
|
|
26
|
-
def mean_y(algo):
|
|
27
|
-
xs, ys, ls, axs, ays, als = create_pointset(all_data[algo], xn, yn)
|
|
28
|
-
return -np.log(np.array(ys)).mean()
|
|
29
|
-
# Find range for logit x-scale
|
|
30
|
-
min_x, max_x = 1, 0
|
|
31
|
-
for algo in sorted(all_data.keys(), key=mean_y):
|
|
32
|
-
xs, ys, ls, axs, ays, als = create_pointset(all_data[algo], xn, yn)
|
|
33
|
-
min_x = min([min_x]+[x for x in xs if x > 0])
|
|
34
|
-
max_x = max([max_x]+[x for x in xs if x < 1])
|
|
35
|
-
color, faded, linestyle, marker = linestyles[algo]
|
|
36
|
-
handle, = plt.plot(xs, ys, '-', label=algo, color=color,
|
|
37
|
-
ms=7, mew=3, lw=3, linestyle=linestyle,
|
|
38
|
-
marker=marker)
|
|
39
|
-
handles.append(handle)
|
|
40
|
-
if raw:
|
|
41
|
-
handle2, = plt.plot(axs, ays, '-', label=algo, color=faded,
|
|
42
|
-
ms=5, mew=2, lw=2, linestyle=linestyle,
|
|
43
|
-
marker=marker)
|
|
44
|
-
labels.append(algo)
|
|
45
|
-
|
|
46
|
-
ax = plt.gca()
|
|
47
|
-
ax.set_ylabel(ym['description'])
|
|
48
|
-
ax.set_xlabel(xm['description'])
|
|
49
|
-
# Custom scales of the type --x-scale a3
|
|
50
|
-
if x_scale[0] == 'a':
|
|
51
|
-
alpha = float(x_scale[1:])
|
|
52
|
-
fun = lambda x: 1-(1-x)**(1/alpha)
|
|
53
|
-
inv_fun = lambda x: 1-(1-x)**alpha
|
|
54
|
-
ax.set_xscale('function', functions=(fun, inv_fun))
|
|
55
|
-
if alpha <= 3:
|
|
56
|
-
ticks = [inv_fun(x) for x in np.arange(0,1.2,.2)]
|
|
57
|
-
plt.xticks(ticks)
|
|
58
|
-
if alpha > 3:
|
|
59
|
-
from matplotlib import ticker
|
|
60
|
-
ax.xaxis.set_major_formatter(ticker.LogitFormatter())
|
|
61
|
-
#plt.xticks(ticker.LogitLocator().tick_values(min_x, max_x))
|
|
62
|
-
plt.xticks([0, 1/2, 1-1e-1, 1-1e-2, 1-1e-3, 1-1e-4, 1])
|
|
63
|
-
# Other x-scales
|
|
64
|
-
else:
|
|
65
|
-
ax.set_xscale(x_scale)
|
|
66
|
-
ax.set_yscale(y_scale)
|
|
67
|
-
ax.set_title(get_plot_label(xm, ym))
|
|
68
|
-
box = plt.gca().get_position()
|
|
69
|
-
# plt.gca().set_position([box.x0, box.y0, box.width * 0.8, box.height])
|
|
70
|
-
ax.legend(handles, labels, loc='center left',
|
|
71
|
-
bbox_to_anchor=(1, 0.5), prop={'size': 9})
|
|
72
|
-
plt.grid(b=True, which='major', color='0.65', linestyle='-')
|
|
73
|
-
plt.setp(ax.get_xminorticklabels(), visible=True)
|
|
74
|
-
|
|
75
|
-
# Logit scale has to be a subset of (0,1)
|
|
76
|
-
if 'lim' in xm and x_scale != 'logit':
|
|
77
|
-
x0, x1 = xm['lim']
|
|
78
|
-
plt.xlim(max(x0,0), min(x1,1))
|
|
79
|
-
elif x_scale == 'logit':
|
|
80
|
-
plt.xlim(min_x, max_x)
|
|
81
|
-
if 'lim' in ym:
|
|
82
|
-
plt.ylim(ym['lim'])
|
|
83
|
-
|
|
84
|
-
# Workaround for bug https://github.com/matplotlib/matplotlib/issues/6789
|
|
85
|
-
ax.spines['bottom']._adjust_location()
|
|
86
|
-
|
|
87
|
-
plt.savefig(fn_out, bbox_inches='tight')
|
|
88
|
-
plt.close()
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
if __name__ == "__main__":
|
|
92
|
-
parser = argparse.ArgumentParser()
|
|
93
|
-
parser.add_argument(
|
|
94
|
-
'--dataset',
|
|
95
|
-
metavar="DATASET",
|
|
96
|
-
default='glove-100-angular')
|
|
97
|
-
parser.add_argument(
|
|
98
|
-
'--count',
|
|
99
|
-
default=10)
|
|
100
|
-
parser.add_argument(
|
|
101
|
-
'--definitions',
|
|
102
|
-
metavar='FILE',
|
|
103
|
-
help='load algorithm definitions from FILE',
|
|
104
|
-
default='algos.yaml')
|
|
105
|
-
parser.add_argument(
|
|
106
|
-
'--limit',
|
|
107
|
-
default=-1)
|
|
108
|
-
parser.add_argument(
|
|
109
|
-
'-o', '--output')
|
|
110
|
-
parser.add_argument(
|
|
111
|
-
'-x', '--x-axis',
|
|
112
|
-
help='Which metric to use on the X-axis',
|
|
113
|
-
choices=metrics.keys(),
|
|
114
|
-
default="k-nn")
|
|
115
|
-
parser.add_argument(
|
|
116
|
-
'-y', '--y-axis',
|
|
117
|
-
help='Which metric to use on the Y-axis',
|
|
118
|
-
choices=metrics.keys(),
|
|
119
|
-
default="qps")
|
|
120
|
-
parser.add_argument(
|
|
121
|
-
'-X', '--x-scale',
|
|
122
|
-
help='Scale to use when drawing the X-axis. Typically linear, logit or a2',
|
|
123
|
-
default='linear')
|
|
124
|
-
parser.add_argument(
|
|
125
|
-
'-Y', '--y-scale',
|
|
126
|
-
help='Scale to use when drawing the Y-axis',
|
|
127
|
-
choices=["linear", "log", "symlog", "logit"],
|
|
128
|
-
default='linear')
|
|
129
|
-
parser.add_argument(
|
|
130
|
-
'--raw',
|
|
131
|
-
help='Show raw results (not just Pareto frontier) in faded colours',
|
|
132
|
-
action='store_true')
|
|
133
|
-
parser.add_argument(
|
|
134
|
-
'--batch',
|
|
135
|
-
help='Plot runs in batch mode',
|
|
136
|
-
action='store_true')
|
|
137
|
-
parser.add_argument(
|
|
138
|
-
'--recompute',
|
|
139
|
-
help='Clears the cache and recomputes the metrics',
|
|
140
|
-
action='store_true')
|
|
141
|
-
args = parser.parse_args()
|
|
142
|
-
|
|
143
|
-
if not args.output:
|
|
144
|
-
args.output = 'results/%s.png' % (args.dataset + ('-batch' if args.batch else ''))
|
|
145
|
-
print('writing output to %s' % args.output)
|
|
146
|
-
|
|
147
|
-
dataset, _ = get_dataset(args.dataset)
|
|
148
|
-
count = int(args.count)
|
|
149
|
-
unique_algorithms = get_unique_algorithms()
|
|
150
|
-
results = load_all_results(args.dataset, count, args.batch)
|
|
151
|
-
linestyles = create_linestyles(sorted(unique_algorithms))
|
|
152
|
-
runs = compute_metrics(np.array(dataset["distances"]),
|
|
153
|
-
results, args.x_axis, args.y_axis, args.recompute)
|
|
154
|
-
if not runs:
|
|
155
|
-
raise Exception('Nothing to plot')
|
|
156
|
-
|
|
157
|
-
create_plot(runs, args.raw, args.x_scale,
|
|
158
|
-
args.y_scale, args.x_axis, args.y_axis, args.output,
|
|
159
|
-
linestyles, args.batch)
|