redisbench-admin 0.11.64__py3-none-any.whl → 0.11.65__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- redisbench_admin/run/ann/pkg/.dockerignore +2 -0
- redisbench_admin/run/ann/pkg/.git +1 -0
- redisbench_admin/run/ann/pkg/.github/workflows/benchmarks.yml +100 -0
- redisbench_admin/run/ann/pkg/.gitignore +21 -0
- redisbench_admin/run/ann/pkg/LICENSE +21 -0
- redisbench_admin/run/ann/pkg/README.md +157 -0
- redisbench_admin/run/ann/pkg/algos.yaml +1294 -0
- redisbench_admin/run/ann/pkg/algosP.yaml +67 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/__init__.py +2 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/__init__.py +0 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/annoy.py +26 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/balltree.py +22 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/base.py +36 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/bruteforce.py +110 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/ckdtree.py +17 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/datasketch.py +29 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/definitions.py +187 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/diskann.py +190 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/dolphinnpy.py +31 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/dummy_algo.py +25 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/elasticsearch.py +107 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/elastiknn.py +124 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/faiss.py +124 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/faiss_gpu.py +61 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/faiss_hnsw.py +39 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/flann.py +27 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/hnswlib.py +36 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/kdtree.py +22 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/kgraph.py +39 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/lshf.py +25 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/milvus.py +99 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/mrpt.py +41 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/n2.py +28 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/nearpy.py +48 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/nmslib.py +74 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/onng_ngt.py +100 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/opensearchknn.py +107 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/panng_ngt.py +79 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/pinecone.py +39 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/puffinn.py +45 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/pynndescent.py +115 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/qg_ngt.py +102 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/redisearch.py +90 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/rpforest.py +20 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/scann.py +34 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/sptag.py +28 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/subprocess.py +246 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/vald.py +149 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/vecsim-hnsw.py +43 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/vespa.py +47 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/constants.py +1 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/data.py +48 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/datasets.py +620 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/distance.py +53 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/main.py +325 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/__init__.py +2 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/metrics.py +183 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/plot_variants.py +17 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/utils.py +165 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/results.py +71 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/runner.py +333 -0
- redisbench_admin/run/ann/pkg/create_dataset.py +12 -0
- redisbench_admin/run/ann/pkg/create_hybrid_dataset.py +147 -0
- redisbench_admin/run/ann/pkg/create_text_to_image_ds.py +117 -0
- redisbench_admin/run/ann/pkg/create_website.py +272 -0
- redisbench_admin/run/ann/pkg/install/Dockerfile +11 -0
- redisbench_admin/run/ann/pkg/install/Dockerfile.annoy +5 -0
- redisbench_admin/run/ann/pkg/install/Dockerfile.datasketch +4 -0
- redisbench_admin/run/ann/pkg/install/Dockerfile.diskann +29 -0
- redisbench_admin/run/ann/pkg/install/Dockerfile.diskann_pq +31 -0
- redisbench_admin/run/ann/pkg/install/Dockerfile.dolphinn +5 -0
- redisbench_admin/run/ann/pkg/install/Dockerfile.elasticsearch +45 -0
- redisbench_admin/run/ann/pkg/install/Dockerfile.elastiknn +61 -0
- redisbench_admin/run/ann/pkg/install/Dockerfile.faiss +18 -0
- redisbench_admin/run/ann/pkg/install/Dockerfile.flann +10 -0
- redisbench_admin/run/ann/pkg/install/Dockerfile.hnswlib +10 -0
- redisbench_admin/run/ann/pkg/install/Dockerfile.kgraph +6 -0
- redisbench_admin/run/ann/pkg/install/Dockerfile.mih +4 -0
- redisbench_admin/run/ann/pkg/install/Dockerfile.milvus +27 -0
- redisbench_admin/run/ann/pkg/install/Dockerfile.mrpt +4 -0
- redisbench_admin/run/ann/pkg/install/Dockerfile.n2 +5 -0
- redisbench_admin/run/ann/pkg/install/Dockerfile.nearpy +5 -0
- redisbench_admin/run/ann/pkg/install/Dockerfile.ngt +13 -0
- redisbench_admin/run/ann/pkg/install/Dockerfile.nmslib +10 -0
- redisbench_admin/run/ann/pkg/install/Dockerfile.opensearchknn +43 -0
- redisbench_admin/run/ann/pkg/install/Dockerfile.puffinn +6 -0
- redisbench_admin/run/ann/pkg/install/Dockerfile.pynndescent +4 -0
- redisbench_admin/run/ann/pkg/install/Dockerfile.redisearch +18 -0
- redisbench_admin/run/ann/pkg/install/Dockerfile.rpforest +5 -0
- redisbench_admin/run/ann/pkg/install/Dockerfile.scann +5 -0
- redisbench_admin/run/ann/pkg/install/Dockerfile.scipy +4 -0
- redisbench_admin/run/ann/pkg/install/Dockerfile.sklearn +4 -0
- redisbench_admin/run/ann/pkg/install/Dockerfile.sptag +30 -0
- redisbench_admin/run/ann/pkg/install/Dockerfile.vald +8 -0
- redisbench_admin/run/ann/pkg/install/Dockerfile.vespa +17 -0
- redisbench_admin/run/ann/pkg/install.py +70 -0
- redisbench_admin/run/ann/pkg/logging.conf +34 -0
- redisbench_admin/run/ann/pkg/multirun.py +298 -0
- redisbench_admin/run/ann/pkg/plot.py +159 -0
- redisbench_admin/run/ann/pkg/protocol/bf-runner +10 -0
- redisbench_admin/run/ann/pkg/protocol/bf-runner.py +204 -0
- redisbench_admin/run/ann/pkg/protocol/ext-add-query-metric.md +51 -0
- redisbench_admin/run/ann/pkg/protocol/ext-batch-queries.md +77 -0
- redisbench_admin/run/ann/pkg/protocol/ext-prepared-queries.md +77 -0
- redisbench_admin/run/ann/pkg/protocol/ext-query-parameters.md +47 -0
- redisbench_admin/run/ann/pkg/protocol/specification.md +194 -0
- redisbench_admin/run/ann/pkg/requirements.txt +14 -0
- redisbench_admin/run/ann/pkg/requirements_py38.txt +11 -0
- redisbench_admin/run/ann/pkg/results/fashion-mnist-784-euclidean.png +0 -0
- redisbench_admin/run/ann/pkg/results/gist-960-euclidean.png +0 -0
- redisbench_admin/run/ann/pkg/results/glove-100-angular.png +0 -0
- redisbench_admin/run/ann/pkg/results/glove-25-angular.png +0 -0
- redisbench_admin/run/ann/pkg/results/lastfm-64-dot.png +0 -0
- redisbench_admin/run/ann/pkg/results/mnist-784-euclidean.png +0 -0
- redisbench_admin/run/ann/pkg/results/nytimes-256-angular.png +0 -0
- redisbench_admin/run/ann/pkg/results/sift-128-euclidean.png +0 -0
- redisbench_admin/run/ann/pkg/run.py +12 -0
- redisbench_admin/run/ann/pkg/run_algorithm.py +3 -0
- redisbench_admin/run/ann/pkg/templates/chartjs.template +102 -0
- redisbench_admin/run/ann/pkg/templates/detail_page.html +23 -0
- redisbench_admin/run/ann/pkg/templates/general.html +58 -0
- redisbench_admin/run/ann/pkg/templates/latex.template +30 -0
- redisbench_admin/run/ann/pkg/templates/summary.html +60 -0
- redisbench_admin/run/ann/pkg/test/__init__.py +0 -0
- redisbench_admin/run/ann/pkg/test/test-jaccard.py +19 -0
- redisbench_admin/run/ann/pkg/test/test-metrics.py +99 -0
- redisbench_admin/run_remote/run_remote.py +1 -1
- {redisbench_admin-0.11.64.dist-info → redisbench_admin-0.11.65.dist-info}/METADATA +2 -5
- redisbench_admin-0.11.65.dist-info/RECORD +243 -0
- {redisbench_admin-0.11.64.dist-info → redisbench_admin-0.11.65.dist-info}/WHEEL +1 -1
- redisbench_admin-0.11.64.dist-info/RECORD +0 -117
- {redisbench_admin-0.11.64.dist-info/licenses → redisbench_admin-0.11.65.dist-info}/LICENSE +0 -0
- {redisbench_admin-0.11.64.dist-info → redisbench_admin-0.11.65.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
float:
|
|
2
|
+
any:
|
|
3
|
+
bruteforce:
|
|
4
|
+
docker-tag: ann-benchmarks-sklearn
|
|
5
|
+
module: ann_benchmarks.algorithms.bruteforce
|
|
6
|
+
constructor: BruteForce
|
|
7
|
+
base-args: ["@metric"]
|
|
8
|
+
run-groups:
|
|
9
|
+
empty:
|
|
10
|
+
args: []
|
|
11
|
+
bruteforce-blas:
|
|
12
|
+
docker-tag: ann-benchmarks-sklearn
|
|
13
|
+
module: ann_benchmarks.algorithms.bruteforce
|
|
14
|
+
constructor: BruteForceBLAS
|
|
15
|
+
base-args: ["@metric"]
|
|
16
|
+
run-groups:
|
|
17
|
+
empty:
|
|
18
|
+
args: []
|
|
19
|
+
angular:
|
|
20
|
+
pp-bruteforce-lo:
|
|
21
|
+
module: ann_benchmarks.algorithms.subprocess
|
|
22
|
+
docker-tag: ann-benchmarks-subprocess
|
|
23
|
+
constructor: FloatSubprocess
|
|
24
|
+
base-args: [["protocol/bf-runner"]]
|
|
25
|
+
run-groups:
|
|
26
|
+
jf-linear:
|
|
27
|
+
args: {"point-type": "float", "distance": "angular"}
|
|
28
|
+
pp-bruteforce-hi:
|
|
29
|
+
module: ann_benchmarks.algorithms.subprocess
|
|
30
|
+
docker-tag: ann-benchmarks-subprocess
|
|
31
|
+
constructor: FloatSubprocessPrepared
|
|
32
|
+
base-args: [["protocol/bf-runner"]]
|
|
33
|
+
run-groups:
|
|
34
|
+
jf-linear:
|
|
35
|
+
args: {"point-type": "float", "distance": "angular"}
|
|
36
|
+
pp-bruteforce-blas-lo:
|
|
37
|
+
module: ann_benchmarks.algorithms.subprocess
|
|
38
|
+
docker-tag: ann-benchmarks-subprocess
|
|
39
|
+
constructor: FloatSubprocess
|
|
40
|
+
base-args: [["protocol/bf-runner"]]
|
|
41
|
+
run-groups:
|
|
42
|
+
jf-linear:
|
|
43
|
+
args: {"point-type": "float", "distance": "angular", "fast": 1}
|
|
44
|
+
pp-bruteforce-blas-hi:
|
|
45
|
+
module: ann_benchmarks.algorithms.subprocess
|
|
46
|
+
docker-tag: ann-benchmarks-subprocess
|
|
47
|
+
constructor: FloatSubprocessPrepared
|
|
48
|
+
base-args: [["protocol/bf-runner"]]
|
|
49
|
+
run-groups:
|
|
50
|
+
jf-linear:
|
|
51
|
+
args: {"point-type": "float", "distance": "angular", "fast": 1}
|
|
52
|
+
pp-bruteforce-batch:
|
|
53
|
+
module: ann_benchmarks.algorithms.subprocess
|
|
54
|
+
docker-tag: ann-benchmarks-subprocess
|
|
55
|
+
constructor: FloatSubprocessBatch
|
|
56
|
+
base-args: [["protocol/bf-runner"]]
|
|
57
|
+
run-groups:
|
|
58
|
+
jf-linear:
|
|
59
|
+
args: {"point-type": "float", "distance": "angular"}
|
|
60
|
+
pp-bruteforce-blas-batch:
|
|
61
|
+
module: ann_benchmarks.algorithms.subprocess
|
|
62
|
+
docker-tag: ann-benchmarks-subprocess
|
|
63
|
+
constructor: FloatSubprocessBatch
|
|
64
|
+
base-args: [["protocol/bf-runner"]]
|
|
65
|
+
run-groups:
|
|
66
|
+
jf-linear:
|
|
67
|
+
args: {"point-type": "float", "distance": "angular", "fast": 1}
|
|
File without changes
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from __future__ import absolute_import
|
|
2
|
+
import annoy
|
|
3
|
+
from ann_benchmarks.algorithms.base import BaseANN
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Annoy(BaseANN):
|
|
7
|
+
def __init__(self, metric, n_trees):
|
|
8
|
+
self._n_trees = n_trees
|
|
9
|
+
self._search_k = None
|
|
10
|
+
self._metric = metric
|
|
11
|
+
|
|
12
|
+
def fit(self, X):
|
|
13
|
+
self._annoy = annoy.AnnoyIndex(X.shape[1], metric=self._metric)
|
|
14
|
+
for i, x in enumerate(X):
|
|
15
|
+
self._annoy.add_item(i, x.tolist())
|
|
16
|
+
self._annoy.build(self._n_trees)
|
|
17
|
+
|
|
18
|
+
def set_query_arguments(self, search_k):
|
|
19
|
+
self._search_k = search_k
|
|
20
|
+
|
|
21
|
+
def query(self, v, n):
|
|
22
|
+
return self._annoy.get_nns_by_vector(v.tolist(), n, self._search_k)
|
|
23
|
+
|
|
24
|
+
def __str__(self):
|
|
25
|
+
return 'Annoy(n_trees=%d, search_k=%d)' % (self._n_trees,
|
|
26
|
+
self._search_k)
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from __future__ import absolute_import
|
|
2
|
+
import sklearn.neighbors
|
|
3
|
+
import sklearn.preprocessing
|
|
4
|
+
from ann_benchmarks.algorithms.base import BaseANN
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class BallTree(BaseANN):
|
|
8
|
+
def __init__(self, metric, leaf_size=20):
|
|
9
|
+
self._leaf_size = leaf_size
|
|
10
|
+
self._metric = metric
|
|
11
|
+
self.name = 'BallTree(leaf_size=%d)' % self._leaf_size
|
|
12
|
+
|
|
13
|
+
def fit(self, X):
|
|
14
|
+
if self._metric == 'angular':
|
|
15
|
+
X = sklearn.preprocessing.normalize(X, axis=1, norm='l2')
|
|
16
|
+
self._tree = sklearn.neighbors.BallTree(X, leaf_size=self._leaf_size)
|
|
17
|
+
|
|
18
|
+
def query(self, v, n):
|
|
19
|
+
if self._metric == 'angular':
|
|
20
|
+
v = sklearn.preprocessing.normalize([v], axis=1, norm='l2')[0]
|
|
21
|
+
dist, ind = self._tree.query([v], k=n)
|
|
22
|
+
return ind[0]
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from __future__ import absolute_import
|
|
2
|
+
from multiprocessing.pool import ThreadPool
|
|
3
|
+
import psutil
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class BaseANN(object):
|
|
7
|
+
def done(self):
|
|
8
|
+
pass
|
|
9
|
+
|
|
10
|
+
def get_memory_usage(self):
|
|
11
|
+
"""Return the current memory usage of this algorithm instance
|
|
12
|
+
(in kilobytes), or None if this information is not available."""
|
|
13
|
+
# return in kB for backwards compatibility
|
|
14
|
+
return psutil.Process().memory_info().rss / 1024
|
|
15
|
+
|
|
16
|
+
def fit(self, X):
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
def query(self, q, n):
|
|
20
|
+
return [] # array of candidate indices
|
|
21
|
+
|
|
22
|
+
def batch_query(self, X, n):
|
|
23
|
+
"""Provide all queries at once and let algorithm figure out
|
|
24
|
+
how to handle it. Default implementation uses a ThreadPool
|
|
25
|
+
to parallelize query processing."""
|
|
26
|
+
pool = ThreadPool()
|
|
27
|
+
self.res = pool.map(lambda q: self.query(q, n), X)
|
|
28
|
+
|
|
29
|
+
def get_batch_results(self):
|
|
30
|
+
return self.res
|
|
31
|
+
|
|
32
|
+
def get_additional(self):
|
|
33
|
+
return {}
|
|
34
|
+
|
|
35
|
+
def __str__(self):
|
|
36
|
+
return self.name
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
from __future__ import absolute_import
|
|
2
|
+
import numpy
|
|
3
|
+
import sklearn.neighbors
|
|
4
|
+
from ann_benchmarks.distance import metrics as pd
|
|
5
|
+
from ann_benchmarks.algorithms.base import BaseANN
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class BruteForce(BaseANN):
|
|
9
|
+
def __init__(self, metric):
|
|
10
|
+
if metric not in ('angular', 'euclidean', 'hamming'):
|
|
11
|
+
raise NotImplementedError(
|
|
12
|
+
"BruteForce doesn't support metric %s" % metric)
|
|
13
|
+
self._metric = metric
|
|
14
|
+
self.name = 'BruteForce()'
|
|
15
|
+
|
|
16
|
+
def fit(self, X):
|
|
17
|
+
metric = {'angular': 'cosine', 'euclidean': 'l2',
|
|
18
|
+
'hamming': 'hamming'}[self._metric]
|
|
19
|
+
self._nbrs = sklearn.neighbors.NearestNeighbors(
|
|
20
|
+
algorithm='brute', metric=metric)
|
|
21
|
+
self._nbrs.fit(X)
|
|
22
|
+
|
|
23
|
+
def query(self, v, n):
|
|
24
|
+
return list(self._nbrs.kneighbors(
|
|
25
|
+
[v], return_distance=False, n_neighbors=n)[0])
|
|
26
|
+
|
|
27
|
+
def query_with_distances(self, v, n):
|
|
28
|
+
(distances, positions) = self._nbrs.kneighbors(
|
|
29
|
+
[v], return_distance=True, n_neighbors=n)
|
|
30
|
+
return zip(list(positions[0]), list(distances[0]))
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class BruteForceBLAS(BaseANN):
|
|
34
|
+
"""kNN search that uses a linear scan = brute force."""
|
|
35
|
+
|
|
36
|
+
def __init__(self, metric, precision=numpy.float32):
|
|
37
|
+
if metric not in ('angular', 'euclidean', 'hamming', 'jaccard'):
|
|
38
|
+
raise NotImplementedError(
|
|
39
|
+
"BruteForceBLAS doesn't support metric %s" % metric)
|
|
40
|
+
elif metric == 'hamming' and precision != numpy.bool:
|
|
41
|
+
raise NotImplementedError(
|
|
42
|
+
"BruteForceBLAS doesn't support precision"
|
|
43
|
+
" %s with Hamming distances" % precision)
|
|
44
|
+
self._metric = metric
|
|
45
|
+
self._precision = precision
|
|
46
|
+
self.name = 'BruteForceBLAS()'
|
|
47
|
+
|
|
48
|
+
def fit(self, X):
|
|
49
|
+
"""Initialize the search index."""
|
|
50
|
+
if self._metric == 'angular':
|
|
51
|
+
# precompute (squared) length of each vector
|
|
52
|
+
lens = (X ** 2).sum(-1)
|
|
53
|
+
# normalize index vectors to unit length
|
|
54
|
+
X /= numpy.sqrt(lens)[..., numpy.newaxis]
|
|
55
|
+
self.index = numpy.ascontiguousarray(X, dtype=self._precision)
|
|
56
|
+
elif self._metric == 'hamming':
|
|
57
|
+
# Regarding bitvectors as vectors in l_2 is faster for blas
|
|
58
|
+
X = X.astype(numpy.float32)
|
|
59
|
+
# precompute (squared) length of each vector
|
|
60
|
+
lens = (X ** 2).sum(-1)
|
|
61
|
+
self.index = numpy.ascontiguousarray(X, dtype=numpy.float32)
|
|
62
|
+
self.lengths = numpy.ascontiguousarray(lens, dtype=numpy.float32)
|
|
63
|
+
elif self._metric == 'euclidean':
|
|
64
|
+
# precompute (squared) length of each vector
|
|
65
|
+
lens = (X ** 2).sum(-1)
|
|
66
|
+
self.index = numpy.ascontiguousarray(X, dtype=self._precision)
|
|
67
|
+
self.lengths = numpy.ascontiguousarray(lens, dtype=self._precision)
|
|
68
|
+
elif self._metric == 'jaccard':
|
|
69
|
+
self.index = X
|
|
70
|
+
else:
|
|
71
|
+
# shouldn't get past the constructor!
|
|
72
|
+
assert False, "invalid metric"
|
|
73
|
+
|
|
74
|
+
def query(self, v, n):
|
|
75
|
+
return [index for index, _ in self.query_with_distances(v, n)]
|
|
76
|
+
|
|
77
|
+
def query_with_distances(self, v, n):
|
|
78
|
+
"""Find indices of `n` most similar vectors from the index to query
|
|
79
|
+
vector `v`."""
|
|
80
|
+
|
|
81
|
+
if self._metric != 'jaccard':
|
|
82
|
+
# use same precision for query as for index
|
|
83
|
+
v = numpy.ascontiguousarray(v, dtype=self.index.dtype)
|
|
84
|
+
|
|
85
|
+
# HACK we ignore query length as that's a constant
|
|
86
|
+
# not affecting the final ordering
|
|
87
|
+
if self._metric == 'angular':
|
|
88
|
+
# argmax_a cossim(a, b) = argmax_a dot(a, b) / |a||b| = argmin_a -dot(a, b) # noqa
|
|
89
|
+
dists = -numpy.dot(self.index, v)
|
|
90
|
+
elif self._metric == 'euclidean':
|
|
91
|
+
# argmin_a (a - b)^2 = argmin_a a^2 - 2ab + b^2 = argmin_a a^2 - 2ab # noqa
|
|
92
|
+
dists = self.lengths - 2 * numpy.dot(self.index, v)
|
|
93
|
+
elif self._metric == 'hamming':
|
|
94
|
+
# Just compute hamming distance using euclidean distance
|
|
95
|
+
dists = self.lengths - 2 * numpy.dot(self.index, v)
|
|
96
|
+
elif self._metric == 'jaccard':
|
|
97
|
+
dists = [pd[self._metric]['distance'](v, e) for e in self.index]
|
|
98
|
+
else:
|
|
99
|
+
# shouldn't get past the constructor!
|
|
100
|
+
assert False, "invalid metric"
|
|
101
|
+
# partition-sort by distance, get `n` closest
|
|
102
|
+
nearest_indices = numpy.argpartition(dists, n)[:n]
|
|
103
|
+
indices = [idx for idx in nearest_indices if pd[self._metric]
|
|
104
|
+
["distance_valid"](dists[idx])]
|
|
105
|
+
|
|
106
|
+
def fix(index):
|
|
107
|
+
ep = self.index[index]
|
|
108
|
+
ev = v
|
|
109
|
+
return (index, pd[self._metric]['distance'](ep, ev))
|
|
110
|
+
return map(fix, indices)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from __future__ import absolute_import
|
|
2
|
+
from scipy.spatial import cKDTree
|
|
3
|
+
from ann_benchmarks.algorithms.base import BaseANN
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class CKDTree(BaseANN):
|
|
7
|
+
def __init__(self, metric, leaf_size=20):
|
|
8
|
+
self._leaf_size = leaf_size
|
|
9
|
+
self._metric = metric
|
|
10
|
+
self.name = 'CKDTree(leaf_size=%d)' % self._leaf_size
|
|
11
|
+
|
|
12
|
+
def fit(self, X):
|
|
13
|
+
self._tree = cKDTree(X, leafsize=self._leaf_size)
|
|
14
|
+
|
|
15
|
+
def query(self, v, n):
|
|
16
|
+
dist, ind = self._tree.query([v], k=n)
|
|
17
|
+
return ind[0]
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from __future__ import absolute_import
|
|
2
|
+
from datasketch import MinHashLSHForest, MinHash
|
|
3
|
+
from ann_benchmarks.algorithms.base import BaseANN
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class DataSketch(BaseANN):
|
|
7
|
+
def __init__(self, metric, n_perm, n_rep):
|
|
8
|
+
if metric not in ('jaccard'):
|
|
9
|
+
raise NotImplementedError(
|
|
10
|
+
"Datasketch doesn't support metric %s" % metric)
|
|
11
|
+
self._n_perm = n_perm
|
|
12
|
+
self._n_rep = n_rep
|
|
13
|
+
self._metric = metric
|
|
14
|
+
self.name = 'Datasketch(n_perm=%d, n_rep=%d)' % (n_perm, n_rep)
|
|
15
|
+
|
|
16
|
+
def fit(self, X):
|
|
17
|
+
self._index = MinHashLSHForest(num_perm=self._n_perm, l=self._n_rep)
|
|
18
|
+
for i, x in enumerate(X):
|
|
19
|
+
m = MinHash(num_perm=self._n_perm)
|
|
20
|
+
for e in x:
|
|
21
|
+
m.update(str(e).encode('utf8'))
|
|
22
|
+
self._index.add(str(i), m)
|
|
23
|
+
self._index.index()
|
|
24
|
+
|
|
25
|
+
def query(self, v, n):
|
|
26
|
+
m = MinHash(num_perm=self._n_perm)
|
|
27
|
+
for e in v:
|
|
28
|
+
m.update(str(e).encode('utf8'))
|
|
29
|
+
return map(int, self._index.query(m, n))
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
from __future__ import absolute_import
|
|
2
|
+
from os import sep as pathsep
|
|
3
|
+
import collections
|
|
4
|
+
import importlib
|
|
5
|
+
import os
|
|
6
|
+
import sys
|
|
7
|
+
import traceback
|
|
8
|
+
import yaml
|
|
9
|
+
from enum import Enum
|
|
10
|
+
from itertools import product
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
Definition = collections.namedtuple(
|
|
14
|
+
'Definition',
|
|
15
|
+
['algorithm', 'run_group', 'constructor', 'module', 'docker_tag',
|
|
16
|
+
'arguments', 'query_argument_groups', 'disabled'])
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def instantiate_algorithm(definition):
|
|
20
|
+
print('Trying to instantiate %s.%s(%s)' %
|
|
21
|
+
(definition.module, definition.constructor, definition.arguments))
|
|
22
|
+
module = importlib.import_module(definition.module)
|
|
23
|
+
constructor = getattr(module, definition.constructor)
|
|
24
|
+
return constructor(*definition.arguments)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class InstantiationStatus(Enum):
|
|
28
|
+
AVAILABLE = 0
|
|
29
|
+
NO_CONSTRUCTOR = 1
|
|
30
|
+
NO_MODULE = 2
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def algorithm_status(definition):
|
|
34
|
+
try:
|
|
35
|
+
module = importlib.import_module(definition.module)
|
|
36
|
+
if hasattr(module, definition.constructor):
|
|
37
|
+
return InstantiationStatus.AVAILABLE
|
|
38
|
+
else:
|
|
39
|
+
return InstantiationStatus.NO_CONSTRUCTOR
|
|
40
|
+
except ImportError:
|
|
41
|
+
return InstantiationStatus.NO_MODULE
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _generate_combinations(args):
|
|
45
|
+
if isinstance(args, list):
|
|
46
|
+
args = [el if isinstance(el, list) else [el] for el in args]
|
|
47
|
+
return [list(x) for x in product(*args)]
|
|
48
|
+
elif isinstance(args, dict):
|
|
49
|
+
flat = []
|
|
50
|
+
for k, v in args.items():
|
|
51
|
+
if isinstance(v, list):
|
|
52
|
+
flat.append([(k, el) for el in v])
|
|
53
|
+
else:
|
|
54
|
+
flat.append([(k, v)])
|
|
55
|
+
return [dict(x) for x in product(*flat)]
|
|
56
|
+
else:
|
|
57
|
+
raise TypeError("No args handling exists for %s" % type(args).__name__)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _substitute_variables(arg, vs):
|
|
61
|
+
if isinstance(arg, dict):
|
|
62
|
+
return dict([(k, _substitute_variables(v, vs))
|
|
63
|
+
for k, v in arg.items()])
|
|
64
|
+
elif isinstance(arg, list):
|
|
65
|
+
return [_substitute_variables(a, vs) for a in arg]
|
|
66
|
+
elif isinstance(arg, str) and arg in vs:
|
|
67
|
+
return vs[arg]
|
|
68
|
+
else:
|
|
69
|
+
return arg
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _get_definitions(definition_file):
|
|
73
|
+
with open(definition_file, "r") as f:
|
|
74
|
+
return yaml.load(f, yaml.SafeLoader)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def list_algorithms(definition_file):
|
|
78
|
+
definitions = _get_definitions(definition_file)
|
|
79
|
+
|
|
80
|
+
print('The following algorithms are supported...')
|
|
81
|
+
for point in definitions:
|
|
82
|
+
print('\t... for the point type "%s"...' % point)
|
|
83
|
+
for metric in definitions[point]:
|
|
84
|
+
print('\t\t... and the distance metric "%s":' % metric)
|
|
85
|
+
for algorithm in definitions[point][metric]:
|
|
86
|
+
print('\t\t\t%s' % algorithm)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def get_unique_algorithms(definition_file):
|
|
90
|
+
definitions = _get_definitions(definition_file)
|
|
91
|
+
algos = set()
|
|
92
|
+
for point in definitions:
|
|
93
|
+
for metric in definitions[point]:
|
|
94
|
+
for algorithm in definitions[point][metric]:
|
|
95
|
+
algos.add(algorithm)
|
|
96
|
+
return list(sorted(algos))
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def get_run_groups(definition_file, algo = None):
|
|
100
|
+
definitions = _get_definitions(definition_file)
|
|
101
|
+
run_groups = set()
|
|
102
|
+
for point in definitions:
|
|
103
|
+
for metric in definitions[point]:
|
|
104
|
+
for algorithm in definitions[point][metric]:
|
|
105
|
+
if algo == None or algo == algorithm:
|
|
106
|
+
for run_group in definitions[point][metric][algorithm]['run-groups'].keys():
|
|
107
|
+
run_groups.add(run_group)
|
|
108
|
+
return list(sorted(run_groups))
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def get_definitions(definition_file, dimension, point_type="float",
|
|
112
|
+
distance_metric="euclidean", count=10, conn_params={'host': None, 'port': None, 'auth': None, 'user': None, 'cluster': False, 'shards': 1}):
|
|
113
|
+
definitions = _get_definitions(definition_file)
|
|
114
|
+
|
|
115
|
+
algorithm_definitions = {}
|
|
116
|
+
if "any" in definitions[point_type]:
|
|
117
|
+
algorithm_definitions.update(definitions[point_type]["any"])
|
|
118
|
+
algorithm_definitions.update(definitions[point_type][distance_metric])
|
|
119
|
+
|
|
120
|
+
definitions = []
|
|
121
|
+
for (name, algo) in algorithm_definitions.items():
|
|
122
|
+
for k in ['docker-tag', 'module', 'constructor']:
|
|
123
|
+
if k not in algo:
|
|
124
|
+
raise Exception(
|
|
125
|
+
'algorithm %s does not define a "%s" property' % (name, k))
|
|
126
|
+
|
|
127
|
+
base_args = []
|
|
128
|
+
if "base-args" in algo:
|
|
129
|
+
base_args = algo["base-args"]
|
|
130
|
+
|
|
131
|
+
for run_group_name, run_group in algo["run-groups"].items():
|
|
132
|
+
if "arg-groups" in run_group:
|
|
133
|
+
groups = []
|
|
134
|
+
for arg_group in run_group["arg-groups"]:
|
|
135
|
+
if isinstance(arg_group, dict):
|
|
136
|
+
# Dictionaries need to be expanded into lists in order
|
|
137
|
+
# for the subsequent call to _generate_combinations to
|
|
138
|
+
# do the right thing
|
|
139
|
+
groups.append(_generate_combinations(arg_group))
|
|
140
|
+
else:
|
|
141
|
+
groups.append(arg_group)
|
|
142
|
+
args = _generate_combinations(groups)
|
|
143
|
+
elif "args" in run_group:
|
|
144
|
+
args = _generate_combinations(run_group["args"])
|
|
145
|
+
else:
|
|
146
|
+
assert False, "? what? %s" % run_group
|
|
147
|
+
|
|
148
|
+
if "query-arg-groups" in run_group:
|
|
149
|
+
groups = []
|
|
150
|
+
for arg_group in run_group["query-arg-groups"]:
|
|
151
|
+
if isinstance(arg_group, dict):
|
|
152
|
+
groups.append(_generate_combinations(arg_group))
|
|
153
|
+
else:
|
|
154
|
+
groups.append(arg_group)
|
|
155
|
+
query_args = _generate_combinations(groups)
|
|
156
|
+
elif "query-args" in run_group:
|
|
157
|
+
query_args = _generate_combinations(run_group["query-args"])
|
|
158
|
+
else:
|
|
159
|
+
query_args = []
|
|
160
|
+
|
|
161
|
+
for arg_group in args:
|
|
162
|
+
aargs = []
|
|
163
|
+
aargs.extend(base_args)
|
|
164
|
+
if isinstance(arg_group, list):
|
|
165
|
+
aargs.extend(arg_group)
|
|
166
|
+
else:
|
|
167
|
+
aargs.append(arg_group)
|
|
168
|
+
|
|
169
|
+
vs = {
|
|
170
|
+
"@count": count,
|
|
171
|
+
"@metric": distance_metric,
|
|
172
|
+
"@dimension": dimension,
|
|
173
|
+
"@connection": conn_params
|
|
174
|
+
}
|
|
175
|
+
aargs = [_substitute_variables(arg, vs) for arg in aargs]
|
|
176
|
+
definitions.append(Definition(
|
|
177
|
+
algorithm=name,
|
|
178
|
+
run_group = run_group_name,
|
|
179
|
+
docker_tag=algo['docker-tag'],
|
|
180
|
+
module=algo['module'],
|
|
181
|
+
constructor=algo['constructor'],
|
|
182
|
+
arguments=aargs,
|
|
183
|
+
query_argument_groups=query_args,
|
|
184
|
+
disabled=algo.get('disabled', False)
|
|
185
|
+
))
|
|
186
|
+
|
|
187
|
+
return definitions
|