redisbench-admin 0.11.66__py3-none-any.whl → 0.11.68__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. redisbench_admin/run/args.py +1 -0
  2. redisbench_admin/run/cluster.py +1 -3
  3. redisbench_admin/run_remote/remote_db.py +3 -1
  4. redisbench_admin/run_remote/remote_helpers.py +27 -11
  5. redisbench_admin/run_remote/run_remote.py +11 -8
  6. redisbench_admin/run_remote/standalone.py +6 -2
  7. redisbench_admin/utils/benchmark_config.py +6 -2
  8. redisbench_admin/utils/local.py +4 -2
  9. redisbench_admin/utils/remote.py +81 -33
  10. {redisbench_admin-0.11.66.dist-info → redisbench_admin-0.11.68.dist-info}/METADATA +5 -2
  11. redisbench_admin-0.11.68.dist-info/RECORD +117 -0
  12. {redisbench_admin-0.11.66.dist-info → redisbench_admin-0.11.68.dist-info}/WHEEL +1 -1
  13. redisbench_admin/run/ann/pkg/.dockerignore +0 -2
  14. redisbench_admin/run/ann/pkg/.git +0 -1
  15. redisbench_admin/run/ann/pkg/.github/workflows/benchmarks.yml +0 -100
  16. redisbench_admin/run/ann/pkg/.gitignore +0 -21
  17. redisbench_admin/run/ann/pkg/LICENSE +0 -21
  18. redisbench_admin/run/ann/pkg/README.md +0 -157
  19. redisbench_admin/run/ann/pkg/algos.yaml +0 -1294
  20. redisbench_admin/run/ann/pkg/algosP.yaml +0 -67
  21. redisbench_admin/run/ann/pkg/ann_benchmarks/__init__.py +0 -2
  22. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/__init__.py +0 -0
  23. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/annoy.py +0 -26
  24. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/balltree.py +0 -22
  25. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/base.py +0 -36
  26. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/bruteforce.py +0 -110
  27. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/ckdtree.py +0 -17
  28. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/datasketch.py +0 -29
  29. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/definitions.py +0 -187
  30. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/diskann.py +0 -190
  31. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/dolphinnpy.py +0 -31
  32. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/dummy_algo.py +0 -25
  33. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/elasticsearch.py +0 -107
  34. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/elastiknn.py +0 -124
  35. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/faiss.py +0 -124
  36. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/faiss_gpu.py +0 -61
  37. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/faiss_hnsw.py +0 -39
  38. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/flann.py +0 -27
  39. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/hnswlib.py +0 -36
  40. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/kdtree.py +0 -22
  41. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/kgraph.py +0 -39
  42. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/lshf.py +0 -25
  43. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/milvus.py +0 -99
  44. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/mrpt.py +0 -41
  45. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/n2.py +0 -28
  46. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/nearpy.py +0 -48
  47. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/nmslib.py +0 -74
  48. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/onng_ngt.py +0 -100
  49. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/opensearchknn.py +0 -107
  50. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/panng_ngt.py +0 -79
  51. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/pinecone.py +0 -39
  52. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/puffinn.py +0 -45
  53. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/pynndescent.py +0 -115
  54. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/qg_ngt.py +0 -102
  55. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/redisearch.py +0 -90
  56. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/rpforest.py +0 -20
  57. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/scann.py +0 -34
  58. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/sptag.py +0 -28
  59. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/subprocess.py +0 -246
  60. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/vald.py +0 -149
  61. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/vecsim-hnsw.py +0 -43
  62. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/vespa.py +0 -47
  63. redisbench_admin/run/ann/pkg/ann_benchmarks/constants.py +0 -1
  64. redisbench_admin/run/ann/pkg/ann_benchmarks/data.py +0 -48
  65. redisbench_admin/run/ann/pkg/ann_benchmarks/datasets.py +0 -620
  66. redisbench_admin/run/ann/pkg/ann_benchmarks/distance.py +0 -53
  67. redisbench_admin/run/ann/pkg/ann_benchmarks/main.py +0 -325
  68. redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/__init__.py +0 -2
  69. redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/metrics.py +0 -183
  70. redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/plot_variants.py +0 -17
  71. redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/utils.py +0 -165
  72. redisbench_admin/run/ann/pkg/ann_benchmarks/results.py +0 -71
  73. redisbench_admin/run/ann/pkg/ann_benchmarks/runner.py +0 -333
  74. redisbench_admin/run/ann/pkg/create_dataset.py +0 -12
  75. redisbench_admin/run/ann/pkg/create_hybrid_dataset.py +0 -147
  76. redisbench_admin/run/ann/pkg/create_text_to_image_ds.py +0 -117
  77. redisbench_admin/run/ann/pkg/create_website.py +0 -272
  78. redisbench_admin/run/ann/pkg/install/Dockerfile +0 -11
  79. redisbench_admin/run/ann/pkg/install/Dockerfile.annoy +0 -5
  80. redisbench_admin/run/ann/pkg/install/Dockerfile.datasketch +0 -4
  81. redisbench_admin/run/ann/pkg/install/Dockerfile.diskann +0 -29
  82. redisbench_admin/run/ann/pkg/install/Dockerfile.diskann_pq +0 -31
  83. redisbench_admin/run/ann/pkg/install/Dockerfile.dolphinn +0 -5
  84. redisbench_admin/run/ann/pkg/install/Dockerfile.elasticsearch +0 -45
  85. redisbench_admin/run/ann/pkg/install/Dockerfile.elastiknn +0 -61
  86. redisbench_admin/run/ann/pkg/install/Dockerfile.faiss +0 -18
  87. redisbench_admin/run/ann/pkg/install/Dockerfile.flann +0 -10
  88. redisbench_admin/run/ann/pkg/install/Dockerfile.hnswlib +0 -10
  89. redisbench_admin/run/ann/pkg/install/Dockerfile.kgraph +0 -6
  90. redisbench_admin/run/ann/pkg/install/Dockerfile.mih +0 -4
  91. redisbench_admin/run/ann/pkg/install/Dockerfile.milvus +0 -27
  92. redisbench_admin/run/ann/pkg/install/Dockerfile.mrpt +0 -4
  93. redisbench_admin/run/ann/pkg/install/Dockerfile.n2 +0 -5
  94. redisbench_admin/run/ann/pkg/install/Dockerfile.nearpy +0 -5
  95. redisbench_admin/run/ann/pkg/install/Dockerfile.ngt +0 -13
  96. redisbench_admin/run/ann/pkg/install/Dockerfile.nmslib +0 -10
  97. redisbench_admin/run/ann/pkg/install/Dockerfile.opensearchknn +0 -43
  98. redisbench_admin/run/ann/pkg/install/Dockerfile.puffinn +0 -6
  99. redisbench_admin/run/ann/pkg/install/Dockerfile.pynndescent +0 -4
  100. redisbench_admin/run/ann/pkg/install/Dockerfile.redisearch +0 -18
  101. redisbench_admin/run/ann/pkg/install/Dockerfile.rpforest +0 -5
  102. redisbench_admin/run/ann/pkg/install/Dockerfile.scann +0 -5
  103. redisbench_admin/run/ann/pkg/install/Dockerfile.scipy +0 -4
  104. redisbench_admin/run/ann/pkg/install/Dockerfile.sklearn +0 -4
  105. redisbench_admin/run/ann/pkg/install/Dockerfile.sptag +0 -30
  106. redisbench_admin/run/ann/pkg/install/Dockerfile.vald +0 -8
  107. redisbench_admin/run/ann/pkg/install/Dockerfile.vespa +0 -17
  108. redisbench_admin/run/ann/pkg/install.py +0 -70
  109. redisbench_admin/run/ann/pkg/logging.conf +0 -34
  110. redisbench_admin/run/ann/pkg/multirun.py +0 -298
  111. redisbench_admin/run/ann/pkg/plot.py +0 -159
  112. redisbench_admin/run/ann/pkg/protocol/bf-runner +0 -10
  113. redisbench_admin/run/ann/pkg/protocol/bf-runner.py +0 -204
  114. redisbench_admin/run/ann/pkg/protocol/ext-add-query-metric.md +0 -51
  115. redisbench_admin/run/ann/pkg/protocol/ext-batch-queries.md +0 -77
  116. redisbench_admin/run/ann/pkg/protocol/ext-prepared-queries.md +0 -77
  117. redisbench_admin/run/ann/pkg/protocol/ext-query-parameters.md +0 -47
  118. redisbench_admin/run/ann/pkg/protocol/specification.md +0 -194
  119. redisbench_admin/run/ann/pkg/requirements.txt +0 -14
  120. redisbench_admin/run/ann/pkg/requirements_py38.txt +0 -11
  121. redisbench_admin/run/ann/pkg/results/fashion-mnist-784-euclidean.png +0 -0
  122. redisbench_admin/run/ann/pkg/results/gist-960-euclidean.png +0 -0
  123. redisbench_admin/run/ann/pkg/results/glove-100-angular.png +0 -0
  124. redisbench_admin/run/ann/pkg/results/glove-25-angular.png +0 -0
  125. redisbench_admin/run/ann/pkg/results/lastfm-64-dot.png +0 -0
  126. redisbench_admin/run/ann/pkg/results/mnist-784-euclidean.png +0 -0
  127. redisbench_admin/run/ann/pkg/results/nytimes-256-angular.png +0 -0
  128. redisbench_admin/run/ann/pkg/results/sift-128-euclidean.png +0 -0
  129. redisbench_admin/run/ann/pkg/run.py +0 -12
  130. redisbench_admin/run/ann/pkg/run_algorithm.py +0 -3
  131. redisbench_admin/run/ann/pkg/templates/chartjs.template +0 -102
  132. redisbench_admin/run/ann/pkg/templates/detail_page.html +0 -23
  133. redisbench_admin/run/ann/pkg/templates/general.html +0 -58
  134. redisbench_admin/run/ann/pkg/templates/latex.template +0 -30
  135. redisbench_admin/run/ann/pkg/templates/summary.html +0 -60
  136. redisbench_admin/run/ann/pkg/test/__init__.py +0 -0
  137. redisbench_admin/run/ann/pkg/test/test-jaccard.py +0 -19
  138. redisbench_admin/run/ann/pkg/test/test-metrics.py +0 -99
  139. redisbench_admin-0.11.66.dist-info/RECORD +0 -243
  140. {redisbench_admin-0.11.66.dist-info → redisbench_admin-0.11.68.dist-info}/entry_points.txt +0 -0
  141. {redisbench_admin-0.11.66.dist-info → redisbench_admin-0.11.68.dist-info/licenses}/LICENSE +0 -0
@@ -1,67 +0,0 @@
1
- float:
2
- any:
3
- bruteforce:
4
- docker-tag: ann-benchmarks-sklearn
5
- module: ann_benchmarks.algorithms.bruteforce
6
- constructor: BruteForce
7
- base-args: ["@metric"]
8
- run-groups:
9
- empty:
10
- args: []
11
- bruteforce-blas:
12
- docker-tag: ann-benchmarks-sklearn
13
- module: ann_benchmarks.algorithms.bruteforce
14
- constructor: BruteForceBLAS
15
- base-args: ["@metric"]
16
- run-groups:
17
- empty:
18
- args: []
19
- angular:
20
- pp-bruteforce-lo:
21
- module: ann_benchmarks.algorithms.subprocess
22
- docker-tag: ann-benchmarks-subprocess
23
- constructor: FloatSubprocess
24
- base-args: [["protocol/bf-runner"]]
25
- run-groups:
26
- jf-linear:
27
- args: {"point-type": "float", "distance": "angular"}
28
- pp-bruteforce-hi:
29
- module: ann_benchmarks.algorithms.subprocess
30
- docker-tag: ann-benchmarks-subprocess
31
- constructor: FloatSubprocessPrepared
32
- base-args: [["protocol/bf-runner"]]
33
- run-groups:
34
- jf-linear:
35
- args: {"point-type": "float", "distance": "angular"}
36
- pp-bruteforce-blas-lo:
37
- module: ann_benchmarks.algorithms.subprocess
38
- docker-tag: ann-benchmarks-subprocess
39
- constructor: FloatSubprocess
40
- base-args: [["protocol/bf-runner"]]
41
- run-groups:
42
- jf-linear:
43
- args: {"point-type": "float", "distance": "angular", "fast": 1}
44
- pp-bruteforce-blas-hi:
45
- module: ann_benchmarks.algorithms.subprocess
46
- docker-tag: ann-benchmarks-subprocess
47
- constructor: FloatSubprocessPrepared
48
- base-args: [["protocol/bf-runner"]]
49
- run-groups:
50
- jf-linear:
51
- args: {"point-type": "float", "distance": "angular", "fast": 1}
52
- pp-bruteforce-batch:
53
- module: ann_benchmarks.algorithms.subprocess
54
- docker-tag: ann-benchmarks-subprocess
55
- constructor: FloatSubprocessBatch
56
- base-args: [["protocol/bf-runner"]]
57
- run-groups:
58
- jf-linear:
59
- args: {"point-type": "float", "distance": "angular"}
60
- pp-bruteforce-blas-batch:
61
- module: ann_benchmarks.algorithms.subprocess
62
- docker-tag: ann-benchmarks-subprocess
63
- constructor: FloatSubprocessBatch
64
- base-args: [["protocol/bf-runner"]]
65
- run-groups:
66
- jf-linear:
67
- args: {"point-type": "float", "distance": "angular", "fast": 1}
@@ -1,2 +0,0 @@
1
- from __future__ import absolute_import
2
- # from ann_benchmarks.main import *
@@ -1,26 +0,0 @@
1
- from __future__ import absolute_import
2
- import annoy
3
- from ann_benchmarks.algorithms.base import BaseANN
4
-
5
-
6
- class Annoy(BaseANN):
7
- def __init__(self, metric, n_trees):
8
- self._n_trees = n_trees
9
- self._search_k = None
10
- self._metric = metric
11
-
12
- def fit(self, X):
13
- self._annoy = annoy.AnnoyIndex(X.shape[1], metric=self._metric)
14
- for i, x in enumerate(X):
15
- self._annoy.add_item(i, x.tolist())
16
- self._annoy.build(self._n_trees)
17
-
18
- def set_query_arguments(self, search_k):
19
- self._search_k = search_k
20
-
21
- def query(self, v, n):
22
- return self._annoy.get_nns_by_vector(v.tolist(), n, self._search_k)
23
-
24
- def __str__(self):
25
- return 'Annoy(n_trees=%d, search_k=%d)' % (self._n_trees,
26
- self._search_k)
@@ -1,22 +0,0 @@
1
- from __future__ import absolute_import
2
- import sklearn.neighbors
3
- import sklearn.preprocessing
4
- from ann_benchmarks.algorithms.base import BaseANN
5
-
6
-
7
- class BallTree(BaseANN):
8
- def __init__(self, metric, leaf_size=20):
9
- self._leaf_size = leaf_size
10
- self._metric = metric
11
- self.name = 'BallTree(leaf_size=%d)' % self._leaf_size
12
-
13
- def fit(self, X):
14
- if self._metric == 'angular':
15
- X = sklearn.preprocessing.normalize(X, axis=1, norm='l2')
16
- self._tree = sklearn.neighbors.BallTree(X, leaf_size=self._leaf_size)
17
-
18
- def query(self, v, n):
19
- if self._metric == 'angular':
20
- v = sklearn.preprocessing.normalize([v], axis=1, norm='l2')[0]
21
- dist, ind = self._tree.query([v], k=n)
22
- return ind[0]
@@ -1,36 +0,0 @@
1
- from __future__ import absolute_import
2
- from multiprocessing.pool import ThreadPool
3
- import psutil
4
-
5
-
6
- class BaseANN(object):
7
- def done(self):
8
- pass
9
-
10
- def get_memory_usage(self):
11
- """Return the current memory usage of this algorithm instance
12
- (in kilobytes), or None if this information is not available."""
13
- # return in kB for backwards compatibility
14
- return psutil.Process().memory_info().rss / 1024
15
-
16
- def fit(self, X):
17
- pass
18
-
19
- def query(self, q, n):
20
- return [] # array of candidate indices
21
-
22
- def batch_query(self, X, n):
23
- """Provide all queries at once and let algorithm figure out
24
- how to handle it. Default implementation uses a ThreadPool
25
- to parallelize query processing."""
26
- pool = ThreadPool()
27
- self.res = pool.map(lambda q: self.query(q, n), X)
28
-
29
- def get_batch_results(self):
30
- return self.res
31
-
32
- def get_additional(self):
33
- return {}
34
-
35
- def __str__(self):
36
- return self.name
@@ -1,110 +0,0 @@
1
- from __future__ import absolute_import
2
- import numpy
3
- import sklearn.neighbors
4
- from ann_benchmarks.distance import metrics as pd
5
- from ann_benchmarks.algorithms.base import BaseANN
6
-
7
-
8
- class BruteForce(BaseANN):
9
- def __init__(self, metric):
10
- if metric not in ('angular', 'euclidean', 'hamming'):
11
- raise NotImplementedError(
12
- "BruteForce doesn't support metric %s" % metric)
13
- self._metric = metric
14
- self.name = 'BruteForce()'
15
-
16
- def fit(self, X):
17
- metric = {'angular': 'cosine', 'euclidean': 'l2',
18
- 'hamming': 'hamming'}[self._metric]
19
- self._nbrs = sklearn.neighbors.NearestNeighbors(
20
- algorithm='brute', metric=metric)
21
- self._nbrs.fit(X)
22
-
23
- def query(self, v, n):
24
- return list(self._nbrs.kneighbors(
25
- [v], return_distance=False, n_neighbors=n)[0])
26
-
27
- def query_with_distances(self, v, n):
28
- (distances, positions) = self._nbrs.kneighbors(
29
- [v], return_distance=True, n_neighbors=n)
30
- return zip(list(positions[0]), list(distances[0]))
31
-
32
-
33
- class BruteForceBLAS(BaseANN):
34
- """kNN search that uses a linear scan = brute force."""
35
-
36
- def __init__(self, metric, precision=numpy.float32):
37
- if metric not in ('angular', 'euclidean', 'hamming', 'jaccard'):
38
- raise NotImplementedError(
39
- "BruteForceBLAS doesn't support metric %s" % metric)
40
- elif metric == 'hamming' and precision != numpy.bool:
41
- raise NotImplementedError(
42
- "BruteForceBLAS doesn't support precision"
43
- " %s with Hamming distances" % precision)
44
- self._metric = metric
45
- self._precision = precision
46
- self.name = 'BruteForceBLAS()'
47
-
48
- def fit(self, X):
49
- """Initialize the search index."""
50
- if self._metric == 'angular':
51
- # precompute (squared) length of each vector
52
- lens = (X ** 2).sum(-1)
53
- # normalize index vectors to unit length
54
- X /= numpy.sqrt(lens)[..., numpy.newaxis]
55
- self.index = numpy.ascontiguousarray(X, dtype=self._precision)
56
- elif self._metric == 'hamming':
57
- # Regarding bitvectors as vectors in l_2 is faster for blas
58
- X = X.astype(numpy.float32)
59
- # precompute (squared) length of each vector
60
- lens = (X ** 2).sum(-1)
61
- self.index = numpy.ascontiguousarray(X, dtype=numpy.float32)
62
- self.lengths = numpy.ascontiguousarray(lens, dtype=numpy.float32)
63
- elif self._metric == 'euclidean':
64
- # precompute (squared) length of each vector
65
- lens = (X ** 2).sum(-1)
66
- self.index = numpy.ascontiguousarray(X, dtype=self._precision)
67
- self.lengths = numpy.ascontiguousarray(lens, dtype=self._precision)
68
- elif self._metric == 'jaccard':
69
- self.index = X
70
- else:
71
- # shouldn't get past the constructor!
72
- assert False, "invalid metric"
73
-
74
- def query(self, v, n):
75
- return [index for index, _ in self.query_with_distances(v, n)]
76
-
77
- def query_with_distances(self, v, n):
78
- """Find indices of `n` most similar vectors from the index to query
79
- vector `v`."""
80
-
81
- if self._metric != 'jaccard':
82
- # use same precision for query as for index
83
- v = numpy.ascontiguousarray(v, dtype=self.index.dtype)
84
-
85
- # HACK we ignore query length as that's a constant
86
- # not affecting the final ordering
87
- if self._metric == 'angular':
88
- # argmax_a cossim(a, b) = argmax_a dot(a, b) / |a||b| = argmin_a -dot(a, b) # noqa
89
- dists = -numpy.dot(self.index, v)
90
- elif self._metric == 'euclidean':
91
- # argmin_a (a - b)^2 = argmin_a a^2 - 2ab + b^2 = argmin_a a^2 - 2ab # noqa
92
- dists = self.lengths - 2 * numpy.dot(self.index, v)
93
- elif self._metric == 'hamming':
94
- # Just compute hamming distance using euclidean distance
95
- dists = self.lengths - 2 * numpy.dot(self.index, v)
96
- elif self._metric == 'jaccard':
97
- dists = [pd[self._metric]['distance'](v, e) for e in self.index]
98
- else:
99
- # shouldn't get past the constructor!
100
- assert False, "invalid metric"
101
- # partition-sort by distance, get `n` closest
102
- nearest_indices = numpy.argpartition(dists, n)[:n]
103
- indices = [idx for idx in nearest_indices if pd[self._metric]
104
- ["distance_valid"](dists[idx])]
105
-
106
- def fix(index):
107
- ep = self.index[index]
108
- ev = v
109
- return (index, pd[self._metric]['distance'](ep, ev))
110
- return map(fix, indices)
@@ -1,17 +0,0 @@
1
- from __future__ import absolute_import
2
- from scipy.spatial import cKDTree
3
- from ann_benchmarks.algorithms.base import BaseANN
4
-
5
-
6
- class CKDTree(BaseANN):
7
- def __init__(self, metric, leaf_size=20):
8
- self._leaf_size = leaf_size
9
- self._metric = metric
10
- self.name = 'CKDTree(leaf_size=%d)' % self._leaf_size
11
-
12
- def fit(self, X):
13
- self._tree = cKDTree(X, leafsize=self._leaf_size)
14
-
15
- def query(self, v, n):
16
- dist, ind = self._tree.query([v], k=n)
17
- return ind[0]
@@ -1,29 +0,0 @@
1
- from __future__ import absolute_import
2
- from datasketch import MinHashLSHForest, MinHash
3
- from ann_benchmarks.algorithms.base import BaseANN
4
-
5
-
6
- class DataSketch(BaseANN):
7
- def __init__(self, metric, n_perm, n_rep):
8
- if metric not in ('jaccard'):
9
- raise NotImplementedError(
10
- "Datasketch doesn't support metric %s" % metric)
11
- self._n_perm = n_perm
12
- self._n_rep = n_rep
13
- self._metric = metric
14
- self.name = 'Datasketch(n_perm=%d, n_rep=%d)' % (n_perm, n_rep)
15
-
16
- def fit(self, X):
17
- self._index = MinHashLSHForest(num_perm=self._n_perm, l=self._n_rep)
18
- for i, x in enumerate(X):
19
- m = MinHash(num_perm=self._n_perm)
20
- for e in x:
21
- m.update(str(e).encode('utf8'))
22
- self._index.add(str(i), m)
23
- self._index.index()
24
-
25
- def query(self, v, n):
26
- m = MinHash(num_perm=self._n_perm)
27
- for e in v:
28
- m.update(str(e).encode('utf8'))
29
- return map(int, self._index.query(m, n))
@@ -1,187 +0,0 @@
1
- from __future__ import absolute_import
2
- from os import sep as pathsep
3
- import collections
4
- import importlib
5
- import os
6
- import sys
7
- import traceback
8
- import yaml
9
- from enum import Enum
10
- from itertools import product
11
-
12
-
13
- Definition = collections.namedtuple(
14
- 'Definition',
15
- ['algorithm', 'run_group', 'constructor', 'module', 'docker_tag',
16
- 'arguments', 'query_argument_groups', 'disabled'])
17
-
18
-
19
- def instantiate_algorithm(definition):
20
- print('Trying to instantiate %s.%s(%s)' %
21
- (definition.module, definition.constructor, definition.arguments))
22
- module = importlib.import_module(definition.module)
23
- constructor = getattr(module, definition.constructor)
24
- return constructor(*definition.arguments)
25
-
26
-
27
- class InstantiationStatus(Enum):
28
- AVAILABLE = 0
29
- NO_CONSTRUCTOR = 1
30
- NO_MODULE = 2
31
-
32
-
33
- def algorithm_status(definition):
34
- try:
35
- module = importlib.import_module(definition.module)
36
- if hasattr(module, definition.constructor):
37
- return InstantiationStatus.AVAILABLE
38
- else:
39
- return InstantiationStatus.NO_CONSTRUCTOR
40
- except ImportError:
41
- return InstantiationStatus.NO_MODULE
42
-
43
-
44
- def _generate_combinations(args):
45
- if isinstance(args, list):
46
- args = [el if isinstance(el, list) else [el] for el in args]
47
- return [list(x) for x in product(*args)]
48
- elif isinstance(args, dict):
49
- flat = []
50
- for k, v in args.items():
51
- if isinstance(v, list):
52
- flat.append([(k, el) for el in v])
53
- else:
54
- flat.append([(k, v)])
55
- return [dict(x) for x in product(*flat)]
56
- else:
57
- raise TypeError("No args handling exists for %s" % type(args).__name__)
58
-
59
-
60
- def _substitute_variables(arg, vs):
61
- if isinstance(arg, dict):
62
- return dict([(k, _substitute_variables(v, vs))
63
- for k, v in arg.items()])
64
- elif isinstance(arg, list):
65
- return [_substitute_variables(a, vs) for a in arg]
66
- elif isinstance(arg, str) and arg in vs:
67
- return vs[arg]
68
- else:
69
- return arg
70
-
71
-
72
- def _get_definitions(definition_file):
73
- with open(definition_file, "r") as f:
74
- return yaml.load(f, yaml.SafeLoader)
75
-
76
-
77
- def list_algorithms(definition_file):
78
- definitions = _get_definitions(definition_file)
79
-
80
- print('The following algorithms are supported...')
81
- for point in definitions:
82
- print('\t... for the point type "%s"...' % point)
83
- for metric in definitions[point]:
84
- print('\t\t... and the distance metric "%s":' % metric)
85
- for algorithm in definitions[point][metric]:
86
- print('\t\t\t%s' % algorithm)
87
-
88
-
89
- def get_unique_algorithms(definition_file):
90
- definitions = _get_definitions(definition_file)
91
- algos = set()
92
- for point in definitions:
93
- for metric in definitions[point]:
94
- for algorithm in definitions[point][metric]:
95
- algos.add(algorithm)
96
- return list(sorted(algos))
97
-
98
-
99
- def get_run_groups(definition_file, algo = None):
100
- definitions = _get_definitions(definition_file)
101
- run_groups = set()
102
- for point in definitions:
103
- for metric in definitions[point]:
104
- for algorithm in definitions[point][metric]:
105
- if algo == None or algo == algorithm:
106
- for run_group in definitions[point][metric][algorithm]['run-groups'].keys():
107
- run_groups.add(run_group)
108
- return list(sorted(run_groups))
109
-
110
-
111
- def get_definitions(definition_file, dimension, point_type="float",
112
- distance_metric="euclidean", count=10, conn_params={'host': None, 'port': None, 'auth': None, 'user': None, 'cluster': False, 'shards': 1}):
113
- definitions = _get_definitions(definition_file)
114
-
115
- algorithm_definitions = {}
116
- if "any" in definitions[point_type]:
117
- algorithm_definitions.update(definitions[point_type]["any"])
118
- algorithm_definitions.update(definitions[point_type][distance_metric])
119
-
120
- definitions = []
121
- for (name, algo) in algorithm_definitions.items():
122
- for k in ['docker-tag', 'module', 'constructor']:
123
- if k not in algo:
124
- raise Exception(
125
- 'algorithm %s does not define a "%s" property' % (name, k))
126
-
127
- base_args = []
128
- if "base-args" in algo:
129
- base_args = algo["base-args"]
130
-
131
- for run_group_name, run_group in algo["run-groups"].items():
132
- if "arg-groups" in run_group:
133
- groups = []
134
- for arg_group in run_group["arg-groups"]:
135
- if isinstance(arg_group, dict):
136
- # Dictionaries need to be expanded into lists in order
137
- # for the subsequent call to _generate_combinations to
138
- # do the right thing
139
- groups.append(_generate_combinations(arg_group))
140
- else:
141
- groups.append(arg_group)
142
- args = _generate_combinations(groups)
143
- elif "args" in run_group:
144
- args = _generate_combinations(run_group["args"])
145
- else:
146
- assert False, "? what? %s" % run_group
147
-
148
- if "query-arg-groups" in run_group:
149
- groups = []
150
- for arg_group in run_group["query-arg-groups"]:
151
- if isinstance(arg_group, dict):
152
- groups.append(_generate_combinations(arg_group))
153
- else:
154
- groups.append(arg_group)
155
- query_args = _generate_combinations(groups)
156
- elif "query-args" in run_group:
157
- query_args = _generate_combinations(run_group["query-args"])
158
- else:
159
- query_args = []
160
-
161
- for arg_group in args:
162
- aargs = []
163
- aargs.extend(base_args)
164
- if isinstance(arg_group, list):
165
- aargs.extend(arg_group)
166
- else:
167
- aargs.append(arg_group)
168
-
169
- vs = {
170
- "@count": count,
171
- "@metric": distance_metric,
172
- "@dimension": dimension,
173
- "@connection": conn_params
174
- }
175
- aargs = [_substitute_variables(arg, vs) for arg in aargs]
176
- definitions.append(Definition(
177
- algorithm=name,
178
- run_group = run_group_name,
179
- docker_tag=algo['docker-tag'],
180
- module=algo['module'],
181
- constructor=algo['constructor'],
182
- arguments=aargs,
183
- query_argument_groups=query_args,
184
- disabled=algo.get('disabled', False)
185
- ))
186
-
187
- return definitions