redisbench-admin 0.11.64__py3-none-any.whl → 0.11.66__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. redisbench_admin/run/ann/pkg/.dockerignore +2 -0
  2. redisbench_admin/run/ann/pkg/.git +1 -0
  3. redisbench_admin/run/ann/pkg/.github/workflows/benchmarks.yml +100 -0
  4. redisbench_admin/run/ann/pkg/.gitignore +21 -0
  5. redisbench_admin/run/ann/pkg/LICENSE +21 -0
  6. redisbench_admin/run/ann/pkg/README.md +157 -0
  7. redisbench_admin/run/ann/pkg/algos.yaml +1294 -0
  8. redisbench_admin/run/ann/pkg/algosP.yaml +67 -0
  9. redisbench_admin/run/ann/pkg/ann_benchmarks/__init__.py +2 -0
  10. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/__init__.py +0 -0
  11. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/annoy.py +26 -0
  12. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/balltree.py +22 -0
  13. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/base.py +36 -0
  14. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/bruteforce.py +110 -0
  15. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/ckdtree.py +17 -0
  16. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/datasketch.py +29 -0
  17. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/definitions.py +187 -0
  18. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/diskann.py +190 -0
  19. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/dolphinnpy.py +31 -0
  20. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/dummy_algo.py +25 -0
  21. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/elasticsearch.py +107 -0
  22. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/elastiknn.py +124 -0
  23. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/faiss.py +124 -0
  24. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/faiss_gpu.py +61 -0
  25. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/faiss_hnsw.py +39 -0
  26. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/flann.py +27 -0
  27. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/hnswlib.py +36 -0
  28. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/kdtree.py +22 -0
  29. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/kgraph.py +39 -0
  30. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/lshf.py +25 -0
  31. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/milvus.py +99 -0
  32. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/mrpt.py +41 -0
  33. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/n2.py +28 -0
  34. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/nearpy.py +48 -0
  35. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/nmslib.py +74 -0
  36. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/onng_ngt.py +100 -0
  37. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/opensearchknn.py +107 -0
  38. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/panng_ngt.py +79 -0
  39. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/pinecone.py +39 -0
  40. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/puffinn.py +45 -0
  41. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/pynndescent.py +115 -0
  42. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/qg_ngt.py +102 -0
  43. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/redisearch.py +90 -0
  44. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/rpforest.py +20 -0
  45. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/scann.py +34 -0
  46. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/sptag.py +28 -0
  47. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/subprocess.py +246 -0
  48. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/vald.py +149 -0
  49. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/vecsim-hnsw.py +43 -0
  50. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/vespa.py +47 -0
  51. redisbench_admin/run/ann/pkg/ann_benchmarks/constants.py +1 -0
  52. redisbench_admin/run/ann/pkg/ann_benchmarks/data.py +48 -0
  53. redisbench_admin/run/ann/pkg/ann_benchmarks/datasets.py +620 -0
  54. redisbench_admin/run/ann/pkg/ann_benchmarks/distance.py +53 -0
  55. redisbench_admin/run/ann/pkg/ann_benchmarks/main.py +325 -0
  56. redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/__init__.py +2 -0
  57. redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/metrics.py +183 -0
  58. redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/plot_variants.py +17 -0
  59. redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/utils.py +165 -0
  60. redisbench_admin/run/ann/pkg/ann_benchmarks/results.py +71 -0
  61. redisbench_admin/run/ann/pkg/ann_benchmarks/runner.py +333 -0
  62. redisbench_admin/run/ann/pkg/create_dataset.py +12 -0
  63. redisbench_admin/run/ann/pkg/create_hybrid_dataset.py +147 -0
  64. redisbench_admin/run/ann/pkg/create_text_to_image_ds.py +117 -0
  65. redisbench_admin/run/ann/pkg/create_website.py +272 -0
  66. redisbench_admin/run/ann/pkg/install/Dockerfile +11 -0
  67. redisbench_admin/run/ann/pkg/install/Dockerfile.annoy +5 -0
  68. redisbench_admin/run/ann/pkg/install/Dockerfile.datasketch +4 -0
  69. redisbench_admin/run/ann/pkg/install/Dockerfile.diskann +29 -0
  70. redisbench_admin/run/ann/pkg/install/Dockerfile.diskann_pq +31 -0
  71. redisbench_admin/run/ann/pkg/install/Dockerfile.dolphinn +5 -0
  72. redisbench_admin/run/ann/pkg/install/Dockerfile.elasticsearch +45 -0
  73. redisbench_admin/run/ann/pkg/install/Dockerfile.elastiknn +61 -0
  74. redisbench_admin/run/ann/pkg/install/Dockerfile.faiss +18 -0
  75. redisbench_admin/run/ann/pkg/install/Dockerfile.flann +10 -0
  76. redisbench_admin/run/ann/pkg/install/Dockerfile.hnswlib +10 -0
  77. redisbench_admin/run/ann/pkg/install/Dockerfile.kgraph +6 -0
  78. redisbench_admin/run/ann/pkg/install/Dockerfile.mih +4 -0
  79. redisbench_admin/run/ann/pkg/install/Dockerfile.milvus +27 -0
  80. redisbench_admin/run/ann/pkg/install/Dockerfile.mrpt +4 -0
  81. redisbench_admin/run/ann/pkg/install/Dockerfile.n2 +5 -0
  82. redisbench_admin/run/ann/pkg/install/Dockerfile.nearpy +5 -0
  83. redisbench_admin/run/ann/pkg/install/Dockerfile.ngt +13 -0
  84. redisbench_admin/run/ann/pkg/install/Dockerfile.nmslib +10 -0
  85. redisbench_admin/run/ann/pkg/install/Dockerfile.opensearchknn +43 -0
  86. redisbench_admin/run/ann/pkg/install/Dockerfile.puffinn +6 -0
  87. redisbench_admin/run/ann/pkg/install/Dockerfile.pynndescent +4 -0
  88. redisbench_admin/run/ann/pkg/install/Dockerfile.redisearch +18 -0
  89. redisbench_admin/run/ann/pkg/install/Dockerfile.rpforest +5 -0
  90. redisbench_admin/run/ann/pkg/install/Dockerfile.scann +5 -0
  91. redisbench_admin/run/ann/pkg/install/Dockerfile.scipy +4 -0
  92. redisbench_admin/run/ann/pkg/install/Dockerfile.sklearn +4 -0
  93. redisbench_admin/run/ann/pkg/install/Dockerfile.sptag +30 -0
  94. redisbench_admin/run/ann/pkg/install/Dockerfile.vald +8 -0
  95. redisbench_admin/run/ann/pkg/install/Dockerfile.vespa +17 -0
  96. redisbench_admin/run/ann/pkg/install.py +70 -0
  97. redisbench_admin/run/ann/pkg/logging.conf +34 -0
  98. redisbench_admin/run/ann/pkg/multirun.py +298 -0
  99. redisbench_admin/run/ann/pkg/plot.py +159 -0
  100. redisbench_admin/run/ann/pkg/protocol/bf-runner +10 -0
  101. redisbench_admin/run/ann/pkg/protocol/bf-runner.py +204 -0
  102. redisbench_admin/run/ann/pkg/protocol/ext-add-query-metric.md +51 -0
  103. redisbench_admin/run/ann/pkg/protocol/ext-batch-queries.md +77 -0
  104. redisbench_admin/run/ann/pkg/protocol/ext-prepared-queries.md +77 -0
  105. redisbench_admin/run/ann/pkg/protocol/ext-query-parameters.md +47 -0
  106. redisbench_admin/run/ann/pkg/protocol/specification.md +194 -0
  107. redisbench_admin/run/ann/pkg/requirements.txt +14 -0
  108. redisbench_admin/run/ann/pkg/requirements_py38.txt +11 -0
  109. redisbench_admin/run/ann/pkg/results/fashion-mnist-784-euclidean.png +0 -0
  110. redisbench_admin/run/ann/pkg/results/gist-960-euclidean.png +0 -0
  111. redisbench_admin/run/ann/pkg/results/glove-100-angular.png +0 -0
  112. redisbench_admin/run/ann/pkg/results/glove-25-angular.png +0 -0
  113. redisbench_admin/run/ann/pkg/results/lastfm-64-dot.png +0 -0
  114. redisbench_admin/run/ann/pkg/results/mnist-784-euclidean.png +0 -0
  115. redisbench_admin/run/ann/pkg/results/nytimes-256-angular.png +0 -0
  116. redisbench_admin/run/ann/pkg/results/sift-128-euclidean.png +0 -0
  117. redisbench_admin/run/ann/pkg/run.py +12 -0
  118. redisbench_admin/run/ann/pkg/run_algorithm.py +3 -0
  119. redisbench_admin/run/ann/pkg/templates/chartjs.template +102 -0
  120. redisbench_admin/run/ann/pkg/templates/detail_page.html +23 -0
  121. redisbench_admin/run/ann/pkg/templates/general.html +58 -0
  122. redisbench_admin/run/ann/pkg/templates/latex.template +30 -0
  123. redisbench_admin/run/ann/pkg/templates/summary.html +60 -0
  124. redisbench_admin/run/ann/pkg/test/__init__.py +0 -0
  125. redisbench_admin/run/ann/pkg/test/test-jaccard.py +19 -0
  126. redisbench_admin/run/ann/pkg/test/test-metrics.py +99 -0
  127. redisbench_admin/run/args.py +2 -1
  128. redisbench_admin/run_remote/run_remote.py +1 -1
  129. {redisbench_admin-0.11.64.dist-info → redisbench_admin-0.11.66.dist-info}/METADATA +2 -5
  130. redisbench_admin-0.11.66.dist-info/RECORD +243 -0
  131. {redisbench_admin-0.11.64.dist-info → redisbench_admin-0.11.66.dist-info}/WHEEL +1 -1
  132. redisbench_admin-0.11.64.dist-info/RECORD +0 -117
  133. {redisbench_admin-0.11.64.dist-info/licenses → redisbench_admin-0.11.66.dist-info}/LICENSE +0 -0
  134. {redisbench_admin-0.11.64.dist-info → redisbench_admin-0.11.66.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,39 @@
1
+ from __future__ import absolute_import
2
+ import os
3
+ import faiss
4
+ import numpy as np
5
+ from ann_benchmarks.constants import INDEX_DIR
6
+ from ann_benchmarks.algorithms.base import BaseANN
7
+ from ann_benchmarks.algorithms.faiss import Faiss
8
+
9
+
10
+ class FaissHNSW(Faiss):
11
+ def __init__(self, metric, method_param):
12
+ self._metric = metric
13
+ self.method_param = method_param
14
+
15
+ def fit(self, X):
16
+ self.index = faiss.IndexHNSWFlat(len(X[0]), self.method_param["M"])
17
+ self.index.hnsw.efConstruction = self.method_param["efConstruction"]
18
+ self.index.verbose = True
19
+
20
+ if self._metric == 'angular':
21
+ X = X / np.linalg.norm(X, axis=1)[:, np.newaxis]
22
+ if X.dtype != np.float32:
23
+ X = X.astype(np.float32)
24
+
25
+ self.index.add(X)
26
+ faiss.omp_set_num_threads(1)
27
+
28
+ def set_query_arguments(self, ef):
29
+ faiss.cvar.hnsw_stats.reset()
30
+ self.index.hnsw.efSearch = ef
31
+
32
+ def get_additional(self):
33
+ return {"dist_comps": faiss.cvar.hnsw_stats.ndis}
34
+
35
+ def __str__(self):
36
+ return 'faiss (%s, ef: %d)' % (self.method_param, self.index.hnsw.efSearch)
37
+
38
+ def freeIndex(self):
39
+ del self.p
@@ -0,0 +1,27 @@
1
+ from __future__ import absolute_import
2
+ import pyflann
3
+ import numpy
4
+ import sklearn.preprocessing
5
+ from ann_benchmarks.algorithms.base import BaseANN
6
+
7
+
8
+ class FLANN(BaseANN):
9
+ def __init__(self, metric, target_precision):
10
+ self._target_precision = target_precision
11
+ self.name = 'FLANN(target_precision=%f)' % self._target_precision
12
+ self._metric = metric
13
+
14
+ def fit(self, X):
15
+ self._flann = pyflann.FLANN(
16
+ target_precision=self._target_precision,
17
+ algorithm='autotuned', log_level='info')
18
+ if self._metric == 'angular':
19
+ X = sklearn.preprocessing.normalize(X, axis=1, norm='l2')
20
+ self._flann.build_index(X)
21
+
22
+ def query(self, v, n):
23
+ if self._metric == 'angular':
24
+ v = sklearn.preprocessing.normalize([v], axis=1, norm='l2')[0]
25
+ if v.dtype != numpy.float32:
26
+ v = v.astype(numpy.float32)
27
+ return self._flann.nn_index(v, n)[0][0]
@@ -0,0 +1,36 @@
1
+ from __future__ import absolute_import
2
+ import os
3
+ import hnswlib
4
+ import numpy as np
5
+ from ann_benchmarks.constants import INDEX_DIR
6
+ from ann_benchmarks.algorithms.base import BaseANN
7
+
8
+
9
+ class HnswLib(BaseANN):
10
+ def __init__(self, metric, method_param):
11
+ self.metric = {'angular': 'cosine', 'euclidean': 'l2'}[metric]
12
+ self.method_param = method_param
13
+ # print(self.method_param,save_index,query_param)
14
+ # self.ef=query_param['ef']
15
+ self.name = 'hnswlib (%s)' % (self.method_param)
16
+
17
+ def fit(self, X):
18
+ # Only l2 is supported currently
19
+ self.p = hnswlib.Index(space=self.metric, dim=len(X[0]))
20
+ self.p.init_index(max_elements=len(X),
21
+ ef_construction=self.method_param["efConstruction"],
22
+ M=self.method_param["M"])
23
+ data_labels = np.arange(len(X))
24
+ self.p.add_items(np.asarray(X), data_labels)
25
+ self.p.set_num_threads(1)
26
+
27
+ def set_query_arguments(self, ef):
28
+ self.p.set_ef(ef)
29
+
30
+ def query(self, v, n):
31
+ # print(np.expand_dims(v,axis=0).shape)
32
+ # print(self.p.knn_query(np.expand_dims(v,axis=0), k = n)[0])
33
+ return self.p.knn_query(np.expand_dims(v, axis=0), k=n)[0][0]
34
+
35
+ def freeIndex(self):
36
+ del self.p
@@ -0,0 +1,22 @@
1
+ from __future__ import absolute_import
2
+ import sklearn.neighbors
3
+ import sklearn.preprocessing
4
+ from ann_benchmarks.algorithms.base import BaseANN
5
+
6
+
7
+ class KDTree(BaseANN):
8
+ def __init__(self, metric, leaf_size=20):
9
+ self._leaf_size = leaf_size
10
+ self._metric = metric
11
+ self.name = 'KDTree(leaf_size=%d)' % self._leaf_size
12
+
13
+ def fit(self, X):
14
+ if self._metric == 'angular':
15
+ X = sklearn.preprocessing.normalize(X, axis=1, norm='l2')
16
+ self._tree = sklearn.neighbors.KDTree(X, leaf_size=self._leaf_size)
17
+
18
+ def query(self, v, n):
19
+ if self._metric == 'angular':
20
+ v = sklearn.preprocessing.normalize([v], axis=1, norm='l2')[0]
21
+ dist, ind = self._tree.query([v], k=n)
22
+ return ind[0]
@@ -0,0 +1,39 @@
1
+ from __future__ import absolute_import
2
+ import os
3
+ import numpy
4
+ import pykgraph
5
+ from ann_benchmarks.constants import INDEX_DIR
6
+ from ann_benchmarks.algorithms.base import BaseANN
7
+
8
+
9
+ class KGraph(BaseANN):
10
+ def __init__(self, metric, index_params, save_index):
11
+ metric = str(metric)
12
+ self.name = 'KGraph(%s)' % (metric)
13
+ self._metric = metric
14
+ self._index_params = index_params
15
+ self._save_index = save_index
16
+
17
+ def fit(self, X):
18
+ if X.dtype != numpy.float32:
19
+ X = X.astype(numpy.float32)
20
+ self._kgraph = pykgraph.KGraph(X, self._metric)
21
+ path = os.path.join(INDEX_DIR, 'kgraph-index-%s' % self._metric)
22
+ if os.path.exists(path):
23
+ self._kgraph.load(path)
24
+ else:
25
+ # iterations=30, L=100, delta=0.002, recall=0.99, K=25)
26
+ self._kgraph.build(**self._index_params)
27
+ if not os.path.exists(INDEX_DIR):
28
+ os.makedirs(INDEX_DIR)
29
+ self._kgraph.save(path)
30
+
31
+ def set_query_arguments(self, P):
32
+ self._P = P
33
+
34
+ def query(self, v, n):
35
+ if v.dtype != numpy.float32:
36
+ v = v.astype(numpy.float32)
37
+ result = self._kgraph.search(
38
+ numpy.array([v]), K=n, threads=1, P=self._P)
39
+ return result[0]
@@ -0,0 +1,25 @@
1
+ from __future__ import absolute_import
2
+ import sklearn.neighbors
3
+ import sklearn.preprocessing
4
+ from ann_benchmarks.algorithms.base import BaseANN
5
+
6
+
7
+ class LSHF(BaseANN):
8
+ def __init__(self, metric, n_estimators=10, n_candidates=50):
9
+ self.name = 'LSHF(n_est=%d, n_cand=%d)' % (n_estimators, n_candidates)
10
+ self._metric = metric
11
+ self._n_estimators = n_estimators
12
+ self._n_candidates = n_candidates
13
+
14
+ def fit(self, X):
15
+ self._lshf = sklearn.neighbors.LSHForest(
16
+ n_estimators=self._n_estimators, n_candidates=self._n_candidates)
17
+ if self._metric == 'angular':
18
+ X = sklearn.preprocessing.normalize(X, axis=1, norm='l2')
19
+ self._lshf.fit(X)
20
+
21
+ def query(self, v, n):
22
+ if self._metric == 'angular':
23
+ v = sklearn.preprocessing.normalize([v], axis=1, norm='l2')[0]
24
+ return self._lshf.kneighbors([v], return_distance=False,
25
+ n_neighbors=n)[0]
@@ -0,0 +1,99 @@
1
+ from __future__ import absolute_import
2
+ from sqlite3 import paramstyle
3
+ from pymilvus import (
4
+ connections,
5
+ utility,
6
+ FieldSchema,
7
+ CollectionSchema,
8
+ DataType,
9
+ IndexType,
10
+ Collection,
11
+ )
12
+ import numpy
13
+ import sklearn.preprocessing
14
+ from ann_benchmarks.algorithms.base import BaseANN
15
+ import sys
16
+
17
+
18
+ class Milvus(BaseANN):
19
+ def __init__(self, metric, dim, conn_params, index_type, method_params):
20
+ self._host = conn_params['host']
21
+ self._port = conn_params['port'] # 19530
22
+ self._index_type = index_type
23
+ self._method_params = method_params
24
+ self._metric = {'angular': 'IP', 'euclidean': 'L2'}[metric]
25
+ self._query_params = dict()
26
+ connections.connect(host=conn_params['host'], port=conn_params['port'])
27
+ try:
28
+ fields = [
29
+ FieldSchema(name="pk", dtype=DataType.INT64, is_primary=True, auto_id=False),
30
+ FieldSchema(name="vector", dtype=DataType.FLOAT_VECTOR, dim=dim)
31
+ ]
32
+ schema = CollectionSchema(fields)
33
+ if utility.has_collection('milvus'):
34
+ self._milvus = Collection('milvus')
35
+ else:
36
+ self._milvus = Collection('milvus', schema)
37
+ except:
38
+ self._milvus = Collection('milvus')
39
+ print('initialization completed!')
40
+
41
+ def fit(self, X, offset=0, limit=None):
42
+ limit = limit if limit else len(X)
43
+ X = X[offset:limit]
44
+ if self._metric == 'IP':
45
+ X = sklearn.preprocessing.normalize(X)
46
+
47
+ X = X.tolist()
48
+ bulk_size = 1000 * 1024 * 1024 // (sys.getsizeof(X[0])) # approximation for milvus insert limit (1024MB)
49
+ for bulk in [X[i: i+bulk_size] for i in range(0, len(X), bulk_size)]:
50
+ print(f'inserting vectors {offset} to {offset + len(bulk) - 1}')
51
+ self._milvus.insert([list(range(offset, offset + len(bulk))), bulk])
52
+ offset += len(bulk)
53
+
54
+ if not self._milvus.has_index():
55
+ print('indexing...', end=' ')
56
+ try:
57
+ self._milvus.create_index('vector', {'index_type': self._index_type, 'metric_type':self._metric, 'params':self._method_params})
58
+ print('done!')
59
+ except:
60
+ print('failed!')
61
+
62
+
63
+ def set_query_arguments(self, param):
64
+ if self._milvus.has_index():
65
+ print('waiting for index... ', end='')
66
+ if utility.wait_for_index_building_complete('milvus', 'vector'):
67
+ print('done!')
68
+ self._milvus.load()
69
+ print('waiting for data to be loaded... ', end='')
70
+ utility.wait_for_loading_complete('milvus')
71
+ print('done!')
72
+ else: raise Exception('index has error')
73
+ else: raise Exception('index is missing')
74
+ if 'IVF_' in self._index_type:
75
+ if param > self._method_params['nlist']:
76
+ print('warning! nprobe > nlist')
77
+ param = self._method_params['nlist']
78
+ self._query_params['nprobe'] = param
79
+ if 'HNSW' in self._index_type:
80
+ self._query_params['ef'] = param
81
+
82
+ def query(self, v, n):
83
+ if self._metric == 'IP':
84
+ v /= numpy.linalg.norm(v)
85
+ v = v.tolist()
86
+ results = self._milvus.search([v], 'vector', {'metric_type':self._metric, 'params':self._query_params}, limit=n)
87
+ if not results:
88
+ return [] # Seems to happen occasionally, not sure why
89
+ result_ids = [result.id for result in results[0]]
90
+ return result_ids
91
+
92
+ def __str__(self):
93
+ return 'Milvus(index_type=%s, method_params=%s, query_params=%s)' % (self._index_type, str(self._method_params), str(self._query_params))
94
+
95
+ def freeIndex(self):
96
+ utility.drop_collection("mlivus")
97
+
98
+ def done(self):
99
+ connections.disconnect('default')
@@ -0,0 +1,41 @@
1
+ from __future__ import absolute_import
2
+ import numpy
3
+ import sklearn.preprocessing
4
+ import mrpt
5
+ from ann_benchmarks.algorithms.base import BaseANN
6
+
7
+
8
+ class MRPT(BaseANN):
9
+ def __init__(self, metric, count):
10
+ self._metric = metric
11
+ self._k = count
12
+
13
+ def fit(self, X):
14
+ if X.dtype != numpy.float32:
15
+ X = X.astype(numpy.float32)
16
+ if self._metric == 'angular':
17
+ X = sklearn.preprocessing.normalize(X, axis=1, norm='l2')
18
+
19
+ self._index_autotuned = mrpt.MRPTIndex(X)
20
+ self._index_autotuned.build_autotune_sample(
21
+ target_recall=None, k=self._k, n_test=1000)
22
+
23
+ def set_query_arguments(self, target_recall):
24
+ self._target_recall = target_recall
25
+ self._index = self._index_autotuned.subset(target_recall)
26
+ self._par = self._index.parameters()
27
+
28
+ def query(self, v, n):
29
+ if v.dtype != numpy.float32:
30
+ v = v.astype(numpy.float32)
31
+ if self._metric == 'angular':
32
+ v = sklearn.preprocessing.normalize(
33
+ v.reshape(1, -1), axis=1, norm='l2').flatten()
34
+ return self._index.ann(v)
35
+
36
+ def __str__(self):
37
+ str_template = ('MRPT(target recall=%.3f, trees=%d, depth=%d, vote '
38
+ 'threshold=%d, estimated recall=%.3f)')
39
+ return str_template % (self._target_recall, self._par['n_trees'],
40
+ self._par['depth'], self._par['votes'],
41
+ self._par['estimated_recall'])
@@ -0,0 +1,28 @@
1
+ from __future__ import absolute_import
2
+ import n2
3
+ from ann_benchmarks.algorithms.base import BaseANN
4
+
5
+
6
+ class N2(BaseANN):
7
+ def __init__(self, metric, method_param):
8
+ self._metric = metric
9
+ self._m = method_param['M']
10
+ self._m0 = self._m * 2
11
+ self._ef_construction = method_param['efConstruction']
12
+ self._n_threads = 1
13
+ self._ef_search = -1
14
+
15
+ def fit(self, X):
16
+ self._n2 = n2.HnswIndex(X.shape[1], self._metric)
17
+ for x in X:
18
+ self._n2.add_data(x)
19
+ self._n2.build(m=self._m, max_m0=self._m0, ef_construction=self._ef_construction, n_threads=self._n_threads, graph_merging='merge_level0')
20
+
21
+ def set_query_arguments(self, ef):
22
+ self._ef_search = ef
23
+
24
+ def query(self, v, n):
25
+ return self._n2.search_by_vector(v, n, self._ef_search)
26
+
27
+ def __str__(self):
28
+ return "N2 (M%d_efCon%d)" % (self._m, self._ef_construction)
@@ -0,0 +1,48 @@
1
+ from __future__ import absolute_import
2
+ import nearpy
3
+ from nearpy.filters import NearestFilter
4
+ import sklearn.preprocessing
5
+ from ann_benchmarks.algorithms.base import BaseANN
6
+
7
+
8
+ class NearPy(BaseANN):
9
+ def __init__(self, metric, n_bits, hash_counts):
10
+ self._n_bits = n_bits
11
+ self._hash_counts = hash_counts
12
+ self._metric = metric
13
+ self._filter = NearestFilter(10)
14
+ self.name = 'NearPy(n_bits=%d, hash_counts=%d)' % (
15
+ self._n_bits, self._hash_counts)
16
+
17
+ def fit(self, X):
18
+ hashes = []
19
+
20
+ for k in range(self._hash_counts):
21
+ nearpy_rbp = nearpy.hashes.RandomBinaryProjections(
22
+ 'rbp_%d' % k, self._n_bits)
23
+ hashes.append(nearpy_rbp)
24
+
25
+ if self._metric == 'euclidean':
26
+ dist = nearpy.distances.EuclideanDistance()
27
+ self._nearpy_engine = nearpy.Engine(
28
+ X.shape[1],
29
+ lshashes=hashes,
30
+ distance=dist)
31
+ else: # Default (angular) = Cosine distance
32
+ self._nearpy_engine = nearpy.Engine(
33
+ X.shape[1],
34
+ lshashes=hashes,
35
+ vector_filters=[self._filter])
36
+
37
+ if self._metric == 'angular':
38
+ X = sklearn.preprocessing.normalize(X, axis=1, norm='l2')
39
+ for i, x in enumerate(X):
40
+ self._nearpy_engine.store_vector(x, i)
41
+
42
+ def query(self, v, n):
43
+ # XXX: This feels like an unpleasant hack, but it's not clear how to do
44
+ # better without making changes to NearPy
45
+ self._filter.N = n
46
+ if self._metric == 'angular':
47
+ v = sklearn.preprocessing.normalize([v], axis=1, norm='l2')[0]
48
+ return [y for x, y, z in self._nearpy_engine.neighbours(v)]
@@ -0,0 +1,74 @@
1
+ from __future__ import absolute_import
2
+ import os
3
+ import nmslib
4
+ from ann_benchmarks.constants import INDEX_DIR
5
+ from ann_benchmarks.algorithms.base import BaseANN
6
+
7
+
8
+ class NmslibReuseIndex(BaseANN):
9
+ @staticmethod
10
+ def encode(d):
11
+ return ["%s=%s" % (a, b) for (a, b) in d.items()]
12
+
13
+ def __init__(self, metric, method_name, index_param, query_param):
14
+ self._nmslib_metric = {
15
+ 'angular': 'cosinesimil', 'euclidean': 'l2'}[metric]
16
+ self._method_name = method_name
17
+ self._save_index = False
18
+ self._index_param = NmslibReuseIndex.encode(index_param)
19
+ if query_param is not False:
20
+ self._query_param = NmslibReuseIndex.encode(query_param)
21
+ self.name = ('Nmslib(method_name={}, index_param={}, '
22
+ 'query_param={})'.format(self._method_name,
23
+ self._index_param,
24
+ self._query_param))
25
+ else:
26
+ self._query_param = None
27
+ self.name = 'Nmslib(method_name=%s, index_param=%s)' % (
28
+ self._method_name, self._index_param)
29
+
30
+ self._index_name = os.path.join(INDEX_DIR, "nmslib_%s_%s_%s" % (
31
+ self._method_name, metric, '_'.join(self._index_param)))
32
+
33
+ d = os.path.dirname(self._index_name)
34
+ if not os.path.exists(d):
35
+ os.makedirs(d)
36
+
37
+ def fit(self, X):
38
+ if self._method_name == 'vptree':
39
+ # To avoid this issue: terminate called after throwing an instance
40
+ # of 'std::runtime_error'
41
+ # what(): The data size is too small or the bucket size is too
42
+ # big. Select the parameters so that <total # of records> is NOT
43
+ # less than <bucket size> * 1000
44
+ # Aborted (core dumped)
45
+ self._index_param.append('bucketSize=%d' %
46
+ min(int(X.shape[0] * 0.0005), 1000))
47
+
48
+ self._index = nmslib.init(
49
+ space=self._nmslib_metric, method=self._method_name)
50
+ self._index.addDataPointBatch(X)
51
+
52
+ if os.path.exists(self._index_name):
53
+ print('Loading index from file')
54
+ self._index.loadIndex(self._index_name)
55
+ else:
56
+ self._index.createIndex(self._index_param)
57
+ if self._save_index:
58
+ self._index.saveIndex(self._index_name)
59
+ if self._query_param is not None:
60
+ self._index.setQueryTimeParams(self._query_param)
61
+
62
+ def set_query_arguments(self, ef):
63
+ if self._method_name == 'hnsw' or self._method_name == 'sw-graph':
64
+ self._index.setQueryTimeParams(["efSearch=%s" % (ef)])
65
+
66
+ def query(self, v, n):
67
+ ids, distances = self._index.knnQuery(v, n)
68
+ return ids
69
+
70
+ def batch_query(self, X, n):
71
+ self.res = self._index.knnQueryBatch(X, n)
72
+
73
+ def get_batch_results(self):
74
+ return [x for x, _ in self.res]
@@ -0,0 +1,100 @@
1
+ from __future__ import absolute_import
2
+ import sys
3
+ import os
4
+ import ngtpy
5
+ import numpy as np
6
+ import subprocess
7
+ import time
8
+ from ann_benchmarks.algorithms.base import BaseANN
9
+ from ann_benchmarks.constants import INDEX_DIR
10
+
11
+
12
+ class ONNG(BaseANN):
13
+ def __init__(self, metric, object_type, epsilon, param):
14
+ metrics = {'euclidean': '2', 'angular': 'C'}
15
+ self._edge_size = int(param['edge'])
16
+ self._outdegree = int(param['outdegree'])
17
+ self._indegree = int(param['indegree'])
18
+ self._metric = metrics[metric]
19
+ self._object_type = object_type
20
+ self._edge_size_for_search = int(param['search_edge']) if 'search_edge' in param.keys() else 0
21
+ self._tree_disabled = (param['tree'] == False) if 'tree' in param.keys() else False
22
+ self._refine_enabled = (param['refine'] == True) if 'refine' in param.keys() else False
23
+ self._build_time_limit = 4
24
+ self._epsilon = epsilon
25
+ print('ONNG: edge_size=' + str(self._edge_size))
26
+ print('ONNG: outdegree=' + str(self._outdegree))
27
+ print('ONNG: indegree=' + str(self._indegree))
28
+ print('ONNG: edge_size_for_search=' + str(self._edge_size_for_search))
29
+ print('ONNG: epsilon=' + str(self._epsilon))
30
+ print('ONNG: metric=' + metric)
31
+ print('ONNG: object_type=' + object_type)
32
+
33
+ def fit(self, X):
34
+ print('ONNG: start indexing...')
35
+ dim = len(X[0])
36
+ print('ONNG: # of data=' + str(len(X)))
37
+ print('ONNG: dimensionality=' + str(dim))
38
+ index_dir = 'indexes'
39
+ if not os.path.exists(index_dir):
40
+ os.makedirs(index_dir)
41
+ index = os.path.join(
42
+ index_dir,
43
+ 'ONNG-{}-{}-{}'.format(self._edge_size, self._outdegree,
44
+ self._indegree))
45
+ anngIndex = os.path.join(index_dir, 'ANNG-' + str(self._edge_size))
46
+ print('ONNG: index=' + index)
47
+ if (not os.path.exists(index)) and (not os.path.exists(anngIndex)):
48
+ print('ONNG: create ANNG')
49
+ t = time.time()
50
+ args = ['ngt', 'create', '-it', '-p8', '-b500', '-ga', '-of',
51
+ '-D' + self._metric, '-d' + str(dim),
52
+ '-E' + str(self._edge_size),
53
+ '-S' + str(self._edge_size_for_search),
54
+ '-e' + str(self._epsilon), '-P0', '-B30',
55
+ '-T' + str(self._build_time_limit), anngIndex]
56
+ subprocess.call(args)
57
+ idx = ngtpy.Index(path=anngIndex)
58
+ idx.batch_insert(X, num_threads=24, debug=False)
59
+ print('ONNG: ANNG construction time(sec)=' + str(time.time() - t))
60
+ t = time.time()
61
+ if self._refine_enabled:
62
+ idx.refine_anng(epsilon=self._epsilon, num_of_edges=self._edge_size,
63
+ num_of_explored_edges=self._edge_size_for_search)
64
+ print('ONNG: RNNG construction time(sec)=' + str(time.time() - t))
65
+ idx.save()
66
+ idx.close()
67
+ if not os.path.exists(index):
68
+ print('ONNG: degree adjustment')
69
+ t = time.time()
70
+ args = ['ngt', 'reconstruct-graph', '-mS',
71
+ '-o ' + str(self._outdegree),
72
+ '-i ' + str(self._indegree), anngIndex, index]
73
+ subprocess.call(args)
74
+ print('ONNG: degree adjustment time(sec)=' + str(time.time() - t))
75
+ if os.path.exists(index):
76
+ print('ONNG: index already exists! ' + str(index))
77
+ t = time.time()
78
+ print(self._tree_disabled)
79
+ self.index = ngtpy.Index(index, read_only=True, tree_disabled=self._tree_disabled)
80
+ self.indexName = index
81
+ print('ONNG: open time(sec)=' + str(time.time() - t))
82
+ else:
83
+ print('ONNG: something wrong.')
84
+ print('ONNG: end of fit')
85
+
86
+ def set_query_arguments(self, parameters):
87
+ epsilon, edge_size = parameters
88
+ print("ONNG: edge_size=" + str(edge_size))
89
+ print("ONNG: epsilon=" + str(epsilon))
90
+ self.name = 'ONNG-NGT(%s, %s, %s, %s, %1.3f)' % (
91
+ self._edge_size, self._outdegree,
92
+ self._indegree, edge_size, epsilon)
93
+ epsilon = epsilon - 1.0
94
+ self.index.set(epsilon=epsilon, edge_size=edge_size)
95
+
96
+ def query(self, v, n):
97
+ return self.index.search(v, n, with_distance=False)
98
+
99
+ def freeIndex(self):
100
+ print('ONNG: free')