redisbench-admin 0.11.65__py3-none-any.whl → 0.11.67__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- redisbench_admin/run/args.py +2 -0
- redisbench_admin/run/cluster.py +1 -3
- redisbench_admin/run_remote/remote_db.py +3 -1
- redisbench_admin/run_remote/remote_helpers.py +27 -11
- redisbench_admin/run_remote/run_remote.py +7 -6
- redisbench_admin/run_remote/standalone.py +6 -2
- redisbench_admin/utils/benchmark_config.py +6 -2
- redisbench_admin/utils/local.py +4 -2
- redisbench_admin/utils/remote.py +81 -33
- {redisbench_admin-0.11.65.dist-info → redisbench_admin-0.11.67.dist-info}/METADATA +5 -2
- redisbench_admin-0.11.67.dist-info/RECORD +117 -0
- {redisbench_admin-0.11.65.dist-info → redisbench_admin-0.11.67.dist-info}/WHEEL +1 -1
- redisbench_admin/run/ann/pkg/.dockerignore +0 -2
- redisbench_admin/run/ann/pkg/.git +0 -1
- redisbench_admin/run/ann/pkg/.github/workflows/benchmarks.yml +0 -100
- redisbench_admin/run/ann/pkg/.gitignore +0 -21
- redisbench_admin/run/ann/pkg/LICENSE +0 -21
- redisbench_admin/run/ann/pkg/README.md +0 -157
- redisbench_admin/run/ann/pkg/algos.yaml +0 -1294
- redisbench_admin/run/ann/pkg/algosP.yaml +0 -67
- redisbench_admin/run/ann/pkg/ann_benchmarks/__init__.py +0 -2
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/__init__.py +0 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/annoy.py +0 -26
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/balltree.py +0 -22
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/base.py +0 -36
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/bruteforce.py +0 -110
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/ckdtree.py +0 -17
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/datasketch.py +0 -29
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/definitions.py +0 -187
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/diskann.py +0 -190
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/dolphinnpy.py +0 -31
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/dummy_algo.py +0 -25
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/elasticsearch.py +0 -107
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/elastiknn.py +0 -124
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/faiss.py +0 -124
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/faiss_gpu.py +0 -61
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/faiss_hnsw.py +0 -39
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/flann.py +0 -27
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/hnswlib.py +0 -36
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/kdtree.py +0 -22
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/kgraph.py +0 -39
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/lshf.py +0 -25
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/milvus.py +0 -99
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/mrpt.py +0 -41
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/n2.py +0 -28
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/nearpy.py +0 -48
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/nmslib.py +0 -74
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/onng_ngt.py +0 -100
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/opensearchknn.py +0 -107
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/panng_ngt.py +0 -79
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/pinecone.py +0 -39
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/puffinn.py +0 -45
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/pynndescent.py +0 -115
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/qg_ngt.py +0 -102
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/redisearch.py +0 -90
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/rpforest.py +0 -20
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/scann.py +0 -34
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/sptag.py +0 -28
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/subprocess.py +0 -246
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/vald.py +0 -149
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/vecsim-hnsw.py +0 -43
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/vespa.py +0 -47
- redisbench_admin/run/ann/pkg/ann_benchmarks/constants.py +0 -1
- redisbench_admin/run/ann/pkg/ann_benchmarks/data.py +0 -48
- redisbench_admin/run/ann/pkg/ann_benchmarks/datasets.py +0 -620
- redisbench_admin/run/ann/pkg/ann_benchmarks/distance.py +0 -53
- redisbench_admin/run/ann/pkg/ann_benchmarks/main.py +0 -325
- redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/__init__.py +0 -2
- redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/metrics.py +0 -183
- redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/plot_variants.py +0 -17
- redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/utils.py +0 -165
- redisbench_admin/run/ann/pkg/ann_benchmarks/results.py +0 -71
- redisbench_admin/run/ann/pkg/ann_benchmarks/runner.py +0 -333
- redisbench_admin/run/ann/pkg/create_dataset.py +0 -12
- redisbench_admin/run/ann/pkg/create_hybrid_dataset.py +0 -147
- redisbench_admin/run/ann/pkg/create_text_to_image_ds.py +0 -117
- redisbench_admin/run/ann/pkg/create_website.py +0 -272
- redisbench_admin/run/ann/pkg/install/Dockerfile +0 -11
- redisbench_admin/run/ann/pkg/install/Dockerfile.annoy +0 -5
- redisbench_admin/run/ann/pkg/install/Dockerfile.datasketch +0 -4
- redisbench_admin/run/ann/pkg/install/Dockerfile.diskann +0 -29
- redisbench_admin/run/ann/pkg/install/Dockerfile.diskann_pq +0 -31
- redisbench_admin/run/ann/pkg/install/Dockerfile.dolphinn +0 -5
- redisbench_admin/run/ann/pkg/install/Dockerfile.elasticsearch +0 -45
- redisbench_admin/run/ann/pkg/install/Dockerfile.elastiknn +0 -61
- redisbench_admin/run/ann/pkg/install/Dockerfile.faiss +0 -18
- redisbench_admin/run/ann/pkg/install/Dockerfile.flann +0 -10
- redisbench_admin/run/ann/pkg/install/Dockerfile.hnswlib +0 -10
- redisbench_admin/run/ann/pkg/install/Dockerfile.kgraph +0 -6
- redisbench_admin/run/ann/pkg/install/Dockerfile.mih +0 -4
- redisbench_admin/run/ann/pkg/install/Dockerfile.milvus +0 -27
- redisbench_admin/run/ann/pkg/install/Dockerfile.mrpt +0 -4
- redisbench_admin/run/ann/pkg/install/Dockerfile.n2 +0 -5
- redisbench_admin/run/ann/pkg/install/Dockerfile.nearpy +0 -5
- redisbench_admin/run/ann/pkg/install/Dockerfile.ngt +0 -13
- redisbench_admin/run/ann/pkg/install/Dockerfile.nmslib +0 -10
- redisbench_admin/run/ann/pkg/install/Dockerfile.opensearchknn +0 -43
- redisbench_admin/run/ann/pkg/install/Dockerfile.puffinn +0 -6
- redisbench_admin/run/ann/pkg/install/Dockerfile.pynndescent +0 -4
- redisbench_admin/run/ann/pkg/install/Dockerfile.redisearch +0 -18
- redisbench_admin/run/ann/pkg/install/Dockerfile.rpforest +0 -5
- redisbench_admin/run/ann/pkg/install/Dockerfile.scann +0 -5
- redisbench_admin/run/ann/pkg/install/Dockerfile.scipy +0 -4
- redisbench_admin/run/ann/pkg/install/Dockerfile.sklearn +0 -4
- redisbench_admin/run/ann/pkg/install/Dockerfile.sptag +0 -30
- redisbench_admin/run/ann/pkg/install/Dockerfile.vald +0 -8
- redisbench_admin/run/ann/pkg/install/Dockerfile.vespa +0 -17
- redisbench_admin/run/ann/pkg/install.py +0 -70
- redisbench_admin/run/ann/pkg/logging.conf +0 -34
- redisbench_admin/run/ann/pkg/multirun.py +0 -298
- redisbench_admin/run/ann/pkg/plot.py +0 -159
- redisbench_admin/run/ann/pkg/protocol/bf-runner +0 -10
- redisbench_admin/run/ann/pkg/protocol/bf-runner.py +0 -204
- redisbench_admin/run/ann/pkg/protocol/ext-add-query-metric.md +0 -51
- redisbench_admin/run/ann/pkg/protocol/ext-batch-queries.md +0 -77
- redisbench_admin/run/ann/pkg/protocol/ext-prepared-queries.md +0 -77
- redisbench_admin/run/ann/pkg/protocol/ext-query-parameters.md +0 -47
- redisbench_admin/run/ann/pkg/protocol/specification.md +0 -194
- redisbench_admin/run/ann/pkg/requirements.txt +0 -14
- redisbench_admin/run/ann/pkg/requirements_py38.txt +0 -11
- redisbench_admin/run/ann/pkg/results/fashion-mnist-784-euclidean.png +0 -0
- redisbench_admin/run/ann/pkg/results/gist-960-euclidean.png +0 -0
- redisbench_admin/run/ann/pkg/results/glove-100-angular.png +0 -0
- redisbench_admin/run/ann/pkg/results/glove-25-angular.png +0 -0
- redisbench_admin/run/ann/pkg/results/lastfm-64-dot.png +0 -0
- redisbench_admin/run/ann/pkg/results/mnist-784-euclidean.png +0 -0
- redisbench_admin/run/ann/pkg/results/nytimes-256-angular.png +0 -0
- redisbench_admin/run/ann/pkg/results/sift-128-euclidean.png +0 -0
- redisbench_admin/run/ann/pkg/run.py +0 -12
- redisbench_admin/run/ann/pkg/run_algorithm.py +0 -3
- redisbench_admin/run/ann/pkg/templates/chartjs.template +0 -102
- redisbench_admin/run/ann/pkg/templates/detail_page.html +0 -23
- redisbench_admin/run/ann/pkg/templates/general.html +0 -58
- redisbench_admin/run/ann/pkg/templates/latex.template +0 -30
- redisbench_admin/run/ann/pkg/templates/summary.html +0 -60
- redisbench_admin/run/ann/pkg/test/__init__.py +0 -0
- redisbench_admin/run/ann/pkg/test/test-jaccard.py +0 -19
- redisbench_admin/run/ann/pkg/test/test-metrics.py +0 -99
- redisbench_admin-0.11.65.dist-info/RECORD +0 -243
- {redisbench_admin-0.11.65.dist-info → redisbench_admin-0.11.67.dist-info}/entry_points.txt +0 -0
- {redisbench_admin-0.11.65.dist-info → redisbench_admin-0.11.67.dist-info/licenses}/LICENSE +0 -0
|
@@ -1,39 +0,0 @@
|
|
|
1
|
-
from __future__ import absolute_import
|
|
2
|
-
import os
|
|
3
|
-
import faiss
|
|
4
|
-
import numpy as np
|
|
5
|
-
from ann_benchmarks.constants import INDEX_DIR
|
|
6
|
-
from ann_benchmarks.algorithms.base import BaseANN
|
|
7
|
-
from ann_benchmarks.algorithms.faiss import Faiss
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class FaissHNSW(Faiss):
|
|
11
|
-
def __init__(self, metric, method_param):
|
|
12
|
-
self._metric = metric
|
|
13
|
-
self.method_param = method_param
|
|
14
|
-
|
|
15
|
-
def fit(self, X):
|
|
16
|
-
self.index = faiss.IndexHNSWFlat(len(X[0]), self.method_param["M"])
|
|
17
|
-
self.index.hnsw.efConstruction = self.method_param["efConstruction"]
|
|
18
|
-
self.index.verbose = True
|
|
19
|
-
|
|
20
|
-
if self._metric == 'angular':
|
|
21
|
-
X = X / np.linalg.norm(X, axis=1)[:, np.newaxis]
|
|
22
|
-
if X.dtype != np.float32:
|
|
23
|
-
X = X.astype(np.float32)
|
|
24
|
-
|
|
25
|
-
self.index.add(X)
|
|
26
|
-
faiss.omp_set_num_threads(1)
|
|
27
|
-
|
|
28
|
-
def set_query_arguments(self, ef):
|
|
29
|
-
faiss.cvar.hnsw_stats.reset()
|
|
30
|
-
self.index.hnsw.efSearch = ef
|
|
31
|
-
|
|
32
|
-
def get_additional(self):
|
|
33
|
-
return {"dist_comps": faiss.cvar.hnsw_stats.ndis}
|
|
34
|
-
|
|
35
|
-
def __str__(self):
|
|
36
|
-
return 'faiss (%s, ef: %d)' % (self.method_param, self.index.hnsw.efSearch)
|
|
37
|
-
|
|
38
|
-
def freeIndex(self):
|
|
39
|
-
del self.p
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
from __future__ import absolute_import
|
|
2
|
-
import pyflann
|
|
3
|
-
import numpy
|
|
4
|
-
import sklearn.preprocessing
|
|
5
|
-
from ann_benchmarks.algorithms.base import BaseANN
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class FLANN(BaseANN):
|
|
9
|
-
def __init__(self, metric, target_precision):
|
|
10
|
-
self._target_precision = target_precision
|
|
11
|
-
self.name = 'FLANN(target_precision=%f)' % self._target_precision
|
|
12
|
-
self._metric = metric
|
|
13
|
-
|
|
14
|
-
def fit(self, X):
|
|
15
|
-
self._flann = pyflann.FLANN(
|
|
16
|
-
target_precision=self._target_precision,
|
|
17
|
-
algorithm='autotuned', log_level='info')
|
|
18
|
-
if self._metric == 'angular':
|
|
19
|
-
X = sklearn.preprocessing.normalize(X, axis=1, norm='l2')
|
|
20
|
-
self._flann.build_index(X)
|
|
21
|
-
|
|
22
|
-
def query(self, v, n):
|
|
23
|
-
if self._metric == 'angular':
|
|
24
|
-
v = sklearn.preprocessing.normalize([v], axis=1, norm='l2')[0]
|
|
25
|
-
if v.dtype != numpy.float32:
|
|
26
|
-
v = v.astype(numpy.float32)
|
|
27
|
-
return self._flann.nn_index(v, n)[0][0]
|
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
from __future__ import absolute_import
|
|
2
|
-
import os
|
|
3
|
-
import hnswlib
|
|
4
|
-
import numpy as np
|
|
5
|
-
from ann_benchmarks.constants import INDEX_DIR
|
|
6
|
-
from ann_benchmarks.algorithms.base import BaseANN
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class HnswLib(BaseANN):
|
|
10
|
-
def __init__(self, metric, method_param):
|
|
11
|
-
self.metric = {'angular': 'cosine', 'euclidean': 'l2'}[metric]
|
|
12
|
-
self.method_param = method_param
|
|
13
|
-
# print(self.method_param,save_index,query_param)
|
|
14
|
-
# self.ef=query_param['ef']
|
|
15
|
-
self.name = 'hnswlib (%s)' % (self.method_param)
|
|
16
|
-
|
|
17
|
-
def fit(self, X):
|
|
18
|
-
# Only l2 is supported currently
|
|
19
|
-
self.p = hnswlib.Index(space=self.metric, dim=len(X[0]))
|
|
20
|
-
self.p.init_index(max_elements=len(X),
|
|
21
|
-
ef_construction=self.method_param["efConstruction"],
|
|
22
|
-
M=self.method_param["M"])
|
|
23
|
-
data_labels = np.arange(len(X))
|
|
24
|
-
self.p.add_items(np.asarray(X), data_labels)
|
|
25
|
-
self.p.set_num_threads(1)
|
|
26
|
-
|
|
27
|
-
def set_query_arguments(self, ef):
|
|
28
|
-
self.p.set_ef(ef)
|
|
29
|
-
|
|
30
|
-
def query(self, v, n):
|
|
31
|
-
# print(np.expand_dims(v,axis=0).shape)
|
|
32
|
-
# print(self.p.knn_query(np.expand_dims(v,axis=0), k = n)[0])
|
|
33
|
-
return self.p.knn_query(np.expand_dims(v, axis=0), k=n)[0][0]
|
|
34
|
-
|
|
35
|
-
def freeIndex(self):
|
|
36
|
-
del self.p
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
from __future__ import absolute_import
|
|
2
|
-
import sklearn.neighbors
|
|
3
|
-
import sklearn.preprocessing
|
|
4
|
-
from ann_benchmarks.algorithms.base import BaseANN
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class KDTree(BaseANN):
|
|
8
|
-
def __init__(self, metric, leaf_size=20):
|
|
9
|
-
self._leaf_size = leaf_size
|
|
10
|
-
self._metric = metric
|
|
11
|
-
self.name = 'KDTree(leaf_size=%d)' % self._leaf_size
|
|
12
|
-
|
|
13
|
-
def fit(self, X):
|
|
14
|
-
if self._metric == 'angular':
|
|
15
|
-
X = sklearn.preprocessing.normalize(X, axis=1, norm='l2')
|
|
16
|
-
self._tree = sklearn.neighbors.KDTree(X, leaf_size=self._leaf_size)
|
|
17
|
-
|
|
18
|
-
def query(self, v, n):
|
|
19
|
-
if self._metric == 'angular':
|
|
20
|
-
v = sklearn.preprocessing.normalize([v], axis=1, norm='l2')[0]
|
|
21
|
-
dist, ind = self._tree.query([v], k=n)
|
|
22
|
-
return ind[0]
|
|
@@ -1,39 +0,0 @@
|
|
|
1
|
-
from __future__ import absolute_import
|
|
2
|
-
import os
|
|
3
|
-
import numpy
|
|
4
|
-
import pykgraph
|
|
5
|
-
from ann_benchmarks.constants import INDEX_DIR
|
|
6
|
-
from ann_benchmarks.algorithms.base import BaseANN
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class KGraph(BaseANN):
|
|
10
|
-
def __init__(self, metric, index_params, save_index):
|
|
11
|
-
metric = str(metric)
|
|
12
|
-
self.name = 'KGraph(%s)' % (metric)
|
|
13
|
-
self._metric = metric
|
|
14
|
-
self._index_params = index_params
|
|
15
|
-
self._save_index = save_index
|
|
16
|
-
|
|
17
|
-
def fit(self, X):
|
|
18
|
-
if X.dtype != numpy.float32:
|
|
19
|
-
X = X.astype(numpy.float32)
|
|
20
|
-
self._kgraph = pykgraph.KGraph(X, self._metric)
|
|
21
|
-
path = os.path.join(INDEX_DIR, 'kgraph-index-%s' % self._metric)
|
|
22
|
-
if os.path.exists(path):
|
|
23
|
-
self._kgraph.load(path)
|
|
24
|
-
else:
|
|
25
|
-
# iterations=30, L=100, delta=0.002, recall=0.99, K=25)
|
|
26
|
-
self._kgraph.build(**self._index_params)
|
|
27
|
-
if not os.path.exists(INDEX_DIR):
|
|
28
|
-
os.makedirs(INDEX_DIR)
|
|
29
|
-
self._kgraph.save(path)
|
|
30
|
-
|
|
31
|
-
def set_query_arguments(self, P):
|
|
32
|
-
self._P = P
|
|
33
|
-
|
|
34
|
-
def query(self, v, n):
|
|
35
|
-
if v.dtype != numpy.float32:
|
|
36
|
-
v = v.astype(numpy.float32)
|
|
37
|
-
result = self._kgraph.search(
|
|
38
|
-
numpy.array([v]), K=n, threads=1, P=self._P)
|
|
39
|
-
return result[0]
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
from __future__ import absolute_import
|
|
2
|
-
import sklearn.neighbors
|
|
3
|
-
import sklearn.preprocessing
|
|
4
|
-
from ann_benchmarks.algorithms.base import BaseANN
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class LSHF(BaseANN):
|
|
8
|
-
def __init__(self, metric, n_estimators=10, n_candidates=50):
|
|
9
|
-
self.name = 'LSHF(n_est=%d, n_cand=%d)' % (n_estimators, n_candidates)
|
|
10
|
-
self._metric = metric
|
|
11
|
-
self._n_estimators = n_estimators
|
|
12
|
-
self._n_candidates = n_candidates
|
|
13
|
-
|
|
14
|
-
def fit(self, X):
|
|
15
|
-
self._lshf = sklearn.neighbors.LSHForest(
|
|
16
|
-
n_estimators=self._n_estimators, n_candidates=self._n_candidates)
|
|
17
|
-
if self._metric == 'angular':
|
|
18
|
-
X = sklearn.preprocessing.normalize(X, axis=1, norm='l2')
|
|
19
|
-
self._lshf.fit(X)
|
|
20
|
-
|
|
21
|
-
def query(self, v, n):
|
|
22
|
-
if self._metric == 'angular':
|
|
23
|
-
v = sklearn.preprocessing.normalize([v], axis=1, norm='l2')[0]
|
|
24
|
-
return self._lshf.kneighbors([v], return_distance=False,
|
|
25
|
-
n_neighbors=n)[0]
|
|
@@ -1,99 +0,0 @@
|
|
|
1
|
-
from __future__ import absolute_import
|
|
2
|
-
from sqlite3 import paramstyle
|
|
3
|
-
from pymilvus import (
|
|
4
|
-
connections,
|
|
5
|
-
utility,
|
|
6
|
-
FieldSchema,
|
|
7
|
-
CollectionSchema,
|
|
8
|
-
DataType,
|
|
9
|
-
IndexType,
|
|
10
|
-
Collection,
|
|
11
|
-
)
|
|
12
|
-
import numpy
|
|
13
|
-
import sklearn.preprocessing
|
|
14
|
-
from ann_benchmarks.algorithms.base import BaseANN
|
|
15
|
-
import sys
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
class Milvus(BaseANN):
|
|
19
|
-
def __init__(self, metric, dim, conn_params, index_type, method_params):
|
|
20
|
-
self._host = conn_params['host']
|
|
21
|
-
self._port = conn_params['port'] # 19530
|
|
22
|
-
self._index_type = index_type
|
|
23
|
-
self._method_params = method_params
|
|
24
|
-
self._metric = {'angular': 'IP', 'euclidean': 'L2'}[metric]
|
|
25
|
-
self._query_params = dict()
|
|
26
|
-
connections.connect(host=conn_params['host'], port=conn_params['port'])
|
|
27
|
-
try:
|
|
28
|
-
fields = [
|
|
29
|
-
FieldSchema(name="pk", dtype=DataType.INT64, is_primary=True, auto_id=False),
|
|
30
|
-
FieldSchema(name="vector", dtype=DataType.FLOAT_VECTOR, dim=dim)
|
|
31
|
-
]
|
|
32
|
-
schema = CollectionSchema(fields)
|
|
33
|
-
if utility.has_collection('milvus'):
|
|
34
|
-
self._milvus = Collection('milvus')
|
|
35
|
-
else:
|
|
36
|
-
self._milvus = Collection('milvus', schema)
|
|
37
|
-
except:
|
|
38
|
-
self._milvus = Collection('milvus')
|
|
39
|
-
print('initialization completed!')
|
|
40
|
-
|
|
41
|
-
def fit(self, X, offset=0, limit=None):
|
|
42
|
-
limit = limit if limit else len(X)
|
|
43
|
-
X = X[offset:limit]
|
|
44
|
-
if self._metric == 'IP':
|
|
45
|
-
X = sklearn.preprocessing.normalize(X)
|
|
46
|
-
|
|
47
|
-
X = X.tolist()
|
|
48
|
-
bulk_size = 1000 * 1024 * 1024 // (sys.getsizeof(X[0])) # approximation for milvus insert limit (1024MB)
|
|
49
|
-
for bulk in [X[i: i+bulk_size] for i in range(0, len(X), bulk_size)]:
|
|
50
|
-
print(f'inserting vectors {offset} to {offset + len(bulk) - 1}')
|
|
51
|
-
self._milvus.insert([list(range(offset, offset + len(bulk))), bulk])
|
|
52
|
-
offset += len(bulk)
|
|
53
|
-
|
|
54
|
-
if not self._milvus.has_index():
|
|
55
|
-
print('indexing...', end=' ')
|
|
56
|
-
try:
|
|
57
|
-
self._milvus.create_index('vector', {'index_type': self._index_type, 'metric_type':self._metric, 'params':self._method_params})
|
|
58
|
-
print('done!')
|
|
59
|
-
except:
|
|
60
|
-
print('failed!')
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
def set_query_arguments(self, param):
|
|
64
|
-
if self._milvus.has_index():
|
|
65
|
-
print('waiting for index... ', end='')
|
|
66
|
-
if utility.wait_for_index_building_complete('milvus', 'vector'):
|
|
67
|
-
print('done!')
|
|
68
|
-
self._milvus.load()
|
|
69
|
-
print('waiting for data to be loaded... ', end='')
|
|
70
|
-
utility.wait_for_loading_complete('milvus')
|
|
71
|
-
print('done!')
|
|
72
|
-
else: raise Exception('index has error')
|
|
73
|
-
else: raise Exception('index is missing')
|
|
74
|
-
if 'IVF_' in self._index_type:
|
|
75
|
-
if param > self._method_params['nlist']:
|
|
76
|
-
print('warning! nprobe > nlist')
|
|
77
|
-
param = self._method_params['nlist']
|
|
78
|
-
self._query_params['nprobe'] = param
|
|
79
|
-
if 'HNSW' in self._index_type:
|
|
80
|
-
self._query_params['ef'] = param
|
|
81
|
-
|
|
82
|
-
def query(self, v, n):
|
|
83
|
-
if self._metric == 'IP':
|
|
84
|
-
v /= numpy.linalg.norm(v)
|
|
85
|
-
v = v.tolist()
|
|
86
|
-
results = self._milvus.search([v], 'vector', {'metric_type':self._metric, 'params':self._query_params}, limit=n)
|
|
87
|
-
if not results:
|
|
88
|
-
return [] # Seems to happen occasionally, not sure why
|
|
89
|
-
result_ids = [result.id for result in results[0]]
|
|
90
|
-
return result_ids
|
|
91
|
-
|
|
92
|
-
def __str__(self):
|
|
93
|
-
return 'Milvus(index_type=%s, method_params=%s, query_params=%s)' % (self._index_type, str(self._method_params), str(self._query_params))
|
|
94
|
-
|
|
95
|
-
def freeIndex(self):
|
|
96
|
-
utility.drop_collection("mlivus")
|
|
97
|
-
|
|
98
|
-
def done(self):
|
|
99
|
-
connections.disconnect('default')
|
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
from __future__ import absolute_import
|
|
2
|
-
import numpy
|
|
3
|
-
import sklearn.preprocessing
|
|
4
|
-
import mrpt
|
|
5
|
-
from ann_benchmarks.algorithms.base import BaseANN
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class MRPT(BaseANN):
|
|
9
|
-
def __init__(self, metric, count):
|
|
10
|
-
self._metric = metric
|
|
11
|
-
self._k = count
|
|
12
|
-
|
|
13
|
-
def fit(self, X):
|
|
14
|
-
if X.dtype != numpy.float32:
|
|
15
|
-
X = X.astype(numpy.float32)
|
|
16
|
-
if self._metric == 'angular':
|
|
17
|
-
X = sklearn.preprocessing.normalize(X, axis=1, norm='l2')
|
|
18
|
-
|
|
19
|
-
self._index_autotuned = mrpt.MRPTIndex(X)
|
|
20
|
-
self._index_autotuned.build_autotune_sample(
|
|
21
|
-
target_recall=None, k=self._k, n_test=1000)
|
|
22
|
-
|
|
23
|
-
def set_query_arguments(self, target_recall):
|
|
24
|
-
self._target_recall = target_recall
|
|
25
|
-
self._index = self._index_autotuned.subset(target_recall)
|
|
26
|
-
self._par = self._index.parameters()
|
|
27
|
-
|
|
28
|
-
def query(self, v, n):
|
|
29
|
-
if v.dtype != numpy.float32:
|
|
30
|
-
v = v.astype(numpy.float32)
|
|
31
|
-
if self._metric == 'angular':
|
|
32
|
-
v = sklearn.preprocessing.normalize(
|
|
33
|
-
v.reshape(1, -1), axis=1, norm='l2').flatten()
|
|
34
|
-
return self._index.ann(v)
|
|
35
|
-
|
|
36
|
-
def __str__(self):
|
|
37
|
-
str_template = ('MRPT(target recall=%.3f, trees=%d, depth=%d, vote '
|
|
38
|
-
'threshold=%d, estimated recall=%.3f)')
|
|
39
|
-
return str_template % (self._target_recall, self._par['n_trees'],
|
|
40
|
-
self._par['depth'], self._par['votes'],
|
|
41
|
-
self._par['estimated_recall'])
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
from __future__ import absolute_import
|
|
2
|
-
import n2
|
|
3
|
-
from ann_benchmarks.algorithms.base import BaseANN
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
class N2(BaseANN):
|
|
7
|
-
def __init__(self, metric, method_param):
|
|
8
|
-
self._metric = metric
|
|
9
|
-
self._m = method_param['M']
|
|
10
|
-
self._m0 = self._m * 2
|
|
11
|
-
self._ef_construction = method_param['efConstruction']
|
|
12
|
-
self._n_threads = 1
|
|
13
|
-
self._ef_search = -1
|
|
14
|
-
|
|
15
|
-
def fit(self, X):
|
|
16
|
-
self._n2 = n2.HnswIndex(X.shape[1], self._metric)
|
|
17
|
-
for x in X:
|
|
18
|
-
self._n2.add_data(x)
|
|
19
|
-
self._n2.build(m=self._m, max_m0=self._m0, ef_construction=self._ef_construction, n_threads=self._n_threads, graph_merging='merge_level0')
|
|
20
|
-
|
|
21
|
-
def set_query_arguments(self, ef):
|
|
22
|
-
self._ef_search = ef
|
|
23
|
-
|
|
24
|
-
def query(self, v, n):
|
|
25
|
-
return self._n2.search_by_vector(v, n, self._ef_search)
|
|
26
|
-
|
|
27
|
-
def __str__(self):
|
|
28
|
-
return "N2 (M%d_efCon%d)" % (self._m, self._ef_construction)
|
|
@@ -1,48 +0,0 @@
|
|
|
1
|
-
from __future__ import absolute_import
|
|
2
|
-
import nearpy
|
|
3
|
-
from nearpy.filters import NearestFilter
|
|
4
|
-
import sklearn.preprocessing
|
|
5
|
-
from ann_benchmarks.algorithms.base import BaseANN
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class NearPy(BaseANN):
|
|
9
|
-
def __init__(self, metric, n_bits, hash_counts):
|
|
10
|
-
self._n_bits = n_bits
|
|
11
|
-
self._hash_counts = hash_counts
|
|
12
|
-
self._metric = metric
|
|
13
|
-
self._filter = NearestFilter(10)
|
|
14
|
-
self.name = 'NearPy(n_bits=%d, hash_counts=%d)' % (
|
|
15
|
-
self._n_bits, self._hash_counts)
|
|
16
|
-
|
|
17
|
-
def fit(self, X):
|
|
18
|
-
hashes = []
|
|
19
|
-
|
|
20
|
-
for k in range(self._hash_counts):
|
|
21
|
-
nearpy_rbp = nearpy.hashes.RandomBinaryProjections(
|
|
22
|
-
'rbp_%d' % k, self._n_bits)
|
|
23
|
-
hashes.append(nearpy_rbp)
|
|
24
|
-
|
|
25
|
-
if self._metric == 'euclidean':
|
|
26
|
-
dist = nearpy.distances.EuclideanDistance()
|
|
27
|
-
self._nearpy_engine = nearpy.Engine(
|
|
28
|
-
X.shape[1],
|
|
29
|
-
lshashes=hashes,
|
|
30
|
-
distance=dist)
|
|
31
|
-
else: # Default (angular) = Cosine distance
|
|
32
|
-
self._nearpy_engine = nearpy.Engine(
|
|
33
|
-
X.shape[1],
|
|
34
|
-
lshashes=hashes,
|
|
35
|
-
vector_filters=[self._filter])
|
|
36
|
-
|
|
37
|
-
if self._metric == 'angular':
|
|
38
|
-
X = sklearn.preprocessing.normalize(X, axis=1, norm='l2')
|
|
39
|
-
for i, x in enumerate(X):
|
|
40
|
-
self._nearpy_engine.store_vector(x, i)
|
|
41
|
-
|
|
42
|
-
def query(self, v, n):
|
|
43
|
-
# XXX: This feels like an unpleasant hack, but it's not clear how to do
|
|
44
|
-
# better without making changes to NearPy
|
|
45
|
-
self._filter.N = n
|
|
46
|
-
if self._metric == 'angular':
|
|
47
|
-
v = sklearn.preprocessing.normalize([v], axis=1, norm='l2')[0]
|
|
48
|
-
return [y for x, y, z in self._nearpy_engine.neighbours(v)]
|
|
@@ -1,74 +0,0 @@
|
|
|
1
|
-
from __future__ import absolute_import
|
|
2
|
-
import os
|
|
3
|
-
import nmslib
|
|
4
|
-
from ann_benchmarks.constants import INDEX_DIR
|
|
5
|
-
from ann_benchmarks.algorithms.base import BaseANN
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class NmslibReuseIndex(BaseANN):
|
|
9
|
-
@staticmethod
|
|
10
|
-
def encode(d):
|
|
11
|
-
return ["%s=%s" % (a, b) for (a, b) in d.items()]
|
|
12
|
-
|
|
13
|
-
def __init__(self, metric, method_name, index_param, query_param):
|
|
14
|
-
self._nmslib_metric = {
|
|
15
|
-
'angular': 'cosinesimil', 'euclidean': 'l2'}[metric]
|
|
16
|
-
self._method_name = method_name
|
|
17
|
-
self._save_index = False
|
|
18
|
-
self._index_param = NmslibReuseIndex.encode(index_param)
|
|
19
|
-
if query_param is not False:
|
|
20
|
-
self._query_param = NmslibReuseIndex.encode(query_param)
|
|
21
|
-
self.name = ('Nmslib(method_name={}, index_param={}, '
|
|
22
|
-
'query_param={})'.format(self._method_name,
|
|
23
|
-
self._index_param,
|
|
24
|
-
self._query_param))
|
|
25
|
-
else:
|
|
26
|
-
self._query_param = None
|
|
27
|
-
self.name = 'Nmslib(method_name=%s, index_param=%s)' % (
|
|
28
|
-
self._method_name, self._index_param)
|
|
29
|
-
|
|
30
|
-
self._index_name = os.path.join(INDEX_DIR, "nmslib_%s_%s_%s" % (
|
|
31
|
-
self._method_name, metric, '_'.join(self._index_param)))
|
|
32
|
-
|
|
33
|
-
d = os.path.dirname(self._index_name)
|
|
34
|
-
if not os.path.exists(d):
|
|
35
|
-
os.makedirs(d)
|
|
36
|
-
|
|
37
|
-
def fit(self, X):
|
|
38
|
-
if self._method_name == 'vptree':
|
|
39
|
-
# To avoid this issue: terminate called after throwing an instance
|
|
40
|
-
# of 'std::runtime_error'
|
|
41
|
-
# what(): The data size is too small or the bucket size is too
|
|
42
|
-
# big. Select the parameters so that <total # of records> is NOT
|
|
43
|
-
# less than <bucket size> * 1000
|
|
44
|
-
# Aborted (core dumped)
|
|
45
|
-
self._index_param.append('bucketSize=%d' %
|
|
46
|
-
min(int(X.shape[0] * 0.0005), 1000))
|
|
47
|
-
|
|
48
|
-
self._index = nmslib.init(
|
|
49
|
-
space=self._nmslib_metric, method=self._method_name)
|
|
50
|
-
self._index.addDataPointBatch(X)
|
|
51
|
-
|
|
52
|
-
if os.path.exists(self._index_name):
|
|
53
|
-
print('Loading index from file')
|
|
54
|
-
self._index.loadIndex(self._index_name)
|
|
55
|
-
else:
|
|
56
|
-
self._index.createIndex(self._index_param)
|
|
57
|
-
if self._save_index:
|
|
58
|
-
self._index.saveIndex(self._index_name)
|
|
59
|
-
if self._query_param is not None:
|
|
60
|
-
self._index.setQueryTimeParams(self._query_param)
|
|
61
|
-
|
|
62
|
-
def set_query_arguments(self, ef):
|
|
63
|
-
if self._method_name == 'hnsw' or self._method_name == 'sw-graph':
|
|
64
|
-
self._index.setQueryTimeParams(["efSearch=%s" % (ef)])
|
|
65
|
-
|
|
66
|
-
def query(self, v, n):
|
|
67
|
-
ids, distances = self._index.knnQuery(v, n)
|
|
68
|
-
return ids
|
|
69
|
-
|
|
70
|
-
def batch_query(self, X, n):
|
|
71
|
-
self.res = self._index.knnQueryBatch(X, n)
|
|
72
|
-
|
|
73
|
-
def get_batch_results(self):
|
|
74
|
-
return [x for x, _ in self.res]
|
|
@@ -1,100 +0,0 @@
|
|
|
1
|
-
from __future__ import absolute_import
|
|
2
|
-
import sys
|
|
3
|
-
import os
|
|
4
|
-
import ngtpy
|
|
5
|
-
import numpy as np
|
|
6
|
-
import subprocess
|
|
7
|
-
import time
|
|
8
|
-
from ann_benchmarks.algorithms.base import BaseANN
|
|
9
|
-
from ann_benchmarks.constants import INDEX_DIR
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class ONNG(BaseANN):
|
|
13
|
-
def __init__(self, metric, object_type, epsilon, param):
|
|
14
|
-
metrics = {'euclidean': '2', 'angular': 'C'}
|
|
15
|
-
self._edge_size = int(param['edge'])
|
|
16
|
-
self._outdegree = int(param['outdegree'])
|
|
17
|
-
self._indegree = int(param['indegree'])
|
|
18
|
-
self._metric = metrics[metric]
|
|
19
|
-
self._object_type = object_type
|
|
20
|
-
self._edge_size_for_search = int(param['search_edge']) if 'search_edge' in param.keys() else 0
|
|
21
|
-
self._tree_disabled = (param['tree'] == False) if 'tree' in param.keys() else False
|
|
22
|
-
self._refine_enabled = (param['refine'] == True) if 'refine' in param.keys() else False
|
|
23
|
-
self._build_time_limit = 4
|
|
24
|
-
self._epsilon = epsilon
|
|
25
|
-
print('ONNG: edge_size=' + str(self._edge_size))
|
|
26
|
-
print('ONNG: outdegree=' + str(self._outdegree))
|
|
27
|
-
print('ONNG: indegree=' + str(self._indegree))
|
|
28
|
-
print('ONNG: edge_size_for_search=' + str(self._edge_size_for_search))
|
|
29
|
-
print('ONNG: epsilon=' + str(self._epsilon))
|
|
30
|
-
print('ONNG: metric=' + metric)
|
|
31
|
-
print('ONNG: object_type=' + object_type)
|
|
32
|
-
|
|
33
|
-
def fit(self, X):
|
|
34
|
-
print('ONNG: start indexing...')
|
|
35
|
-
dim = len(X[0])
|
|
36
|
-
print('ONNG: # of data=' + str(len(X)))
|
|
37
|
-
print('ONNG: dimensionality=' + str(dim))
|
|
38
|
-
index_dir = 'indexes'
|
|
39
|
-
if not os.path.exists(index_dir):
|
|
40
|
-
os.makedirs(index_dir)
|
|
41
|
-
index = os.path.join(
|
|
42
|
-
index_dir,
|
|
43
|
-
'ONNG-{}-{}-{}'.format(self._edge_size, self._outdegree,
|
|
44
|
-
self._indegree))
|
|
45
|
-
anngIndex = os.path.join(index_dir, 'ANNG-' + str(self._edge_size))
|
|
46
|
-
print('ONNG: index=' + index)
|
|
47
|
-
if (not os.path.exists(index)) and (not os.path.exists(anngIndex)):
|
|
48
|
-
print('ONNG: create ANNG')
|
|
49
|
-
t = time.time()
|
|
50
|
-
args = ['ngt', 'create', '-it', '-p8', '-b500', '-ga', '-of',
|
|
51
|
-
'-D' + self._metric, '-d' + str(dim),
|
|
52
|
-
'-E' + str(self._edge_size),
|
|
53
|
-
'-S' + str(self._edge_size_for_search),
|
|
54
|
-
'-e' + str(self._epsilon), '-P0', '-B30',
|
|
55
|
-
'-T' + str(self._build_time_limit), anngIndex]
|
|
56
|
-
subprocess.call(args)
|
|
57
|
-
idx = ngtpy.Index(path=anngIndex)
|
|
58
|
-
idx.batch_insert(X, num_threads=24, debug=False)
|
|
59
|
-
print('ONNG: ANNG construction time(sec)=' + str(time.time() - t))
|
|
60
|
-
t = time.time()
|
|
61
|
-
if self._refine_enabled:
|
|
62
|
-
idx.refine_anng(epsilon=self._epsilon, num_of_edges=self._edge_size,
|
|
63
|
-
num_of_explored_edges=self._edge_size_for_search)
|
|
64
|
-
print('ONNG: RNNG construction time(sec)=' + str(time.time() - t))
|
|
65
|
-
idx.save()
|
|
66
|
-
idx.close()
|
|
67
|
-
if not os.path.exists(index):
|
|
68
|
-
print('ONNG: degree adjustment')
|
|
69
|
-
t = time.time()
|
|
70
|
-
args = ['ngt', 'reconstruct-graph', '-mS',
|
|
71
|
-
'-o ' + str(self._outdegree),
|
|
72
|
-
'-i ' + str(self._indegree), anngIndex, index]
|
|
73
|
-
subprocess.call(args)
|
|
74
|
-
print('ONNG: degree adjustment time(sec)=' + str(time.time() - t))
|
|
75
|
-
if os.path.exists(index):
|
|
76
|
-
print('ONNG: index already exists! ' + str(index))
|
|
77
|
-
t = time.time()
|
|
78
|
-
print(self._tree_disabled)
|
|
79
|
-
self.index = ngtpy.Index(index, read_only=True, tree_disabled=self._tree_disabled)
|
|
80
|
-
self.indexName = index
|
|
81
|
-
print('ONNG: open time(sec)=' + str(time.time() - t))
|
|
82
|
-
else:
|
|
83
|
-
print('ONNG: something wrong.')
|
|
84
|
-
print('ONNG: end of fit')
|
|
85
|
-
|
|
86
|
-
def set_query_arguments(self, parameters):
|
|
87
|
-
epsilon, edge_size = parameters
|
|
88
|
-
print("ONNG: edge_size=" + str(edge_size))
|
|
89
|
-
print("ONNG: epsilon=" + str(epsilon))
|
|
90
|
-
self.name = 'ONNG-NGT(%s, %s, %s, %s, %1.3f)' % (
|
|
91
|
-
self._edge_size, self._outdegree,
|
|
92
|
-
self._indegree, edge_size, epsilon)
|
|
93
|
-
epsilon = epsilon - 1.0
|
|
94
|
-
self.index.set(epsilon=epsilon, edge_size=edge_size)
|
|
95
|
-
|
|
96
|
-
def query(self, v, n):
|
|
97
|
-
return self.index.search(v, n, with_distance=False)
|
|
98
|
-
|
|
99
|
-
def freeIndex(self):
|
|
100
|
-
print('ONNG: free')
|