redisbench-admin 0.11.55__py3-none-any.whl → 0.11.57__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- redisbench_admin/environments/oss_cluster.py +9 -1
- redisbench_admin/run/asm.py +1 -1
- redisbench_admin/run_remote/remote_helpers.py +41 -11
- redisbench_admin/run_remote/standalone.py +2 -3
- redisbench_admin/utils/remote.py +2 -0
- {redisbench_admin-0.11.55.dist-info → redisbench_admin-0.11.57.dist-info}/METADATA +7 -4
- redisbench_admin-0.11.57.dist-info/RECORD +117 -0
- {redisbench_admin-0.11.55.dist-info → redisbench_admin-0.11.57.dist-info}/WHEEL +1 -1
- redisbench_admin/run/ann/pkg/.dockerignore +0 -2
- redisbench_admin/run/ann/pkg/.git +0 -1
- redisbench_admin/run/ann/pkg/.github/workflows/benchmarks.yml +0 -100
- redisbench_admin/run/ann/pkg/.gitignore +0 -21
- redisbench_admin/run/ann/pkg/LICENSE +0 -21
- redisbench_admin/run/ann/pkg/README.md +0 -157
- redisbench_admin/run/ann/pkg/algos.yaml +0 -1294
- redisbench_admin/run/ann/pkg/algosP.yaml +0 -67
- redisbench_admin/run/ann/pkg/ann_benchmarks/__init__.py +0 -2
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/__init__.py +0 -0
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/annoy.py +0 -26
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/balltree.py +0 -22
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/base.py +0 -36
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/bruteforce.py +0 -110
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/ckdtree.py +0 -17
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/datasketch.py +0 -29
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/definitions.py +0 -187
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/diskann.py +0 -190
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/dolphinnpy.py +0 -31
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/dummy_algo.py +0 -25
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/elasticsearch.py +0 -107
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/elastiknn.py +0 -124
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/faiss.py +0 -124
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/faiss_gpu.py +0 -61
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/faiss_hnsw.py +0 -39
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/flann.py +0 -27
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/hnswlib.py +0 -36
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/kdtree.py +0 -22
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/kgraph.py +0 -39
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/lshf.py +0 -25
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/milvus.py +0 -99
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/mrpt.py +0 -41
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/n2.py +0 -28
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/nearpy.py +0 -48
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/nmslib.py +0 -74
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/onng_ngt.py +0 -100
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/opensearchknn.py +0 -107
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/panng_ngt.py +0 -79
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/pinecone.py +0 -39
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/puffinn.py +0 -45
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/pynndescent.py +0 -115
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/qg_ngt.py +0 -102
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/redisearch.py +0 -90
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/rpforest.py +0 -20
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/scann.py +0 -34
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/sptag.py +0 -28
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/subprocess.py +0 -246
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/vald.py +0 -149
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/vecsim-hnsw.py +0 -43
- redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/vespa.py +0 -47
- redisbench_admin/run/ann/pkg/ann_benchmarks/constants.py +0 -1
- redisbench_admin/run/ann/pkg/ann_benchmarks/data.py +0 -48
- redisbench_admin/run/ann/pkg/ann_benchmarks/datasets.py +0 -620
- redisbench_admin/run/ann/pkg/ann_benchmarks/distance.py +0 -53
- redisbench_admin/run/ann/pkg/ann_benchmarks/main.py +0 -325
- redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/__init__.py +0 -2
- redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/metrics.py +0 -183
- redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/plot_variants.py +0 -17
- redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/utils.py +0 -165
- redisbench_admin/run/ann/pkg/ann_benchmarks/results.py +0 -71
- redisbench_admin/run/ann/pkg/ann_benchmarks/runner.py +0 -333
- redisbench_admin/run/ann/pkg/create_dataset.py +0 -12
- redisbench_admin/run/ann/pkg/create_hybrid_dataset.py +0 -147
- redisbench_admin/run/ann/pkg/create_text_to_image_ds.py +0 -117
- redisbench_admin/run/ann/pkg/create_website.py +0 -272
- redisbench_admin/run/ann/pkg/install/Dockerfile +0 -11
- redisbench_admin/run/ann/pkg/install/Dockerfile.annoy +0 -5
- redisbench_admin/run/ann/pkg/install/Dockerfile.datasketch +0 -4
- redisbench_admin/run/ann/pkg/install/Dockerfile.diskann +0 -29
- redisbench_admin/run/ann/pkg/install/Dockerfile.diskann_pq +0 -31
- redisbench_admin/run/ann/pkg/install/Dockerfile.dolphinn +0 -5
- redisbench_admin/run/ann/pkg/install/Dockerfile.elasticsearch +0 -45
- redisbench_admin/run/ann/pkg/install/Dockerfile.elastiknn +0 -61
- redisbench_admin/run/ann/pkg/install/Dockerfile.faiss +0 -18
- redisbench_admin/run/ann/pkg/install/Dockerfile.flann +0 -10
- redisbench_admin/run/ann/pkg/install/Dockerfile.hnswlib +0 -10
- redisbench_admin/run/ann/pkg/install/Dockerfile.kgraph +0 -6
- redisbench_admin/run/ann/pkg/install/Dockerfile.mih +0 -4
- redisbench_admin/run/ann/pkg/install/Dockerfile.milvus +0 -27
- redisbench_admin/run/ann/pkg/install/Dockerfile.mrpt +0 -4
- redisbench_admin/run/ann/pkg/install/Dockerfile.n2 +0 -5
- redisbench_admin/run/ann/pkg/install/Dockerfile.nearpy +0 -5
- redisbench_admin/run/ann/pkg/install/Dockerfile.ngt +0 -13
- redisbench_admin/run/ann/pkg/install/Dockerfile.nmslib +0 -10
- redisbench_admin/run/ann/pkg/install/Dockerfile.opensearchknn +0 -43
- redisbench_admin/run/ann/pkg/install/Dockerfile.puffinn +0 -6
- redisbench_admin/run/ann/pkg/install/Dockerfile.pynndescent +0 -4
- redisbench_admin/run/ann/pkg/install/Dockerfile.redisearch +0 -18
- redisbench_admin/run/ann/pkg/install/Dockerfile.rpforest +0 -5
- redisbench_admin/run/ann/pkg/install/Dockerfile.scann +0 -5
- redisbench_admin/run/ann/pkg/install/Dockerfile.scipy +0 -4
- redisbench_admin/run/ann/pkg/install/Dockerfile.sklearn +0 -4
- redisbench_admin/run/ann/pkg/install/Dockerfile.sptag +0 -30
- redisbench_admin/run/ann/pkg/install/Dockerfile.vald +0 -8
- redisbench_admin/run/ann/pkg/install/Dockerfile.vespa +0 -17
- redisbench_admin/run/ann/pkg/install.py +0 -70
- redisbench_admin/run/ann/pkg/logging.conf +0 -34
- redisbench_admin/run/ann/pkg/multirun.py +0 -298
- redisbench_admin/run/ann/pkg/plot.py +0 -159
- redisbench_admin/run/ann/pkg/protocol/bf-runner +0 -10
- redisbench_admin/run/ann/pkg/protocol/bf-runner.py +0 -204
- redisbench_admin/run/ann/pkg/protocol/ext-add-query-metric.md +0 -51
- redisbench_admin/run/ann/pkg/protocol/ext-batch-queries.md +0 -77
- redisbench_admin/run/ann/pkg/protocol/ext-prepared-queries.md +0 -77
- redisbench_admin/run/ann/pkg/protocol/ext-query-parameters.md +0 -47
- redisbench_admin/run/ann/pkg/protocol/specification.md +0 -194
- redisbench_admin/run/ann/pkg/requirements.txt +0 -14
- redisbench_admin/run/ann/pkg/requirements_py38.txt +0 -11
- redisbench_admin/run/ann/pkg/results/fashion-mnist-784-euclidean.png +0 -0
- redisbench_admin/run/ann/pkg/results/gist-960-euclidean.png +0 -0
- redisbench_admin/run/ann/pkg/results/glove-100-angular.png +0 -0
- redisbench_admin/run/ann/pkg/results/glove-25-angular.png +0 -0
- redisbench_admin/run/ann/pkg/results/lastfm-64-dot.png +0 -0
- redisbench_admin/run/ann/pkg/results/mnist-784-euclidean.png +0 -0
- redisbench_admin/run/ann/pkg/results/nytimes-256-angular.png +0 -0
- redisbench_admin/run/ann/pkg/results/sift-128-euclidean.png +0 -0
- redisbench_admin/run/ann/pkg/run.py +0 -12
- redisbench_admin/run/ann/pkg/run_algorithm.py +0 -3
- redisbench_admin/run/ann/pkg/templates/chartjs.template +0 -102
- redisbench_admin/run/ann/pkg/templates/detail_page.html +0 -23
- redisbench_admin/run/ann/pkg/templates/general.html +0 -58
- redisbench_admin/run/ann/pkg/templates/latex.template +0 -30
- redisbench_admin/run/ann/pkg/templates/summary.html +0 -60
- redisbench_admin/run/ann/pkg/test/__init__.py +0 -0
- redisbench_admin/run/ann/pkg/test/test-jaccard.py +0 -19
- redisbench_admin/run/ann/pkg/test/test-metrics.py +0 -99
- redisbench_admin-0.11.55.dist-info/RECORD +0 -243
- {redisbench_admin-0.11.55.dist-info → redisbench_admin-0.11.57.dist-info}/entry_points.txt +0 -0
- {redisbench_admin-0.11.55.dist-info → redisbench_admin-0.11.57.dist-info/licenses}/LICENSE +0 -0
|
@@ -1,107 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
from time import sleep
|
|
3
|
-
from urllib.error import URLError
|
|
4
|
-
from urllib.request import Request, urlopen
|
|
5
|
-
|
|
6
|
-
from elasticsearch import Elasticsearch
|
|
7
|
-
from elasticsearch.helpers import bulk
|
|
8
|
-
|
|
9
|
-
from ann_benchmarks.algorithms.base import BaseANN
|
|
10
|
-
|
|
11
|
-
from .elasticsearch import es_wait
|
|
12
|
-
|
|
13
|
-
from tqdm import tqdm
|
|
14
|
-
|
|
15
|
-
# Configure the logger.
|
|
16
|
-
logging.getLogger("elasticsearch").setLevel(logging.WARN)
|
|
17
|
-
|
|
18
|
-
class OpenSearchKNN(BaseANN):
|
|
19
|
-
def __init__(self, metric, dimension, method_param):
|
|
20
|
-
self.metric = {"angular": "cosinesimil", "euclidean": "l2"}[metric]
|
|
21
|
-
self.dimension = dimension
|
|
22
|
-
self.method_param = method_param
|
|
23
|
-
self.param_string = "-".join(k+"-"+str(v) for k,v in self.method_param.items()).lower()
|
|
24
|
-
self.name = f"os-{self.param_string}"
|
|
25
|
-
self.es = Elasticsearch(["http://localhost:9200"])
|
|
26
|
-
es_wait()
|
|
27
|
-
|
|
28
|
-
def fit(self, X):
|
|
29
|
-
body = {
|
|
30
|
-
"settings": {
|
|
31
|
-
"index": {"knn": True},
|
|
32
|
-
"number_of_shards": 1,
|
|
33
|
-
"number_of_replicas": 0,
|
|
34
|
-
"refresh_interval": -1
|
|
35
|
-
}
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
mapping = {
|
|
39
|
-
"properties": {
|
|
40
|
-
"id": {"type": "keyword", "store": True},
|
|
41
|
-
"vec": {
|
|
42
|
-
"type": "knn_vector",
|
|
43
|
-
"dimension": self.dimension,
|
|
44
|
-
"method": {
|
|
45
|
-
"name": "hnsw",
|
|
46
|
-
"space_type": self.metric,
|
|
47
|
-
"engine": "nmslib",
|
|
48
|
-
"parameters": {
|
|
49
|
-
"ef_construction": self.method_param["efConstruction"],
|
|
50
|
-
"m": self.method_param["M"]
|
|
51
|
-
}
|
|
52
|
-
}
|
|
53
|
-
}
|
|
54
|
-
}
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
self.es.indices.create(self.name, body=body)
|
|
58
|
-
self.es.indices.put_mapping(mapping, self.name)
|
|
59
|
-
|
|
60
|
-
print("Uploading data to the Index:", self.name)
|
|
61
|
-
def gen():
|
|
62
|
-
for i, vec in enumerate(tqdm(X)):
|
|
63
|
-
yield { "_op_type": "index", "_index": self.name, "vec": vec.tolist(), 'id': str(i + 1) }
|
|
64
|
-
|
|
65
|
-
(_, errors) = bulk(self.es, gen(), chunk_size=500, max_retries=2, request_timeout=10)
|
|
66
|
-
assert len(errors) == 0, errors
|
|
67
|
-
|
|
68
|
-
print("Force Merge...")
|
|
69
|
-
self.es.indices.forcemerge(self.name, max_num_segments=1, request_timeout=1000)
|
|
70
|
-
|
|
71
|
-
print("Refreshing the Index...")
|
|
72
|
-
self.es.indices.refresh(self.name, request_timeout=1000)
|
|
73
|
-
|
|
74
|
-
print("Running Warmup API...")
|
|
75
|
-
res = urlopen(Request("http://localhost:9200/_plugins/_knn/warmup/"+self.name+"?pretty"))
|
|
76
|
-
print(res.read().decode("utf-8"))
|
|
77
|
-
|
|
78
|
-
def set_query_arguments(self, ef):
|
|
79
|
-
body = {
|
|
80
|
-
"settings": {
|
|
81
|
-
"index": {"knn.algo_param.ef_search": ef}
|
|
82
|
-
}
|
|
83
|
-
}
|
|
84
|
-
self.es.indices.put_settings(body=body)
|
|
85
|
-
|
|
86
|
-
def query(self, q, n):
|
|
87
|
-
body = {
|
|
88
|
-
"query": {
|
|
89
|
-
"knn": {
|
|
90
|
-
"vec": {"vector": q.tolist(), "k": n}
|
|
91
|
-
}
|
|
92
|
-
}
|
|
93
|
-
}
|
|
94
|
-
|
|
95
|
-
res = self.es.search(index=self.name, body=body, size=n, _source=False, docvalue_fields=['id'],
|
|
96
|
-
stored_fields="_none_", filter_path=["hits.hits.fields.id"], request_timeout=10)
|
|
97
|
-
|
|
98
|
-
return [int(h['fields']['id'][0]) - 1 for h in res['hits']['hits']]
|
|
99
|
-
|
|
100
|
-
def batch_query(self, X, n):
|
|
101
|
-
self.batch_res = [self.query(q, n) for q in X]
|
|
102
|
-
|
|
103
|
-
def get_batch_results(self):
|
|
104
|
-
return self.batch_res
|
|
105
|
-
|
|
106
|
-
def freeIndex(self):
|
|
107
|
-
self.es.indices.delete(index=self.name)
|
|
@@ -1,79 +0,0 @@
|
|
|
1
|
-
from __future__ import absolute_import
|
|
2
|
-
import sys
|
|
3
|
-
import os
|
|
4
|
-
import ngtpy
|
|
5
|
-
import numpy as np
|
|
6
|
-
import subprocess
|
|
7
|
-
import time
|
|
8
|
-
from ann_benchmarks.algorithms.base import BaseANN
|
|
9
|
-
from ann_benchmarks.constants import INDEX_DIR
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class PANNG(BaseANN):
|
|
13
|
-
def __init__(self, metric, object_type, param):
|
|
14
|
-
metrics = {'euclidean': 'L2', 'angular': 'Cosine'}
|
|
15
|
-
self._edge_size = int(param['edge'])
|
|
16
|
-
self._pathadj_size = int(param['pathadj'])
|
|
17
|
-
self._edge_size_for_search = int(param['searchedge'])
|
|
18
|
-
self._metric = metrics[metric]
|
|
19
|
-
self._object_type = object_type
|
|
20
|
-
print('PANNG: edge_size=' + str(self._edge_size))
|
|
21
|
-
print('PANNG: pathadj_size=' + str(self._pathadj_size))
|
|
22
|
-
print('PANNG: edge_size_for_search=' + str(self._edge_size_for_search))
|
|
23
|
-
print('PANNG: metric=' + metric)
|
|
24
|
-
print('PANNG: object_type=' + object_type)
|
|
25
|
-
|
|
26
|
-
def fit(self, X):
|
|
27
|
-
print('PANNG: start indexing...')
|
|
28
|
-
dim = len(X[0])
|
|
29
|
-
print('PANNG: # of data=' + str(len(X)))
|
|
30
|
-
print('PANNG: Dimensionality=' + str(dim))
|
|
31
|
-
index_dir = 'indexes'
|
|
32
|
-
if not os.path.exists(index_dir):
|
|
33
|
-
os.makedirs(index_dir)
|
|
34
|
-
index = os.path.join(
|
|
35
|
-
index_dir,
|
|
36
|
-
'PANNG-' + str(self._edge_size) + '-' + str(self._pathadj_size))
|
|
37
|
-
print(index)
|
|
38
|
-
if os.path.exists(index):
|
|
39
|
-
print('PANNG: index already exists! ' + str(index))
|
|
40
|
-
else:
|
|
41
|
-
t0 = time.time()
|
|
42
|
-
ngtpy.create(path=index, dimension=dim,
|
|
43
|
-
edge_size_for_creation=self._edge_size,
|
|
44
|
-
distance_type=self._metric,
|
|
45
|
-
object_type=self._object_type)
|
|
46
|
-
idx = ngtpy.Index(path=index)
|
|
47
|
-
idx.batch_insert(X, num_threads=24, debug=False)
|
|
48
|
-
idx.save()
|
|
49
|
-
idx.close()
|
|
50
|
-
if self._pathadj_size > 0:
|
|
51
|
-
print('PANNG: path adjustment')
|
|
52
|
-
args = ['ngt', 'prune', '-s ' + str(self._pathadj_size),
|
|
53
|
-
index]
|
|
54
|
-
subprocess.call(args)
|
|
55
|
-
indexingtime = time.time() - t0
|
|
56
|
-
print('PANNG: indexing, adjustment and saving time(sec)={}'
|
|
57
|
-
.format(indexingtime))
|
|
58
|
-
t0 = time.time()
|
|
59
|
-
self.index = ngtpy.Index(path=index, read_only=True)
|
|
60
|
-
opentime = time.time() - t0
|
|
61
|
-
print('PANNG: open time(sec)=' + str(opentime))
|
|
62
|
-
|
|
63
|
-
def set_query_arguments(self, epsilon):
|
|
64
|
-
print("PANNG: epsilon=" + str(epsilon))
|
|
65
|
-
self._epsilon = epsilon - 1.0
|
|
66
|
-
self.name = 'PANNG-NGT(%d, %d, %d, %1.3f)' % (
|
|
67
|
-
self._edge_size,
|
|
68
|
-
self._pathadj_size,
|
|
69
|
-
self._edge_size_for_search,
|
|
70
|
-
self._epsilon + 1.0)
|
|
71
|
-
|
|
72
|
-
def query(self, v, n):
|
|
73
|
-
results = self.index.search(
|
|
74
|
-
v, n, self._epsilon, self._edge_size_for_search,
|
|
75
|
-
with_distance=False)
|
|
76
|
-
return results
|
|
77
|
-
|
|
78
|
-
def freeIndex(self):
|
|
79
|
-
print('PANNG: free')
|
|
@@ -1,39 +0,0 @@
|
|
|
1
|
-
from __future__ import absolute_import
|
|
2
|
-
from sqlite3 import paramstyle
|
|
3
|
-
from ann_benchmarks.algorithms.base import BaseANN
|
|
4
|
-
import sys
|
|
5
|
-
import pinecone
|
|
6
|
-
|
|
7
|
-
class Pinecone(BaseANN):
|
|
8
|
-
def __init__(self, metric, dim, conn_params, type):
|
|
9
|
-
pinecone.init(api_key=conn_params['auth'])
|
|
10
|
-
m = {'angular': 'cosine', 'euclidean': 'euclidean'}[metric]
|
|
11
|
-
self.name = 'ann-benchmark'
|
|
12
|
-
if self.name not in pinecone.list_indexes():
|
|
13
|
-
pinecone.create_index(self.name, dimension=dim, metric=m,
|
|
14
|
-
index_type=type, shards=int(conn_params["shards"]), )
|
|
15
|
-
self.index = pinecone.Index(self.name)
|
|
16
|
-
|
|
17
|
-
def fit(self, X, offset=0, limit=None):
|
|
18
|
-
limit = limit if limit else len(X)
|
|
19
|
-
|
|
20
|
-
bulk = [(str(i), X[i].tolist()) for i in range(offset, limit)]
|
|
21
|
-
# approximation for pinecone insert limit (2MB or 1000 vectors)
|
|
22
|
-
batch_size = min(1000, 2 * 1024 * 1024 // (sys.getsizeof(bulk[-1]))) # bulk[-1] should be the largest (longest name)
|
|
23
|
-
|
|
24
|
-
for batch in [bulk[i: i+batch_size] for i in range(0, len(bulk), batch_size)]:
|
|
25
|
-
# print(f'inserting vectors {batch[0][0]} to {batch[-1][0]}')
|
|
26
|
-
self.index.upsert(batch)
|
|
27
|
-
|
|
28
|
-
# print(self.index.describe_index_stats())
|
|
29
|
-
# print(pinecone.describe_index(self.name))
|
|
30
|
-
|
|
31
|
-
def query(self, v, n):
|
|
32
|
-
res = self.index.query(v.tolist(), top_k=n)
|
|
33
|
-
return [int(e['id']) for e in res['matches']]
|
|
34
|
-
|
|
35
|
-
def freeIndex(self):
|
|
36
|
-
pinecone.delete_index(self.name)
|
|
37
|
-
|
|
38
|
-
def __str__(self):
|
|
39
|
-
return f'Pinecone({pinecone.describe_index(self.name)})'
|
|
@@ -1,45 +0,0 @@
|
|
|
1
|
-
from __future__ import absolute_import
|
|
2
|
-
import puffinn
|
|
3
|
-
from ann_benchmarks.algorithms.base import BaseANN
|
|
4
|
-
import numpy
|
|
5
|
-
|
|
6
|
-
class Puffinn(BaseANN):
|
|
7
|
-
def __init__(self, metric, space=10**6, hash_function="fht_crosspolytope", hash_source='pool', hash_args=None):
|
|
8
|
-
if metric not in ['jaccard', 'angular']:
|
|
9
|
-
raise NotImplementedError(
|
|
10
|
-
"Puffinn doesn't support metric %s" % metric)
|
|
11
|
-
self.metric = metric
|
|
12
|
-
self.space = space
|
|
13
|
-
self.hash_function = hash_function
|
|
14
|
-
self.hash_source = hash_source
|
|
15
|
-
self.hash_args = hash_args
|
|
16
|
-
|
|
17
|
-
def fit(self, X):
|
|
18
|
-
if self.metric == 'angular':
|
|
19
|
-
dimensions = len(X[0])
|
|
20
|
-
else:
|
|
21
|
-
dimensions = 0
|
|
22
|
-
for x in X:
|
|
23
|
-
dimensions = max(dimensions, max(x)+1)
|
|
24
|
-
|
|
25
|
-
if self.hash_args:
|
|
26
|
-
self.index = puffinn.Index(self.metric, dimensions, self.space,\
|
|
27
|
-
hash_function=self.hash_function, hash_source=self.hash_source,\
|
|
28
|
-
hash_args=self.hash_args)
|
|
29
|
-
else:
|
|
30
|
-
self.index = puffinn.Index(self.metric, dimensions, self.space,\
|
|
31
|
-
hash_function=self.hash_function, hash_source=self.hash_source)
|
|
32
|
-
for i, x in enumerate(X):
|
|
33
|
-
x = x.tolist()
|
|
34
|
-
self.index.insert(x)
|
|
35
|
-
self.index.rebuild()
|
|
36
|
-
|
|
37
|
-
def set_query_arguments(self, recall):
|
|
38
|
-
self.recall = recall
|
|
39
|
-
|
|
40
|
-
def query(self, v, n):
|
|
41
|
-
v = v.tolist()
|
|
42
|
-
return self.index.search(v, n, self.recall)
|
|
43
|
-
|
|
44
|
-
def __str__(self):
|
|
45
|
-
return 'PUFFINN(space=%d, recall=%f, hf=%s, hashsource=%s)' % (self.space, self.recall, self.hash_function, self.hash_source)
|
|
@@ -1,115 +0,0 @@
|
|
|
1
|
-
from __future__ import absolute_import
|
|
2
|
-
import pynndescent
|
|
3
|
-
from ann_benchmarks.algorithms.base import BaseANN
|
|
4
|
-
import numpy as np
|
|
5
|
-
import scipy.sparse
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class PyNNDescent(BaseANN):
|
|
9
|
-
def __init__(self, metric, index_param_dict, n_search_trees=1):
|
|
10
|
-
if "n_neighbors" in index_param_dict:
|
|
11
|
-
self._n_neighbors = int(index_param_dict["n_neighbors"])
|
|
12
|
-
else:
|
|
13
|
-
self._n_neighbors = 30
|
|
14
|
-
|
|
15
|
-
if "pruning_degree_multiplier" in index_param_dict:
|
|
16
|
-
self._pruning_degree_multiplier = float(
|
|
17
|
-
index_param_dict["pruning_degree_multiplier"]
|
|
18
|
-
)
|
|
19
|
-
else:
|
|
20
|
-
self._pruning_degree_multiplier = 1.5
|
|
21
|
-
|
|
22
|
-
if "diversify_prob" in index_param_dict:
|
|
23
|
-
self._diversify_prob = float(index_param_dict["diversify_prob"])
|
|
24
|
-
else:
|
|
25
|
-
self._diversify_prob = 1.0
|
|
26
|
-
|
|
27
|
-
if "leaf_size" in index_param_dict:
|
|
28
|
-
self._leaf_size = int(index_param_dict["leaf_size"])
|
|
29
|
-
else:
|
|
30
|
-
leaf_size = 32
|
|
31
|
-
|
|
32
|
-
self._n_search_trees = int(n_search_trees)
|
|
33
|
-
|
|
34
|
-
self._pynnd_metric = {
|
|
35
|
-
"angular": "dot",
|
|
36
|
-
# 'angular': 'cosine',
|
|
37
|
-
"euclidean": "euclidean",
|
|
38
|
-
"hamming": "hamming",
|
|
39
|
-
"jaccard": "jaccard",
|
|
40
|
-
}[metric]
|
|
41
|
-
|
|
42
|
-
def _sparse_convert_for_fit(self, X):
|
|
43
|
-
lil_data = []
|
|
44
|
-
self._n_cols = 1
|
|
45
|
-
self._n_rows = len(X)
|
|
46
|
-
for i in range(self._n_rows):
|
|
47
|
-
lil_data.append([1] * len(X[i]))
|
|
48
|
-
if max(X[i]) + 1 > self._n_cols:
|
|
49
|
-
self._n_cols = max(X[i]) + 1
|
|
50
|
-
|
|
51
|
-
result = scipy.sparse.lil_matrix(
|
|
52
|
-
(self._n_rows, self._n_cols), dtype=np.int
|
|
53
|
-
)
|
|
54
|
-
result.rows[:] = list(X)
|
|
55
|
-
result.data[:] = lil_data
|
|
56
|
-
return result.tocsr()
|
|
57
|
-
|
|
58
|
-
def _sparse_convert_for_query(self, v):
|
|
59
|
-
result = scipy.sparse.csr_matrix((1, self._n_cols), dtype=np.int)
|
|
60
|
-
result.indptr = np.array([0, len(v)])
|
|
61
|
-
result.indices = np.array(v).astype(np.int32)
|
|
62
|
-
result.data = np.ones(len(v), dtype=np.int)
|
|
63
|
-
return result
|
|
64
|
-
|
|
65
|
-
def fit(self, X):
|
|
66
|
-
if self._pynnd_metric == "jaccard":
|
|
67
|
-
# Convert to sparse matrix format
|
|
68
|
-
X = self._sparse_convert_for_fit(X)
|
|
69
|
-
|
|
70
|
-
self._index = pynndescent.NNDescent(
|
|
71
|
-
X,
|
|
72
|
-
n_neighbors=self._n_neighbors,
|
|
73
|
-
metric=self._pynnd_metric,
|
|
74
|
-
low_memory=True,
|
|
75
|
-
leaf_size=self._leaf_size,
|
|
76
|
-
pruning_degree_multiplier=self._pruning_degree_multiplier,
|
|
77
|
-
diversify_prob=self._diversify_prob,
|
|
78
|
-
n_search_trees=self._n_search_trees,
|
|
79
|
-
compressed=True,
|
|
80
|
-
verbose=True,
|
|
81
|
-
)
|
|
82
|
-
if hasattr(self._index, "prepare"):
|
|
83
|
-
self._index.prepare()
|
|
84
|
-
else:
|
|
85
|
-
self._index._init_search_graph()
|
|
86
|
-
if self._index._is_sparse:
|
|
87
|
-
if hasattr(self._index, "_init_sparse_search_function"):
|
|
88
|
-
self._index._init_sparse_search_function()
|
|
89
|
-
else:
|
|
90
|
-
if hasattr(self._index, "_init_search_function"):
|
|
91
|
-
self._index._init_search_function()
|
|
92
|
-
|
|
93
|
-
def set_query_arguments(self, epsilon=0.1):
|
|
94
|
-
self._epsilon = float(epsilon)
|
|
95
|
-
|
|
96
|
-
def query(self, v, n):
|
|
97
|
-
if self._pynnd_metric == "jaccard":
|
|
98
|
-
# convert index array to sparse matrix format and query
|
|
99
|
-
v = self._sparse_convert_for_query(v)
|
|
100
|
-
ind, dist = self._index.query(v, k=n, epsilon=self._epsilon)
|
|
101
|
-
else:
|
|
102
|
-
ind, dist = self._index.query(
|
|
103
|
-
v.reshape(1, -1).astype("float32"), k=n, epsilon=self._epsilon
|
|
104
|
-
)
|
|
105
|
-
return ind[0]
|
|
106
|
-
|
|
107
|
-
def __str__(self):
|
|
108
|
-
str_template = "PyNNDescent(n_neighbors=%d, pruning_mult=%.2f, diversify_prob=%.3f, epsilon=%.3f, leaf_size=%02d)"
|
|
109
|
-
return str_template % (
|
|
110
|
-
self._n_neighbors,
|
|
111
|
-
self._pruning_degree_multiplier,
|
|
112
|
-
self._diversify_prob,
|
|
113
|
-
self._epsilon,
|
|
114
|
-
self._leaf_size,
|
|
115
|
-
)
|
|
@@ -1,102 +0,0 @@
|
|
|
1
|
-
from __future__ import absolute_import
|
|
2
|
-
import sys
|
|
3
|
-
import os
|
|
4
|
-
import ngtpy
|
|
5
|
-
import numpy as np
|
|
6
|
-
import subprocess
|
|
7
|
-
import time
|
|
8
|
-
from ann_benchmarks.algorithms.base import BaseANN
|
|
9
|
-
from ann_benchmarks.constants import INDEX_DIR
|
|
10
|
-
|
|
11
|
-
class QG(BaseANN):
|
|
12
|
-
def __init__(self, metric, object_type, epsilon, param):
|
|
13
|
-
metrics = {'euclidean': '2', 'angular': 'E'}
|
|
14
|
-
self._edge_size = int(param['edge'])
|
|
15
|
-
self._outdegree = int(param['outdegree'])
|
|
16
|
-
self._indegree = int(param['indegree'])
|
|
17
|
-
self._max_edge_size = int(param['max_edge']) if 'max_edge' in param.keys() else 128
|
|
18
|
-
self._metric = metrics[metric]
|
|
19
|
-
self._object_type = object_type
|
|
20
|
-
self._edge_size_for_search = int(param['search_edge']) if 'search_edge' in param.keys() else -2
|
|
21
|
-
self._tree_disabled = (param['tree'] == False) if 'tree' in param.keys() else False
|
|
22
|
-
self._build_time_limit = 4
|
|
23
|
-
self._epsilon = epsilon
|
|
24
|
-
print('QG: edge_size=' + str(self._edge_size))
|
|
25
|
-
print('QG: outdegree=' + str(self._outdegree))
|
|
26
|
-
print('QG: indegree=' + str(self._indegree))
|
|
27
|
-
print('QG: edge_size_for_search=' + str(self._edge_size_for_search))
|
|
28
|
-
print('QG: epsilon=' + str(self._epsilon))
|
|
29
|
-
print('QG: metric=' + metric)
|
|
30
|
-
print('QG: object_type=' + object_type)
|
|
31
|
-
|
|
32
|
-
def fit(self, X):
|
|
33
|
-
print('QG: start indexing...')
|
|
34
|
-
dim = len(X[0])
|
|
35
|
-
print('QG: # of data=' + str(len(X)))
|
|
36
|
-
print('QG: dimensionality=' + str(dim))
|
|
37
|
-
index_dir = 'indexes'
|
|
38
|
-
if not os.path.exists(index_dir):
|
|
39
|
-
os.makedirs(index_dir)
|
|
40
|
-
index = os.path.join(
|
|
41
|
-
index_dir,
|
|
42
|
-
'ONNG-{}-{}-{}'.format(self._edge_size, self._outdegree,
|
|
43
|
-
self._indegree))
|
|
44
|
-
anngIndex = os.path.join(index_dir, 'ANNG-' + str(self._edge_size))
|
|
45
|
-
print('QG: index=' + index)
|
|
46
|
-
if (not os.path.exists(index)) and (not os.path.exists(anngIndex)):
|
|
47
|
-
print('QG: create ANNG')
|
|
48
|
-
t = time.time()
|
|
49
|
-
args = ['ngt', 'create', '-it', '-p8', '-b500', '-ga', '-of',
|
|
50
|
-
'-D' + self._metric, '-d' + str(dim),
|
|
51
|
-
'-E' + str(self._edge_size), '-S40',
|
|
52
|
-
'-e' + str(self._epsilon), '-P0', '-B30',
|
|
53
|
-
'-T' + str(self._build_time_limit), anngIndex]
|
|
54
|
-
subprocess.call(args)
|
|
55
|
-
idx = ngtpy.Index(path=anngIndex)
|
|
56
|
-
idx.batch_insert(X, num_threads=24, debug=False)
|
|
57
|
-
idx.save()
|
|
58
|
-
idx.close()
|
|
59
|
-
print('QG: ANNG construction time(sec)=' + str(time.time() - t))
|
|
60
|
-
if not os.path.exists(index):
|
|
61
|
-
print('QG: degree adjustment')
|
|
62
|
-
t = time.time()
|
|
63
|
-
args = ['ngt', 'reconstruct-graph', '-mS',
|
|
64
|
-
'-E ' + str(self._outdegree),
|
|
65
|
-
'-o ' + str(self._outdegree),
|
|
66
|
-
'-i ' + str(self._indegree), anngIndex, index]
|
|
67
|
-
subprocess.call(args)
|
|
68
|
-
print('QG: degree adjustment time(sec)=' + str(time.time() - t))
|
|
69
|
-
if not os.path.exists(index + '/qg'):
|
|
70
|
-
print('QG: quantization')
|
|
71
|
-
t = time.time()
|
|
72
|
-
args = ['ngtqg', 'quantize', index]
|
|
73
|
-
subprocess.call(args)
|
|
74
|
-
print('QG: quantization time(sec)=' + str(time.time() - t))
|
|
75
|
-
if os.path.exists(index):
|
|
76
|
-
print('QG: index already exists! ' + str(index))
|
|
77
|
-
t = time.time()
|
|
78
|
-
self.index = ngtpy.QuantizedIndex(index, self._max_edge_size)
|
|
79
|
-
self.index.set_with_distance(False)
|
|
80
|
-
self.indexName = index
|
|
81
|
-
print('QG: open time(sec)=' + str(time.time() - t))
|
|
82
|
-
else:
|
|
83
|
-
print('QG: something wrong.')
|
|
84
|
-
print('QG: end of fit')
|
|
85
|
-
|
|
86
|
-
def set_query_arguments(self, parameters):
|
|
87
|
-
result_expansion, epsilon = parameters
|
|
88
|
-
print("QG: result_expansion=" + str(result_expansion))
|
|
89
|
-
print("QG: epsilon=" + str(epsilon))
|
|
90
|
-
self.name = 'QG-NGT(%s, %s, %s, %s, %s, %1.3f)' % (
|
|
91
|
-
self._edge_size, self._outdegree,
|
|
92
|
-
self._indegree, self._max_edge_size,
|
|
93
|
-
epsilon,
|
|
94
|
-
result_expansion)
|
|
95
|
-
epsilon = epsilon - 1.0
|
|
96
|
-
self.index.set(epsilon=epsilon, result_expansion=result_expansion)
|
|
97
|
-
|
|
98
|
-
def query(self, v, n):
|
|
99
|
-
return self.index.search(v, n)
|
|
100
|
-
|
|
101
|
-
def freeIndex(self):
|
|
102
|
-
print('QG: free')
|
|
@@ -1,90 +0,0 @@
|
|
|
1
|
-
from __future__ import absolute_import
|
|
2
|
-
from optparse import Values
|
|
3
|
-
from redis import Redis
|
|
4
|
-
from redis.cluster import RedisCluster
|
|
5
|
-
from ann_benchmarks.constants import INDEX_DIR
|
|
6
|
-
from ann_benchmarks.algorithms.base import BaseANN
|
|
7
|
-
import math
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class RediSearch(BaseANN):
|
|
11
|
-
def __init__(self, algo, metric, conn_params, method_param):
|
|
12
|
-
self.metric = {'angular': 'cosine', 'euclidean': 'l2'}[metric]
|
|
13
|
-
self.method_param = method_param
|
|
14
|
-
self.algo = algo
|
|
15
|
-
self.name = 'redisearch-%s (%s)' % (self.algo, self.method_param)
|
|
16
|
-
self.index_name = "ann_benchmark"
|
|
17
|
-
self.text = None
|
|
18
|
-
|
|
19
|
-
redis = RedisCluster if conn_params['cluster'] else Redis
|
|
20
|
-
host = conn_params["host"] if conn_params["host"] else 'localhost'
|
|
21
|
-
port = conn_params["port"] if conn_params["port"] else 6379
|
|
22
|
-
self.redis = redis(host=host, port=port, decode_responses=False,
|
|
23
|
-
password=conn_params["auth"], username=conn_params["user"])
|
|
24
|
-
self.shards = int(conn_params["shards"])
|
|
25
|
-
if conn_params['cluster']:
|
|
26
|
-
self.shards = len(self.redis.get_primaries())
|
|
27
|
-
|
|
28
|
-
def fit(self, X, offset=0, limit=None, hybrid_buckets = None):
|
|
29
|
-
limit = limit if limit else len(X)
|
|
30
|
-
try:
|
|
31
|
-
args = [self.index_name, 'SCHEMA']
|
|
32
|
-
if hybrid_buckets:
|
|
33
|
-
args.extend(['n', 'NUMERIC', 't', 'TEXT'])
|
|
34
|
-
# https://oss.redis.com/redisearch/master/Commands/#ftcreate
|
|
35
|
-
if self.algo == "HNSW":
|
|
36
|
-
args.extend(['vector', 'VECTOR', self.algo, '10', 'TYPE', 'FLOAT32', 'DIM', len(X[0]), 'DISTANCE_METRIC', self.metric, 'M', self.method_param['M'], 'EF_CONSTRUCTION', self.method_param["efConstruction"]])
|
|
37
|
-
elif self.algo == "FLAT":
|
|
38
|
-
args.extend(['vector', 'VECTOR', self.algo, '6', 'TYPE', 'FLOAT32', 'DIM', len(X[0]), 'DISTANCE_METRIC', self.metric])
|
|
39
|
-
print("Calling FT.CREATE", *args)
|
|
40
|
-
self.redis.execute_command('FT.CREATE', *args, target_nodes='random')
|
|
41
|
-
except Exception as e:
|
|
42
|
-
if 'Index already exists' not in str(e):
|
|
43
|
-
raise
|
|
44
|
-
p = self.redis.pipeline(transaction=False)
|
|
45
|
-
count = 0
|
|
46
|
-
if hybrid_buckets:
|
|
47
|
-
print('running hybrid')
|
|
48
|
-
for bucket in hybrid_buckets.values():
|
|
49
|
-
ids = bucket['ids']
|
|
50
|
-
text = bucket['text'].decode()
|
|
51
|
-
number = bucket['number']
|
|
52
|
-
print('calling HSET', f'<id>', 'vector', '<vector blob>', 't', text, 'n', number)
|
|
53
|
-
for id in ids:
|
|
54
|
-
if id >= offset and id < limit:
|
|
55
|
-
p.execute_command('HSET', int(id), 'vector', X[id].tobytes(), 't', text, 'n', int(number))
|
|
56
|
-
count+=1
|
|
57
|
-
if count % 1000 == 0:
|
|
58
|
-
p.execute()
|
|
59
|
-
p.reset()
|
|
60
|
-
p.execute()
|
|
61
|
-
else:
|
|
62
|
-
for i in range(offset, limit):
|
|
63
|
-
p.execute_command('HSET', i, 'vector', X[i].tobytes())
|
|
64
|
-
count+=1
|
|
65
|
-
if count % 1000 == 0:
|
|
66
|
-
p.execute()
|
|
67
|
-
p.reset()
|
|
68
|
-
p.execute()
|
|
69
|
-
|
|
70
|
-
def set_query_arguments(self, ef):
|
|
71
|
-
self.ef = ef
|
|
72
|
-
|
|
73
|
-
def set_hybrid_query(self, text):
|
|
74
|
-
self.text = text
|
|
75
|
-
|
|
76
|
-
def query(self, v, k):
|
|
77
|
-
# https://oss.redis.com/redisearch/master/Commands/#ftsearch
|
|
78
|
-
qparams = f' EF_RUNTIME {self.ef}' if self.algo == 'HNSW' else ''
|
|
79
|
-
if self.text:
|
|
80
|
-
vq = f'(@t:{self.text})=>[KNN {k} @vector $BLOB {qparams}]'
|
|
81
|
-
else:
|
|
82
|
-
vq = f'*=>[KNN {k} @vector $BLOB {qparams}]'
|
|
83
|
-
q = ['FT.SEARCH', self.index_name, vq, 'NOCONTENT', 'SORTBY', '__vector_score', 'LIMIT', '0', str(k), 'PARAMS', '2', 'BLOB', v.tobytes(), 'DIALECT', '2']
|
|
84
|
-
return [int(doc) for doc in self.redis.execute_command(*q, target_nodes='random')[1:]]
|
|
85
|
-
|
|
86
|
-
def freeIndex(self):
|
|
87
|
-
self.redis.execute_command("FLUSHALL")
|
|
88
|
-
|
|
89
|
-
def __str__(self):
|
|
90
|
-
return self.name + f", efRuntime: {self.ef}"
|
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
from __future__ import absolute_import
|
|
2
|
-
import rpforest
|
|
3
|
-
import numpy
|
|
4
|
-
from ann_benchmarks.algorithms.base import BaseANN
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class RPForest(BaseANN):
|
|
8
|
-
def __init__(self, leaf_size, n_trees):
|
|
9
|
-
self.name = 'RPForest(leaf_size=%d, n_trees=%d)' % (leaf_size, n_trees)
|
|
10
|
-
self._model = rpforest.RPForest(leaf_size=leaf_size, no_trees=n_trees)
|
|
11
|
-
|
|
12
|
-
def fit(self, X):
|
|
13
|
-
if X.dtype != numpy.double:
|
|
14
|
-
X = numpy.array(X).astype(numpy.double)
|
|
15
|
-
self._model.fit(X)
|
|
16
|
-
|
|
17
|
-
def query(self, v, n):
|
|
18
|
-
if v.dtype != numpy.double:
|
|
19
|
-
v = numpy.array(v).astype(numpy.double)
|
|
20
|
-
return self._model.query(v, n)
|
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
from __future__ import absolute_import
|
|
2
|
-
import os
|
|
3
|
-
import numpy as np
|
|
4
|
-
import scann
|
|
5
|
-
from ann_benchmarks.algorithms.base import BaseANN
|
|
6
|
-
|
|
7
|
-
class Scann(BaseANN):
|
|
8
|
-
|
|
9
|
-
def __init__(self, n_leaves, avq_threshold, dims_per_block, dist):
|
|
10
|
-
self.name = "scann n_leaves={} avq_threshold={:.02f} dims_per_block={}".format(
|
|
11
|
-
n_leaves, avq_threshold, dims_per_block)
|
|
12
|
-
self.n_leaves = n_leaves
|
|
13
|
-
self.avq_threshold = avq_threshold
|
|
14
|
-
self.dims_per_block = dims_per_block
|
|
15
|
-
self.dist = dist
|
|
16
|
-
|
|
17
|
-
def fit(self, X):
|
|
18
|
-
if self.dist == "dot_product":
|
|
19
|
-
spherical = True
|
|
20
|
-
X[np.linalg.norm(X, axis=1) == 0] = 1.0 / np.sqrt(X.shape[1])
|
|
21
|
-
X /= np.linalg.norm(X, axis=1)[:, np.newaxis]
|
|
22
|
-
else:
|
|
23
|
-
spherical = False
|
|
24
|
-
|
|
25
|
-
self.searcher = scann.scann_ops_pybind.builder(X, 10, self.dist).tree(
|
|
26
|
-
self.n_leaves, 1, training_sample_size=len(X), spherical=spherical, quantize_centroids=True).score_ah(
|
|
27
|
-
self.dims_per_block, anisotropic_quantization_threshold=self.avq_threshold).reorder(
|
|
28
|
-
1).build()
|
|
29
|
-
|
|
30
|
-
def set_query_arguments(self, leaves_reorder):
|
|
31
|
-
self.leaves_to_search, self.reorder = leaves_reorder
|
|
32
|
-
|
|
33
|
-
def query(self, v, n):
|
|
34
|
-
return self.searcher.search(v, n, self.reorder, self.leaves_to_search)[0]
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
from __future__ import absolute_import
|
|
2
|
-
import SPTAG
|
|
3
|
-
from ann_benchmarks.algorithms.base import BaseANN
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
class Sptag(BaseANN):
|
|
7
|
-
def __init__(self, metric, algo):
|
|
8
|
-
self._algo = str(algo)
|
|
9
|
-
self._metric = {
|
|
10
|
-
'angular': 'Cosine', 'euclidean': 'L2'}[metric]
|
|
11
|
-
|
|
12
|
-
def fit(self, X):
|
|
13
|
-
self._sptag = SPTAG.AnnIndex(self._algo, 'Float', X.shape[1])
|
|
14
|
-
self._sptag.SetBuildParam("NumberOfThreads", '32')
|
|
15
|
-
self._sptag.SetBuildParam("DistCalcMethod", self._metric)
|
|
16
|
-
self._sptag.Build(X, X.shape[0])
|
|
17
|
-
|
|
18
|
-
def set_query_arguments(self, MaxCheck):
|
|
19
|
-
self._maxCheck = MaxCheck
|
|
20
|
-
self._sptag.SetSearchParam("MaxCheck", str(self._maxCheck))
|
|
21
|
-
|
|
22
|
-
def query(self, v, k):
|
|
23
|
-
return self._sptag.Search(v, k)[0]
|
|
24
|
-
|
|
25
|
-
def __str__(self):
|
|
26
|
-
return 'Sptag(metric=%s, algo=%s, check=%d)' % (self._metric,
|
|
27
|
-
self._algo, self._maxCheck)
|
|
28
|
-
|