redisbench-admin 0.11.63__py3-none-any.whl → 0.11.65__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. redisbench_admin/run/ann/pkg/.dockerignore +2 -0
  2. redisbench_admin/run/ann/pkg/.git +1 -0
  3. redisbench_admin/run/ann/pkg/.github/workflows/benchmarks.yml +100 -0
  4. redisbench_admin/run/ann/pkg/.gitignore +21 -0
  5. redisbench_admin/run/ann/pkg/LICENSE +21 -0
  6. redisbench_admin/run/ann/pkg/README.md +157 -0
  7. redisbench_admin/run/ann/pkg/algos.yaml +1294 -0
  8. redisbench_admin/run/ann/pkg/algosP.yaml +67 -0
  9. redisbench_admin/run/ann/pkg/ann_benchmarks/__init__.py +2 -0
  10. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/__init__.py +0 -0
  11. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/annoy.py +26 -0
  12. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/balltree.py +22 -0
  13. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/base.py +36 -0
  14. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/bruteforce.py +110 -0
  15. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/ckdtree.py +17 -0
  16. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/datasketch.py +29 -0
  17. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/definitions.py +187 -0
  18. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/diskann.py +190 -0
  19. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/dolphinnpy.py +31 -0
  20. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/dummy_algo.py +25 -0
  21. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/elasticsearch.py +107 -0
  22. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/elastiknn.py +124 -0
  23. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/faiss.py +124 -0
  24. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/faiss_gpu.py +61 -0
  25. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/faiss_hnsw.py +39 -0
  26. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/flann.py +27 -0
  27. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/hnswlib.py +36 -0
  28. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/kdtree.py +22 -0
  29. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/kgraph.py +39 -0
  30. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/lshf.py +25 -0
  31. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/milvus.py +99 -0
  32. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/mrpt.py +41 -0
  33. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/n2.py +28 -0
  34. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/nearpy.py +48 -0
  35. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/nmslib.py +74 -0
  36. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/onng_ngt.py +100 -0
  37. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/opensearchknn.py +107 -0
  38. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/panng_ngt.py +79 -0
  39. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/pinecone.py +39 -0
  40. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/puffinn.py +45 -0
  41. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/pynndescent.py +115 -0
  42. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/qg_ngt.py +102 -0
  43. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/redisearch.py +90 -0
  44. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/rpforest.py +20 -0
  45. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/scann.py +34 -0
  46. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/sptag.py +28 -0
  47. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/subprocess.py +246 -0
  48. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/vald.py +149 -0
  49. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/vecsim-hnsw.py +43 -0
  50. redisbench_admin/run/ann/pkg/ann_benchmarks/algorithms/vespa.py +47 -0
  51. redisbench_admin/run/ann/pkg/ann_benchmarks/constants.py +1 -0
  52. redisbench_admin/run/ann/pkg/ann_benchmarks/data.py +48 -0
  53. redisbench_admin/run/ann/pkg/ann_benchmarks/datasets.py +620 -0
  54. redisbench_admin/run/ann/pkg/ann_benchmarks/distance.py +53 -0
  55. redisbench_admin/run/ann/pkg/ann_benchmarks/main.py +325 -0
  56. redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/__init__.py +2 -0
  57. redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/metrics.py +183 -0
  58. redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/plot_variants.py +17 -0
  59. redisbench_admin/run/ann/pkg/ann_benchmarks/plotting/utils.py +165 -0
  60. redisbench_admin/run/ann/pkg/ann_benchmarks/results.py +71 -0
  61. redisbench_admin/run/ann/pkg/ann_benchmarks/runner.py +333 -0
  62. redisbench_admin/run/ann/pkg/create_dataset.py +12 -0
  63. redisbench_admin/run/ann/pkg/create_hybrid_dataset.py +147 -0
  64. redisbench_admin/run/ann/pkg/create_text_to_image_ds.py +117 -0
  65. redisbench_admin/run/ann/pkg/create_website.py +272 -0
  66. redisbench_admin/run/ann/pkg/install/Dockerfile +11 -0
  67. redisbench_admin/run/ann/pkg/install/Dockerfile.annoy +5 -0
  68. redisbench_admin/run/ann/pkg/install/Dockerfile.datasketch +4 -0
  69. redisbench_admin/run/ann/pkg/install/Dockerfile.diskann +29 -0
  70. redisbench_admin/run/ann/pkg/install/Dockerfile.diskann_pq +31 -0
  71. redisbench_admin/run/ann/pkg/install/Dockerfile.dolphinn +5 -0
  72. redisbench_admin/run/ann/pkg/install/Dockerfile.elasticsearch +45 -0
  73. redisbench_admin/run/ann/pkg/install/Dockerfile.elastiknn +61 -0
  74. redisbench_admin/run/ann/pkg/install/Dockerfile.faiss +18 -0
  75. redisbench_admin/run/ann/pkg/install/Dockerfile.flann +10 -0
  76. redisbench_admin/run/ann/pkg/install/Dockerfile.hnswlib +10 -0
  77. redisbench_admin/run/ann/pkg/install/Dockerfile.kgraph +6 -0
  78. redisbench_admin/run/ann/pkg/install/Dockerfile.mih +4 -0
  79. redisbench_admin/run/ann/pkg/install/Dockerfile.milvus +27 -0
  80. redisbench_admin/run/ann/pkg/install/Dockerfile.mrpt +4 -0
  81. redisbench_admin/run/ann/pkg/install/Dockerfile.n2 +5 -0
  82. redisbench_admin/run/ann/pkg/install/Dockerfile.nearpy +5 -0
  83. redisbench_admin/run/ann/pkg/install/Dockerfile.ngt +13 -0
  84. redisbench_admin/run/ann/pkg/install/Dockerfile.nmslib +10 -0
  85. redisbench_admin/run/ann/pkg/install/Dockerfile.opensearchknn +43 -0
  86. redisbench_admin/run/ann/pkg/install/Dockerfile.puffinn +6 -0
  87. redisbench_admin/run/ann/pkg/install/Dockerfile.pynndescent +4 -0
  88. redisbench_admin/run/ann/pkg/install/Dockerfile.redisearch +18 -0
  89. redisbench_admin/run/ann/pkg/install/Dockerfile.rpforest +5 -0
  90. redisbench_admin/run/ann/pkg/install/Dockerfile.scann +5 -0
  91. redisbench_admin/run/ann/pkg/install/Dockerfile.scipy +4 -0
  92. redisbench_admin/run/ann/pkg/install/Dockerfile.sklearn +4 -0
  93. redisbench_admin/run/ann/pkg/install/Dockerfile.sptag +30 -0
  94. redisbench_admin/run/ann/pkg/install/Dockerfile.vald +8 -0
  95. redisbench_admin/run/ann/pkg/install/Dockerfile.vespa +17 -0
  96. redisbench_admin/run/ann/pkg/install.py +70 -0
  97. redisbench_admin/run/ann/pkg/logging.conf +34 -0
  98. redisbench_admin/run/ann/pkg/multirun.py +298 -0
  99. redisbench_admin/run/ann/pkg/plot.py +159 -0
  100. redisbench_admin/run/ann/pkg/protocol/bf-runner +10 -0
  101. redisbench_admin/run/ann/pkg/protocol/bf-runner.py +204 -0
  102. redisbench_admin/run/ann/pkg/protocol/ext-add-query-metric.md +51 -0
  103. redisbench_admin/run/ann/pkg/protocol/ext-batch-queries.md +77 -0
  104. redisbench_admin/run/ann/pkg/protocol/ext-prepared-queries.md +77 -0
  105. redisbench_admin/run/ann/pkg/protocol/ext-query-parameters.md +47 -0
  106. redisbench_admin/run/ann/pkg/protocol/specification.md +194 -0
  107. redisbench_admin/run/ann/pkg/requirements.txt +14 -0
  108. redisbench_admin/run/ann/pkg/requirements_py38.txt +11 -0
  109. redisbench_admin/run/ann/pkg/results/fashion-mnist-784-euclidean.png +0 -0
  110. redisbench_admin/run/ann/pkg/results/gist-960-euclidean.png +0 -0
  111. redisbench_admin/run/ann/pkg/results/glove-100-angular.png +0 -0
  112. redisbench_admin/run/ann/pkg/results/glove-25-angular.png +0 -0
  113. redisbench_admin/run/ann/pkg/results/lastfm-64-dot.png +0 -0
  114. redisbench_admin/run/ann/pkg/results/mnist-784-euclidean.png +0 -0
  115. redisbench_admin/run/ann/pkg/results/nytimes-256-angular.png +0 -0
  116. redisbench_admin/run/ann/pkg/results/sift-128-euclidean.png +0 -0
  117. redisbench_admin/run/ann/pkg/run.py +12 -0
  118. redisbench_admin/run/ann/pkg/run_algorithm.py +3 -0
  119. redisbench_admin/run/ann/pkg/templates/chartjs.template +102 -0
  120. redisbench_admin/run/ann/pkg/templates/detail_page.html +23 -0
  121. redisbench_admin/run/ann/pkg/templates/general.html +58 -0
  122. redisbench_admin/run/ann/pkg/templates/latex.template +30 -0
  123. redisbench_admin/run/ann/pkg/templates/summary.html +60 -0
  124. redisbench_admin/run/ann/pkg/test/__init__.py +0 -0
  125. redisbench_admin/run/ann/pkg/test/test-jaccard.py +19 -0
  126. redisbench_admin/run/ann/pkg/test/test-metrics.py +99 -0
  127. redisbench_admin/run_async/run_async.py +2 -2
  128. redisbench_admin/run_local/run_local.py +2 -2
  129. redisbench_admin/run_remote/run_remote.py +9 -5
  130. {redisbench_admin-0.11.63.dist-info → redisbench_admin-0.11.65.dist-info}/METADATA +2 -5
  131. redisbench_admin-0.11.65.dist-info/RECORD +243 -0
  132. {redisbench_admin-0.11.63.dist-info → redisbench_admin-0.11.65.dist-info}/WHEEL +1 -1
  133. redisbench_admin-0.11.63.dist-info/RECORD +0 -117
  134. {redisbench_admin-0.11.63.dist-info/licenses → redisbench_admin-0.11.65.dist-info}/LICENSE +0 -0
  135. {redisbench_admin-0.11.63.dist-info → redisbench_admin-0.11.65.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,107 @@
1
+ import logging
2
+ from time import sleep
3
+ from urllib.error import URLError
4
+ from urllib.request import Request, urlopen
5
+
6
+ from elasticsearch import Elasticsearch
7
+ from elasticsearch.helpers import bulk
8
+
9
+ from ann_benchmarks.algorithms.base import BaseANN
10
+
11
+ from .elasticsearch import es_wait
12
+
13
+ from tqdm import tqdm
14
+
15
+ # Configure the logger.
16
+ logging.getLogger("elasticsearch").setLevel(logging.WARN)
17
+
18
+ class OpenSearchKNN(BaseANN):
19
+ def __init__(self, metric, dimension, method_param):
20
+ self.metric = {"angular": "cosinesimil", "euclidean": "l2"}[metric]
21
+ self.dimension = dimension
22
+ self.method_param = method_param
23
+ self.param_string = "-".join(k+"-"+str(v) for k,v in self.method_param.items()).lower()
24
+ self.name = f"os-{self.param_string}"
25
+ self.es = Elasticsearch(["http://localhost:9200"])
26
+ es_wait()
27
+
28
+ def fit(self, X):
29
+ body = {
30
+ "settings": {
31
+ "index": {"knn": True},
32
+ "number_of_shards": 1,
33
+ "number_of_replicas": 0,
34
+ "refresh_interval": -1
35
+ }
36
+ }
37
+
38
+ mapping = {
39
+ "properties": {
40
+ "id": {"type": "keyword", "store": True},
41
+ "vec": {
42
+ "type": "knn_vector",
43
+ "dimension": self.dimension,
44
+ "method": {
45
+ "name": "hnsw",
46
+ "space_type": self.metric,
47
+ "engine": "nmslib",
48
+ "parameters": {
49
+ "ef_construction": self.method_param["efConstruction"],
50
+ "m": self.method_param["M"]
51
+ }
52
+ }
53
+ }
54
+ }
55
+ }
56
+
57
+ self.es.indices.create(self.name, body=body)
58
+ self.es.indices.put_mapping(mapping, self.name)
59
+
60
+ print("Uploading data to the Index:", self.name)
61
+ def gen():
62
+ for i, vec in enumerate(tqdm(X)):
63
+ yield { "_op_type": "index", "_index": self.name, "vec": vec.tolist(), 'id': str(i + 1) }
64
+
65
+ (_, errors) = bulk(self.es, gen(), chunk_size=500, max_retries=2, request_timeout=10)
66
+ assert len(errors) == 0, errors
67
+
68
+ print("Force Merge...")
69
+ self.es.indices.forcemerge(self.name, max_num_segments=1, request_timeout=1000)
70
+
71
+ print("Refreshing the Index...")
72
+ self.es.indices.refresh(self.name, request_timeout=1000)
73
+
74
+ print("Running Warmup API...")
75
+ res = urlopen(Request("http://localhost:9200/_plugins/_knn/warmup/"+self.name+"?pretty"))
76
+ print(res.read().decode("utf-8"))
77
+
78
+ def set_query_arguments(self, ef):
79
+ body = {
80
+ "settings": {
81
+ "index": {"knn.algo_param.ef_search": ef}
82
+ }
83
+ }
84
+ self.es.indices.put_settings(body=body)
85
+
86
+ def query(self, q, n):
87
+ body = {
88
+ "query": {
89
+ "knn": {
90
+ "vec": {"vector": q.tolist(), "k": n}
91
+ }
92
+ }
93
+ }
94
+
95
+ res = self.es.search(index=self.name, body=body, size=n, _source=False, docvalue_fields=['id'],
96
+ stored_fields="_none_", filter_path=["hits.hits.fields.id"], request_timeout=10)
97
+
98
+ return [int(h['fields']['id'][0]) - 1 for h in res['hits']['hits']]
99
+
100
+ def batch_query(self, X, n):
101
+ self.batch_res = [self.query(q, n) for q in X]
102
+
103
+ def get_batch_results(self):
104
+ return self.batch_res
105
+
106
+ def freeIndex(self):
107
+ self.es.indices.delete(index=self.name)
@@ -0,0 +1,79 @@
1
+ from __future__ import absolute_import
2
+ import sys
3
+ import os
4
+ import ngtpy
5
+ import numpy as np
6
+ import subprocess
7
+ import time
8
+ from ann_benchmarks.algorithms.base import BaseANN
9
+ from ann_benchmarks.constants import INDEX_DIR
10
+
11
+
12
+ class PANNG(BaseANN):
13
+ def __init__(self, metric, object_type, param):
14
+ metrics = {'euclidean': 'L2', 'angular': 'Cosine'}
15
+ self._edge_size = int(param['edge'])
16
+ self._pathadj_size = int(param['pathadj'])
17
+ self._edge_size_for_search = int(param['searchedge'])
18
+ self._metric = metrics[metric]
19
+ self._object_type = object_type
20
+ print('PANNG: edge_size=' + str(self._edge_size))
21
+ print('PANNG: pathadj_size=' + str(self._pathadj_size))
22
+ print('PANNG: edge_size_for_search=' + str(self._edge_size_for_search))
23
+ print('PANNG: metric=' + metric)
24
+ print('PANNG: object_type=' + object_type)
25
+
26
+ def fit(self, X):
27
+ print('PANNG: start indexing...')
28
+ dim = len(X[0])
29
+ print('PANNG: # of data=' + str(len(X)))
30
+ print('PANNG: Dimensionality=' + str(dim))
31
+ index_dir = 'indexes'
32
+ if not os.path.exists(index_dir):
33
+ os.makedirs(index_dir)
34
+ index = os.path.join(
35
+ index_dir,
36
+ 'PANNG-' + str(self._edge_size) + '-' + str(self._pathadj_size))
37
+ print(index)
38
+ if os.path.exists(index):
39
+ print('PANNG: index already exists! ' + str(index))
40
+ else:
41
+ t0 = time.time()
42
+ ngtpy.create(path=index, dimension=dim,
43
+ edge_size_for_creation=self._edge_size,
44
+ distance_type=self._metric,
45
+ object_type=self._object_type)
46
+ idx = ngtpy.Index(path=index)
47
+ idx.batch_insert(X, num_threads=24, debug=False)
48
+ idx.save()
49
+ idx.close()
50
+ if self._pathadj_size > 0:
51
+ print('PANNG: path adjustment')
52
+ args = ['ngt', 'prune', '-s ' + str(self._pathadj_size),
53
+ index]
54
+ subprocess.call(args)
55
+ indexingtime = time.time() - t0
56
+ print('PANNG: indexing, adjustment and saving time(sec)={}'
57
+ .format(indexingtime))
58
+ t0 = time.time()
59
+ self.index = ngtpy.Index(path=index, read_only=True)
60
+ opentime = time.time() - t0
61
+ print('PANNG: open time(sec)=' + str(opentime))
62
+
63
+ def set_query_arguments(self, epsilon):
64
+ print("PANNG: epsilon=" + str(epsilon))
65
+ self._epsilon = epsilon - 1.0
66
+ self.name = 'PANNG-NGT(%d, %d, %d, %1.3f)' % (
67
+ self._edge_size,
68
+ self._pathadj_size,
69
+ self._edge_size_for_search,
70
+ self._epsilon + 1.0)
71
+
72
+ def query(self, v, n):
73
+ results = self.index.search(
74
+ v, n, self._epsilon, self._edge_size_for_search,
75
+ with_distance=False)
76
+ return results
77
+
78
+ def freeIndex(self):
79
+ print('PANNG: free')
@@ -0,0 +1,39 @@
1
+ from __future__ import absolute_import
2
+ from sqlite3 import paramstyle
3
+ from ann_benchmarks.algorithms.base import BaseANN
4
+ import sys
5
+ import pinecone
6
+
7
+ class Pinecone(BaseANN):
8
+ def __init__(self, metric, dim, conn_params, type):
9
+ pinecone.init(api_key=conn_params['auth'])
10
+ m = {'angular': 'cosine', 'euclidean': 'euclidean'}[metric]
11
+ self.name = 'ann-benchmark'
12
+ if self.name not in pinecone.list_indexes():
13
+ pinecone.create_index(self.name, dimension=dim, metric=m,
14
+ index_type=type, shards=int(conn_params["shards"]), )
15
+ self.index = pinecone.Index(self.name)
16
+
17
+ def fit(self, X, offset=0, limit=None):
18
+ limit = limit if limit else len(X)
19
+
20
+ bulk = [(str(i), X[i].tolist()) for i in range(offset, limit)]
21
+ # approximation for pinecone insert limit (2MB or 1000 vectors)
22
+ batch_size = min(1000, 2 * 1024 * 1024 // (sys.getsizeof(bulk[-1]))) # bulk[-1] should be the largest (longest name)
23
+
24
+ for batch in [bulk[i: i+batch_size] for i in range(0, len(bulk), batch_size)]:
25
+ # print(f'inserting vectors {batch[0][0]} to {batch[-1][0]}')
26
+ self.index.upsert(batch)
27
+
28
+ # print(self.index.describe_index_stats())
29
+ # print(pinecone.describe_index(self.name))
30
+
31
+ def query(self, v, n):
32
+ res = self.index.query(v.tolist(), top_k=n)
33
+ return [int(e['id']) for e in res['matches']]
34
+
35
+ def freeIndex(self):
36
+ pinecone.delete_index(self.name)
37
+
38
+ def __str__(self):
39
+ return f'Pinecone({pinecone.describe_index(self.name)})'
@@ -0,0 +1,45 @@
1
+ from __future__ import absolute_import
2
+ import puffinn
3
+ from ann_benchmarks.algorithms.base import BaseANN
4
+ import numpy
5
+
6
+ class Puffinn(BaseANN):
7
+ def __init__(self, metric, space=10**6, hash_function="fht_crosspolytope", hash_source='pool', hash_args=None):
8
+ if metric not in ['jaccard', 'angular']:
9
+ raise NotImplementedError(
10
+ "Puffinn doesn't support metric %s" % metric)
11
+ self.metric = metric
12
+ self.space = space
13
+ self.hash_function = hash_function
14
+ self.hash_source = hash_source
15
+ self.hash_args = hash_args
16
+
17
+ def fit(self, X):
18
+ if self.metric == 'angular':
19
+ dimensions = len(X[0])
20
+ else:
21
+ dimensions = 0
22
+ for x in X:
23
+ dimensions = max(dimensions, max(x)+1)
24
+
25
+ if self.hash_args:
26
+ self.index = puffinn.Index(self.metric, dimensions, self.space,\
27
+ hash_function=self.hash_function, hash_source=self.hash_source,\
28
+ hash_args=self.hash_args)
29
+ else:
30
+ self.index = puffinn.Index(self.metric, dimensions, self.space,\
31
+ hash_function=self.hash_function, hash_source=self.hash_source)
32
+ for i, x in enumerate(X):
33
+ x = x.tolist()
34
+ self.index.insert(x)
35
+ self.index.rebuild()
36
+
37
+ def set_query_arguments(self, recall):
38
+ self.recall = recall
39
+
40
+ def query(self, v, n):
41
+ v = v.tolist()
42
+ return self.index.search(v, n, self.recall)
43
+
44
+ def __str__(self):
45
+ return 'PUFFINN(space=%d, recall=%f, hf=%s, hashsource=%s)' % (self.space, self.recall, self.hash_function, self.hash_source)
@@ -0,0 +1,115 @@
1
+ from __future__ import absolute_import
2
+ import pynndescent
3
+ from ann_benchmarks.algorithms.base import BaseANN
4
+ import numpy as np
5
+ import scipy.sparse
6
+
7
+
8
+ class PyNNDescent(BaseANN):
9
+ def __init__(self, metric, index_param_dict, n_search_trees=1):
10
+ if "n_neighbors" in index_param_dict:
11
+ self._n_neighbors = int(index_param_dict["n_neighbors"])
12
+ else:
13
+ self._n_neighbors = 30
14
+
15
+ if "pruning_degree_multiplier" in index_param_dict:
16
+ self._pruning_degree_multiplier = float(
17
+ index_param_dict["pruning_degree_multiplier"]
18
+ )
19
+ else:
20
+ self._pruning_degree_multiplier = 1.5
21
+
22
+ if "diversify_prob" in index_param_dict:
23
+ self._diversify_prob = float(index_param_dict["diversify_prob"])
24
+ else:
25
+ self._diversify_prob = 1.0
26
+
27
+ if "leaf_size" in index_param_dict:
28
+ self._leaf_size = int(index_param_dict["leaf_size"])
29
+ else:
30
+ leaf_size = 32
31
+
32
+ self._n_search_trees = int(n_search_trees)
33
+
34
+ self._pynnd_metric = {
35
+ "angular": "dot",
36
+ # 'angular': 'cosine',
37
+ "euclidean": "euclidean",
38
+ "hamming": "hamming",
39
+ "jaccard": "jaccard",
40
+ }[metric]
41
+
42
+ def _sparse_convert_for_fit(self, X):
43
+ lil_data = []
44
+ self._n_cols = 1
45
+ self._n_rows = len(X)
46
+ for i in range(self._n_rows):
47
+ lil_data.append([1] * len(X[i]))
48
+ if max(X[i]) + 1 > self._n_cols:
49
+ self._n_cols = max(X[i]) + 1
50
+
51
+ result = scipy.sparse.lil_matrix(
52
+ (self._n_rows, self._n_cols), dtype=np.int
53
+ )
54
+ result.rows[:] = list(X)
55
+ result.data[:] = lil_data
56
+ return result.tocsr()
57
+
58
+ def _sparse_convert_for_query(self, v):
59
+ result = scipy.sparse.csr_matrix((1, self._n_cols), dtype=np.int)
60
+ result.indptr = np.array([0, len(v)])
61
+ result.indices = np.array(v).astype(np.int32)
62
+ result.data = np.ones(len(v), dtype=np.int)
63
+ return result
64
+
65
+ def fit(self, X):
66
+ if self._pynnd_metric == "jaccard":
67
+ # Convert to sparse matrix format
68
+ X = self._sparse_convert_for_fit(X)
69
+
70
+ self._index = pynndescent.NNDescent(
71
+ X,
72
+ n_neighbors=self._n_neighbors,
73
+ metric=self._pynnd_metric,
74
+ low_memory=True,
75
+ leaf_size=self._leaf_size,
76
+ pruning_degree_multiplier=self._pruning_degree_multiplier,
77
+ diversify_prob=self._diversify_prob,
78
+ n_search_trees=self._n_search_trees,
79
+ compressed=True,
80
+ verbose=True,
81
+ )
82
+ if hasattr(self._index, "prepare"):
83
+ self._index.prepare()
84
+ else:
85
+ self._index._init_search_graph()
86
+ if self._index._is_sparse:
87
+ if hasattr(self._index, "_init_sparse_search_function"):
88
+ self._index._init_sparse_search_function()
89
+ else:
90
+ if hasattr(self._index, "_init_search_function"):
91
+ self._index._init_search_function()
92
+
93
+ def set_query_arguments(self, epsilon=0.1):
94
+ self._epsilon = float(epsilon)
95
+
96
+ def query(self, v, n):
97
+ if self._pynnd_metric == "jaccard":
98
+ # convert index array to sparse matrix format and query
99
+ v = self._sparse_convert_for_query(v)
100
+ ind, dist = self._index.query(v, k=n, epsilon=self._epsilon)
101
+ else:
102
+ ind, dist = self._index.query(
103
+ v.reshape(1, -1).astype("float32"), k=n, epsilon=self._epsilon
104
+ )
105
+ return ind[0]
106
+
107
+ def __str__(self):
108
+ str_template = "PyNNDescent(n_neighbors=%d, pruning_mult=%.2f, diversify_prob=%.3f, epsilon=%.3f, leaf_size=%02d)"
109
+ return str_template % (
110
+ self._n_neighbors,
111
+ self._pruning_degree_multiplier,
112
+ self._diversify_prob,
113
+ self._epsilon,
114
+ self._leaf_size,
115
+ )
@@ -0,0 +1,102 @@
1
+ from __future__ import absolute_import
2
+ import sys
3
+ import os
4
+ import ngtpy
5
+ import numpy as np
6
+ import subprocess
7
+ import time
8
+ from ann_benchmarks.algorithms.base import BaseANN
9
+ from ann_benchmarks.constants import INDEX_DIR
10
+
11
+ class QG(BaseANN):
12
+ def __init__(self, metric, object_type, epsilon, param):
13
+ metrics = {'euclidean': '2', 'angular': 'E'}
14
+ self._edge_size = int(param['edge'])
15
+ self._outdegree = int(param['outdegree'])
16
+ self._indegree = int(param['indegree'])
17
+ self._max_edge_size = int(param['max_edge']) if 'max_edge' in param.keys() else 128
18
+ self._metric = metrics[metric]
19
+ self._object_type = object_type
20
+ self._edge_size_for_search = int(param['search_edge']) if 'search_edge' in param.keys() else -2
21
+ self._tree_disabled = (param['tree'] == False) if 'tree' in param.keys() else False
22
+ self._build_time_limit = 4
23
+ self._epsilon = epsilon
24
+ print('QG: edge_size=' + str(self._edge_size))
25
+ print('QG: outdegree=' + str(self._outdegree))
26
+ print('QG: indegree=' + str(self._indegree))
27
+ print('QG: edge_size_for_search=' + str(self._edge_size_for_search))
28
+ print('QG: epsilon=' + str(self._epsilon))
29
+ print('QG: metric=' + metric)
30
+ print('QG: object_type=' + object_type)
31
+
32
+ def fit(self, X):
33
+ print('QG: start indexing...')
34
+ dim = len(X[0])
35
+ print('QG: # of data=' + str(len(X)))
36
+ print('QG: dimensionality=' + str(dim))
37
+ index_dir = 'indexes'
38
+ if not os.path.exists(index_dir):
39
+ os.makedirs(index_dir)
40
+ index = os.path.join(
41
+ index_dir,
42
+ 'ONNG-{}-{}-{}'.format(self._edge_size, self._outdegree,
43
+ self._indegree))
44
+ anngIndex = os.path.join(index_dir, 'ANNG-' + str(self._edge_size))
45
+ print('QG: index=' + index)
46
+ if (not os.path.exists(index)) and (not os.path.exists(anngIndex)):
47
+ print('QG: create ANNG')
48
+ t = time.time()
49
+ args = ['ngt', 'create', '-it', '-p8', '-b500', '-ga', '-of',
50
+ '-D' + self._metric, '-d' + str(dim),
51
+ '-E' + str(self._edge_size), '-S40',
52
+ '-e' + str(self._epsilon), '-P0', '-B30',
53
+ '-T' + str(self._build_time_limit), anngIndex]
54
+ subprocess.call(args)
55
+ idx = ngtpy.Index(path=anngIndex)
56
+ idx.batch_insert(X, num_threads=24, debug=False)
57
+ idx.save()
58
+ idx.close()
59
+ print('QG: ANNG construction time(sec)=' + str(time.time() - t))
60
+ if not os.path.exists(index):
61
+ print('QG: degree adjustment')
62
+ t = time.time()
63
+ args = ['ngt', 'reconstruct-graph', '-mS',
64
+ '-E ' + str(self._outdegree),
65
+ '-o ' + str(self._outdegree),
66
+ '-i ' + str(self._indegree), anngIndex, index]
67
+ subprocess.call(args)
68
+ print('QG: degree adjustment time(sec)=' + str(time.time() - t))
69
+ if not os.path.exists(index + '/qg'):
70
+ print('QG: quantization')
71
+ t = time.time()
72
+ args = ['ngtqg', 'quantize', index]
73
+ subprocess.call(args)
74
+ print('QG: quantization time(sec)=' + str(time.time() - t))
75
+ if os.path.exists(index):
76
+ print('QG: index already exists! ' + str(index))
77
+ t = time.time()
78
+ self.index = ngtpy.QuantizedIndex(index, self._max_edge_size)
79
+ self.index.set_with_distance(False)
80
+ self.indexName = index
81
+ print('QG: open time(sec)=' + str(time.time() - t))
82
+ else:
83
+ print('QG: something wrong.')
84
+ print('QG: end of fit')
85
+
86
+ def set_query_arguments(self, parameters):
87
+ result_expansion, epsilon = parameters
88
+ print("QG: result_expansion=" + str(result_expansion))
89
+ print("QG: epsilon=" + str(epsilon))
90
+ self.name = 'QG-NGT(%s, %s, %s, %s, %s, %1.3f)' % (
91
+ self._edge_size, self._outdegree,
92
+ self._indegree, self._max_edge_size,
93
+ epsilon,
94
+ result_expansion)
95
+ epsilon = epsilon - 1.0
96
+ self.index.set(epsilon=epsilon, result_expansion=result_expansion)
97
+
98
+ def query(self, v, n):
99
+ return self.index.search(v, n)
100
+
101
+ def freeIndex(self):
102
+ print('QG: free')
@@ -0,0 +1,90 @@
1
+ from __future__ import absolute_import
2
+ from optparse import Values
3
+ from redis import Redis
4
+ from redis.cluster import RedisCluster
5
+ from ann_benchmarks.constants import INDEX_DIR
6
+ from ann_benchmarks.algorithms.base import BaseANN
7
+ import math
8
+
9
+
10
+ class RediSearch(BaseANN):
11
+ def __init__(self, algo, metric, conn_params, method_param):
12
+ self.metric = {'angular': 'cosine', 'euclidean': 'l2'}[metric]
13
+ self.method_param = method_param
14
+ self.algo = algo
15
+ self.name = 'redisearch-%s (%s)' % (self.algo, self.method_param)
16
+ self.index_name = "ann_benchmark"
17
+ self.text = None
18
+
19
+ redis = RedisCluster if conn_params['cluster'] else Redis
20
+ host = conn_params["host"] if conn_params["host"] else 'localhost'
21
+ port = conn_params["port"] if conn_params["port"] else 6379
22
+ self.redis = redis(host=host, port=port, decode_responses=False,
23
+ password=conn_params["auth"], username=conn_params["user"])
24
+ self.shards = int(conn_params["shards"])
25
+ if conn_params['cluster']:
26
+ self.shards = len(self.redis.get_primaries())
27
+
28
+ def fit(self, X, offset=0, limit=None, hybrid_buckets = None):
29
+ limit = limit if limit else len(X)
30
+ try:
31
+ args = [self.index_name, 'SCHEMA']
32
+ if hybrid_buckets:
33
+ args.extend(['n', 'NUMERIC', 't', 'TEXT'])
34
+ # https://oss.redis.com/redisearch/master/Commands/#ftcreate
35
+ if self.algo == "HNSW":
36
+ args.extend(['vector', 'VECTOR', self.algo, '10', 'TYPE', 'FLOAT32', 'DIM', len(X[0]), 'DISTANCE_METRIC', self.metric, 'M', self.method_param['M'], 'EF_CONSTRUCTION', self.method_param["efConstruction"]])
37
+ elif self.algo == "FLAT":
38
+ args.extend(['vector', 'VECTOR', self.algo, '6', 'TYPE', 'FLOAT32', 'DIM', len(X[0]), 'DISTANCE_METRIC', self.metric])
39
+ print("Calling FT.CREATE", *args)
40
+ self.redis.execute_command('FT.CREATE', *args, target_nodes='random')
41
+ except Exception as e:
42
+ if 'Index already exists' not in str(e):
43
+ raise
44
+ p = self.redis.pipeline(transaction=False)
45
+ count = 0
46
+ if hybrid_buckets:
47
+ print('running hybrid')
48
+ for bucket in hybrid_buckets.values():
49
+ ids = bucket['ids']
50
+ text = bucket['text'].decode()
51
+ number = bucket['number']
52
+ print('calling HSET', f'<id>', 'vector', '<vector blob>', 't', text, 'n', number)
53
+ for id in ids:
54
+ if id >= offset and id < limit:
55
+ p.execute_command('HSET', int(id), 'vector', X[id].tobytes(), 't', text, 'n', int(number))
56
+ count+=1
57
+ if count % 1000 == 0:
58
+ p.execute()
59
+ p.reset()
60
+ p.execute()
61
+ else:
62
+ for i in range(offset, limit):
63
+ p.execute_command('HSET', i, 'vector', X[i].tobytes())
64
+ count+=1
65
+ if count % 1000 == 0:
66
+ p.execute()
67
+ p.reset()
68
+ p.execute()
69
+
70
+ def set_query_arguments(self, ef):
71
+ self.ef = ef
72
+
73
+ def set_hybrid_query(self, text):
74
+ self.text = text
75
+
76
+ def query(self, v, k):
77
+ # https://oss.redis.com/redisearch/master/Commands/#ftsearch
78
+ qparams = f' EF_RUNTIME {self.ef}' if self.algo == 'HNSW' else ''
79
+ if self.text:
80
+ vq = f'(@t:{self.text})=>[KNN {k} @vector $BLOB {qparams}]'
81
+ else:
82
+ vq = f'*=>[KNN {k} @vector $BLOB {qparams}]'
83
+ q = ['FT.SEARCH', self.index_name, vq, 'NOCONTENT', 'SORTBY', '__vector_score', 'LIMIT', '0', str(k), 'PARAMS', '2', 'BLOB', v.tobytes(), 'DIALECT', '2']
84
+ return [int(doc) for doc in self.redis.execute_command(*q, target_nodes='random')[1:]]
85
+
86
+ def freeIndex(self):
87
+ self.redis.execute_command("FLUSHALL")
88
+
89
+ def __str__(self):
90
+ return self.name + f", efRuntime: {self.ef}"
@@ -0,0 +1,20 @@
1
+ from __future__ import absolute_import
2
+ import rpforest
3
+ import numpy
4
+ from ann_benchmarks.algorithms.base import BaseANN
5
+
6
+
7
+ class RPForest(BaseANN):
8
+ def __init__(self, leaf_size, n_trees):
9
+ self.name = 'RPForest(leaf_size=%d, n_trees=%d)' % (leaf_size, n_trees)
10
+ self._model = rpforest.RPForest(leaf_size=leaf_size, no_trees=n_trees)
11
+
12
+ def fit(self, X):
13
+ if X.dtype != numpy.double:
14
+ X = numpy.array(X).astype(numpy.double)
15
+ self._model.fit(X)
16
+
17
+ def query(self, v, n):
18
+ if v.dtype != numpy.double:
19
+ v = numpy.array(v).astype(numpy.double)
20
+ return self._model.query(v, n)
@@ -0,0 +1,34 @@
1
+ from __future__ import absolute_import
2
+ import os
3
+ import numpy as np
4
+ import scann
5
+ from ann_benchmarks.algorithms.base import BaseANN
6
+
7
+ class Scann(BaseANN):
8
+
9
+ def __init__(self, n_leaves, avq_threshold, dims_per_block, dist):
10
+ self.name = "scann n_leaves={} avq_threshold={:.02f} dims_per_block={}".format(
11
+ n_leaves, avq_threshold, dims_per_block)
12
+ self.n_leaves = n_leaves
13
+ self.avq_threshold = avq_threshold
14
+ self.dims_per_block = dims_per_block
15
+ self.dist = dist
16
+
17
+ def fit(self, X):
18
+ if self.dist == "dot_product":
19
+ spherical = True
20
+ X[np.linalg.norm(X, axis=1) == 0] = 1.0 / np.sqrt(X.shape[1])
21
+ X /= np.linalg.norm(X, axis=1)[:, np.newaxis]
22
+ else:
23
+ spherical = False
24
+
25
+ self.searcher = scann.scann_ops_pybind.builder(X, 10, self.dist).tree(
26
+ self.n_leaves, 1, training_sample_size=len(X), spherical=spherical, quantize_centroids=True).score_ah(
27
+ self.dims_per_block, anisotropic_quantization_threshold=self.avq_threshold).reorder(
28
+ 1).build()
29
+
30
+ def set_query_arguments(self, leaves_reorder):
31
+ self.leaves_to_search, self.reorder = leaves_reorder
32
+
33
+ def query(self, v, n):
34
+ return self.searcher.search(v, n, self.reorder, self.leaves_to_search)[0]
@@ -0,0 +1,28 @@
1
+ from __future__ import absolute_import
2
+ import SPTAG
3
+ from ann_benchmarks.algorithms.base import BaseANN
4
+
5
+
6
+ class Sptag(BaseANN):
7
+ def __init__(self, metric, algo):
8
+ self._algo = str(algo)
9
+ self._metric = {
10
+ 'angular': 'Cosine', 'euclidean': 'L2'}[metric]
11
+
12
+ def fit(self, X):
13
+ self._sptag = SPTAG.AnnIndex(self._algo, 'Float', X.shape[1])
14
+ self._sptag.SetBuildParam("NumberOfThreads", '32')
15
+ self._sptag.SetBuildParam("DistCalcMethod", self._metric)
16
+ self._sptag.Build(X, X.shape[0])
17
+
18
+ def set_query_arguments(self, MaxCheck):
19
+ self._maxCheck = MaxCheck
20
+ self._sptag.SetSearchParam("MaxCheck", str(self._maxCheck))
21
+
22
+ def query(self, v, k):
23
+ return self._sptag.Search(v, k)[0]
24
+
25
+ def __str__(self):
26
+ return 'Sptag(metric=%s, algo=%s, check=%d)' % (self._metric,
27
+ self._algo, self._maxCheck)
28
+