rara-tools 0.0.10__tar.gz → 0.0.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rara-tools might be problematic. Click here for more details.
- {rara_tools-0.0.10/rara_tools.egg-info → rara_tools-0.0.12}/PKG-INFO +1 -1
- rara_tools-0.0.12/VERSION +1 -0
- {rara_tools-0.0.10 → rara_tools-0.0.12}/rara_tools/digar_schema_converter.py +11 -4
- {rara_tools-0.0.10 → rara_tools-0.0.12}/rara_tools/elastic.py +183 -1
- {rara_tools-0.0.10 → rara_tools-0.0.12/rara_tools.egg-info}/PKG-INFO +1 -1
- {rara_tools-0.0.10 → rara_tools-0.0.12}/rara_tools.egg-info/SOURCES.txt +1 -0
- rara_tools-0.0.12/tests/test_elastic_vector_and_search_operations.py +167 -0
- rara_tools-0.0.10/VERSION +0 -1
- {rara_tools-0.0.10 → rara_tools-0.0.12}/LICENSE.md +0 -0
- {rara_tools-0.0.10 → rara_tools-0.0.12}/README.md +0 -0
- {rara_tools-0.0.10 → rara_tools-0.0.12}/pyproject.toml +0 -0
- {rara_tools-0.0.10 → rara_tools-0.0.12}/rara_tools/constants/__init__.py +0 -0
- {rara_tools-0.0.10 → rara_tools-0.0.12}/rara_tools/constants/digitizer.py +0 -0
- {rara_tools-0.0.10 → rara_tools-0.0.12}/rara_tools/constants/general.py +0 -0
- {rara_tools-0.0.10 → rara_tools-0.0.12}/rara_tools/converters.py +0 -0
- {rara_tools-0.0.10 → rara_tools-0.0.12}/rara_tools/decorators.py +0 -0
- {rara_tools-0.0.10 → rara_tools-0.0.12}/rara_tools/exceptions.py +0 -0
- {rara_tools-0.0.10 → rara_tools-0.0.12}/rara_tools/s3.py +0 -0
- {rara_tools-0.0.10 → rara_tools-0.0.12}/rara_tools/task_reporter.py +0 -0
- {rara_tools-0.0.10 → rara_tools-0.0.12}/rara_tools/utils.py +0 -0
- {rara_tools-0.0.10 → rara_tools-0.0.12}/rara_tools.egg-info/dependency_links.txt +0 -0
- {rara_tools-0.0.10 → rara_tools-0.0.12}/rara_tools.egg-info/requires.txt +0 -0
- {rara_tools-0.0.10 → rara_tools-0.0.12}/rara_tools.egg-info/top_level.txt +0 -0
- {rara_tools-0.0.10 → rara_tools-0.0.12}/requirements.txt +0 -0
- {rara_tools-0.0.10 → rara_tools-0.0.12}/setup.cfg +0 -0
- {rara_tools-0.0.10 → rara_tools-0.0.12}/tests/test_converters.py +0 -0
- {rara_tools-0.0.10 → rara_tools-0.0.12}/tests/test_digar_schema_converter.py +0 -0
- {rara_tools-0.0.10 → rara_tools-0.0.12}/tests/test_elastic.py +0 -0
- {rara_tools-0.0.10 → rara_tools-0.0.12}/tests/test_s3_exceptions.py +0 -0
- {rara_tools-0.0.10 → rara_tools-0.0.12}/tests/test_s3_file_operations.py +0 -0
- {rara_tools-0.0.10 → rara_tools-0.0.12}/tests/test_task_reporter.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0.0.12
|
|
@@ -3,7 +3,7 @@ from typing import List, NoReturn
|
|
|
3
3
|
|
|
4
4
|
from rara_tools.utils import lang_to_iso639_2, ratio_to_percentage
|
|
5
5
|
|
|
6
|
-
|
|
6
|
+
|
|
7
7
|
UNDEFINED_LANGUAGE_VALUE = "unk"
|
|
8
8
|
QUALITY_RATIO_TYPE = "Float"
|
|
9
9
|
|
|
@@ -104,13 +104,15 @@ class DocSchemas:
|
|
|
104
104
|
generated_id: str = "",
|
|
105
105
|
permalink: str = "",
|
|
106
106
|
min_language_ratio: float = 0.2,
|
|
107
|
-
convert_ratio: bool = True
|
|
107
|
+
convert_ratio: bool = True,
|
|
108
|
+
generated_id_type: str = "CustomID"
|
|
108
109
|
) -> NoReturn:
|
|
109
110
|
self.__convert_ratio = convert_ratio
|
|
110
111
|
self.__min_language_ratio = min_language_ratio
|
|
111
112
|
self.__sierra_id = sierra_id
|
|
112
113
|
self.__generated_id = generated_id
|
|
113
114
|
self.__permalink = permalink
|
|
115
|
+
self.__generated_id_type = generated_id_type
|
|
114
116
|
self.__doc_meta = doc_meta
|
|
115
117
|
self.__ocr_accuracy_schema: dict = {}
|
|
116
118
|
self.__text_quality_schema: dict = {}
|
|
@@ -209,7 +211,7 @@ class DocSchemas:
|
|
|
209
211
|
identifiers.append(
|
|
210
212
|
{
|
|
211
213
|
"@type": "Identifier",
|
|
212
|
-
"qualifier":
|
|
214
|
+
"qualifier": self.__generated_id_type,
|
|
213
215
|
"value": self.__generated_id
|
|
214
216
|
}
|
|
215
217
|
)
|
|
@@ -235,6 +237,7 @@ class DIGARSchemaConverter:
|
|
|
235
237
|
generated_id: str,
|
|
236
238
|
sierra_id: str = "",
|
|
237
239
|
permalink: str = "",
|
|
240
|
+
generated_id_type: str = "CustomID",
|
|
238
241
|
min_language_ratio: float = 0.2,
|
|
239
242
|
convert_ratio: bool = False
|
|
240
243
|
) -> NoReturn:
|
|
@@ -250,6 +253,8 @@ class DIGARSchemaConverter:
|
|
|
250
253
|
Document's corresponding Sierra ID.
|
|
251
254
|
permalink: str
|
|
252
255
|
Permanent link, where the document can be accessed.
|
|
256
|
+
generated_id_type: str
|
|
257
|
+
Method / type of generated ID (e.g. 'UUID')
|
|
253
258
|
min_language_ratio: float
|
|
254
259
|
Cutoff ratio for languages. If ratio for some language
|
|
255
260
|
does not exceed the set threshold, the language will not
|
|
@@ -264,6 +269,7 @@ class DIGARSchemaConverter:
|
|
|
264
269
|
self.__sierra_id: str = sierra_id
|
|
265
270
|
self.__generated_id: str = generated_id
|
|
266
271
|
self.__permalink: str = permalink.removesuffix("/")
|
|
272
|
+
self.__generated_id_type: str = generated_id_type
|
|
267
273
|
self.__texts: List[dict] = []
|
|
268
274
|
self.__images: List[dict] = []
|
|
269
275
|
self.__doc_meta: dict = {}
|
|
@@ -281,7 +287,8 @@ class DIGARSchemaConverter:
|
|
|
281
287
|
generated_id=self.__generated_id,
|
|
282
288
|
permalink=self.__permalink,
|
|
283
289
|
min_language_ratio=self.__min_language_ratio,
|
|
284
|
-
convert_ratio=self.__convert_ratio
|
|
290
|
+
convert_ratio=self.__convert_ratio,
|
|
291
|
+
generated_id_type=self.__generated_id_type
|
|
285
292
|
)
|
|
286
293
|
self.__digar_schema: dict = {}
|
|
287
294
|
|
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
from typing import Any, Dict, Iterator, Optional
|
|
1
|
+
from typing import Any, Dict, Iterator, Optional, List
|
|
2
2
|
|
|
3
3
|
import elasticsearch_dsl
|
|
4
4
|
from elastic_transport import ObjectApiResponse
|
|
5
5
|
from elasticsearch import Elasticsearch
|
|
6
6
|
from elasticsearch.helpers import bulk
|
|
7
7
|
from elasticsearch_dsl import Index
|
|
8
|
+
from elasticsearch_dsl.response import Response
|
|
8
9
|
|
|
9
10
|
from .decorators import _elastic_connection
|
|
10
11
|
|
|
@@ -82,6 +83,63 @@ class KataElastic:
|
|
|
82
83
|
def add_mapping(self, index_name: str, schema: dict):
|
|
83
84
|
index = Index(name=index_name)
|
|
84
85
|
return index.put_mapping(body=schema, using=self.elasticsearch)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@_elastic_connection
|
|
89
|
+
def add_vector_mapping(
|
|
90
|
+
self,
|
|
91
|
+
index_name: str,
|
|
92
|
+
field: str,
|
|
93
|
+
schema: Optional[dict] = None,
|
|
94
|
+
dims: int = 1024
|
|
95
|
+
) -> dict:
|
|
96
|
+
vector_mapping = {
|
|
97
|
+
"properties": {
|
|
98
|
+
field: {
|
|
99
|
+
"type": "dense_vector",
|
|
100
|
+
"dims": dims
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
mapping = schema or vector_mapping
|
|
105
|
+
index = Index(name=index_name)
|
|
106
|
+
return index.put_mapping(body=mapping, using=self.elasticsearch)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
@_elastic_connection
|
|
110
|
+
def add_ann_vector_mapping(
|
|
111
|
+
self,
|
|
112
|
+
index_name: str,
|
|
113
|
+
field: str,
|
|
114
|
+
schema: Optional[dict] = None,
|
|
115
|
+
dims: int = 1024
|
|
116
|
+
) -> dict:
|
|
117
|
+
vector_mapping = {
|
|
118
|
+
"properties": {
|
|
119
|
+
field: {
|
|
120
|
+
"type": "dense_vector",
|
|
121
|
+
"dims": dims,
|
|
122
|
+
"similarity": "cosine",
|
|
123
|
+
"index": True
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
mapping = schema or vector_mapping
|
|
128
|
+
index = Index(name=index_name)
|
|
129
|
+
return index.put_mapping(body=mapping, using=self.elasticsearch)
|
|
130
|
+
|
|
131
|
+
@_elastic_connection
|
|
132
|
+
def add_vector(
|
|
133
|
+
self,
|
|
134
|
+
index_name: str,
|
|
135
|
+
document_id: str,
|
|
136
|
+
vector: List[float],
|
|
137
|
+
field: str
|
|
138
|
+
) -> dict:
|
|
139
|
+
schema = {"doc": {field: vector}}
|
|
140
|
+
return self.elasticsearch.update(
|
|
141
|
+
index=index_name, id=document_id, body=schema, refresh="wait_for"
|
|
142
|
+
)
|
|
85
143
|
|
|
86
144
|
@_elastic_connection
|
|
87
145
|
def create_index(
|
|
@@ -170,6 +228,130 @@ class KataElastic:
|
|
|
170
228
|
s.scan(), key=lambda doc: [getattr(doc, field) for field in sort_fields]
|
|
171
229
|
)
|
|
172
230
|
return documents
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
@_elastic_connection
|
|
234
|
+
def execute_fuzzy_search(
|
|
235
|
+
self,
|
|
236
|
+
index: str,
|
|
237
|
+
field: str,
|
|
238
|
+
entity: str,
|
|
239
|
+
fuzziness: int = 2,
|
|
240
|
+
prefix_length: int = 1,
|
|
241
|
+
max_expansions: int = 50
|
|
242
|
+
) -> Response:
|
|
243
|
+
"""Executes a fuzzy search.
|
|
244
|
+
:param: index str: Index to search from.
|
|
245
|
+
:param: entity str: Entity to search matches for.
|
|
246
|
+
:param: fuzziness int: Maximum edit distance for a match.
|
|
247
|
+
:param: prefix_length int: Number of characters in the prefix that
|
|
248
|
+
should overlap with the original entity's prefix.
|
|
249
|
+
:param: max_expansion int: maximum number of terms the fuzzy query
|
|
250
|
+
will match before halting the search
|
|
251
|
+
:return: Dict on search results.
|
|
252
|
+
"""
|
|
253
|
+
query_params = {
|
|
254
|
+
f"{field}.keyword": {
|
|
255
|
+
"value": entity,
|
|
256
|
+
"fuzziness": fuzziness,
|
|
257
|
+
"max_expansions": max_expansions,
|
|
258
|
+
"prefix_length": prefix_length
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
s = elasticsearch_dsl.Search(using=self.elasticsearch, index=index)
|
|
262
|
+
s = s.query("fuzzy", **query_params)
|
|
263
|
+
response = s.execute()
|
|
264
|
+
return response
|
|
265
|
+
|
|
266
|
+
def execute_ann_vector_search(
|
|
267
|
+
self,
|
|
268
|
+
index: str,
|
|
269
|
+
field: str,
|
|
270
|
+
query_vector: List[float],
|
|
271
|
+
k: int = 10,
|
|
272
|
+
num_candidates: int = 100,
|
|
273
|
+
n_docs: int = 10,
|
|
274
|
+
elastic_ids: List[str] = []
|
|
275
|
+
) -> Response:
|
|
276
|
+
""" Execute a vector search.
|
|
277
|
+
NB! Works only with ANN mapping!
|
|
278
|
+
|
|
279
|
+
:param: index str: Index to search from.
|
|
280
|
+
:param: field str: Field containing vectorized data.
|
|
281
|
+
:param: query vector List[float]: Vector to search matches for.
|
|
282
|
+
:param: k int: Number of nearest neighbors to return.
|
|
283
|
+
:param: num_candidates int: Number of candidates considered before selecting k results.
|
|
284
|
+
:param: n_docs: int: Number of documents to return.
|
|
285
|
+
:param: elastic_ids: List[str]: Elastic ID-s for restricting the search.
|
|
286
|
+
"""
|
|
287
|
+
|
|
288
|
+
s = elasticsearch_dsl.Search(using=self.elasticsearch, index=index)
|
|
289
|
+
|
|
290
|
+
# Add kNN vector search
|
|
291
|
+
s = s.extra(
|
|
292
|
+
knn={
|
|
293
|
+
"field": field,
|
|
294
|
+
"query_vector": query_vector,
|
|
295
|
+
"k": k,
|
|
296
|
+
"num_candidates": num_candidates
|
|
297
|
+
}
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
# Add ID filtering, if elastic_ids are specified
|
|
301
|
+
if elastic_ids:
|
|
302
|
+
s = s.query(
|
|
303
|
+
elasticsearch_dsl.Q("terms", _id=elastic_ids)
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
# Sort by score and return `n_docs` best-matching documents
|
|
307
|
+
s = s.extra(size=n_docs)
|
|
308
|
+
|
|
309
|
+
# Execute the search
|
|
310
|
+
response = s.execute()
|
|
311
|
+
return response
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def execute_script_score_vector_search(
|
|
315
|
+
self,
|
|
316
|
+
index: str,
|
|
317
|
+
field: str,
|
|
318
|
+
query_vector: List[float],
|
|
319
|
+
n_docs: int = 10,
|
|
320
|
+
elastic_ids: List[str] = []
|
|
321
|
+
) -> Response:
|
|
322
|
+
""" Execute a vector search.
|
|
323
|
+
NB! Requires different mapping than ANN!
|
|
324
|
+
|
|
325
|
+
:param: index str: Index to search from.
|
|
326
|
+
:param: field str: Field containing vectorized data.
|
|
327
|
+
:param: query vector List[float]: Vector to search matches for.
|
|
328
|
+
:param: n_docs: int: Number of documents to return.
|
|
329
|
+
:param: elastic_ids: List[str]: Elastic ID-s for restricting the search.
|
|
330
|
+
"""
|
|
331
|
+
s = elasticsearch_dsl.Search(using=self.elasticsearch, index=index)
|
|
332
|
+
|
|
333
|
+
if elastic_ids:
|
|
334
|
+
query = elasticsearch_dsl.Q("terms", _id=elastic_ids)
|
|
335
|
+
else:
|
|
336
|
+
query = elasticsearch_dsl.Q("match_all")
|
|
337
|
+
# Apply script_score query
|
|
338
|
+
s = s.query(
|
|
339
|
+
"script_score",
|
|
340
|
+
query=query,
|
|
341
|
+
script={
|
|
342
|
+
"source": f"1.0 + cosineSimilarity(params.query_vector, '{field}')",
|
|
343
|
+
"params": {
|
|
344
|
+
"query_vector": query_vector
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
)
|
|
348
|
+
# Set min_score and limit number of documents
|
|
349
|
+
s = s.extra(size=n_docs)
|
|
350
|
+
|
|
351
|
+
# Execute search
|
|
352
|
+
response = s.execute()
|
|
353
|
+
return response
|
|
354
|
+
|
|
173
355
|
|
|
174
356
|
def __str__(self) -> str:
|
|
175
357
|
return self.elasticsearch_url
|
|
@@ -22,6 +22,7 @@ rara_tools/constants/general.py
|
|
|
22
22
|
tests/test_converters.py
|
|
23
23
|
tests/test_digar_schema_converter.py
|
|
24
24
|
tests/test_elastic.py
|
|
25
|
+
tests/test_elastic_vector_and_search_operations.py
|
|
25
26
|
tests/test_s3_exceptions.py
|
|
26
27
|
tests/test_s3_file_operations.py
|
|
27
28
|
tests/test_task_reporter.py
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from time import sleep
|
|
6
|
+
from rara_tools.elastic import KataElastic
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def load_json(file_path: str):
|
|
10
|
+
with open(file_path, "r") as fh:
|
|
11
|
+
data = json.load(fh)
|
|
12
|
+
return data
|
|
13
|
+
|
|
14
|
+
TEST_DOCUMENTS = load_json("./tests/test_data/elastic_vectorized_docs.json")
|
|
15
|
+
TEST_VECTOR_DATA = load_json("./tests/test_data/test_vector_data.json")
|
|
16
|
+
TEST_VECTOR = TEST_VECTOR_DATA.get("vector")
|
|
17
|
+
|
|
18
|
+
es_url = os.getenv("ELASTIC_TEST_URL", "http://rara-elastic.texta.ee:9200")#http://localhost:9200")
|
|
19
|
+
ELASTIC = KataElastic(es_url)
|
|
20
|
+
|
|
21
|
+
TEST_KNN_INDEX_NAME = "tools_knn_testing_index"
|
|
22
|
+
TEST_ANN_INDEX_NAME = "tools_ann_testing_index"
|
|
23
|
+
|
|
24
|
+
TEST_VECTOR_FIELD = "vector"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@pytest.mark.order(1)
|
|
29
|
+
def test_index_creation_with_knn_vector_mapping():
|
|
30
|
+
""" Tests if index created and documents indexed.
|
|
31
|
+
"""
|
|
32
|
+
# Create test index
|
|
33
|
+
created = ELASTIC.create_index(TEST_KNN_INDEX_NAME)
|
|
34
|
+
assert created["acknowledged"] is True
|
|
35
|
+
result = ELASTIC.add_vector_mapping(
|
|
36
|
+
index_name=TEST_KNN_INDEX_NAME,
|
|
37
|
+
field=TEST_VECTOR_FIELD
|
|
38
|
+
)
|
|
39
|
+
assert result["acknowledged"] is True
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@pytest.mark.order(2)
|
|
43
|
+
def test_index_creation_with_ann_vector_mapping():
|
|
44
|
+
""" Tests if index created and documents indexed.
|
|
45
|
+
"""
|
|
46
|
+
# Create test index
|
|
47
|
+
created = ELASTIC.create_index(TEST_ANN_INDEX_NAME)
|
|
48
|
+
assert created["acknowledged"] is True
|
|
49
|
+
result = ELASTIC.add_ann_vector_mapping(
|
|
50
|
+
index_name=TEST_ANN_INDEX_NAME,
|
|
51
|
+
field=TEST_VECTOR_FIELD
|
|
52
|
+
)
|
|
53
|
+
assert result["acknowledged"] is True
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@pytest.mark.order(3)
|
|
57
|
+
def test_vectorized_document_addition_knn_index():
|
|
58
|
+
""" Tests indexing vectorized documents.
|
|
59
|
+
"""
|
|
60
|
+
# Add test documents
|
|
61
|
+
for document in TEST_DOCUMENTS:
|
|
62
|
+
indexed = ELASTIC.index_document(TEST_KNN_INDEX_NAME, document)
|
|
63
|
+
assert indexed["result"] == "created"
|
|
64
|
+
# let it index
|
|
65
|
+
sleep(1)
|
|
66
|
+
|
|
67
|
+
@pytest.mark.order(4)
|
|
68
|
+
def test_vectorized_document_addition_ann_index():
|
|
69
|
+
""" Tests indexing vectorized documents.
|
|
70
|
+
"""
|
|
71
|
+
# Add test documents
|
|
72
|
+
for document in TEST_DOCUMENTS:
|
|
73
|
+
indexed = ELASTIC.index_document(TEST_ANN_INDEX_NAME, document)
|
|
74
|
+
assert indexed["result"] == "created"
|
|
75
|
+
# let it index
|
|
76
|
+
sleep(1)
|
|
77
|
+
|
|
78
|
+
@pytest.mark.order(5)
|
|
79
|
+
def test_fuzzy_search():
|
|
80
|
+
""" Tests fuzzy search.
|
|
81
|
+
"""
|
|
82
|
+
response = ELASTIC.execute_fuzzy_search(
|
|
83
|
+
index=TEST_ANN_INDEX_NAME,
|
|
84
|
+
field="variations",
|
|
85
|
+
entity="Paul Keres",
|
|
86
|
+
fuzziness=0
|
|
87
|
+
)
|
|
88
|
+
total_hits = response.hits.total.value
|
|
89
|
+
assert total_hits == 2
|
|
90
|
+
|
|
91
|
+
response = ELASTIC.execute_fuzzy_search(
|
|
92
|
+
index=TEST_ANN_INDEX_NAME,
|
|
93
|
+
field="variations",
|
|
94
|
+
entity="Paul Keres",
|
|
95
|
+
fuzziness=2
|
|
96
|
+
)
|
|
97
|
+
total_hits = response.hits.total.value
|
|
98
|
+
assert total_hits == 3
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@pytest.mark.order(6)
|
|
102
|
+
def test_ann_vector_search():
|
|
103
|
+
""" Tests ANN vector search.
|
|
104
|
+
"""
|
|
105
|
+
# Execut fuzzy search to get ID restrictions
|
|
106
|
+
response = ELASTIC.execute_fuzzy_search(
|
|
107
|
+
index=TEST_ANN_INDEX_NAME,
|
|
108
|
+
field="variations",
|
|
109
|
+
entity="Paul Keres",
|
|
110
|
+
fuzziness=2
|
|
111
|
+
)
|
|
112
|
+
total_hits = response.hits.total.value
|
|
113
|
+
assert total_hits == 3
|
|
114
|
+
elastic_ids = [hit.meta.id for hit in response]
|
|
115
|
+
|
|
116
|
+
response = ELASTIC.execute_ann_vector_search(
|
|
117
|
+
index=TEST_ANN_INDEX_NAME,
|
|
118
|
+
field="vector",
|
|
119
|
+
query_vector=TEST_VECTOR,
|
|
120
|
+
k=1,
|
|
121
|
+
n_docs=1,
|
|
122
|
+
num_candidates=10,
|
|
123
|
+
elastic_ids=elastic_ids
|
|
124
|
+
)
|
|
125
|
+
descriptions = [hit.description for hit in response]
|
|
126
|
+
assert len(descriptions) == 1
|
|
127
|
+
assert descriptions[0] == "Eesti maletaja ja maleteoreetik"
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
@pytest.mark.order(7)
|
|
131
|
+
def test_script_score_vector_search():
|
|
132
|
+
""" Tests ANN vector search.
|
|
133
|
+
"""
|
|
134
|
+
# Execut fuzzy search to get ID restrictions
|
|
135
|
+
response = ELASTIC.execute_fuzzy_search(
|
|
136
|
+
index=TEST_KNN_INDEX_NAME,
|
|
137
|
+
field="variations",
|
|
138
|
+
entity="Paul Keres",
|
|
139
|
+
fuzziness=2
|
|
140
|
+
)
|
|
141
|
+
total_hits = response.hits.total.value
|
|
142
|
+
assert total_hits == 3
|
|
143
|
+
elastic_ids = [hit.meta.id for hit in response]
|
|
144
|
+
|
|
145
|
+
response = ELASTIC.execute_script_score_vector_search(
|
|
146
|
+
index=TEST_KNN_INDEX_NAME,
|
|
147
|
+
field="vector",
|
|
148
|
+
query_vector=TEST_VECTOR,
|
|
149
|
+
n_docs=1,
|
|
150
|
+
elastic_ids=elastic_ids
|
|
151
|
+
)
|
|
152
|
+
descriptions = [hit.description for hit in response]
|
|
153
|
+
assert len(descriptions) == 1
|
|
154
|
+
assert descriptions[0] == "Eesti maletaja ja maleteoreetik"
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
@pytest.mark.order(8)
|
|
158
|
+
def test_index_deleting():
|
|
159
|
+
"""
|
|
160
|
+
Tests deleting index. We delete the test index now.
|
|
161
|
+
"""
|
|
162
|
+
indices = [TEST_KNN_INDEX_NAME, TEST_ANN_INDEX_NAME]
|
|
163
|
+
for index in indices:
|
|
164
|
+
deleted = ELASTIC.delete_index(index)
|
|
165
|
+
sleep(1)
|
|
166
|
+
assert deleted["acknowledged"] is True
|
|
167
|
+
|
rara_tools-0.0.10/VERSION
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
0.0.10
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|