rara-tools 0.0.11__tar.gz → 0.0.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rara-tools might be problematic. Click here for more details.

Files changed (31) hide show
  1. {rara_tools-0.0.11/rara_tools.egg-info → rara_tools-0.0.12}/PKG-INFO +1 -1
  2. rara_tools-0.0.12/VERSION +1 -0
  3. {rara_tools-0.0.11 → rara_tools-0.0.12}/rara_tools/elastic.py +44 -1
  4. {rara_tools-0.0.11 → rara_tools-0.0.12/rara_tools.egg-info}/PKG-INFO +1 -1
  5. {rara_tools-0.0.11 → rara_tools-0.0.12}/tests/test_elastic_vector_and_search_operations.py +32 -5
  6. rara_tools-0.0.11/VERSION +0 -1
  7. {rara_tools-0.0.11 → rara_tools-0.0.12}/LICENSE.md +0 -0
  8. {rara_tools-0.0.11 → rara_tools-0.0.12}/README.md +0 -0
  9. {rara_tools-0.0.11 → rara_tools-0.0.12}/pyproject.toml +0 -0
  10. {rara_tools-0.0.11 → rara_tools-0.0.12}/rara_tools/constants/__init__.py +0 -0
  11. {rara_tools-0.0.11 → rara_tools-0.0.12}/rara_tools/constants/digitizer.py +0 -0
  12. {rara_tools-0.0.11 → rara_tools-0.0.12}/rara_tools/constants/general.py +0 -0
  13. {rara_tools-0.0.11 → rara_tools-0.0.12}/rara_tools/converters.py +0 -0
  14. {rara_tools-0.0.11 → rara_tools-0.0.12}/rara_tools/decorators.py +0 -0
  15. {rara_tools-0.0.11 → rara_tools-0.0.12}/rara_tools/digar_schema_converter.py +0 -0
  16. {rara_tools-0.0.11 → rara_tools-0.0.12}/rara_tools/exceptions.py +0 -0
  17. {rara_tools-0.0.11 → rara_tools-0.0.12}/rara_tools/s3.py +0 -0
  18. {rara_tools-0.0.11 → rara_tools-0.0.12}/rara_tools/task_reporter.py +0 -0
  19. {rara_tools-0.0.11 → rara_tools-0.0.12}/rara_tools/utils.py +0 -0
  20. {rara_tools-0.0.11 → rara_tools-0.0.12}/rara_tools.egg-info/SOURCES.txt +0 -0
  21. {rara_tools-0.0.11 → rara_tools-0.0.12}/rara_tools.egg-info/dependency_links.txt +0 -0
  22. {rara_tools-0.0.11 → rara_tools-0.0.12}/rara_tools.egg-info/requires.txt +0 -0
  23. {rara_tools-0.0.11 → rara_tools-0.0.12}/rara_tools.egg-info/top_level.txt +0 -0
  24. {rara_tools-0.0.11 → rara_tools-0.0.12}/requirements.txt +0 -0
  25. {rara_tools-0.0.11 → rara_tools-0.0.12}/setup.cfg +0 -0
  26. {rara_tools-0.0.11 → rara_tools-0.0.12}/tests/test_converters.py +0 -0
  27. {rara_tools-0.0.11 → rara_tools-0.0.12}/tests/test_digar_schema_converter.py +0 -0
  28. {rara_tools-0.0.11 → rara_tools-0.0.12}/tests/test_elastic.py +0 -0
  29. {rara_tools-0.0.11 → rara_tools-0.0.12}/tests/test_s3_exceptions.py +0 -0
  30. {rara_tools-0.0.11 → rara_tools-0.0.12}/tests/test_s3_file_operations.py +0 -0
  31. {rara_tools-0.0.11 → rara_tools-0.0.12}/tests/test_task_reporter.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: rara-tools
3
- Version: 0.0.11
3
+ Version: 0.0.12
4
4
  Summary: Tools to support Kata's work.
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Programming Language :: Python :: 3.10
@@ -0,0 +1 @@
1
+ 0.0.12
@@ -263,7 +263,7 @@ class KataElastic:
263
263
  response = s.execute()
264
264
  return response
265
265
 
266
- def execute_vector_search(
266
+ def execute_ann_vector_search(
267
267
  self,
268
268
  index: str,
269
269
  field: str,
@@ -281,6 +281,7 @@ class KataElastic:
281
281
  :param: query vector List[float]: Vector to search matches for.
282
282
  :param: k int: Number of nearest neighbors to return.
283
283
  :param: num_candidates int: Number of candidates considered before selecting k results.
284
+ :param: n_docs: int: Number of documents to return.
284
285
  :param: elastic_ids: List[str]: Elastic ID-s for restricting the search.
285
286
  """
286
287
 
@@ -308,6 +309,48 @@ class KataElastic:
308
309
  # Execute the search
309
310
  response = s.execute()
310
311
  return response
312
+
313
+
314
+ def execute_script_score_vector_search(
315
+ self,
316
+ index: str,
317
+ field: str,
318
+ query_vector: List[float],
319
+ n_docs: int = 10,
320
+ elastic_ids: List[str] = []
321
+ ) -> Response:
322
+ """ Execute a vector search.
323
+ NB! Requires different mapping than ANN!
324
+
325
+ :param: index str: Index to search from.
326
+ :param: field str: Field containing vectorized data.
327
+ :param: query vector List[float]: Vector to search matches for.
328
+ :param: n_docs: int: Number of documents to return.
329
+ :param: elastic_ids: List[str]: Elastic ID-s for restricting the search.
330
+ """
331
+ s = elasticsearch_dsl.Search(using=self.elasticsearch, index=index)
332
+
333
+ if elastic_ids:
334
+ query = elasticsearch_dsl.Q("terms", _id=elastic_ids)
335
+ else:
336
+ query = elasticsearch_dsl.Q("match_all")
337
+ # Apply script_score query
338
+ s = s.query(
339
+ "script_score",
340
+ query=query,
341
+ script={
342
+ "source": f"1.0 + cosineSimilarity(params.query_vector, '{field}')",
343
+ "params": {
344
+ "query_vector": query_vector
345
+ }
346
+ }
347
+ )
348
+ # Set min_score and limit number of documents
349
+ s = s.extra(size=n_docs)
350
+
351
+ # Execute search
352
+ response = s.execute()
353
+ return response
311
354
 
312
355
 
313
356
  def __str__(self) -> str:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: rara-tools
3
- Version: 0.0.11
3
+ Version: 0.0.12
4
4
  Summary: Tools to support Kata's work.
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Programming Language :: Python :: 3.10
@@ -15,7 +15,7 @@ TEST_DOCUMENTS = load_json("./tests/test_data/elastic_vectorized_docs.json")
15
15
  TEST_VECTOR_DATA = load_json("./tests/test_data/test_vector_data.json")
16
16
  TEST_VECTOR = TEST_VECTOR_DATA.get("vector")
17
17
 
18
- es_url = os.getenv("ELASTIC_TEST_URL", "http://localhost:9200")
18
+ es_url = os.getenv("ELASTIC_TEST_URL", "http://rara-elastic.texta.ee:9200")#http://localhost:9200")
19
19
  ELASTIC = KataElastic(es_url)
20
20
 
21
21
  TEST_KNN_INDEX_NAME = "tools_knn_testing_index"
@@ -99,8 +99,8 @@ def test_fuzzy_search():
99
99
 
100
100
 
101
101
  @pytest.mark.order(6)
102
- def test_vector_search():
103
- """ Tests vector search.
102
+ def test_ann_vector_search():
103
+ """ Tests ANN vector search.
104
104
  """
105
105
  # Execut fuzzy search to get ID restrictions
106
106
  response = ELASTIC.execute_fuzzy_search(
@@ -113,7 +113,7 @@ def test_vector_search():
113
113
  assert total_hits == 3
114
114
  elastic_ids = [hit.meta.id for hit in response]
115
115
 
116
- response = ELASTIC.execute_vector_search(
116
+ response = ELASTIC.execute_ann_vector_search(
117
117
  index=TEST_ANN_INDEX_NAME,
118
118
  field="vector",
119
119
  query_vector=TEST_VECTOR,
@@ -126,8 +126,35 @@ def test_vector_search():
126
126
  assert len(descriptions) == 1
127
127
  assert descriptions[0] == "Eesti maletaja ja maleteoreetik"
128
128
 
129
-
129
+
130
130
  @pytest.mark.order(7)
131
+ def test_script_score_vector_search():
132
+ """ Tests ANN vector search.
133
+ """
134
+ # Execut fuzzy search to get ID restrictions
135
+ response = ELASTIC.execute_fuzzy_search(
136
+ index=TEST_KNN_INDEX_NAME,
137
+ field="variations",
138
+ entity="Paul Keres",
139
+ fuzziness=2
140
+ )
141
+ total_hits = response.hits.total.value
142
+ assert total_hits == 3
143
+ elastic_ids = [hit.meta.id for hit in response]
144
+
145
+ response = ELASTIC.execute_script_score_vector_search(
146
+ index=TEST_KNN_INDEX_NAME,
147
+ field="vector",
148
+ query_vector=TEST_VECTOR,
149
+ n_docs=1,
150
+ elastic_ids=elastic_ids
151
+ )
152
+ descriptions = [hit.description for hit in response]
153
+ assert len(descriptions) == 1
154
+ assert descriptions[0] == "Eesti maletaja ja maleteoreetik"
155
+
156
+
157
+ @pytest.mark.order(8)
131
158
  def test_index_deleting():
132
159
  """
133
160
  Tests deleting index. We delete the test index now.
rara_tools-0.0.11/VERSION DELETED
@@ -1 +0,0 @@
1
- 0.0.11
File without changes
File without changes
File without changes
File without changes