rara-tools 0.6.15__py3-none-any.whl → 0.6.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rara-tools might be problematic. Click here for more details.

rara_tools/elastic.py CHANGED
@@ -4,7 +4,7 @@ import elasticsearch_dsl
4
4
  from elastic_transport import ObjectApiResponse
5
5
  from elasticsearch import Elasticsearch
6
6
  from elasticsearch.helpers import bulk
7
- from elasticsearch_dsl import Index
7
+ from elasticsearch_dsl import Index, Search, Q
8
8
  from elasticsearch_dsl.response import Response
9
9
 
10
10
  from .decorators import _elastic_connection
@@ -84,20 +84,30 @@ class KataElastic:
84
84
  def add_mapping(self, index_name: str, schema: dict):
85
85
  index = Index(name=index_name)
86
86
  return index.put_mapping(body=schema, using=self.elasticsearch)
87
-
88
-
87
+
88
+ @_elastic_connection
89
+ def delete_by_query(self, index: str, query_kwargs: dict, query_type: str = "term", wait_for_completion=True):
90
+ query = Q(query_type, **query_kwargs)
91
+ s = Search(using=self.elasticsearch, index=index).query(query)
92
+ response = self.elasticsearch.delete_by_query(
93
+ index=index,
94
+ body={"query": s.to_dict()["query"]},
95
+ wait_for_completion=True
96
+ )
97
+ return response
98
+
89
99
  @_elastic_connection
90
100
  def add_vector_mapping(
91
- self,
92
- index_name: str,
93
- field: str,
94
- schema: Optional[dict] = None,
101
+ self,
102
+ index_name: str,
103
+ field: str,
104
+ schema: Optional[dict] = None,
95
105
  dims: int = 1024
96
106
  ) -> dict:
97
107
  vector_mapping = {
98
108
  "properties": {
99
109
  field: {
100
- "type": "dense_vector",
110
+ "type": "dense_vector",
101
111
  "dims": dims
102
112
  }
103
113
  }
@@ -105,22 +115,21 @@ class KataElastic:
105
115
  mapping = schema or vector_mapping
106
116
  index = Index(name=index_name)
107
117
  return index.put_mapping(body=mapping, using=self.elasticsearch)
108
-
109
-
118
+
110
119
  @_elastic_connection
111
120
  def add_ann_vector_mapping(
112
- self,
121
+ self,
113
122
  index_name: str,
114
123
  field: str,
115
- schema: Optional[dict] = None,
124
+ schema: Optional[dict] = None,
116
125
  dims: int = 1024
117
126
  ) -> dict:
118
127
  vector_mapping = {
119
128
  "properties": {
120
129
  field: {
121
- "type": "dense_vector",
122
- "dims": dims,
123
- "similarity": "cosine",
130
+ "type": "dense_vector",
131
+ "dims": dims,
132
+ "similarity": "cosine",
124
133
  "index": True
125
134
  }
126
135
  }
@@ -131,15 +140,19 @@ class KataElastic:
131
140
 
132
141
  @_elastic_connection
133
142
  def add_vector(
134
- self,
143
+ self,
135
144
  index_name: str,
136
- document_id: str,
137
- vector: List[float],
138
- field: str
145
+ document_id: str,
146
+ vector: List[float],
147
+ field: str,
148
+ refresh: str = "wait_for"
139
149
  ) -> dict:
140
150
  schema = {"doc": {field: vector}}
141
151
  return self.elasticsearch.update(
142
- index=index_name, id=document_id, body=schema, refresh="wait_for"
152
+ index=index_name,
153
+ id=document_id,
154
+ body=schema,
155
+ refresh=refresh
143
156
  )
144
157
 
145
158
  @_elastic_connection
@@ -204,7 +217,7 @@ class KataElastic:
204
217
  actions = [{"_index": last_index_name, "_source": document} for document in documents]
205
218
  successful_count, error_count = bulk(actions=actions, client=self.elasticsearch, max_retries=3, refresh=refresh)
206
219
  return successful_count, error_count
207
-
220
+
208
221
  @_elastic_connection
209
222
  def bulk_index_without_rollver(
210
223
  self,
@@ -240,16 +253,15 @@ class KataElastic:
240
253
  s.scan(), key=lambda doc: [getattr(doc, field) for field in sort_fields]
241
254
  )
242
255
  return documents
243
-
244
-
256
+
245
257
  @_elastic_connection
246
258
  def execute_fuzzy_search(
247
- self,
259
+ self,
248
260
  index: str,
249
261
  field: str,
250
- entity: str,
251
- fuzziness: int = 2,
252
- prefix_length: int = 1,
262
+ entity: str,
263
+ fuzziness: int = 2,
264
+ prefix_length: int = 1,
253
265
  max_expansions: int = 50
254
266
  ) -> Response:
255
267
  """Executes a fuzzy search.
@@ -261,7 +273,7 @@ class KataElastic:
261
273
  :param: max_expansion int: maximum number of terms the fuzzy query
262
274
  will match before halting the search
263
275
  :return: Dict on search results.
264
- """
276
+ """
265
277
  query_params = {
266
278
  f"{field}.keyword": {
267
279
  "value": entity,
@@ -274,7 +286,7 @@ class KataElastic:
274
286
  s = s.query("fuzzy", **query_params)
275
287
  response = s.execute()
276
288
  return response
277
-
289
+
278
290
  def execute_ann_vector_search(
279
291
  self,
280
292
  index: str,
@@ -303,8 +315,8 @@ class KataElastic:
303
315
  s = s.extra(
304
316
  knn={
305
317
  "field": field,
306
- "query_vector": query_vector,
307
- "k": k,
318
+ "query_vector": query_vector,
319
+ "k": k,
308
320
  "num_candidates": num_candidates
309
321
  }
310
322
  )
@@ -314,15 +326,14 @@ class KataElastic:
314
326
  s = s.query(
315
327
  elasticsearch_dsl.Q("terms", _id=elastic_ids)
316
328
  )
317
-
329
+
318
330
  # Sort by score and return `n_docs` best-matching documents
319
331
  s = s.extra(size=n_docs)
320
332
 
321
333
  # Execute the search
322
334
  response = s.execute()
323
335
  return response
324
-
325
-
336
+
326
337
  def execute_script_score_vector_search(
327
338
  self,
328
339
  index: str,
@@ -341,7 +352,7 @@ class KataElastic:
341
352
  :param: elastic_ids: List[str]: Elastic ID-s for restricting the search.
342
353
  """
343
354
  s = elasticsearch_dsl.Search(using=self.elasticsearch, index=index)
344
-
355
+
345
356
  if elastic_ids:
346
357
  query = elasticsearch_dsl.Q("terms", _id=elastic_ids)
347
358
  else:
@@ -364,6 +375,5 @@ class KataElastic:
364
375
  response = s.execute()
365
376
  return response
366
377
 
367
-
368
378
  def __str__(self) -> str:
369
379
  return self.elasticsearch_url
@@ -672,6 +672,7 @@ class VIAFClient:
672
672
  f"Allowed VIAF sources are: {self.allowed_viaf_sources}."
673
673
  )
674
674
  viaf_record = None
675
+ verified_record = None
675
676
  viaf_ids = self.get_viaf_ids_by_search_terms(
676
677
  search_term=search_term,
677
678
  field=field,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rara-tools
3
- Version: 0.6.15
3
+ Version: 0.6.17
4
4
  Summary: Tools to support Kata's work.
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Programming Language :: Python :: 3.10
@@ -1,7 +1,7 @@
1
1
  rara_tools/converters.py,sha256=a1dEMa0TwcO9UmjuSBkiuc7LGmH0d_dB6wwoTLpdZhI,4040
2
2
  rara_tools/decorators.py,sha256=MjOyvZ5nTkwxwx2JLFEGpKKBysvecFw6EN6UDrSvZLU,2187
3
3
  rara_tools/digar_schema_converter.py,sha256=k95U2iRlEA3sh772-v6snhHW6fju6qSTMnvWJ6DpzZk,14254
4
- rara_tools/elastic.py,sha256=7HvDmFKpQbGnnzYyiCKOg0uvubnv2TpCASRrnPP8DcQ,13540
4
+ rara_tools/elastic.py,sha256=4D9yoyMy6AJIKwhSi2H1usffDHAh2A_IZfv5BtYnBKg,13992
5
5
  rara_tools/exceptions.py,sha256=YQyaueUbXeTkJYFDEuN6iWTXMI3eCv5l7PxGp87vg5I,550
6
6
  rara_tools/formatters.py,sha256=LTliadjIPZTO4s-44NsumaUdlQlEvqetvWz4bEvwf90,3418
7
7
  rara_tools/s3.py,sha256=9ziDXsLjBtFAvsjTPxFddhfvkpA8773rzPJqO7y1N5Q,6415
@@ -20,7 +20,7 @@ rara_tools/normalizers/__init__.py,sha256=_NqpS5w710DhaURytHq9JpEt8HgYpSPfRDcOtO
20
20
  rara_tools/normalizers/authorities.py,sha256=IDtcm0yNZNhv1f-WcdqWFSRzZk_CoKuBFsk6hEPddWM,4513
21
21
  rara_tools/normalizers/base.py,sha256=6tLfNdF6FZo8M6j_Q61lXoaF1HdIB1c0SKMatTc-Z64,12014
22
22
  rara_tools/normalizers/bibs.py,sha256=4DTS6k37z8qR5B3n7aiCXsT5Z49rLTvQ60lKKr5dyLs,2352
23
- rara_tools/normalizers/viaf.py,sha256=IA_dIJBA47IBnt0O1E2vKD6UbqhJdrSyg080CXEikKA,24187
23
+ rara_tools/normalizers/viaf.py,sha256=LIeqbJoKtVt_0H1o7XMmhSE0BjF4l-jdAJgX_8Gg9Z4,24218
24
24
  rara_tools/parsers/marc_parsers/base_parser.py,sha256=Kdw4aivJf2FkWgIK7pJtHtVXF_G1pjHVQ7IcFItSqy8,1649
25
25
  rara_tools/parsers/marc_parsers/ems_parser.py,sha256=LFuhZcVwmHMcJknX9p4ZkO8RdjPdQZ4APGbw8KV6BIs,2024
26
26
  rara_tools/parsers/marc_parsers/location_parser.py,sha256=dSU9dQoGV5z0ajhLI1bn3AAghkOr79qKIrX7sO0_4lA,1873
@@ -35,8 +35,8 @@ rara_tools/parsers/marc_records/title_record.py,sha256=XrtJ4gj7wzSaGxNaPtPuawmqq
35
35
  rara_tools/parsers/tools/entity_normalizers.py,sha256=VyCy_NowCLpOsL0luQ55IW-Qi-J5oBH0Ofzr7HRFBhM,8949
36
36
  rara_tools/parsers/tools/marc_converter.py,sha256=LgSHe-7n7aiDrw2bnsB53r3fXTRFjZXTwBYfTpL0pfs,415
37
37
  rara_tools/parsers/tools/russian_transliterator.py,sha256=5ZU66iTqAhr7pmfVqXPAI_cidF43VqqmuN4d7H4_JuA,9770
38
- rara_tools-0.6.15.dist-info/licenses/LICENSE.md,sha256=hkZVnIZll7e_KNEQzeY94Y9tlzVL8iVZBTMBvDykksU,35142
39
- rara_tools-0.6.15.dist-info/METADATA,sha256=WOK0svJOvm14qHwAjnGHi9daTaaDOIZi2__URFTw44c,4080
40
- rara_tools-0.6.15.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
41
- rara_tools-0.6.15.dist-info/top_level.txt,sha256=JwfB5b8BAtW5OFKRln2AQ_WElTRyIBM4nO0FKN1cupY,11
42
- rara_tools-0.6.15.dist-info/RECORD,,
38
+ rara_tools-0.6.17.dist-info/licenses/LICENSE.md,sha256=hkZVnIZll7e_KNEQzeY94Y9tlzVL8iVZBTMBvDykksU,35142
39
+ rara_tools-0.6.17.dist-info/METADATA,sha256=_E9Ml7OSHn29YTsh5V4DytlzwRTSf-xeeQRZJNDzbos,4080
40
+ rara_tools-0.6.17.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
41
+ rara_tools-0.6.17.dist-info/top_level.txt,sha256=JwfB5b8BAtW5OFKRln2AQ_WElTRyIBM4nO0FKN1cupY,11
42
+ rara_tools-0.6.17.dist-info/RECORD,,