rara-tools 0.0.11__py3-none-any.whl → 0.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rara-tools might be problematic. Click here for more details.

rara_tools/converters.py CHANGED
@@ -2,40 +2,80 @@ from .exceptions import SierraResponseConverterException
2
2
 
3
3
 
4
4
  class SierraResponseConverter:
5
- """ Takes a JSON response from the Sierra API (https://tester.ester.ee/iii/sierra-api/swagger/index.html)
6
- and converts it to MARC-in-JSON format.
7
-
8
- """
5
+ """Converts a JSON response from the Sierra API to MARC-in-JSON format."""
9
6
 
10
7
  def __init__(self, response: dict):
11
8
  if not isinstance(response, dict):
12
9
  raise SierraResponseConverterException("Please provide a valid JSON response.")
13
10
  self.response = response
11
+
12
+ def _map_control_fields(self, field: dict) -> dict:
13
+ # for tags < 010, no subfields, instead one str value in "value"
14
+ return {field["tag"]: field["value"]}
15
+
16
+ def _map_data_fields(self, field: dict) -> dict:
17
+ """ Maps marc fields > 010.
18
+
19
+ Args:
20
+ field (dict): Contains the marc tag and list with indicators and subfields.
21
+
22
+ Returns:
23
+ dict: standardised marc-in-json format.
24
+ """
25
+
26
+ data = field["data"]
27
+
28
+ # Order matters ind1, in2, subfields
29
+ field_data = {
30
+ "ind1": data.get("ind1", " "),
31
+ "ind2": data.get("ind2", " "),
32
+ "subfields": data.get("subfields", [])
33
+ }
34
+
35
+ return {field["tag"]: field_data}
36
+
37
+ def _is_marc21structured(self, field: dict) -> bool:
38
+ """Checks if the field is already structured according to MARC21 in JSON"""
39
+ return any(key.isdigit() for key in field.keys())
40
+
41
+
42
+ def _handle_field_type(self, field: dict) -> dict:
43
+
44
+ if self._is_marc21structured(field):
45
+ return field
46
+
47
+ if field.get("data"):
48
+ return self._map_data_fields(field)
14
49
 
15
- def _map_field_data(self, field):
16
50
  tag = field.get("tag")
51
+
17
52
  if not tag:
18
- raise SierraResponseConverterException("Field is missing a valid 'tag'.")
19
- data = field.get("data", {})
20
- return {tag: data}
53
+ raise SierraResponseConverterException("Field is missing MARC21 tag.")
21
54
 
22
- def _convert_response(self):
23
- response = self.response
55
+ if tag < "010":
56
+ return self._map_control_fields(field)
57
+ else:
58
+ return self._map_data_fields(field)
24
59
 
25
- entries = response.get("entries")
60
+ def _convert_response(self) -> list:
61
+ entries = self.response.get("entries")
26
62
  if not entries:
27
63
  raise SierraResponseConverterException("No entries found in the response.")
28
64
 
29
65
  try:
30
- fields = [self._map_field_data(f) for e in entries for f in e["marc"]["fields"]]
66
+ return {"fields": [
67
+ {e["id"]: [
68
+ self._handle_field_type(f) for f in e["marc"]["fields"]
69
+ ]}
70
+ for e in entries
71
+ ]}
72
+
31
73
  except KeyError as e:
32
- raise SierraResponseConverterException(f"Missing expected MARC fields in the response: {e}")
74
+ raise SierraResponseConverterException(f"Malformed response: missing key {e}")
33
75
 
34
- return {"fields": fields}
35
-
36
- def convert(self):
37
- """Runner method, converts the response to MARC-in-JSON format with error handling."""
76
+
77
+ def convert(self) -> list:
38
78
  try:
39
79
  return self._convert_response()
40
80
  except Exception as e:
41
- raise SierraResponseConverterException(f"An unexpected error occurred during conversion: {e}")
81
+ raise SierraResponseConverterException(f"An unexpected error occurred: {e}")
rara_tools/elastic.py CHANGED
@@ -263,7 +263,7 @@ class KataElastic:
263
263
  response = s.execute()
264
264
  return response
265
265
 
266
- def execute_vector_search(
266
+ def execute_ann_vector_search(
267
267
  self,
268
268
  index: str,
269
269
  field: str,
@@ -281,6 +281,7 @@ class KataElastic:
281
281
  :param: query vector List[float]: Vector to search matches for.
282
282
  :param: k int: Number of nearest neighbors to return.
283
283
  :param: num_candidates int: Number of candidates considered before selecting k results.
284
+ :param: n_docs: int: Number of documents to return.
284
285
  :param: elastic_ids: List[str]: Elastic ID-s for restricting the search.
285
286
  """
286
287
 
@@ -308,6 +309,48 @@ class KataElastic:
308
309
  # Execute the search
309
310
  response = s.execute()
310
311
  return response
312
+
313
+
314
+ def execute_script_score_vector_search(
315
+ self,
316
+ index: str,
317
+ field: str,
318
+ query_vector: List[float],
319
+ n_docs: int = 10,
320
+ elastic_ids: List[str] = []
321
+ ) -> Response:
322
+ """ Execute a vector search.
323
+ NB! Requires different mapping than ANN!
324
+
325
+ :param: index str: Index to search from.
326
+ :param: field str: Field containing vectorized data.
327
+ :param: query vector List[float]: Vector to search matches for.
328
+ :param: n_docs: int: Number of documents to return.
329
+ :param: elastic_ids: List[str]: Elastic ID-s for restricting the search.
330
+ """
331
+ s = elasticsearch_dsl.Search(using=self.elasticsearch, index=index)
332
+
333
+ if elastic_ids:
334
+ query = elasticsearch_dsl.Q("terms", _id=elastic_ids)
335
+ else:
336
+ query = elasticsearch_dsl.Q("match_all")
337
+ # Apply script_score query
338
+ s = s.query(
339
+ "script_score",
340
+ query=query,
341
+ script={
342
+ "source": f"1.0 + cosineSimilarity(params.query_vector, '{field}')",
343
+ "params": {
344
+ "query_vector": query_vector
345
+ }
346
+ }
347
+ )
348
+ # Set min_score and limit number of documents
349
+ s = s.extra(size=n_docs)
350
+
351
+ # Execute search
352
+ response = s.execute()
353
+ return response
311
354
 
312
355
 
313
356
  def __str__(self) -> str:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: rara-tools
3
- Version: 0.0.11
3
+ Version: 0.0.13
4
4
  Summary: Tools to support Kata's work.
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Programming Language :: Python :: 3.10
@@ -1,7 +1,7 @@
1
- rara_tools/converters.py,sha256=JcS74VzV6jm12l3C6aqMJBY9nuVW_aevQeCe32KmfrE,1576
1
+ rara_tools/converters.py,sha256=O769zNjde1VCfEUF2VU_49IAbm8NT-cG-VR0uPxixtE,2687
2
2
  rara_tools/decorators.py,sha256=MjOyvZ5nTkwxwx2JLFEGpKKBysvecFw6EN6UDrSvZLU,2187
3
3
  rara_tools/digar_schema_converter.py,sha256=k95U2iRlEA3sh772-v6snhHW6fju6qSTMnvWJ6DpzZk,14254
4
- rara_tools/elastic.py,sha256=LZfHZqeTDjCEb5YX4CLPJEFffRSZAcRq6AtyP49Fo0E,11575
4
+ rara_tools/elastic.py,sha256=MgPHxZ3UbSTIL8_sT9gU5V4PLKJjo3aQ8CGyhXjRz6M,13065
5
5
  rara_tools/exceptions.py,sha256=BwNh4qWxau_ylr9RqZoYwd1KnExI6oWWWDno3jkh8q4,474
6
6
  rara_tools/s3.py,sha256=uNDu2HzMYHAWh33RcHeyPFK7gdQfQPxsdfohyIKezEY,4467
7
7
  rara_tools/task_reporter.py,sha256=WCcZts9dAUokPc4vbrG3-lNAFLnWaMgE3b3iaUB7mr8,3256
@@ -9,8 +9,8 @@ rara_tools/utils.py,sha256=9vSbmuWYU5ydr4lXBKlUKa0xzDccFsaJv4T-XwgUfuY,2578
9
9
  rara_tools/constants/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  rara_tools/constants/digitizer.py,sha256=gJ3jOMwuZfKcLqgOAxTyB266VYsskLabJiMUiSz3xX4,297
11
11
  rara_tools/constants/general.py,sha256=E9Jaw-YxocS_tOZw9QBoxO3e9KK5EMbLoM0R7D4Iflw,171
12
- rara_tools-0.0.11.dist-info/LICENSE.md,sha256=hkZVnIZll7e_KNEQzeY94Y9tlzVL8iVZBTMBvDykksU,35142
13
- rara_tools-0.0.11.dist-info/METADATA,sha256=pDcladCQ1A9O9Wh4UDSh0eHwNyqcGY1BOwPxSJKpLFk,3895
14
- rara_tools-0.0.11.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
15
- rara_tools-0.0.11.dist-info/top_level.txt,sha256=JwfB5b8BAtW5OFKRln2AQ_WElTRyIBM4nO0FKN1cupY,11
16
- rara_tools-0.0.11.dist-info/RECORD,,
12
+ rara_tools-0.0.13.dist-info/LICENSE.md,sha256=hkZVnIZll7e_KNEQzeY94Y9tlzVL8iVZBTMBvDykksU,35142
13
+ rara_tools-0.0.13.dist-info/METADATA,sha256=0Aipkuodi_CzCTUMkVqKOI__n5mN2r8hEGJ49-MjpMo,3895
14
+ rara_tools-0.0.13.dist-info/WHEEL,sha256=nn6H5-ilmfVryoAQl3ZQ2l8SH5imPWFpm1A5FgEuFV4,91
15
+ rara_tools-0.0.13.dist-info/top_level.txt,sha256=JwfB5b8BAtW5OFKRln2AQ_WElTRyIBM4nO0FKN1cupY,11
16
+ rara_tools-0.0.13.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.0)
2
+ Generator: setuptools (75.8.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5