rara-tools 0.7.15__tar.gz → 0.7.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rara-tools might be problematic. Click here for more details.

Files changed (67) hide show
  1. {rara_tools-0.7.15/rara_tools.egg-info → rara_tools-0.7.16}/PKG-INFO +1 -1
  2. rara_tools-0.7.16/VERSION +1 -0
  3. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/normalizers/base.py +36 -28
  4. {rara_tools-0.7.15 → rara_tools-0.7.16/rara_tools.egg-info}/PKG-INFO +1 -1
  5. {rara_tools-0.7.15 → rara_tools-0.7.16}/tests/test_normalization.py +56 -0
  6. rara_tools-0.7.15/VERSION +0 -1
  7. {rara_tools-0.7.15 → rara_tools-0.7.16}/LICENSE.md +0 -0
  8. {rara_tools-0.7.15 → rara_tools-0.7.16}/README.md +0 -0
  9. {rara_tools-0.7.15 → rara_tools-0.7.16}/pyproject.toml +0 -0
  10. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/constants/__init__.py +0 -0
  11. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/constants/digitizer.py +0 -0
  12. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/constants/general.py +0 -0
  13. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/constants/language_evaluator.py +0 -0
  14. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/constants/linker.py +0 -0
  15. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/constants/meta_extractor.py +0 -0
  16. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/constants/normalizers.py +0 -0
  17. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/constants/parsers.py +0 -0
  18. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/constants/subject_indexer.py +0 -0
  19. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/converters.py +0 -0
  20. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/core_formatters/core_formatter.py +0 -0
  21. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/core_formatters/formatted_keyword.py +0 -0
  22. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/core_formatters/formatted_meta.py +0 -0
  23. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/core_formatters/formatted_object.py +0 -0
  24. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/decorators.py +0 -0
  25. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/digar_schema_converter.py +0 -0
  26. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/elastic.py +0 -0
  27. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/exceptions.py +0 -0
  28. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/normalizers/__init__.py +0 -0
  29. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/normalizers/authorities.py +0 -0
  30. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/normalizers/bibs.py +0 -0
  31. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/normalizers/reader.py +0 -0
  32. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/normalizers/viaf.py +0 -0
  33. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/parsers/marc_parsers/base_parser.py +0 -0
  34. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/parsers/marc_parsers/ems_parser.py +0 -0
  35. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/parsers/marc_parsers/location_parser.py +0 -0
  36. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/parsers/marc_parsers/organization_parser.py +0 -0
  37. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/parsers/marc_parsers/person_parser.py +0 -0
  38. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/parsers/marc_parsers/title_parser.py +0 -0
  39. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/parsers/marc_records/base_record.py +0 -0
  40. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/parsers/marc_records/ems_record.py +0 -0
  41. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/parsers/marc_records/organization_record.py +0 -0
  42. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/parsers/marc_records/person_record.py +0 -0
  43. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/parsers/marc_records/title_record.py +0 -0
  44. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/parsers/tools/entity_normalizers.py +0 -0
  45. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/parsers/tools/marc_converter.py +0 -0
  46. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/parsers/tools/russian_transliterator.py +0 -0
  47. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/s3.py +0 -0
  48. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/task_reporter.py +0 -0
  49. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/utils.py +0 -0
  50. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools.egg-info/SOURCES.txt +0 -0
  51. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools.egg-info/dependency_links.txt +0 -0
  52. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools.egg-info/requires.txt +0 -0
  53. {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools.egg-info/top_level.txt +0 -0
  54. {rara_tools-0.7.15 → rara_tools-0.7.16}/requirements.txt +0 -0
  55. {rara_tools-0.7.15 → rara_tools-0.7.16}/setup.cfg +0 -0
  56. {rara_tools-0.7.15 → rara_tools-0.7.16}/tests/test_digar_schema_converter.py +0 -0
  57. {rara_tools-0.7.15 → rara_tools-0.7.16}/tests/test_elastic.py +0 -0
  58. {rara_tools-0.7.15 → rara_tools-0.7.16}/tests/test_elastic_vector_and_search_operations.py +0 -0
  59. {rara_tools-0.7.15 → rara_tools-0.7.16}/tests/test_entity_normalizers.py +0 -0
  60. {rara_tools-0.7.15 → rara_tools-0.7.16}/tests/test_formatters.py +0 -0
  61. {rara_tools-0.7.15 → rara_tools-0.7.16}/tests/test_marc_parsers.py +0 -0
  62. {rara_tools-0.7.15 → rara_tools-0.7.16}/tests/test_s3_exceptions.py +0 -0
  63. {rara_tools-0.7.15 → rara_tools-0.7.16}/tests/test_s3_file_operations.py +0 -0
  64. {rara_tools-0.7.15 → rara_tools-0.7.16}/tests/test_sierra_converters.py +0 -0
  65. {rara_tools-0.7.15 → rara_tools-0.7.16}/tests/test_task_reporter.py +0 -0
  66. {rara_tools-0.7.15 → rara_tools-0.7.16}/tests/test_utils.py +0 -0
  67. {rara_tools-0.7.15 → rara_tools-0.7.16}/tests/test_viaf_client.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rara-tools
3
- Version: 0.7.15
3
+ Version: 0.7.16
4
4
  Summary: Tools to support Kata's work.
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Programming Language :: Python :: 3.10
@@ -0,0 +1 @@
1
+ 0.7.16
@@ -51,10 +51,23 @@ class RecordNormalizer:
51
51
  If one linked entity found, we create an updated record from the linked entity data.
52
52
  """
53
53
  linked_records = []
54
+
55
+ def handle_create_new_record(entity, idx):
56
+ logger.info(f"No linked entities found for {entity}, Creating new record.")
57
+ linked_records.append({
58
+ "leader": self.DEFAULT_LEADER,
59
+ "fields": []
60
+ })
61
+ self.records_extra_data.append({
62
+ "entity": entity,
63
+ "classified_fields": classified_fields[idx] if idx < len(classified_fields) else [],
64
+ "edited": False,
65
+ })
54
66
 
55
67
  for idx, linked in enumerate(linking_results or []):
56
68
 
57
69
  if not isinstance(linked, dict):
70
+ logger.debug(f"Skipping invalid linked result: {linked}")
58
71
  continue
59
72
 
60
73
  entity = linked.get("original_entity")
@@ -62,46 +75,32 @@ class RecordNormalizer:
62
75
 
63
76
  if not isinstance(linked_info, list) or not linked_info:
64
77
  # No linked entities found, create new record
65
- logger.info(
66
- f"No linked entities found for {entity}, Creating new record.")
67
- linked_records.append({
68
- "leader": self.DEFAULT_LEADER,
69
- "fields": []
70
- })
71
- self.records_extra_data.append({
72
- "entity": entity,
73
- "classified_fields": classified_fields[idx] if idx < len(classified_fields) else [],
74
- "edited": False
75
- })
78
+ handle_create_new_record(entity, idx)
76
79
  continue
77
80
 
78
81
  elif len(linked_info) > 1:
79
82
  # Multiple linked entities found, create new record
80
- logger.info(
81
- f"Multiple linked entities found for {entity}. Creating new record.")
82
- linked_records.append({
83
- "leader": self.DEFAULT_LEADER,
84
- "fields": []
85
- })
86
- self.records_extra_data.append({
87
- "entity": entity,
88
- "classified_fields": classified_fields[idx] if idx < len(classified_fields) else [],
89
- "edited": False
90
- })
83
+ handle_create_new_record(entity, idx)
91
84
  continue
92
85
 
93
86
  elif len(linked_info) == 1:
87
+ # one record match found, we update existing record
88
+
94
89
  linked_item = linked_info[0]
95
90
  if not isinstance(linked_item, dict):
96
91
  continue
97
92
 
93
+ # handle case where we have linked an entity without a record
94
+ if not linked_item.get("json", None):
95
+ handle_create_new_record(entity, idx)
96
+ continue
97
+
98
98
  linked_records.append(linked_item.get("json", {}))
99
99
 
100
100
  self.records_extra_data.append({
101
101
  "entity": entity,
102
102
  "viaf": linked_item.get("viaf", {}),
103
103
  "classified_fields": classified_fields[idx] if idx < len(classified_fields) else [],
104
- "type": "linked",
105
104
  "edited": True
106
105
  })
107
106
  continue
@@ -109,7 +108,6 @@ class RecordNormalizer:
109
108
  self.records_extra_data.extend(
110
109
  {
111
110
  "sierraID": obj.get("sierraID"),
112
- "type": "sierra",
113
111
  "edited": True
114
112
  }
115
113
  for obj in (sierra_data or [])
@@ -465,6 +463,8 @@ class RecordNormalizer:
465
463
  verify=verify,
466
464
  threshold=threshold
467
465
  )
466
+ if viaf_record:
467
+ logger.debug(f"VIAF {search_term}, linked to ID: {viaf_record.viaf_id}")
468
468
 
469
469
  except Exception as e:
470
470
  logger.error(
@@ -473,7 +473,8 @@ class RecordNormalizer:
473
473
  return viaf_record
474
474
 
475
475
  def _normalize_record(self, record: Record, sierraID: str,
476
- viaf_record: VIAFRecord, is_editing_existing_record: bool, original_entity: str) -> Record:
476
+ viaf_record: VIAFRecord, is_editing_existing_record: bool,
477
+ original_entity: str) -> Record:
477
478
  return record
478
479
 
479
480
  def get_record(self, index: int) -> Record:
@@ -500,19 +501,26 @@ class RecordNormalizer:
500
501
  return next(iter(self))
501
502
 
502
503
  def __iter__(self) -> Iterator:
503
- viaf_id_path = "viaf.queryResult.records.record.0.recordData.VIAFCluster.viafID"
504
+ # viaf_id_path = "viaf.original.queryResult.viafID"
505
+ viaf_id_path = "viaf.parsed.viaf_id"
506
+
504
507
  sierra_id_path = "sierraID"
505
-
508
+
506
509
  for record, extra_data in zip(self.records, self.records_extra_data):
507
510
 
508
511
  sierra_id = glom(extra_data, sierra_id_path, default="")
509
512
  viaf_id = glom(extra_data, viaf_id_path, default=None)
510
- classified_fields = extra_data.get("classified_fields", [])
511
513
 
514
+ classified_fields = extra_data.get("classified_fields", [])
512
515
  entity = extra_data.get("entity")
513
516
  is_editing_existing_record = extra_data.get("edited") == True
514
517
 
515
518
  viaf_record = self._get_viaf_record(record, viaf_id, entity)
519
+ if viaf_record:
520
+ logger.debug(
521
+ f"linked VIAF record with ID {viaf_record.viaf_id} for entity '{entity}'"
522
+ )
523
+
516
524
  record = self._normalize_common(record, is_editing_existing_record, classified_fields)
517
525
 
518
526
  normalized_record = self._normalize_record(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rara-tools
3
- Version: 0.7.15
3
+ Version: 0.7.16
4
4
  Summary: Tools to support Kata's work.
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Programming Language :: Python :: 3.10
@@ -5,6 +5,7 @@ from rara_tools.constants import YYMMDD_FORMAT
5
5
  from rara_tools.normalizers import (BibRecordNormalizer, AuthoritiesRecordNormalizer)
6
6
  from tests.test_utils import (get_linker_res_example, get_formatted_sierra_response,
7
7
  check_record_tags_sorted, check_no_dupe_tag_values, check_record_tags_have_values)
8
+ from rara_tools.normalizers.viaf import VIAFRecord
8
9
 
9
10
  from rara_tools.constants.linker import EntityType
10
11
 
@@ -842,3 +843,58 @@ def test_classified_data_with_multiple_records():
842
843
  assert len(record.get_fields("670")) == 1
843
844
  fields_670 = record.get_fields("670")[0]
844
845
  assert fields_670.get_subfields("a")[0] == "Teine kirjeldus, 2022"
846
+
847
+
848
+ def test_viaf_name_variations():
849
+ """ Test adding alternative name forms from VIAF to 4XX fields. Should skip some variants """
850
+
851
+ normalizer = AuthoritiesRecordNormalizer()
852
+ record = Record()
853
+
854
+ viaf_record: VIAFRecord = normalizer._get_viaf_record(
855
+ record,
856
+ entity="Jaan Kaplinski"
857
+ )
858
+
859
+ assert viaf_record is not None
860
+ assert len(viaf_record.name_variations) > 0
861
+
862
+ normalizer._add_author(record, viaf_record)
863
+
864
+ fields_4xx = record.get_fields("400") + record.get_fields("410") + record.get_fields("430")
865
+ assert len(fields_4xx) > 0
866
+
867
+ def test_existing_record_linked_to_viaf_record():
868
+ """ Test existing record linked to VIAF record - should enrich with VIAF data """
869
+
870
+ base_path = "tests/test_data/marc_records/json/"
871
+ with open(os.path.join(base_path, "imbi.json"), "r", encoding="utf-8") as f, \
872
+ open(os.path.join(base_path, "ernits.json"), "r", encoding="utf-8") as f2, \
873
+ open(os.path.join(base_path, "rowling.json"), "r", encoding="utf-8") as f3:
874
+ imbi = json.load(f)
875
+ ernits = json.load(f2)
876
+ rowling = json.load(f3)
877
+
878
+ linking_results = [
879
+ imbi,
880
+ ernits,
881
+ rowling
882
+ ]
883
+
884
+ normalizer = AuthoritiesRecordNormalizer(
885
+ linking_results=linking_results,
886
+ )
887
+
888
+ def get_viaf_url(record: Record):
889
+ field_024 = record.get_fields("024")
890
+ if len(field_024) == 0:
891
+ return None
892
+ return field_024[0].get_subfields("0")[0]
893
+
894
+ viaf_base_url = "http://viaf.org/viaf"
895
+ assert get_viaf_url(normalizer.get_record(0)) == f"{viaf_base_url}/167120147/"
896
+ assert get_viaf_url(normalizer.get_record(1)) == f"{viaf_base_url}/22458146/"
897
+ assert get_viaf_url(normalizer.get_record(1)) == f"{viaf_base_url}/22458146/"
898
+ assert get_viaf_url(normalizer.get_record(2)) == f"{viaf_base_url}/116796842/"
899
+
900
+
rara_tools-0.7.15/VERSION DELETED
@@ -1 +0,0 @@
1
- 0.7.15
File without changes
File without changes
File without changes
File without changes