rara-tools 0.7.15__tar.gz → 0.7.16__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rara-tools might be problematic. Click here for more details.
- {rara_tools-0.7.15/rara_tools.egg-info → rara_tools-0.7.16}/PKG-INFO +1 -1
- rara_tools-0.7.16/VERSION +1 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/normalizers/base.py +36 -28
- {rara_tools-0.7.15 → rara_tools-0.7.16/rara_tools.egg-info}/PKG-INFO +1 -1
- {rara_tools-0.7.15 → rara_tools-0.7.16}/tests/test_normalization.py +56 -0
- rara_tools-0.7.15/VERSION +0 -1
- {rara_tools-0.7.15 → rara_tools-0.7.16}/LICENSE.md +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/README.md +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/pyproject.toml +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/constants/__init__.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/constants/digitizer.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/constants/general.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/constants/language_evaluator.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/constants/linker.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/constants/meta_extractor.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/constants/normalizers.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/constants/parsers.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/constants/subject_indexer.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/converters.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/core_formatters/core_formatter.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/core_formatters/formatted_keyword.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/core_formatters/formatted_meta.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/core_formatters/formatted_object.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/decorators.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/digar_schema_converter.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/elastic.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/exceptions.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/normalizers/__init__.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/normalizers/authorities.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/normalizers/bibs.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/normalizers/reader.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/normalizers/viaf.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/parsers/marc_parsers/base_parser.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/parsers/marc_parsers/ems_parser.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/parsers/marc_parsers/location_parser.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/parsers/marc_parsers/organization_parser.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/parsers/marc_parsers/person_parser.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/parsers/marc_parsers/title_parser.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/parsers/marc_records/base_record.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/parsers/marc_records/ems_record.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/parsers/marc_records/organization_record.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/parsers/marc_records/person_record.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/parsers/marc_records/title_record.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/parsers/tools/entity_normalizers.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/parsers/tools/marc_converter.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/parsers/tools/russian_transliterator.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/s3.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/task_reporter.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/utils.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools.egg-info/SOURCES.txt +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools.egg-info/dependency_links.txt +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools.egg-info/requires.txt +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools.egg-info/top_level.txt +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/requirements.txt +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/setup.cfg +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/tests/test_digar_schema_converter.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/tests/test_elastic.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/tests/test_elastic_vector_and_search_operations.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/tests/test_entity_normalizers.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/tests/test_formatters.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/tests/test_marc_parsers.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/tests/test_s3_exceptions.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/tests/test_s3_file_operations.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/tests/test_sierra_converters.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/tests/test_task_reporter.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/tests/test_utils.py +0 -0
- {rara_tools-0.7.15 → rara_tools-0.7.16}/tests/test_viaf_client.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0.7.16
|
|
@@ -51,10 +51,23 @@ class RecordNormalizer:
|
|
|
51
51
|
If one linked entity found, we create an updated record from the linked entity data.
|
|
52
52
|
"""
|
|
53
53
|
linked_records = []
|
|
54
|
+
|
|
55
|
+
def handle_create_new_record(entity, idx):
|
|
56
|
+
logger.info(f"No linked entities found for {entity}, Creating new record.")
|
|
57
|
+
linked_records.append({
|
|
58
|
+
"leader": self.DEFAULT_LEADER,
|
|
59
|
+
"fields": []
|
|
60
|
+
})
|
|
61
|
+
self.records_extra_data.append({
|
|
62
|
+
"entity": entity,
|
|
63
|
+
"classified_fields": classified_fields[idx] if idx < len(classified_fields) else [],
|
|
64
|
+
"edited": False,
|
|
65
|
+
})
|
|
54
66
|
|
|
55
67
|
for idx, linked in enumerate(linking_results or []):
|
|
56
68
|
|
|
57
69
|
if not isinstance(linked, dict):
|
|
70
|
+
logger.debug(f"Skipping invalid linked result: {linked}")
|
|
58
71
|
continue
|
|
59
72
|
|
|
60
73
|
entity = linked.get("original_entity")
|
|
@@ -62,46 +75,32 @@ class RecordNormalizer:
|
|
|
62
75
|
|
|
63
76
|
if not isinstance(linked_info, list) or not linked_info:
|
|
64
77
|
# No linked entities found, create new record
|
|
65
|
-
|
|
66
|
-
f"No linked entities found for {entity}, Creating new record.")
|
|
67
|
-
linked_records.append({
|
|
68
|
-
"leader": self.DEFAULT_LEADER,
|
|
69
|
-
"fields": []
|
|
70
|
-
})
|
|
71
|
-
self.records_extra_data.append({
|
|
72
|
-
"entity": entity,
|
|
73
|
-
"classified_fields": classified_fields[idx] if idx < len(classified_fields) else [],
|
|
74
|
-
"edited": False
|
|
75
|
-
})
|
|
78
|
+
handle_create_new_record(entity, idx)
|
|
76
79
|
continue
|
|
77
80
|
|
|
78
81
|
elif len(linked_info) > 1:
|
|
79
82
|
# Multiple linked entities found, create new record
|
|
80
|
-
|
|
81
|
-
f"Multiple linked entities found for {entity}. Creating new record.")
|
|
82
|
-
linked_records.append({
|
|
83
|
-
"leader": self.DEFAULT_LEADER,
|
|
84
|
-
"fields": []
|
|
85
|
-
})
|
|
86
|
-
self.records_extra_data.append({
|
|
87
|
-
"entity": entity,
|
|
88
|
-
"classified_fields": classified_fields[idx] if idx < len(classified_fields) else [],
|
|
89
|
-
"edited": False
|
|
90
|
-
})
|
|
83
|
+
handle_create_new_record(entity, idx)
|
|
91
84
|
continue
|
|
92
85
|
|
|
93
86
|
elif len(linked_info) == 1:
|
|
87
|
+
# one record match found, we update existing record
|
|
88
|
+
|
|
94
89
|
linked_item = linked_info[0]
|
|
95
90
|
if not isinstance(linked_item, dict):
|
|
96
91
|
continue
|
|
97
92
|
|
|
93
|
+
# handle case where we have linked an entity without a record
|
|
94
|
+
if not linked_item.get("json", None):
|
|
95
|
+
handle_create_new_record(entity, idx)
|
|
96
|
+
continue
|
|
97
|
+
|
|
98
98
|
linked_records.append(linked_item.get("json", {}))
|
|
99
99
|
|
|
100
100
|
self.records_extra_data.append({
|
|
101
101
|
"entity": entity,
|
|
102
102
|
"viaf": linked_item.get("viaf", {}),
|
|
103
103
|
"classified_fields": classified_fields[idx] if idx < len(classified_fields) else [],
|
|
104
|
-
"type": "linked",
|
|
105
104
|
"edited": True
|
|
106
105
|
})
|
|
107
106
|
continue
|
|
@@ -109,7 +108,6 @@ class RecordNormalizer:
|
|
|
109
108
|
self.records_extra_data.extend(
|
|
110
109
|
{
|
|
111
110
|
"sierraID": obj.get("sierraID"),
|
|
112
|
-
"type": "sierra",
|
|
113
111
|
"edited": True
|
|
114
112
|
}
|
|
115
113
|
for obj in (sierra_data or [])
|
|
@@ -465,6 +463,8 @@ class RecordNormalizer:
|
|
|
465
463
|
verify=verify,
|
|
466
464
|
threshold=threshold
|
|
467
465
|
)
|
|
466
|
+
if viaf_record:
|
|
467
|
+
logger.debug(f"VIAF {search_term}, linked to ID: {viaf_record.viaf_id}")
|
|
468
468
|
|
|
469
469
|
except Exception as e:
|
|
470
470
|
logger.error(
|
|
@@ -473,7 +473,8 @@ class RecordNormalizer:
|
|
|
473
473
|
return viaf_record
|
|
474
474
|
|
|
475
475
|
def _normalize_record(self, record: Record, sierraID: str,
|
|
476
|
-
viaf_record: VIAFRecord, is_editing_existing_record: bool,
|
|
476
|
+
viaf_record: VIAFRecord, is_editing_existing_record: bool,
|
|
477
|
+
original_entity: str) -> Record:
|
|
477
478
|
return record
|
|
478
479
|
|
|
479
480
|
def get_record(self, index: int) -> Record:
|
|
@@ -500,19 +501,26 @@ class RecordNormalizer:
|
|
|
500
501
|
return next(iter(self))
|
|
501
502
|
|
|
502
503
|
def __iter__(self) -> Iterator:
|
|
503
|
-
viaf_id_path = "viaf.queryResult.
|
|
504
|
+
# viaf_id_path = "viaf.original.queryResult.viafID"
|
|
505
|
+
viaf_id_path = "viaf.parsed.viaf_id"
|
|
506
|
+
|
|
504
507
|
sierra_id_path = "sierraID"
|
|
505
|
-
|
|
508
|
+
|
|
506
509
|
for record, extra_data in zip(self.records, self.records_extra_data):
|
|
507
510
|
|
|
508
511
|
sierra_id = glom(extra_data, sierra_id_path, default="")
|
|
509
512
|
viaf_id = glom(extra_data, viaf_id_path, default=None)
|
|
510
|
-
classified_fields = extra_data.get("classified_fields", [])
|
|
511
513
|
|
|
514
|
+
classified_fields = extra_data.get("classified_fields", [])
|
|
512
515
|
entity = extra_data.get("entity")
|
|
513
516
|
is_editing_existing_record = extra_data.get("edited") == True
|
|
514
517
|
|
|
515
518
|
viaf_record = self._get_viaf_record(record, viaf_id, entity)
|
|
519
|
+
if viaf_record:
|
|
520
|
+
logger.debug(
|
|
521
|
+
f"linked VIAF record with ID {viaf_record.viaf_id} for entity '{entity}'"
|
|
522
|
+
)
|
|
523
|
+
|
|
516
524
|
record = self._normalize_common(record, is_editing_existing_record, classified_fields)
|
|
517
525
|
|
|
518
526
|
normalized_record = self._normalize_record(
|
|
@@ -5,6 +5,7 @@ from rara_tools.constants import YYMMDD_FORMAT
|
|
|
5
5
|
from rara_tools.normalizers import (BibRecordNormalizer, AuthoritiesRecordNormalizer)
|
|
6
6
|
from tests.test_utils import (get_linker_res_example, get_formatted_sierra_response,
|
|
7
7
|
check_record_tags_sorted, check_no_dupe_tag_values, check_record_tags_have_values)
|
|
8
|
+
from rara_tools.normalizers.viaf import VIAFRecord
|
|
8
9
|
|
|
9
10
|
from rara_tools.constants.linker import EntityType
|
|
10
11
|
|
|
@@ -842,3 +843,58 @@ def test_classified_data_with_multiple_records():
|
|
|
842
843
|
assert len(record.get_fields("670")) == 1
|
|
843
844
|
fields_670 = record.get_fields("670")[0]
|
|
844
845
|
assert fields_670.get_subfields("a")[0] == "Teine kirjeldus, 2022"
|
|
846
|
+
|
|
847
|
+
|
|
848
|
+
def test_viaf_name_variations():
|
|
849
|
+
""" Test adding alternative name forms from VIAF to 4XX fields. Should skip some variants """
|
|
850
|
+
|
|
851
|
+
normalizer = AuthoritiesRecordNormalizer()
|
|
852
|
+
record = Record()
|
|
853
|
+
|
|
854
|
+
viaf_record: VIAFRecord = normalizer._get_viaf_record(
|
|
855
|
+
record,
|
|
856
|
+
entity="Jaan Kaplinski"
|
|
857
|
+
)
|
|
858
|
+
|
|
859
|
+
assert viaf_record is not None
|
|
860
|
+
assert len(viaf_record.name_variations) > 0
|
|
861
|
+
|
|
862
|
+
normalizer._add_author(record, viaf_record)
|
|
863
|
+
|
|
864
|
+
fields_4xx = record.get_fields("400") + record.get_fields("410") + record.get_fields("430")
|
|
865
|
+
assert len(fields_4xx) > 0
|
|
866
|
+
|
|
867
|
+
def test_existing_record_linked_to_viaf_record():
|
|
868
|
+
""" Test existing record linked to VIAF record - should enrich with VIAF data """
|
|
869
|
+
|
|
870
|
+
base_path = "tests/test_data/marc_records/json/"
|
|
871
|
+
with open(os.path.join(base_path, "imbi.json"), "r", encoding="utf-8") as f, \
|
|
872
|
+
open(os.path.join(base_path, "ernits.json"), "r", encoding="utf-8") as f2, \
|
|
873
|
+
open(os.path.join(base_path, "rowling.json"), "r", encoding="utf-8") as f3:
|
|
874
|
+
imbi = json.load(f)
|
|
875
|
+
ernits = json.load(f2)
|
|
876
|
+
rowling = json.load(f3)
|
|
877
|
+
|
|
878
|
+
linking_results = [
|
|
879
|
+
imbi,
|
|
880
|
+
ernits,
|
|
881
|
+
rowling
|
|
882
|
+
]
|
|
883
|
+
|
|
884
|
+
normalizer = AuthoritiesRecordNormalizer(
|
|
885
|
+
linking_results=linking_results,
|
|
886
|
+
)
|
|
887
|
+
|
|
888
|
+
def get_viaf_url(record: Record):
|
|
889
|
+
field_024 = record.get_fields("024")
|
|
890
|
+
if len(field_024) == 0:
|
|
891
|
+
return None
|
|
892
|
+
return field_024[0].get_subfields("0")[0]
|
|
893
|
+
|
|
894
|
+
viaf_base_url = "http://viaf.org/viaf"
|
|
895
|
+
assert get_viaf_url(normalizer.get_record(0)) == f"{viaf_base_url}/167120147/"
|
|
896
|
+
assert get_viaf_url(normalizer.get_record(1)) == f"{viaf_base_url}/22458146/"
|
|
897
|
+
assert get_viaf_url(normalizer.get_record(1)) == f"{viaf_base_url}/22458146/"
|
|
898
|
+
assert get_viaf_url(normalizer.get_record(2)) == f"{viaf_base_url}/116796842/"
|
|
899
|
+
|
|
900
|
+
|
rara_tools-0.7.15/VERSION
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
0.7.15
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/parsers/marc_parsers/organization_parser.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{rara_tools-0.7.15 → rara_tools-0.7.16}/rara_tools/parsers/marc_records/organization_record.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|