rara-tools 0.7.14__py3-none-any.whl → 0.7.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rara-tools might be problematic. Click here for more details.
- rara_tools/normalizers/authorities.py +1 -1
- rara_tools/normalizers/base.py +53 -37
- rara_tools/normalizers/bibs.py +1 -1
- {rara_tools-0.7.14.dist-info → rara_tools-0.7.16.dist-info}/METADATA +1 -1
- {rara_tools-0.7.14.dist-info → rara_tools-0.7.16.dist-info}/RECORD +8 -8
- {rara_tools-0.7.14.dist-info → rara_tools-0.7.16.dist-info}/WHEEL +0 -0
- {rara_tools-0.7.14.dist-info → rara_tools-0.7.16.dist-info}/licenses/LICENSE.md +0 -0
- {rara_tools-0.7.14.dist-info → rara_tools-0.7.16.dist-info}/top_level.txt +0 -0
|
@@ -11,7 +11,7 @@ class AuthoritiesRecordNormalizer(RecordNormalizer):
|
|
|
11
11
|
""" Normalize authorities records """
|
|
12
12
|
|
|
13
13
|
def __init__(self, linking_results: List[dict] = [], sierra_data: List[dict] = [],
|
|
14
|
-
classified_fields: List[
|
|
14
|
+
classified_fields: List[List[dict]] = [],
|
|
15
15
|
ALLOW_EDIT_FIELDS: List[str] = ["008", "925"],
|
|
16
16
|
REPEATABLE_FIELDS: List[str] = ["024", "035", "400", "667"]):
|
|
17
17
|
|
rara_tools/normalizers/base.py
CHANGED
|
@@ -34,7 +34,7 @@ class RecordNormalizer:
|
|
|
34
34
|
entities: List of Full names (str). If included, will use NormLinker to match with normalized records on KATA elastic.
|
|
35
35
|
"""
|
|
36
36
|
|
|
37
|
-
def __init__(self, linking_results: List[dict] = [], sierra_data: List[dict] = [], classified_fields: List[
|
|
37
|
+
def __init__(self, linking_results: List[dict] = [], sierra_data: List[dict] = [], classified_fields: List[List[dict]] = [],
|
|
38
38
|
ALLOW_EDIT_FIELDS: List[str] = ["925"], REPEATABLE_FIELDS: List[str] = ["667"]):
|
|
39
39
|
|
|
40
40
|
# Include, if will replace existing field
|
|
@@ -44,17 +44,30 @@ class RecordNormalizer:
|
|
|
44
44
|
# leader applied to new records
|
|
45
45
|
self.DEFAULT_LEADER = "01682nz a2200349n 4500" # must be 24 digits
|
|
46
46
|
|
|
47
|
-
def _setup_records(self, linking_results: List[dict], sierra_data: List[dict], classified_fields: List[
|
|
47
|
+
def _setup_records(self, linking_results: List[dict], sierra_data: List[dict], classified_fields: List[List[dict]] = []) -> JSONReader:
|
|
48
48
|
"""Setup initial MARC records and data.
|
|
49
49
|
|
|
50
50
|
If no linked entities or more than one linked entity found, we create a new record.
|
|
51
51
|
If one linked entity found, we create an updated record from the linked entity data.
|
|
52
52
|
"""
|
|
53
|
-
|
|
54
53
|
linked_records = []
|
|
55
54
|
|
|
56
|
-
|
|
55
|
+
def handle_create_new_record(entity, idx):
|
|
56
|
+
logger.info(f"No linked entities found for {entity}, Creating new record.")
|
|
57
|
+
linked_records.append({
|
|
58
|
+
"leader": self.DEFAULT_LEADER,
|
|
59
|
+
"fields": []
|
|
60
|
+
})
|
|
61
|
+
self.records_extra_data.append({
|
|
62
|
+
"entity": entity,
|
|
63
|
+
"classified_fields": classified_fields[idx] if idx < len(classified_fields) else [],
|
|
64
|
+
"edited": False,
|
|
65
|
+
})
|
|
66
|
+
|
|
67
|
+
for idx, linked in enumerate(linking_results or []):
|
|
68
|
+
|
|
57
69
|
if not isinstance(linked, dict):
|
|
70
|
+
logger.debug(f"Skipping invalid linked result: {linked}")
|
|
58
71
|
continue
|
|
59
72
|
|
|
60
73
|
entity = linked.get("original_entity")
|
|
@@ -62,46 +75,32 @@ class RecordNormalizer:
|
|
|
62
75
|
|
|
63
76
|
if not isinstance(linked_info, list) or not linked_info:
|
|
64
77
|
# No linked entities found, create new record
|
|
65
|
-
|
|
66
|
-
f"No linked entities found for {entity}, Creating new record.")
|
|
67
|
-
linked_records.append({
|
|
68
|
-
"leader": self.DEFAULT_LEADER,
|
|
69
|
-
"fields": []
|
|
70
|
-
})
|
|
71
|
-
self.records_extra_data.append({
|
|
72
|
-
"entity": entity,
|
|
73
|
-
"classified_fields": classified_fields,
|
|
74
|
-
"edited": False
|
|
75
|
-
})
|
|
78
|
+
handle_create_new_record(entity, idx)
|
|
76
79
|
continue
|
|
77
80
|
|
|
78
|
-
|
|
81
|
+
elif len(linked_info) > 1:
|
|
79
82
|
# Multiple linked entities found, create new record
|
|
80
|
-
|
|
81
|
-
f"Multiple linked entities found for {entity}. Creating new record.")
|
|
82
|
-
linked_records.append({
|
|
83
|
-
"leader": self.DEFAULT_LEADER,
|
|
84
|
-
"fields": []
|
|
85
|
-
})
|
|
86
|
-
self.records_extra_data.append({
|
|
87
|
-
"entity": entity,
|
|
88
|
-
"classified_fields": classified_fields,
|
|
89
|
-
"edited": False
|
|
90
|
-
})
|
|
83
|
+
handle_create_new_record(entity, idx)
|
|
91
84
|
continue
|
|
92
85
|
|
|
93
86
|
elif len(linked_info) == 1:
|
|
87
|
+
# one record match found, we update existing record
|
|
88
|
+
|
|
94
89
|
linked_item = linked_info[0]
|
|
95
90
|
if not isinstance(linked_item, dict):
|
|
96
91
|
continue
|
|
97
92
|
|
|
93
|
+
# handle case where we have linked an entity without a record
|
|
94
|
+
if not linked_item.get("json", None):
|
|
95
|
+
handle_create_new_record(entity, idx)
|
|
96
|
+
continue
|
|
97
|
+
|
|
98
98
|
linked_records.append(linked_item.get("json", {}))
|
|
99
99
|
|
|
100
100
|
self.records_extra_data.append({
|
|
101
101
|
"entity": entity,
|
|
102
102
|
"viaf": linked_item.get("viaf", {}),
|
|
103
|
-
"classified_fields": classified_fields,
|
|
104
|
-
"type": "linked",
|
|
103
|
+
"classified_fields": classified_fields[idx] if idx < len(classified_fields) else [],
|
|
105
104
|
"edited": True
|
|
106
105
|
})
|
|
107
106
|
continue
|
|
@@ -109,7 +108,6 @@ class RecordNormalizer:
|
|
|
109
108
|
self.records_extra_data.extend(
|
|
110
109
|
{
|
|
111
110
|
"sierraID": obj.get("sierraID"),
|
|
112
|
-
"type": "sierra",
|
|
113
111
|
"edited": True
|
|
114
112
|
}
|
|
115
113
|
for obj in (sierra_data or [])
|
|
@@ -359,10 +357,12 @@ class RecordNormalizer:
|
|
|
359
357
|
def _include_classified_fields(self, record: Record, classified_fields: list[dict]) -> None:
|
|
360
358
|
"""Include classified fields from core, if any.
|
|
361
359
|
e.g. classified_fields=[{'670': {'ind1': ' ', 'ind2': '0', 'subfields': [{'a': 'Päikesekiri, 2021'}]}}]
|
|
360
|
+
|
|
361
|
+
For each record, we need a list of dicts, to handle repeatable fields.
|
|
362
362
|
"""
|
|
363
363
|
if not classified_fields:
|
|
364
364
|
return
|
|
365
|
-
|
|
365
|
+
|
|
366
366
|
fields = [
|
|
367
367
|
Field(
|
|
368
368
|
tag=str(tag),
|
|
@@ -373,7 +373,6 @@ class RecordNormalizer:
|
|
|
373
373
|
for tag, v in field_dict.items()
|
|
374
374
|
]
|
|
375
375
|
|
|
376
|
-
logger.info(f"Adding classified fields: {[f.tag for f in fields]}")
|
|
377
376
|
self._add_fields_to_record(record, fields)
|
|
378
377
|
|
|
379
378
|
def _normalize_common(self, record: Record, is_editing_existing_record: bool, classified_fields: List[dict]) -> None:
|
|
@@ -464,6 +463,8 @@ class RecordNormalizer:
|
|
|
464
463
|
verify=verify,
|
|
465
464
|
threshold=threshold
|
|
466
465
|
)
|
|
466
|
+
if viaf_record:
|
|
467
|
+
logger.debug(f"VIAF {search_term}, linked to ID: {viaf_record.viaf_id}")
|
|
467
468
|
|
|
468
469
|
except Exception as e:
|
|
469
470
|
logger.error(
|
|
@@ -472,8 +473,16 @@ class RecordNormalizer:
|
|
|
472
473
|
return viaf_record
|
|
473
474
|
|
|
474
475
|
def _normalize_record(self, record: Record, sierraID: str,
|
|
475
|
-
viaf_record: VIAFRecord, is_editing_existing_record: bool,
|
|
476
|
+
viaf_record: VIAFRecord, is_editing_existing_record: bool,
|
|
477
|
+
original_entity: str) -> Record:
|
|
476
478
|
return record
|
|
479
|
+
|
|
480
|
+
def get_record(self, index: int) -> Record:
|
|
481
|
+
"""Get normalized record by index."""
|
|
482
|
+
for idx, record in enumerate(self):
|
|
483
|
+
if idx == index:
|
|
484
|
+
return record
|
|
485
|
+
raise IndexError("Record index out of range.")
|
|
477
486
|
|
|
478
487
|
@property
|
|
479
488
|
def data(self) -> List[dict]:
|
|
@@ -490,21 +499,28 @@ class RecordNormalizer:
|
|
|
490
499
|
@property
|
|
491
500
|
def first(self) -> Record:
|
|
492
501
|
return next(iter(self))
|
|
493
|
-
|
|
502
|
+
|
|
494
503
|
def __iter__(self) -> Iterator:
|
|
495
|
-
viaf_id_path = "viaf.queryResult.
|
|
504
|
+
# viaf_id_path = "viaf.original.queryResult.viafID"
|
|
505
|
+
viaf_id_path = "viaf.parsed.viaf_id"
|
|
506
|
+
|
|
496
507
|
sierra_id_path = "sierraID"
|
|
497
|
-
|
|
508
|
+
|
|
498
509
|
for record, extra_data in zip(self.records, self.records_extra_data):
|
|
499
510
|
|
|
500
511
|
sierra_id = glom(extra_data, sierra_id_path, default="")
|
|
501
512
|
viaf_id = glom(extra_data, viaf_id_path, default=None)
|
|
502
|
-
classified_fields = extra_data.get("classified_fields", [])
|
|
503
513
|
|
|
514
|
+
classified_fields = extra_data.get("classified_fields", [])
|
|
504
515
|
entity = extra_data.get("entity")
|
|
505
516
|
is_editing_existing_record = extra_data.get("edited") == True
|
|
506
517
|
|
|
507
518
|
viaf_record = self._get_viaf_record(record, viaf_id, entity)
|
|
519
|
+
if viaf_record:
|
|
520
|
+
logger.debug(
|
|
521
|
+
f"linked VIAF record with ID {viaf_record.viaf_id} for entity '{entity}'"
|
|
522
|
+
)
|
|
523
|
+
|
|
508
524
|
record = self._normalize_common(record, is_editing_existing_record, classified_fields)
|
|
509
525
|
|
|
510
526
|
normalized_record = self._normalize_record(
|
rara_tools/normalizers/bibs.py
CHANGED
|
@@ -12,7 +12,7 @@ class BibRecordNormalizer(RecordNormalizer):
|
|
|
12
12
|
""" Normalize bib records. """
|
|
13
13
|
|
|
14
14
|
def __init__(self, linking_results: List[dict] = [], sierra_data: List[dict] = [],
|
|
15
|
-
classified_fields: List[
|
|
15
|
+
classified_fields: List[List[dict]] = [],
|
|
16
16
|
ALLOW_EDIT_FIELDS: List[str] = ["008", "925"],
|
|
17
17
|
REPEATABLE_FIELDS: List[str] = ["667"]):
|
|
18
18
|
super().__init__(linking_results, sierra_data, classified_fields)
|
|
@@ -20,9 +20,9 @@ rara_tools/core_formatters/formatted_keyword.py,sha256=hhi6wh4ErFionjBqYsEeKGbf1
|
|
|
20
20
|
rara_tools/core_formatters/formatted_meta.py,sha256=WEnMs8K0YeTLGjXn_mxQTpshxcz5_9YjvqcbRFa3M1g,5605
|
|
21
21
|
rara_tools/core_formatters/formatted_object.py,sha256=7a499ZmcZXOqtlwxDi6FWHWF5a6HdCsduS22wV3uHIE,5656
|
|
22
22
|
rara_tools/normalizers/__init__.py,sha256=_NqpS5w710DhaURytHq9JpEt8HgYpSPfRDcOtOymJgE,193
|
|
23
|
-
rara_tools/normalizers/authorities.py,sha256=
|
|
24
|
-
rara_tools/normalizers/base.py,sha256=
|
|
25
|
-
rara_tools/normalizers/bibs.py,sha256=
|
|
23
|
+
rara_tools/normalizers/authorities.py,sha256=iW3cYOqqVJKy4CcnG9_T6dN-1bBT1e-0jtLYvco-MyQ,5311
|
|
24
|
+
rara_tools/normalizers/base.py,sha256=DhMicY5p_N2SC_E3lbWUvSM77AOy_pBjQpbLSvYWDxM,20488
|
|
25
|
+
rara_tools/normalizers/bibs.py,sha256=s8NGoieCjiftASUb--1YvYZ0VzW6uBt2ZidhLi_wP9A,3938
|
|
26
26
|
rara_tools/normalizers/reader.py,sha256=GYCkAtnsNx135w5lD-_MqCZzdHQHHPDF-pDxYj839Vo,1595
|
|
27
27
|
rara_tools/normalizers/viaf.py,sha256=C-NfbvL83ZcHVB9ICMw43wAMYKTqDTHU3ZT2mXKec00,24288
|
|
28
28
|
rara_tools/parsers/marc_parsers/base_parser.py,sha256=Kdw4aivJf2FkWgIK7pJtHtVXF_G1pjHVQ7IcFItSqy8,1649
|
|
@@ -39,8 +39,8 @@ rara_tools/parsers/marc_records/title_record.py,sha256=XrtJ4gj7wzSaGxNaPtPuawmqq
|
|
|
39
39
|
rara_tools/parsers/tools/entity_normalizers.py,sha256=VyCy_NowCLpOsL0luQ55IW-Qi-J5oBH0Ofzr7HRFBhM,8949
|
|
40
40
|
rara_tools/parsers/tools/marc_converter.py,sha256=LgSHe-7n7aiDrw2bnsB53r3fXTRFjZXTwBYfTpL0pfs,415
|
|
41
41
|
rara_tools/parsers/tools/russian_transliterator.py,sha256=5ZU66iTqAhr7pmfVqXPAI_cidF43VqqmuN4d7H4_JuA,9770
|
|
42
|
-
rara_tools-0.7.
|
|
43
|
-
rara_tools-0.7.
|
|
44
|
-
rara_tools-0.7.
|
|
45
|
-
rara_tools-0.7.
|
|
46
|
-
rara_tools-0.7.
|
|
42
|
+
rara_tools-0.7.16.dist-info/licenses/LICENSE.md,sha256=hkZVnIZll7e_KNEQzeY94Y9tlzVL8iVZBTMBvDykksU,35142
|
|
43
|
+
rara_tools-0.7.16.dist-info/METADATA,sha256=yBmOUFVy7V6RyZdN_qBGkY7M8zH7H7h7v_QLQeZ3bAM,4080
|
|
44
|
+
rara_tools-0.7.16.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
45
|
+
rara_tools-0.7.16.dist-info/top_level.txt,sha256=JwfB5b8BAtW5OFKRln2AQ_WElTRyIBM4nO0FKN1cupY,11
|
|
46
|
+
rara_tools-0.7.16.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|