rara-tools 0.7.10__py3-none-any.whl → 0.7.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rara-tools might be problematic. Click here for more details.
- rara_tools/normalizers/authorities.py +6 -1
- rara_tools/normalizers/base.py +32 -8
- rara_tools/normalizers/bibs.py +52 -4
- {rara_tools-0.7.10.dist-info → rara_tools-0.7.12.dist-info}/METADATA +1 -1
- {rara_tools-0.7.10.dist-info → rara_tools-0.7.12.dist-info}/RECORD +8 -8
- {rara_tools-0.7.10.dist-info → rara_tools-0.7.12.dist-info}/WHEEL +0 -0
- {rara_tools-0.7.10.dist-info → rara_tools-0.7.12.dist-info}/licenses/LICENSE.md +0 -0
- {rara_tools-0.7.10.dist-info → rara_tools-0.7.12.dist-info}/top_level.txt +0 -0
|
@@ -59,6 +59,9 @@ class AuthoritiesRecordNormalizer(RecordNormalizer):
|
|
|
59
59
|
record, "046", "f", formatted_birth_date)
|
|
60
60
|
death_date = self.get_subfield(
|
|
61
61
|
record, "046", "g", formatted_death_date)
|
|
62
|
+
|
|
63
|
+
if not birth_date and not death_date:
|
|
64
|
+
return
|
|
62
65
|
|
|
63
66
|
subfields_046 = [
|
|
64
67
|
Subfield("f", birth_date),
|
|
@@ -117,7 +120,9 @@ class AuthoritiesRecordNormalizer(RecordNormalizer):
|
|
|
117
120
|
self._add_author(record, viaf_record)
|
|
118
121
|
|
|
119
122
|
def _normalize_record(self, record: Record, sierraID: str,
|
|
120
|
-
viaf_record: VIAFRecord,
|
|
123
|
+
viaf_record: VIAFRecord,
|
|
124
|
+
is_editing_existing_record: bool,
|
|
125
|
+
original_entity: str) -> Record:
|
|
121
126
|
|
|
122
127
|
self._normalize_sierra(record, sierraID)
|
|
123
128
|
self._normalize_viaf(record, viaf_record)
|
rara_tools/normalizers/base.py
CHANGED
|
@@ -213,13 +213,33 @@ class RecordNormalizer:
|
|
|
213
213
|
)
|
|
214
214
|
|
|
215
215
|
def _add_fields_to_record(self, record: Record, fields: List[Field]) -> Record:
|
|
216
|
-
|
|
216
|
+
cleaned_fields = []
|
|
217
|
+
|
|
217
218
|
for field in fields:
|
|
218
|
-
field.subfields = [sub for sub in field.subfields if sub.value and sub.value not in ["0", 0]]
|
|
219
219
|
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
220
|
+
# Always assume control fields cleaned
|
|
221
|
+
if field.tag < "010" and field.tag.isdigit():
|
|
222
|
+
cleaned_fields.append(field)
|
|
223
|
+
continue
|
|
224
|
+
|
|
225
|
+
# filter out subfields that are empty or 0 (VIAF returns 0 for unknown dates)
|
|
226
|
+
field.subfields = [
|
|
227
|
+
sub for sub in field.subfields
|
|
228
|
+
if sub.value and sub.value not in ["0", 0]
|
|
229
|
+
]
|
|
230
|
+
|
|
231
|
+
# only keep the field if it still has subfields left
|
|
232
|
+
if field.subfields:
|
|
233
|
+
cleaned_fields.append(field)
|
|
234
|
+
|
|
235
|
+
if not cleaned_fields:
|
|
236
|
+
return record
|
|
237
|
+
|
|
238
|
+
self._handle_repeatable_fields(record, *cleaned_fields)
|
|
239
|
+
self._handle_editable_fields(record, *cleaned_fields)
|
|
240
|
+
self._handle_default_fields(record, *cleaned_fields)
|
|
241
|
+
|
|
242
|
+
return record
|
|
223
243
|
|
|
224
244
|
def _add_author(self, record: Record, viaf_record: VIAFRecord) -> Optional[Field]:
|
|
225
245
|
|
|
@@ -360,7 +380,7 @@ class RecordNormalizer:
|
|
|
360
380
|
return viaf_record
|
|
361
381
|
|
|
362
382
|
def _normalize_record(self, record: Record, sierraID: str,
|
|
363
|
-
viaf_record: VIAFRecord, is_editing_existing_record: bool) -> Record:
|
|
383
|
+
viaf_record: VIAFRecord, is_editing_existing_record: bool, original_entity: str) -> Record:
|
|
364
384
|
return record
|
|
365
385
|
|
|
366
386
|
@property
|
|
@@ -374,7 +394,11 @@ class RecordNormalizer:
|
|
|
374
394
|
logger.error(f"Failed to normalize record: {e}")
|
|
375
395
|
continue
|
|
376
396
|
return result
|
|
377
|
-
|
|
397
|
+
|
|
398
|
+
@property
|
|
399
|
+
def first(self) -> Record:
|
|
400
|
+
return next(iter(self))
|
|
401
|
+
|
|
378
402
|
def __iter__(self) -> Iterator:
|
|
379
403
|
viaf_id_path = "viaf.queryResult.records.record.0.recordData.VIAFCluster.viafID"
|
|
380
404
|
sierra_id_path = "sierraID"
|
|
@@ -391,7 +415,7 @@ class RecordNormalizer:
|
|
|
391
415
|
record = self._normalize_common(record, is_editing_existing_record)
|
|
392
416
|
|
|
393
417
|
normalized_record = self._normalize_record(
|
|
394
|
-
record, sierra_id, viaf_record, is_editing_existing_record)
|
|
418
|
+
record, sierra_id, viaf_record, is_editing_existing_record, original_entity=entity)
|
|
395
419
|
|
|
396
420
|
normalized_record.fields.sort(key=lambda field: field.tag)
|
|
397
421
|
|
rara_tools/normalizers/bibs.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from pymarc import (Field, Subfield, Record)
|
|
2
|
+
from typing import List, Optional
|
|
2
3
|
|
|
3
4
|
from rara_tools.constants import EMPTY_INDICATORS
|
|
4
5
|
from rara_tools.normalizers.viaf import VIAFRecord
|
|
@@ -34,10 +35,57 @@ class BibRecordNormalizer(RecordNormalizer):
|
|
|
34
35
|
]
|
|
35
36
|
|
|
36
37
|
self._add_fields_to_record(record, fields)
|
|
38
|
+
|
|
39
|
+
def _include_name_variations(self, record: Record, viaf_record: VIAFRecord) -> None:
|
|
40
|
+
""" Include name variations from VIAF record as 400|t fields """
|
|
41
|
+
|
|
42
|
+
if not viaf_record or not viaf_record.name_variations:
|
|
43
|
+
return
|
|
44
|
+
|
|
45
|
+
existing_name_variations = record.get_fields("400")
|
|
46
|
+
existing_variations = [sf.value for field in existing_name_variations for sf in field.get_subfields("t")]
|
|
47
|
+
|
|
48
|
+
fields = []
|
|
49
|
+
|
|
50
|
+
for variation in viaf_record.name_variations:
|
|
51
|
+
if variation not in existing_variations:
|
|
52
|
+
fields.append(
|
|
53
|
+
Field(
|
|
54
|
+
tag="400",
|
|
55
|
+
indicators=EMPTY_INDICATORS,
|
|
56
|
+
subfields=[
|
|
57
|
+
Subfield("t", variation)
|
|
58
|
+
]
|
|
59
|
+
)
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
self._add_fields_to_record(record, fields)
|
|
63
|
+
|
|
64
|
+
def _add_author(self, record: Record, viaf_record: Optional[VIAFRecord], original_entity: str) -> Optional[Field]:
|
|
65
|
+
if record.get("100") or record.get("110") or record.get("111"):
|
|
66
|
+
return record
|
|
37
67
|
|
|
38
|
-
|
|
68
|
+
type_map = {
|
|
69
|
+
"Personal": "100",
|
|
70
|
+
"Corporate": "110",
|
|
71
|
+
"Collective": "111"
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
tag = type_map.get(getattr(viaf_record, "name_type", None), "100")
|
|
75
|
+
title = getattr(viaf_record, "name", None) or original_entity
|
|
39
76
|
|
|
77
|
+
fields = [Field(tag=tag, indicators=EMPTY_INDICATORS, subfields=[Subfield("t", title)])]
|
|
78
|
+
|
|
79
|
+
self._add_fields_to_record(record, fields)
|
|
80
|
+
|
|
81
|
+
if viaf_record:
|
|
82
|
+
self._include_name_variations(record, viaf_record)
|
|
83
|
+
|
|
84
|
+
def _normalize_viaf(self, record: Record, viaf_record: VIAFRecord, original_entity: str) -> None:
|
|
85
|
+
|
|
40
86
|
if not viaf_record:
|
|
87
|
+
# viaf record not found, include original entity as 100|t
|
|
88
|
+
self._add_author(record, viaf_record=None, original_entity=original_entity)
|
|
41
89
|
return record
|
|
42
90
|
|
|
43
91
|
viaf_id = viaf_record.viaf_id
|
|
@@ -52,12 +100,12 @@ class BibRecordNormalizer(RecordNormalizer):
|
|
|
52
100
|
]
|
|
53
101
|
|
|
54
102
|
self._add_fields_to_record(record, fields)
|
|
55
|
-
self._add_author(record, viaf_record)
|
|
103
|
+
self._add_author(record, viaf_record, original_entity=original_entity)
|
|
56
104
|
|
|
57
105
|
def _normalize_record(self, record: Record, sierraID: str,
|
|
58
|
-
viaf_record: VIAFRecord, is_editing_existing_record: bool) -> Record:
|
|
106
|
+
viaf_record: VIAFRecord, is_editing_existing_record: bool, original_entity: str) -> Record:
|
|
59
107
|
|
|
60
108
|
self._normalize_sierra(record)
|
|
61
|
-
self._normalize_viaf(record, viaf_record)
|
|
109
|
+
self._normalize_viaf(record, viaf_record, original_entity=original_entity)
|
|
62
110
|
|
|
63
111
|
return record
|
|
@@ -20,9 +20,9 @@ rara_tools/core_formatters/formatted_keyword.py,sha256=hhi6wh4ErFionjBqYsEeKGbf1
|
|
|
20
20
|
rara_tools/core_formatters/formatted_meta.py,sha256=r0RPG4eM-REPIR1DrIJnvYPQtQrzkgdvX9tvhNWjQ0Y,5250
|
|
21
21
|
rara_tools/core_formatters/formatted_object.py,sha256=7a499ZmcZXOqtlwxDi6FWHWF5a6HdCsduS22wV3uHIE,5656
|
|
22
22
|
rara_tools/normalizers/__init__.py,sha256=_NqpS5w710DhaURytHq9JpEt8HgYpSPfRDcOtOymJgE,193
|
|
23
|
-
rara_tools/normalizers/authorities.py,sha256=
|
|
24
|
-
rara_tools/normalizers/base.py,sha256=
|
|
25
|
-
rara_tools/normalizers/bibs.py,sha256=
|
|
23
|
+
rara_tools/normalizers/authorities.py,sha256=w7r2KIFSnmo57Pt9YGsxYYCH8AkdNkiBBFHhpf1GLJA,4794
|
|
24
|
+
rara_tools/normalizers/base.py,sha256=LzWdjzG_5zFGbzOCpUSOdmzrJJ7p23iODEtar8wDrrY,15942
|
|
25
|
+
rara_tools/normalizers/bibs.py,sha256=7H-spiqj6x8Xk4JQkBY5GK7q5pKjYUmXTRFf0RgzC-w,4040
|
|
26
26
|
rara_tools/normalizers/reader.py,sha256=GYCkAtnsNx135w5lD-_MqCZzdHQHHPDF-pDxYj839Vo,1595
|
|
27
27
|
rara_tools/normalizers/viaf.py,sha256=C-NfbvL83ZcHVB9ICMw43wAMYKTqDTHU3ZT2mXKec00,24288
|
|
28
28
|
rara_tools/parsers/marc_parsers/base_parser.py,sha256=Kdw4aivJf2FkWgIK7pJtHtVXF_G1pjHVQ7IcFItSqy8,1649
|
|
@@ -39,8 +39,8 @@ rara_tools/parsers/marc_records/title_record.py,sha256=XrtJ4gj7wzSaGxNaPtPuawmqq
|
|
|
39
39
|
rara_tools/parsers/tools/entity_normalizers.py,sha256=VyCy_NowCLpOsL0luQ55IW-Qi-J5oBH0Ofzr7HRFBhM,8949
|
|
40
40
|
rara_tools/parsers/tools/marc_converter.py,sha256=LgSHe-7n7aiDrw2bnsB53r3fXTRFjZXTwBYfTpL0pfs,415
|
|
41
41
|
rara_tools/parsers/tools/russian_transliterator.py,sha256=5ZU66iTqAhr7pmfVqXPAI_cidF43VqqmuN4d7H4_JuA,9770
|
|
42
|
-
rara_tools-0.7.
|
|
43
|
-
rara_tools-0.7.
|
|
44
|
-
rara_tools-0.7.
|
|
45
|
-
rara_tools-0.7.
|
|
46
|
-
rara_tools-0.7.
|
|
42
|
+
rara_tools-0.7.12.dist-info/licenses/LICENSE.md,sha256=hkZVnIZll7e_KNEQzeY94Y9tlzVL8iVZBTMBvDykksU,35142
|
|
43
|
+
rara_tools-0.7.12.dist-info/METADATA,sha256=N8gNFTv8yIgSSp2DgZ7q97V_t_KC-YZHIjsfiAlTcPo,4080
|
|
44
|
+
rara_tools-0.7.12.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
45
|
+
rara_tools-0.7.12.dist-info/top_level.txt,sha256=JwfB5b8BAtW5OFKRln2AQ_WElTRyIBM4nO0FKN1cupY,11
|
|
46
|
+
rara_tools-0.7.12.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|