rara-tools 0.7.9__py3-none-any.whl → 0.7.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rara-tools might be problematic. Click here for more details.
- rara_tools/normalizers/authorities.py +3 -3
- rara_tools/normalizers/base.py +20 -11
- rara_tools/normalizers/bibs.py +53 -5
- {rara_tools-0.7.9.dist-info → rara_tools-0.7.11.dist-info}/METADATA +1 -1
- {rara_tools-0.7.9.dist-info → rara_tools-0.7.11.dist-info}/RECORD +8 -8
- {rara_tools-0.7.9.dist-info → rara_tools-0.7.11.dist-info}/WHEEL +0 -0
- {rara_tools-0.7.9.dist-info → rara_tools-0.7.11.dist-info}/licenses/LICENSE.md +0 -0
- {rara_tools-0.7.9.dist-info → rara_tools-0.7.11.dist-info}/top_level.txt +0 -0
|
@@ -69,7 +69,6 @@ class AuthoritiesRecordNormalizer(RecordNormalizer):
|
|
|
69
69
|
record, [Field(tag="046", indicators=EMPTY_INDICATORS, subfields=subfields_046)])
|
|
70
70
|
|
|
71
71
|
def _add_viaf_url_or_isni(self, record: Record, viaf_record: VIAFRecord) -> None:
|
|
72
|
-
# TODO 024. will be used to store KRATT KATA ID. Just generate one?
|
|
73
72
|
viaf_url = f"https://viaf.org/viaf/{viaf_record.viaf_id}"
|
|
74
73
|
|
|
75
74
|
subfields = [Subfield("0", self.get_subfield(
|
|
@@ -109,7 +108,6 @@ class AuthoritiesRecordNormalizer(RecordNormalizer):
|
|
|
109
108
|
100, 110, 111 - non-repeatable field, attempts to add author type, if missing.
|
|
110
109
|
|
|
111
110
|
"""
|
|
112
|
-
# TODO: include KRATT KATA ID to 024 and remove on delete. Increment last elastic ID?
|
|
113
111
|
if not viaf_record:
|
|
114
112
|
return
|
|
115
113
|
|
|
@@ -119,7 +117,9 @@ class AuthoritiesRecordNormalizer(RecordNormalizer):
|
|
|
119
117
|
self._add_author(record, viaf_record)
|
|
120
118
|
|
|
121
119
|
def _normalize_record(self, record: Record, sierraID: str,
|
|
122
|
-
viaf_record: VIAFRecord,
|
|
120
|
+
viaf_record: VIAFRecord,
|
|
121
|
+
is_editing_existing_record: bool,
|
|
122
|
+
original_entity: str) -> Record:
|
|
123
123
|
|
|
124
124
|
self._normalize_sierra(record, sierraID)
|
|
125
125
|
self._normalize_viaf(record, viaf_record)
|
rara_tools/normalizers/base.py
CHANGED
|
@@ -147,21 +147,26 @@ class RecordNormalizer:
|
|
|
147
147
|
return filter(lambda field: not self._field_in_record(field, record), fields)
|
|
148
148
|
|
|
149
149
|
def _format_date(self, value: str) -> str:
|
|
150
|
-
|
|
150
|
+
|
|
151
|
+
if not value:
|
|
151
152
|
return ""
|
|
152
153
|
|
|
153
154
|
if isinstance(value, (datetime, date)):
|
|
154
|
-
return value.strftime(
|
|
155
|
+
return value.strftime("%Y%m%d")
|
|
156
|
+
|
|
157
|
+
val = str(value).strip()
|
|
155
158
|
|
|
156
159
|
try:
|
|
157
|
-
dt = parser.parse(
|
|
158
|
-
|
|
159
|
-
logger.info(f"Formatted date '{formatted_date}' from value '{value}'")
|
|
160
|
-
return formatted_date
|
|
161
|
-
except Exception as e:
|
|
162
|
-
logger.info(f"Failed to format date string '{value}': {e}")
|
|
160
|
+
dt = parser.parse(val, fuzzy=False, default=datetime(1, 1, 1))
|
|
161
|
+
except Exception:
|
|
163
162
|
return ""
|
|
164
163
|
|
|
164
|
+
if len(val) == 4 and val.isdigit():
|
|
165
|
+
return dt.strftime("%Y") # YYYY
|
|
166
|
+
if len(val) in (6, 7): # YYYYMM or YYYY-MM
|
|
167
|
+
return dt.strftime("%Y%m") # YYYYMM
|
|
168
|
+
return dt.strftime("%Y%m%d") # YYYYMMDD
|
|
169
|
+
|
|
165
170
|
def get_subfield(self, record: Record, tag: str, subfield: str, default: str) -> str:
|
|
166
171
|
""" get record existing subfield value or assign a fallback value. """
|
|
167
172
|
|
|
@@ -355,7 +360,7 @@ class RecordNormalizer:
|
|
|
355
360
|
return viaf_record
|
|
356
361
|
|
|
357
362
|
def _normalize_record(self, record: Record, sierraID: str,
|
|
358
|
-
viaf_record: VIAFRecord, is_editing_existing_record: bool) -> Record:
|
|
363
|
+
viaf_record: VIAFRecord, is_editing_existing_record: bool, original_entity: str) -> Record:
|
|
359
364
|
return record
|
|
360
365
|
|
|
361
366
|
@property
|
|
@@ -369,7 +374,11 @@ class RecordNormalizer:
|
|
|
369
374
|
logger.error(f"Failed to normalize record: {e}")
|
|
370
375
|
continue
|
|
371
376
|
return result
|
|
372
|
-
|
|
377
|
+
|
|
378
|
+
@property
|
|
379
|
+
def first(self) -> Record:
|
|
380
|
+
return next(iter(self))
|
|
381
|
+
|
|
373
382
|
def __iter__(self) -> Iterator:
|
|
374
383
|
viaf_id_path = "viaf.queryResult.records.record.0.recordData.VIAFCluster.viafID"
|
|
375
384
|
sierra_id_path = "sierraID"
|
|
@@ -386,7 +395,7 @@ class RecordNormalizer:
|
|
|
386
395
|
record = self._normalize_common(record, is_editing_existing_record)
|
|
387
396
|
|
|
388
397
|
normalized_record = self._normalize_record(
|
|
389
|
-
record, sierra_id, viaf_record, is_editing_existing_record)
|
|
398
|
+
record, sierra_id, viaf_record, is_editing_existing_record, original_entity=entity)
|
|
390
399
|
|
|
391
400
|
normalized_record.fields.sort(key=lambda field: field.tag)
|
|
392
401
|
|
rara_tools/normalizers/bibs.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
from pymarc import (Field,
|
|
1
|
+
from pymarc import (Field, Subfield, Record)
|
|
2
|
+
from typing import List, Optional
|
|
2
3
|
|
|
3
4
|
from rara_tools.constants import EMPTY_INDICATORS
|
|
4
5
|
from rara_tools.normalizers.viaf import VIAFRecord
|
|
@@ -34,10 +35,57 @@ class BibRecordNormalizer(RecordNormalizer):
|
|
|
34
35
|
]
|
|
35
36
|
|
|
36
37
|
self._add_fields_to_record(record, fields)
|
|
38
|
+
|
|
39
|
+
def _include_name_variations(self, record: Record, viaf_record: VIAFRecord) -> None:
|
|
40
|
+
""" Include name variations from VIAF record as 400|t fields """
|
|
41
|
+
|
|
42
|
+
if not viaf_record or not viaf_record.name_variations:
|
|
43
|
+
return
|
|
44
|
+
|
|
45
|
+
existing_name_variations = record.get_fields("400")
|
|
46
|
+
existing_variations = [sf.value for field in existing_name_variations for sf in field.get_subfields("t")]
|
|
47
|
+
|
|
48
|
+
fields = []
|
|
49
|
+
|
|
50
|
+
for variation in viaf_record.name_variations:
|
|
51
|
+
if variation not in existing_variations:
|
|
52
|
+
fields.append(
|
|
53
|
+
Field(
|
|
54
|
+
tag="400",
|
|
55
|
+
indicators=EMPTY_INDICATORS,
|
|
56
|
+
subfields=[
|
|
57
|
+
Subfield("t", variation)
|
|
58
|
+
]
|
|
59
|
+
)
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
self._add_fields_to_record(record, fields)
|
|
63
|
+
|
|
64
|
+
def _add_author(self, record: Record, viaf_record: Optional[VIAFRecord], original_entity: str) -> Optional[Field]:
|
|
65
|
+
if record.get("100") or record.get("110") or record.get("111"):
|
|
66
|
+
return record
|
|
37
67
|
|
|
38
|
-
|
|
68
|
+
type_map = {
|
|
69
|
+
"Personal": "100",
|
|
70
|
+
"Corporate": "110",
|
|
71
|
+
"Collective": "111"
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
tag = type_map.get(getattr(viaf_record, "name_type", None), "100")
|
|
75
|
+
title = getattr(viaf_record, "name", None) or original_entity
|
|
39
76
|
|
|
77
|
+
fields = [Field(tag=tag, indicators=EMPTY_INDICATORS, subfields=[Subfield("t", title)])]
|
|
78
|
+
|
|
79
|
+
self._add_fields_to_record(record, fields)
|
|
80
|
+
|
|
81
|
+
if viaf_record:
|
|
82
|
+
self._include_name_variations(record, viaf_record)
|
|
83
|
+
|
|
84
|
+
def _normalize_viaf(self, record: Record, viaf_record: VIAFRecord, original_entity: str) -> None:
|
|
85
|
+
|
|
40
86
|
if not viaf_record:
|
|
87
|
+
# viaf record not found, include original entity as 100|t
|
|
88
|
+
self._add_author(record, viaf_record=None, original_entity=original_entity)
|
|
41
89
|
return record
|
|
42
90
|
|
|
43
91
|
viaf_id = viaf_record.viaf_id
|
|
@@ -52,12 +100,12 @@ class BibRecordNormalizer(RecordNormalizer):
|
|
|
52
100
|
]
|
|
53
101
|
|
|
54
102
|
self._add_fields_to_record(record, fields)
|
|
55
|
-
self._add_author(record, viaf_record)
|
|
103
|
+
self._add_author(record, viaf_record, original_entity=original_entity)
|
|
56
104
|
|
|
57
105
|
def _normalize_record(self, record: Record, sierraID: str,
|
|
58
|
-
viaf_record: VIAFRecord, is_editing_existing_record: bool) -> Record:
|
|
106
|
+
viaf_record: VIAFRecord, is_editing_existing_record: bool, original_entity: str) -> Record:
|
|
59
107
|
|
|
60
108
|
self._normalize_sierra(record)
|
|
61
|
-
self._normalize_viaf(record, viaf_record)
|
|
109
|
+
self._normalize_viaf(record, viaf_record, original_entity=original_entity)
|
|
62
110
|
|
|
63
111
|
return record
|
|
@@ -20,9 +20,9 @@ rara_tools/core_formatters/formatted_keyword.py,sha256=hhi6wh4ErFionjBqYsEeKGbf1
|
|
|
20
20
|
rara_tools/core_formatters/formatted_meta.py,sha256=r0RPG4eM-REPIR1DrIJnvYPQtQrzkgdvX9tvhNWjQ0Y,5250
|
|
21
21
|
rara_tools/core_formatters/formatted_object.py,sha256=7a499ZmcZXOqtlwxDi6FWHWF5a6HdCsduS22wV3uHIE,5656
|
|
22
22
|
rara_tools/normalizers/__init__.py,sha256=_NqpS5w710DhaURytHq9JpEt8HgYpSPfRDcOtOymJgE,193
|
|
23
|
-
rara_tools/normalizers/authorities.py,sha256=
|
|
24
|
-
rara_tools/normalizers/base.py,sha256=
|
|
25
|
-
rara_tools/normalizers/bibs.py,sha256=
|
|
23
|
+
rara_tools/normalizers/authorities.py,sha256=0R1YpZCVtwKj1fgzLvPYfw-R4v_hBocB9dHwMF2JR7c,4720
|
|
24
|
+
rara_tools/normalizers/base.py,sha256=wiza01rldAhdH5rszE0z7ehE8EHSeKnNb-RmetFiZYE,15388
|
|
25
|
+
rara_tools/normalizers/bibs.py,sha256=7H-spiqj6x8Xk4JQkBY5GK7q5pKjYUmXTRFf0RgzC-w,4040
|
|
26
26
|
rara_tools/normalizers/reader.py,sha256=GYCkAtnsNx135w5lD-_MqCZzdHQHHPDF-pDxYj839Vo,1595
|
|
27
27
|
rara_tools/normalizers/viaf.py,sha256=C-NfbvL83ZcHVB9ICMw43wAMYKTqDTHU3ZT2mXKec00,24288
|
|
28
28
|
rara_tools/parsers/marc_parsers/base_parser.py,sha256=Kdw4aivJf2FkWgIK7pJtHtVXF_G1pjHVQ7IcFItSqy8,1649
|
|
@@ -39,8 +39,8 @@ rara_tools/parsers/marc_records/title_record.py,sha256=XrtJ4gj7wzSaGxNaPtPuawmqq
|
|
|
39
39
|
rara_tools/parsers/tools/entity_normalizers.py,sha256=VyCy_NowCLpOsL0luQ55IW-Qi-J5oBH0Ofzr7HRFBhM,8949
|
|
40
40
|
rara_tools/parsers/tools/marc_converter.py,sha256=LgSHe-7n7aiDrw2bnsB53r3fXTRFjZXTwBYfTpL0pfs,415
|
|
41
41
|
rara_tools/parsers/tools/russian_transliterator.py,sha256=5ZU66iTqAhr7pmfVqXPAI_cidF43VqqmuN4d7H4_JuA,9770
|
|
42
|
-
rara_tools-0.7.
|
|
43
|
-
rara_tools-0.7.
|
|
44
|
-
rara_tools-0.7.
|
|
45
|
-
rara_tools-0.7.
|
|
46
|
-
rara_tools-0.7.
|
|
42
|
+
rara_tools-0.7.11.dist-info/licenses/LICENSE.md,sha256=hkZVnIZll7e_KNEQzeY94Y9tlzVL8iVZBTMBvDykksU,35142
|
|
43
|
+
rara_tools-0.7.11.dist-info/METADATA,sha256=OvVJG6looJ5yhNhMJ-jUV74t1u6aymlzajNqMokUa54,4080
|
|
44
|
+
rara_tools-0.7.11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
45
|
+
rara_tools-0.7.11.dist-info/top_level.txt,sha256=JwfB5b8BAtW5OFKRln2AQ_WElTRyIBM4nO0FKN1cupY,11
|
|
46
|
+
rara_tools-0.7.11.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|