rara-tools 0.7.8__py3-none-any.whl → 0.7.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rara-tools might be problematic. Click here for more details.
- rara_tools/constants/normalizers.py +3 -1
- rara_tools/normalizers/authorities.py +32 -27
- rara_tools/normalizers/base.py +116 -88
- rara_tools/normalizers/bibs.py +14 -27
- rara_tools/normalizers/reader.py +1 -1
- rara_tools/normalizers/viaf.py +5 -2
- {rara_tools-0.7.8.dist-info → rara_tools-0.7.10.dist-info}/METADATA +1 -1
- {rara_tools-0.7.8.dist-info → rara_tools-0.7.10.dist-info}/RECORD +11 -11
- {rara_tools-0.7.8.dist-info → rara_tools-0.7.10.dist-info}/WHEEL +0 -0
- {rara_tools-0.7.8.dist-info → rara_tools-0.7.10.dist-info}/licenses/LICENSE.md +0 -0
- {rara_tools-0.7.8.dist-info → rara_tools-0.7.10.dist-info}/top_level.txt +0 -0
|
@@ -11,13 +11,15 @@ class AuthoritiesRecordNormalizer(RecordNormalizer):
|
|
|
11
11
|
""" Normalize authorities records """
|
|
12
12
|
|
|
13
13
|
def __init__(self, linking_results: List[dict] = [], sierra_data: List[dict] = [],
|
|
14
|
-
ALLOW_EDIT_FIELDS: List[str] = [
|
|
15
|
-
|
|
16
|
-
REPEATABLE_FIELDS: List[str] = ["024", "035", "400", "670"]):
|
|
14
|
+
ALLOW_EDIT_FIELDS: List[str] = ["008", "925"],
|
|
15
|
+
REPEATABLE_FIELDS: List[str] = ["024", "035", "400", "670", "667"]):
|
|
17
16
|
|
|
18
17
|
super().__init__(linking_results, sierra_data)
|
|
19
18
|
self.ALLOW_EDIT_FIELDS = ALLOW_EDIT_FIELDS
|
|
20
19
|
self.REPEATABLE_FIELDS = REPEATABLE_FIELDS
|
|
20
|
+
self.records_extra_data = []
|
|
21
|
+
self.sierra_data = sierra_data
|
|
22
|
+
self.records = self._setup_records(linking_results, sierra_data)
|
|
21
23
|
|
|
22
24
|
def _normalize_sierra(self, record: Record, sierraID: str) -> None:
|
|
23
25
|
|
|
@@ -26,7 +28,6 @@ class AuthoritiesRecordNormalizer(RecordNormalizer):
|
|
|
26
28
|
fields = [
|
|
27
29
|
Field(
|
|
28
30
|
tag="008",
|
|
29
|
-
indicators=EMPTY_INDICATORS,
|
|
30
31
|
data=f"{self.current_timestamp()}{suffix_008}"
|
|
31
32
|
),
|
|
32
33
|
|
|
@@ -50,22 +51,24 @@ class AuthoritiesRecordNormalizer(RecordNormalizer):
|
|
|
50
51
|
return record
|
|
51
52
|
|
|
52
53
|
def _add_birth_and_death_dates(self, record: Record, viaf_record: VIAFRecord) -> None:
|
|
54
|
+
|
|
55
|
+
formatted_birth_date = self._format_date(viaf_record.birth_date)
|
|
56
|
+
formatted_death_date = self._format_date(viaf_record.death_date) if viaf_record.death_date != 0 else ""
|
|
57
|
+
|
|
58
|
+
birth_date = self.get_subfield(
|
|
59
|
+
record, "046", "f", formatted_birth_date)
|
|
60
|
+
death_date = self.get_subfield(
|
|
61
|
+
record, "046", "g", formatted_death_date)
|
|
62
|
+
|
|
53
63
|
subfields_046 = [
|
|
54
|
-
Subfield("f",
|
|
55
|
-
|
|
56
|
-
Subfield("g", self.get_subfield(
|
|
57
|
-
record, "046", "g", viaf_record.death_date)),
|
|
58
|
-
Subfield("s", self.get_subfield(
|
|
59
|
-
record, "046", "s", viaf_record.activity_start)),
|
|
60
|
-
Subfield("t", self.get_subfield(
|
|
61
|
-
record, "046", "t", viaf_record.activity_end)),
|
|
64
|
+
Subfield("f", birth_date),
|
|
65
|
+
Subfield("g", death_date),
|
|
62
66
|
]
|
|
63
67
|
|
|
64
68
|
self._add_fields_to_record(
|
|
65
69
|
record, [Field(tag="046", indicators=EMPTY_INDICATORS, subfields=subfields_046)])
|
|
66
70
|
|
|
67
|
-
def
|
|
68
|
-
# TODO 024. will be used to store KRATT KATA ID. Just generate one?
|
|
71
|
+
def _add_viaf_url_or_isni(self, record: Record, viaf_record: VIAFRecord) -> None:
|
|
69
72
|
viaf_url = f"https://viaf.org/viaf/{viaf_record.viaf_id}"
|
|
70
73
|
|
|
71
74
|
subfields = [Subfield("0", self.get_subfield(
|
|
@@ -80,17 +83,20 @@ class AuthoritiesRecordNormalizer(RecordNormalizer):
|
|
|
80
83
|
self._add_fields_to_record(record, [field])
|
|
81
84
|
|
|
82
85
|
def _add_nationality(self, record: Record, viaf_record: VIAFRecord) -> None:
|
|
86
|
+
""" Non-repeatable field 043 - adds ee only if is estonian nationality and
|
|
87
|
+
the records does not have the field already."""
|
|
88
|
+
|
|
89
|
+
is_person_est = self._is_person_est_nationality(viaf_record)
|
|
90
|
+
|
|
91
|
+
if is_person_est:
|
|
92
|
+
fields = [
|
|
93
|
+
Field(
|
|
94
|
+
tag="043",
|
|
95
|
+
indicators=EMPTY_INDICATORS,
|
|
96
|
+
subfields=[Subfield("c", "ee")])
|
|
97
|
+
]
|
|
83
98
|
|
|
84
|
-
|
|
85
|
-
Field(
|
|
86
|
-
tag="043",
|
|
87
|
-
indicators=EMPTY_INDICATORS,
|
|
88
|
-
subfields=[
|
|
89
|
-
Subfield("c", "ee")
|
|
90
|
-
] if self._is_person_est_nationality(viaf_record) else []
|
|
91
|
-
)]
|
|
92
|
-
|
|
93
|
-
self._add_fields_to_record(record, fields)
|
|
99
|
+
self._add_fields_to_record(record, fields)
|
|
94
100
|
|
|
95
101
|
def _normalize_viaf(self, record: Record, viaf_record: VIAFRecord) -> None:
|
|
96
102
|
""""
|
|
@@ -102,18 +108,17 @@ class AuthoritiesRecordNormalizer(RecordNormalizer):
|
|
|
102
108
|
100, 110, 111 - non-repeatable field, attempts to add author type, if missing.
|
|
103
109
|
|
|
104
110
|
"""
|
|
105
|
-
# TODO: include KRATT KATA ID to 024 and remove on delete. Increment last elastic ID?
|
|
106
111
|
if not viaf_record:
|
|
107
112
|
return
|
|
108
113
|
|
|
109
114
|
self._add_nationality(record, viaf_record)
|
|
110
|
-
self.
|
|
115
|
+
self._add_viaf_url_or_isni(record, viaf_record)
|
|
111
116
|
self._add_birth_and_death_dates(record, viaf_record)
|
|
112
117
|
self._add_author(record, viaf_record)
|
|
113
118
|
|
|
114
119
|
def _normalize_record(self, record: Record, sierraID: str,
|
|
115
120
|
viaf_record: VIAFRecord, is_editing_existing_record: bool) -> Record:
|
|
116
|
-
|
|
121
|
+
|
|
117
122
|
self._normalize_sierra(record, sierraID)
|
|
118
123
|
self._normalize_viaf(record, viaf_record)
|
|
119
124
|
|
rara_tools/normalizers/base.py
CHANGED
|
@@ -7,9 +7,12 @@ from rara_tools.normalizers.viaf import VIAFRecord, VIAFClient
|
|
|
7
7
|
from rara_tools.constants.normalizers import (
|
|
8
8
|
DEFAULT_VIAF_FIELD, ALLOWED_VIAF_FIELDS, ALLOWED_VIAF_WIKILINK_LANGS,
|
|
9
9
|
VIAF_SIMILARITY_THRESHOLD, VERIFY_VIAF_RECORD, MAX_VIAF_RECORDS_TO_VERIFY,
|
|
10
|
-
EMPTY_INDICATORS
|
|
10
|
+
EMPTY_INDICATORS, YYMMDD_FORMAT, YY_DD_FORMAT
|
|
11
11
|
)
|
|
12
12
|
from glom import glom
|
|
13
|
+
from dateutil import parser
|
|
14
|
+
from datetime import date
|
|
15
|
+
|
|
13
16
|
import logging
|
|
14
17
|
import json
|
|
15
18
|
|
|
@@ -18,7 +21,7 @@ logger = logging.getLogger(__name__)
|
|
|
18
21
|
|
|
19
22
|
class RecordNormalizer:
|
|
20
23
|
"""
|
|
21
|
-
Base class
|
|
24
|
+
Base class for normalizing different record types corresponding classes have been created.
|
|
22
25
|
By default existing record fields will not be changed, unless included in ALLOW_EDIT_FIELDS. If a field
|
|
23
26
|
included in the normalization is not present, it will be added to the record. If under REPEATABLE_FIELDS.
|
|
24
27
|
a new record field is added.
|
|
@@ -30,72 +33,15 @@ class RecordNormalizer:
|
|
|
30
33
|
"""
|
|
31
34
|
|
|
32
35
|
def __init__(self, linking_results: List[dict] = [], sierra_data: List[dict] = [],
|
|
33
|
-
ALLOW_EDIT_FIELDS: List[str] = ["
|
|
36
|
+
ALLOW_EDIT_FIELDS: List[str] = ["925"], REPEATABLE_FIELDS: List[str] = ["667"]):
|
|
34
37
|
|
|
35
38
|
# Include, if will replace existing field
|
|
36
39
|
self.ALLOW_EDIT_FIELDS = ALLOW_EDIT_FIELDS
|
|
37
40
|
# include, if should be added alongside existing fields
|
|
38
41
|
self.REPEATABLE_FIELDS = REPEATABLE_FIELDS
|
|
39
|
-
|
|
40
|
-
self.
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def _setup_records(self, linking_results: List[dict], sierra_data: List[dict]) -> JSONReader:
|
|
44
|
-
"""Setup initial MARC records and data.
|
|
45
|
-
|
|
46
|
-
For linked entities:
|
|
47
|
-
1. Try to get single linked normalized record from KATA elastic. If more than one found, skip.
|
|
48
|
-
2. If 0 matches, search from VIAF and if 1 result found, create a new authority record from the data.
|
|
49
|
-
3. If none or more than one responses found, use only Classificator data (coming from Linker?).
|
|
50
|
-
|
|
51
|
-
for SIERRA records: normalize.
|
|
52
|
-
"""
|
|
53
|
-
linked_records = []
|
|
54
|
-
|
|
55
|
-
for linked in linking_results:
|
|
56
|
-
entity = linked.get("original_entity")
|
|
57
|
-
try:
|
|
58
|
-
linked_info = linked.get("linked_info", [])
|
|
59
|
-
linked_num = len(linked_info)
|
|
60
|
-
|
|
61
|
-
if not linked_info:
|
|
62
|
-
# new record will be created
|
|
63
|
-
logger.info(
|
|
64
|
-
f"No linked entities found for {entity}")
|
|
65
|
-
continue
|
|
66
|
-
|
|
67
|
-
if linked_num == 1:
|
|
68
|
-
linked = linked_info[0]
|
|
69
|
-
linked_records.append(linked.get("json", {}))
|
|
70
|
-
self.records_extra_data.append({
|
|
71
|
-
"entity": entity,
|
|
72
|
-
"viaf": linked.get("viaf", {}),
|
|
73
|
-
"type": "linked",
|
|
74
|
-
"edited": True
|
|
75
|
-
})
|
|
76
|
-
else:
|
|
77
|
-
# new record will be created
|
|
78
|
-
logger.info(
|
|
79
|
-
f"Multiple linked entities found for {entity}")
|
|
80
|
-
|
|
81
|
-
except Exception as e:
|
|
82
|
-
logger.error(f"Error processing entity {entity}: {e}")
|
|
83
|
-
|
|
84
|
-
self.records_extra_data.extend(
|
|
85
|
-
{
|
|
86
|
-
"sierraID": obj.get("sierraID"),
|
|
87
|
-
"type": "sierra",
|
|
88
|
-
"edited": True
|
|
89
|
-
}
|
|
90
|
-
for obj in (sierra_data or [])
|
|
91
|
-
)
|
|
92
|
-
|
|
93
|
-
all_records = linked_records + (sierra_data or [])
|
|
94
|
-
|
|
95
|
-
return SafeJSONReader(
|
|
96
|
-
json.dumps(all_records, ensure_ascii=False),
|
|
97
|
-
)
|
|
98
|
-
|
|
42
|
+
# leader applied to new records
|
|
43
|
+
self.DEFAULT_LEADER = "01682nz a2200349n 4500" # must be 24 digits
|
|
44
|
+
|
|
99
45
|
def _setup_records(self, linking_results: List[dict], sierra_data: List[dict]) -> JSONReader:
|
|
100
46
|
"""Setup initial MARC records and data.
|
|
101
47
|
|
|
@@ -114,9 +60,34 @@ class RecordNormalizer:
|
|
|
114
60
|
linked_info = linked.get("linked_info", [])
|
|
115
61
|
|
|
116
62
|
if not isinstance(linked_info, list) or not linked_info:
|
|
63
|
+
# No linked entities found, create new record
|
|
64
|
+
logger.info(
|
|
65
|
+
f"No linked entities found for {entity}, Creating new record.")
|
|
66
|
+
linked_records.append({
|
|
67
|
+
"leader": self.DEFAULT_LEADER,
|
|
68
|
+
"fields": []
|
|
69
|
+
})
|
|
70
|
+
self.records_extra_data.append({
|
|
71
|
+
"entity": entity,
|
|
72
|
+
"edited": False
|
|
73
|
+
})
|
|
117
74
|
continue
|
|
75
|
+
|
|
76
|
+
if len(linked_info) > 1:
|
|
77
|
+
# Multiple linked entities found, create new record
|
|
78
|
+
logger.info(
|
|
79
|
+
f"Multiple linked entities found for {entity}. Creating new record.")
|
|
80
|
+
linked_records.append({
|
|
81
|
+
"leader": self.DEFAULT_LEADER,
|
|
82
|
+
"fields": []
|
|
83
|
+
})
|
|
84
|
+
self.records_extra_data.append({
|
|
85
|
+
"entity": entity,
|
|
86
|
+
"edited": False
|
|
87
|
+
})
|
|
88
|
+
continue
|
|
118
89
|
|
|
119
|
-
|
|
90
|
+
elif len(linked_info) == 1:
|
|
120
91
|
linked_item = linked_info[0]
|
|
121
92
|
if not isinstance(linked_item, dict):
|
|
122
93
|
continue
|
|
@@ -128,7 +99,8 @@ class RecordNormalizer:
|
|
|
128
99
|
"type": "linked",
|
|
129
100
|
"edited": True
|
|
130
101
|
})
|
|
131
|
-
|
|
102
|
+
continue
|
|
103
|
+
|
|
132
104
|
self.records_extra_data.extend(
|
|
133
105
|
{
|
|
134
106
|
"sierraID": obj.get("sierraID"),
|
|
@@ -138,25 +110,25 @@ class RecordNormalizer:
|
|
|
138
110
|
for obj in (sierra_data or [])
|
|
139
111
|
if isinstance(obj, dict)
|
|
140
112
|
)
|
|
141
|
-
|
|
113
|
+
|
|
142
114
|
all_records = linked_records + (sierra_data or [])
|
|
143
|
-
|
|
115
|
+
|
|
144
116
|
return SafeJSONReader(json.dumps(all_records, ensure_ascii=False))
|
|
145
117
|
|
|
146
118
|
@staticmethod
|
|
147
119
|
def current_timestamp():
|
|
148
|
-
"""6 digit timestamp
|
|
149
|
-
return datetime.now().strftime(
|
|
120
|
+
"""6 digit timestamp, format YYMMDD"""
|
|
121
|
+
return datetime.now().strftime(YYMMDD_FORMAT)
|
|
150
122
|
|
|
151
123
|
@staticmethod
|
|
152
124
|
def current_yyyy_dd():
|
|
153
125
|
"""format of 2025-03"""
|
|
154
|
-
return datetime.now().strftime(
|
|
126
|
+
return datetime.now().strftime(YY_DD_FORMAT)
|
|
155
127
|
|
|
156
128
|
@staticmethod
|
|
157
129
|
def _is_person_est_nationality(viaf_record: VIAFRecord) -> bool:
|
|
158
|
-
return viaf_record.nationality == "ee"
|
|
159
|
-
|
|
130
|
+
return hasattr(viaf_record, 'nationality') and viaf_record.nationality == "ee"
|
|
131
|
+
|
|
160
132
|
def _is_nxx(self, field: Field, n: str):
|
|
161
133
|
""" Check if fields tag is in nxx range. """
|
|
162
134
|
return field.tag.startswith(n)
|
|
@@ -173,6 +145,27 @@ class RecordNormalizer:
|
|
|
173
145
|
def _filter_equivalent_field_not_in_record(self, record: Record, fields: List[Field]) -> bool:
|
|
174
146
|
""" filter out fields, that do not have an equivalent in the record. """
|
|
175
147
|
return filter(lambda field: not self._field_in_record(field, record), fields)
|
|
148
|
+
|
|
149
|
+
def _format_date(self, value: str) -> str:
|
|
150
|
+
|
|
151
|
+
if not value:
|
|
152
|
+
return ""
|
|
153
|
+
|
|
154
|
+
if isinstance(value, (datetime, date)):
|
|
155
|
+
return value.strftime("%Y%m%d")
|
|
156
|
+
|
|
157
|
+
val = str(value).strip()
|
|
158
|
+
|
|
159
|
+
try:
|
|
160
|
+
dt = parser.parse(val, fuzzy=False, default=datetime(1, 1, 1))
|
|
161
|
+
except Exception:
|
|
162
|
+
return ""
|
|
163
|
+
|
|
164
|
+
if len(val) == 4 and val.isdigit():
|
|
165
|
+
return dt.strftime("%Y") # YYYY
|
|
166
|
+
if len(val) in (6, 7): # YYYYMM or YYYY-MM
|
|
167
|
+
return dt.strftime("%Y%m") # YYYYMM
|
|
168
|
+
return dt.strftime("%Y%m%d") # YYYYMMDD
|
|
176
169
|
|
|
177
170
|
def get_subfield(self, record: Record, tag: str, subfield: str, default: str) -> str:
|
|
178
171
|
""" get record existing subfield value or assign a fallback value. """
|
|
@@ -220,7 +213,10 @@ class RecordNormalizer:
|
|
|
220
213
|
)
|
|
221
214
|
|
|
222
215
|
def _add_fields_to_record(self, record: Record, fields: List[Field]) -> Record:
|
|
223
|
-
|
|
216
|
+
# filter out subfields that are empty, or 0, as VIAF returns 0 for unknown dates
|
|
217
|
+
for field in fields:
|
|
218
|
+
field.subfields = [sub for sub in field.subfields if sub.value and sub.value not in ["0", 0]]
|
|
219
|
+
|
|
224
220
|
self._handle_repeatable_fields(record, *fields)
|
|
225
221
|
self._handle_editable_fields(record, *fields)
|
|
226
222
|
self._handle_default_fields(record, *fields)
|
|
@@ -247,31 +243,63 @@ class RecordNormalizer:
|
|
|
247
243
|
indicators=EMPTY_INDICATORS,
|
|
248
244
|
subfields=[
|
|
249
245
|
Subfield("a", viaf_record.name),
|
|
250
|
-
Subfield("b", viaf_record.name_type), # Is this correct??
|
|
251
|
-
Subfield("c", viaf_record.name_type) # Is this correct??
|
|
252
246
|
]
|
|
253
247
|
)
|
|
254
248
|
]
|
|
255
249
|
|
|
256
250
|
self._add_fields_to_record(record, fields)
|
|
251
|
+
|
|
252
|
+
def _move680_fields_to_667(self, record: Record) -> None:
|
|
253
|
+
""" Move existing 680 fields to 667, if any. """
|
|
254
|
+
fields_680 = record.get_fields("680")
|
|
255
|
+
if not fields_680:
|
|
256
|
+
return
|
|
257
|
+
|
|
258
|
+
fields_667 = [
|
|
259
|
+
Field(
|
|
260
|
+
tag="667",
|
|
261
|
+
indicators=EMPTY_INDICATORS,
|
|
262
|
+
subfields=field.subfields
|
|
263
|
+
) for field in fields_680
|
|
264
|
+
]
|
|
257
265
|
|
|
258
|
-
|
|
259
|
-
|
|
266
|
+
record.remove_fields("680")
|
|
267
|
+
self._add_fields_to_record(record, fields_667)
|
|
260
268
|
|
|
269
|
+
def _normalize_common(self, record: Record, is_editing_existing_record: bool) -> None:
|
|
270
|
+
"""Common logic for all normalizations.
|
|
271
|
+
- Includes note about record being created/edited.
|
|
272
|
+
- include date note with a different subfield, depending on if record is new or edited.
|
|
273
|
+
- move existing 680 fields to 667
|
|
274
|
+
"""
|
|
275
|
+
# before adding new notes
|
|
276
|
+
self._move680_fields_to_667(record)
|
|
277
|
+
|
|
261
278
|
note = "Muudetud AI poolt" if is_editing_existing_record else "Loodud AI poolt"
|
|
262
279
|
date_note = f"KRATT {self.current_yyyy_dd()}"
|
|
263
280
|
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
281
|
+
field_667 = Field(tag="667",
|
|
282
|
+
indicators=EMPTY_INDICATORS,
|
|
283
|
+
subfields=[Subfield("a", note)])
|
|
284
|
+
|
|
285
|
+
fields = [field_667]
|
|
286
|
+
|
|
287
|
+
if is_editing_existing_record:
|
|
288
|
+
field_925 = Field(tag="925",
|
|
289
|
+
indicators=EMPTY_INDICATORS,
|
|
290
|
+
subfields=[
|
|
291
|
+
Subfield("p", self.get_subfield(record, "925", "p", date_note))
|
|
292
|
+
])
|
|
293
|
+
fields.append(field_925)
|
|
294
|
+
|
|
295
|
+
else:
|
|
296
|
+
field_925 = Field(tag="925",
|
|
297
|
+
indicators=EMPTY_INDICATORS,
|
|
298
|
+
subfields=[
|
|
299
|
+
Subfield("t", self.get_subfield(record, "925", "t", date_note))
|
|
300
|
+
])
|
|
301
|
+
fields.append(field_925)
|
|
302
|
+
|
|
275
303
|
self._add_fields_to_record(record, fields)
|
|
276
304
|
|
|
277
305
|
return record
|
rara_tools/normalizers/bibs.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from pymarc import (Field,
|
|
1
|
+
from pymarc import (Field, Subfield, Record)
|
|
2
2
|
|
|
3
3
|
from rara_tools.constants import EMPTY_INDICATORS
|
|
4
4
|
from rara_tools.normalizers.viaf import VIAFRecord
|
|
@@ -11,32 +11,25 @@ class BibRecordNormalizer(RecordNormalizer):
|
|
|
11
11
|
""" Normalize bib records. """
|
|
12
12
|
|
|
13
13
|
def __init__(self, linking_results: List[dict] = [], sierra_data: List[dict] = [],
|
|
14
|
-
ALLOW_EDIT_FIELDS: List[str] = ["
|
|
15
|
-
REPEATABLE_FIELDS: List[str] = []):
|
|
14
|
+
ALLOW_EDIT_FIELDS: List[str] = ["008", "925"],
|
|
15
|
+
REPEATABLE_FIELDS: List[str] = ["667"]):
|
|
16
16
|
super().__init__(linking_results, sierra_data)
|
|
17
|
+
self.DEFAULT_LEADER = "00399nz a2200145n 4500" # must be 24 digits
|
|
17
18
|
self.ALLOW_EDIT_FIELDS = ALLOW_EDIT_FIELDS
|
|
18
19
|
self.REPEATABLE_FIELDS = REPEATABLE_FIELDS
|
|
20
|
+
|
|
21
|
+
self.records_extra_data = []
|
|
22
|
+
self.sierra_data = sierra_data
|
|
23
|
+
self.records = self._setup_records(linking_results, sierra_data)
|
|
19
24
|
|
|
20
25
|
def _normalize_sierra(self, record: Record) -> Record:
|
|
26
|
+
|
|
27
|
+
suffix_008 = "|||aznnnaabn || ||| "
|
|
28
|
+
|
|
21
29
|
fields = [
|
|
22
30
|
Field(
|
|
23
31
|
tag="008",
|
|
24
|
-
|
|
25
|
-
data=f"{self.current_timestamp()} | | | aznnnaabn | | | | |"
|
|
26
|
-
),
|
|
27
|
-
Field(
|
|
28
|
-
tag="046",
|
|
29
|
-
indicators=EMPTY_INDICATORS,
|
|
30
|
-
subfields=[
|
|
31
|
-
Subfield("k", "Pub date")
|
|
32
|
-
]
|
|
33
|
-
),
|
|
34
|
-
Field(
|
|
35
|
-
tag="245",
|
|
36
|
-
indicators=Indicators("1", "0"),
|
|
37
|
-
subfields=[
|
|
38
|
-
Subfield("a", "Title")
|
|
39
|
-
]
|
|
32
|
+
data=f"{self.current_timestamp()}{suffix_008}"
|
|
40
33
|
),
|
|
41
34
|
]
|
|
42
35
|
|
|
@@ -55,14 +48,8 @@ class BibRecordNormalizer(RecordNormalizer):
|
|
|
55
48
|
subfields=[
|
|
56
49
|
Subfield("a", viaf_id)
|
|
57
50
|
]
|
|
58
|
-
)
|
|
59
|
-
|
|
60
|
-
tag="100",
|
|
61
|
-
indicators=EMPTY_INDICATORS,
|
|
62
|
-
subfields=[
|
|
63
|
-
Subfield("a", "?")
|
|
64
|
-
]
|
|
65
|
-
)]
|
|
51
|
+
)
|
|
52
|
+
]
|
|
66
53
|
|
|
67
54
|
self._add_fields_to_record(record, fields)
|
|
68
55
|
self._add_author(record, viaf_record)
|
rara_tools/normalizers/reader.py
CHANGED
rara_tools/normalizers/viaf.py
CHANGED
|
@@ -10,6 +10,7 @@ from rara_tools.constants.normalizers import (
|
|
|
10
10
|
DEFAULT_VIAF_FIELD, ALLOWED_VIAF_FIELDS, ALLOWED_VIAF_WIKILINK_LANGS,
|
|
11
11
|
VIAF_SIMILARITY_THRESHOLD, VIAF_ALLOWED_SOURCES
|
|
12
12
|
)
|
|
13
|
+
from glom import glom
|
|
13
14
|
|
|
14
15
|
import logging
|
|
15
16
|
logger = logging.getLogger(__name__)
|
|
@@ -598,11 +599,13 @@ class VIAFClient:
|
|
|
598
599
|
"""
|
|
599
600
|
logger.debug("Extracting VIAF IDs from VIAF search query results.")
|
|
600
601
|
try:
|
|
601
|
-
|
|
602
|
+
res_json = search_query_response.json()
|
|
603
|
+
records = glom(res_json, "queryResult.records.record", default=[])
|
|
604
|
+
|
|
602
605
|
except Exception as e:
|
|
603
606
|
logger.error(
|
|
604
607
|
f"Parsing records from search query " \
|
|
605
|
-
f"
|
|
608
|
+
f"failed with error: {e}."
|
|
606
609
|
)
|
|
607
610
|
records = []
|
|
608
611
|
viaf_ids = []
|
|
@@ -12,7 +12,7 @@ rara_tools/constants/general.py,sha256=dLomRopLiHv_J_liSIGzK1A3XByydsKGIyVN8KuuN
|
|
|
12
12
|
rara_tools/constants/language_evaluator.py,sha256=3sCSaoS-zXQRY0vJ7UUMuZqbtYQD_quVVbdpgvJjE7I,124
|
|
13
13
|
rara_tools/constants/linker.py,sha256=WnOmJFTkoBMZUbBaW1uY45NTQB7FGG-dc9a_6qYTtwk,3381
|
|
14
14
|
rara_tools/constants/meta_extractor.py,sha256=iVyxycKScbrjFWLv50dRmdeHfTLOKbdyEhgUF3DyBrY,1053
|
|
15
|
-
rara_tools/constants/normalizers.py,sha256=
|
|
15
|
+
rara_tools/constants/normalizers.py,sha256=Qyi6eSCp4Gnz45xF-vOPExGXasyAoVimOAAlLj1t74s,1383
|
|
16
16
|
rara_tools/constants/parsers.py,sha256=L6nh1Itget9_9DMsliDkh6T25z78eMFPWVkbaU08DwU,5561
|
|
17
17
|
rara_tools/constants/subject_indexer.py,sha256=0snyyB8IMCWXOYPXR_c0Kavq4nBiww559rdNOKjawx8,2133
|
|
18
18
|
rara_tools/core_formatters/core_formatter.py,sha256=u_Cdgv9qBcyF-XddjaRGUqAFik9OMAdSzAulXpYR7vE,4997
|
|
@@ -20,11 +20,11 @@ rara_tools/core_formatters/formatted_keyword.py,sha256=hhi6wh4ErFionjBqYsEeKGbf1
|
|
|
20
20
|
rara_tools/core_formatters/formatted_meta.py,sha256=r0RPG4eM-REPIR1DrIJnvYPQtQrzkgdvX9tvhNWjQ0Y,5250
|
|
21
21
|
rara_tools/core_formatters/formatted_object.py,sha256=7a499ZmcZXOqtlwxDi6FWHWF5a6HdCsduS22wV3uHIE,5656
|
|
22
22
|
rara_tools/normalizers/__init__.py,sha256=_NqpS5w710DhaURytHq9JpEt8HgYpSPfRDcOtOymJgE,193
|
|
23
|
-
rara_tools/normalizers/authorities.py,sha256=
|
|
24
|
-
rara_tools/normalizers/base.py,sha256=
|
|
25
|
-
rara_tools/normalizers/bibs.py,sha256=
|
|
26
|
-
rara_tools/normalizers/reader.py,sha256=
|
|
27
|
-
rara_tools/normalizers/viaf.py,sha256=
|
|
23
|
+
rara_tools/normalizers/authorities.py,sha256=7YlUFlhQTCJOT7umcrZE2Td_w1YIpBxVqW0b7rBDTbU,4645
|
|
24
|
+
rara_tools/normalizers/base.py,sha256=BpRI2BSA9qKW0ux3rFJMTQsxjPB8gYObHtcvDoJwMOU,15256
|
|
25
|
+
rara_tools/normalizers/bibs.py,sha256=5NZooArBYaRE_MCG2g3A4kxiRzMoXAqGkx_hCbPzKlM,2018
|
|
26
|
+
rara_tools/normalizers/reader.py,sha256=GYCkAtnsNx135w5lD-_MqCZzdHQHHPDF-pDxYj839Vo,1595
|
|
27
|
+
rara_tools/normalizers/viaf.py,sha256=C-NfbvL83ZcHVB9ICMw43wAMYKTqDTHU3ZT2mXKec00,24288
|
|
28
28
|
rara_tools/parsers/marc_parsers/base_parser.py,sha256=Kdw4aivJf2FkWgIK7pJtHtVXF_G1pjHVQ7IcFItSqy8,1649
|
|
29
29
|
rara_tools/parsers/marc_parsers/ems_parser.py,sha256=LFuhZcVwmHMcJknX9p4ZkO8RdjPdQZ4APGbw8KV6BIs,2024
|
|
30
30
|
rara_tools/parsers/marc_parsers/location_parser.py,sha256=dSU9dQoGV5z0ajhLI1bn3AAghkOr79qKIrX7sO0_4lA,1873
|
|
@@ -39,8 +39,8 @@ rara_tools/parsers/marc_records/title_record.py,sha256=XrtJ4gj7wzSaGxNaPtPuawmqq
|
|
|
39
39
|
rara_tools/parsers/tools/entity_normalizers.py,sha256=VyCy_NowCLpOsL0luQ55IW-Qi-J5oBH0Ofzr7HRFBhM,8949
|
|
40
40
|
rara_tools/parsers/tools/marc_converter.py,sha256=LgSHe-7n7aiDrw2bnsB53r3fXTRFjZXTwBYfTpL0pfs,415
|
|
41
41
|
rara_tools/parsers/tools/russian_transliterator.py,sha256=5ZU66iTqAhr7pmfVqXPAI_cidF43VqqmuN4d7H4_JuA,9770
|
|
42
|
-
rara_tools-0.7.
|
|
43
|
-
rara_tools-0.7.
|
|
44
|
-
rara_tools-0.7.
|
|
45
|
-
rara_tools-0.7.
|
|
46
|
-
rara_tools-0.7.
|
|
42
|
+
rara_tools-0.7.10.dist-info/licenses/LICENSE.md,sha256=hkZVnIZll7e_KNEQzeY94Y9tlzVL8iVZBTMBvDykksU,35142
|
|
43
|
+
rara_tools-0.7.10.dist-info/METADATA,sha256=hQFvtq4KzNHR_aDzynXw2WJH-HbdKJshP0oYoy1kmss,4080
|
|
44
|
+
rara_tools-0.7.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
45
|
+
rara_tools-0.7.10.dist-info/top_level.txt,sha256=JwfB5b8BAtW5OFKRln2AQ_WElTRyIBM4nO0FKN1cupY,11
|
|
46
|
+
rara_tools-0.7.10.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|