rara-tools 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rara-tools might be problematic. Click here for more details.
- rara_tools/constants/normalizers.py +0 -11
- rara_tools/constants/parsers.py +152 -0
- rara_tools/normalizers/__init__.py +4 -0
- rara_tools/normalizers/authorities.py +120 -0
- rara_tools/normalizers/base.py +290 -0
- rara_tools/normalizers/bibs.py +76 -0
- rara_tools/normalizers/viaf.py +204 -0
- rara_tools/parsers/marc_parsers/base_parser.py +50 -0
- rara_tools/parsers/marc_parsers/ems_parser.py +49 -0
- rara_tools/parsers/marc_parsers/location_parser.py +46 -0
- rara_tools/parsers/marc_parsers/organization_parser.py +44 -0
- rara_tools/parsers/marc_parsers/person_parser.py +45 -0
- rara_tools/parsers/marc_parsers/title_parser.py +1 -0
- rara_tools/parsers/marc_records/base_record.py +112 -0
- rara_tools/parsers/marc_records/ems_record.py +267 -0
- rara_tools/parsers/marc_records/organization_record.py +245 -0
- rara_tools/parsers/marc_records/person_record.py +217 -0
- rara_tools/parsers/marc_records/title_record.py +1 -0
- rara_tools/parsers/tools/entity_normalizers.py +256 -0
- rara_tools/parsers/tools/marc_converter.py +15 -0
- rara_tools/parsers/tools/russian_transliterator.py +248 -0
- {rara_tools-0.2.0.dist-info → rara_tools-0.4.0.dist-info}/METADATA +5 -2
- rara_tools-0.4.0.dist-info/RECORD +37 -0
- rara_tools-0.2.0.dist-info/RECORD +0 -17
- {rara_tools-0.2.0.dist-info → rara_tools-0.4.0.dist-info}/WHEEL +0 -0
- {rara_tools-0.2.0.dist-info → rara_tools-0.4.0.dist-info}/licenses/LICENSE.md +0 -0
- {rara_tools-0.2.0.dist-info → rara_tools-0.4.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
from typing import List, NoReturn
|
|
2
|
+
from pymarc.record import Record
|
|
3
|
+
from rara_tools.parsers.tools.entity_normalizers import KeywordNormalizer
|
|
4
|
+
from rara_tools.parsers.marc_records.base_record import BaseRecord
|
|
5
|
+
from rara_tools.constants.parsers import (
|
|
6
|
+
EMSMarcIDs, KeywordType,
|
|
7
|
+
EN_SUBJECT_FIELDS, ET_SUBJECT_FIELDS
|
|
8
|
+
)
|
|
9
|
+
import regex as re
|
|
10
|
+
import json
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class EMSRecord(BaseRecord):
|
|
15
|
+
""" Generates a simplified EMS JSON record
|
|
16
|
+
from a pymarc MARC record.
|
|
17
|
+
"""
|
|
18
|
+
def __init__(self, record: Record, add_variations: bool = False) -> NoReturn:
|
|
19
|
+
""" Initializes EMSRecord object.
|
|
20
|
+
|
|
21
|
+
Parameters
|
|
22
|
+
-----------
|
|
23
|
+
record: Record
|
|
24
|
+
pymarc.record.Record object.
|
|
25
|
+
add_variations: bool
|
|
26
|
+
If enabled, constructs an additional variations field, which
|
|
27
|
+
combines the content of multiple fields + adds some generated
|
|
28
|
+
variations. If the output is uploaded into Elastic and used
|
|
29
|
+
via rara-norm-linker, it is necessary to enable this.
|
|
30
|
+
"""
|
|
31
|
+
super().__init__(record=record, add_variations=add_variations)
|
|
32
|
+
self.__en_subject_field_mapping: dict = EN_SUBJECT_FIELDS
|
|
33
|
+
self.__et_subject_field_mapping: dict = ET_SUBJECT_FIELDS
|
|
34
|
+
self.__keyword: str = ""
|
|
35
|
+
self.__keyword_en: str = ""
|
|
36
|
+
self.__keyword_type: str = ""
|
|
37
|
+
self.__keyword_variations: List[str] = []
|
|
38
|
+
self.__keyword_fields = {
|
|
39
|
+
EMSMarcIDs.TIME_KEYWORD: KeywordType.TIME,
|
|
40
|
+
EMSMarcIDs.TOPIC_KEYWORD: KeywordType.TOPIC,
|
|
41
|
+
EMSMarcIDs.LOC_KEYWORD: KeywordType.LOC,
|
|
42
|
+
EMSMarcIDs.GENRE_KEYWORD: KeywordType.GENRE
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
self.__ems_url_ids = EMSMarcIDs.URL
|
|
46
|
+
self.__synonym_ids = EMSMarcIDs.SYNONYMS
|
|
47
|
+
self.__related_ids = EMSMarcIDs.RELATED
|
|
48
|
+
self.__category_ids = EMSMarcIDs.CATEGORY
|
|
49
|
+
self.__notes_ids = EMSMarcIDs.NOTES
|
|
50
|
+
self.__synonyms: List[str] = []
|
|
51
|
+
self.__synonyms_en: List[str] = []
|
|
52
|
+
self.__subject_field_ids: List[str] = []
|
|
53
|
+
|
|
54
|
+
self.__subject_fields_et: List[str] = []
|
|
55
|
+
self.__subject_fields_en: List[str] = []
|
|
56
|
+
self.__ems_url: str = ""
|
|
57
|
+
self.__narrower: List[str] = []
|
|
58
|
+
self.__broader: List[str] = []
|
|
59
|
+
self.__related: List[str] = []
|
|
60
|
+
self.__narrower_ems_urls: List[str] = []
|
|
61
|
+
self.__broader_ems_urls: List[str] = []
|
|
62
|
+
self.__related_ems_urls: List[str] = []
|
|
63
|
+
self.__variations: List[str] = []
|
|
64
|
+
self.__variations_en: List[str] = []
|
|
65
|
+
self.__use_with_others: bool | None = None
|
|
66
|
+
self.__full_record: dict = {}
|
|
67
|
+
|
|
68
|
+
@property
|
|
69
|
+
def keyword(self) -> str:
|
|
70
|
+
if not self.__keyword:
|
|
71
|
+
self.__keyword = self.get_values(
|
|
72
|
+
marc_ids=self.__keyword_fields,
|
|
73
|
+
subfield_id="a"
|
|
74
|
+
)[0]
|
|
75
|
+
return self.__keyword
|
|
76
|
+
|
|
77
|
+
@property
|
|
78
|
+
def keyword_en(self) -> str:
|
|
79
|
+
if not self.__keyword_en:
|
|
80
|
+
self.__keyword_en = self.synonyms_en[0] if self.synonyms_en else ""
|
|
81
|
+
return self.__keyword_en
|
|
82
|
+
|
|
83
|
+
@property
|
|
84
|
+
def synonyms(self) -> List[str]:
|
|
85
|
+
if not self.__synonyms:
|
|
86
|
+
self.__synonyms = self.get_values(
|
|
87
|
+
marc_ids=self.__synonym_ids,
|
|
88
|
+
subfield_id="a"
|
|
89
|
+
)
|
|
90
|
+
return self.__synonyms
|
|
91
|
+
|
|
92
|
+
@property
|
|
93
|
+
def synonyms_en(self) -> List[str]:
|
|
94
|
+
if not self.__synonyms_en:
|
|
95
|
+
self.__synonyms_en = self.get_values(
|
|
96
|
+
marc_ids=self.__synonym_ids,
|
|
97
|
+
subfield_id="a", ind2="9"
|
|
98
|
+
)
|
|
99
|
+
return self.__synonyms_en
|
|
100
|
+
|
|
101
|
+
@property
|
|
102
|
+
def subject_field_ids(self) -> List[str]:
|
|
103
|
+
if not self.__subject_field_ids:
|
|
104
|
+
self.__subject_field_ids = self.get_values(
|
|
105
|
+
marc_ids=self.__category_ids,
|
|
106
|
+
subfield_id="a",
|
|
107
|
+
ind2="7"
|
|
108
|
+
)
|
|
109
|
+
return self.__subject_field_ids
|
|
110
|
+
|
|
111
|
+
@property
|
|
112
|
+
def subject_fields_et(self) -> List[str]:
|
|
113
|
+
if not self.__subject_fields_et:
|
|
114
|
+
self.__subject_fields_et = [
|
|
115
|
+
self.__et_subject_field_mapping[_id]
|
|
116
|
+
for _id in self.subject_field_ids
|
|
117
|
+
]
|
|
118
|
+
return self.__subject_fields_et
|
|
119
|
+
|
|
120
|
+
@property
|
|
121
|
+
def subject_fields_en(self) -> List[str]:
|
|
122
|
+
if not self.__subject_fields_en:
|
|
123
|
+
self.__subject_fields_en = [
|
|
124
|
+
self.__en_subject_field_mapping[_id]
|
|
125
|
+
for _id in self.subject_field_ids
|
|
126
|
+
]
|
|
127
|
+
return self.__subject_fields_en
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
@property
|
|
131
|
+
def ems_url(self) -> str:
|
|
132
|
+
if not self.__ems_url:
|
|
133
|
+
self.__ems_url = self.get_values(
|
|
134
|
+
marc_ids=self.__ems_url_ids,
|
|
135
|
+
subfield_id="0",
|
|
136
|
+
ind2="8"
|
|
137
|
+
)[0]
|
|
138
|
+
return self.__ems_url
|
|
139
|
+
|
|
140
|
+
@property
|
|
141
|
+
def broader(self) -> List[str]:
|
|
142
|
+
if not self.__broader:
|
|
143
|
+
self.__broader = self.get_values(
|
|
144
|
+
marc_ids=self.__related_ids,
|
|
145
|
+
subfield_id="a",
|
|
146
|
+
subfield_restriction = ("w", "g")
|
|
147
|
+
)
|
|
148
|
+
return self.__broader
|
|
149
|
+
|
|
150
|
+
@property
|
|
151
|
+
def narrower(self) -> List[str]:
|
|
152
|
+
if not self.__narrower:
|
|
153
|
+
self.__narrower = self.get_values(
|
|
154
|
+
marc_ids=self.__related_ids,
|
|
155
|
+
subfield_id="a",
|
|
156
|
+
subfield_restriction = ("w", "h")
|
|
157
|
+
)
|
|
158
|
+
return self.__narrower
|
|
159
|
+
|
|
160
|
+
@property
|
|
161
|
+
def related(self) -> List[str]:
|
|
162
|
+
if not self.__related:
|
|
163
|
+
self.__related = self.get_values(
|
|
164
|
+
marc_ids=self.__related_ids,
|
|
165
|
+
subfield_id="a",
|
|
166
|
+
subfield_to_ignore ="w"
|
|
167
|
+
)
|
|
168
|
+
return self.__related
|
|
169
|
+
|
|
170
|
+
@property
|
|
171
|
+
def broader_ems_urls(self) -> List[str]:
|
|
172
|
+
if not self.__broader_ems_urls:
|
|
173
|
+
self.__broader_ems_urls = self.get_values(
|
|
174
|
+
marc_ids=self.__related_ids,
|
|
175
|
+
subfield_id="0",
|
|
176
|
+
subfield_restriction = ("w", "g")
|
|
177
|
+
)
|
|
178
|
+
return self.__broader_ems_urls
|
|
179
|
+
|
|
180
|
+
@property
|
|
181
|
+
def narrower_ems_urls(self) -> List[str]:
|
|
182
|
+
if not self.__narrower_ems_urls:
|
|
183
|
+
self.__narrower_ems_urls = self.get_values(
|
|
184
|
+
marc_ids=self.__related_ids,
|
|
185
|
+
subfield_id="0",
|
|
186
|
+
subfield_restriction = ("w", "h")
|
|
187
|
+
)
|
|
188
|
+
return self.__narrower_ems_urls
|
|
189
|
+
|
|
190
|
+
@property
|
|
191
|
+
def related_ems_urls(self) -> List[str]:
|
|
192
|
+
if not self.__related_ems_urls:
|
|
193
|
+
self.__related_ems_urls = self.get_values(
|
|
194
|
+
marc_ids=self.__related_ids,
|
|
195
|
+
subfield_id="0",
|
|
196
|
+
subfield_to_ignore ="w"
|
|
197
|
+
)
|
|
198
|
+
return self.__related_ems_urls
|
|
199
|
+
|
|
200
|
+
@property
|
|
201
|
+
def keyword_type(self) -> str:
|
|
202
|
+
if not self.__keyword_type:
|
|
203
|
+
for field in self.dict_record:
|
|
204
|
+
field_id = list(field.keys())[0]
|
|
205
|
+
if field_id in self.__keyword_fields:
|
|
206
|
+
self.__keyword_type = self.__keyword_fields[field_id]
|
|
207
|
+
return self.__keyword_type
|
|
208
|
+
|
|
209
|
+
@property
|
|
210
|
+
def use_with_others(self) -> bool:
|
|
211
|
+
if self.__use_with_others == None:
|
|
212
|
+
notes = self.get_values(marc_ids=self.__notes_ids, subfield_id="i")
|
|
213
|
+
self.__use_with_others = False
|
|
214
|
+
if notes:
|
|
215
|
+
if re.search(r"Kasutada koos teise", notes[0]):
|
|
216
|
+
self.__use_with_others = True
|
|
217
|
+
|
|
218
|
+
return self.__use_with_others
|
|
219
|
+
|
|
220
|
+
@property
|
|
221
|
+
def variations(self) -> List[str]:
|
|
222
|
+
if not self.__variations:
|
|
223
|
+
original_variations = self.synonyms + [self.keyword]
|
|
224
|
+
variations = []
|
|
225
|
+
for kw in original_variations:
|
|
226
|
+
variations_ = KeywordNormalizer(kw, keyword_type=self.keyword_type).variations
|
|
227
|
+
variations.extend(variations_)
|
|
228
|
+
self.__variations = [v.lower() for v in list(set(variations))]
|
|
229
|
+
return self.__variations
|
|
230
|
+
|
|
231
|
+
@property
|
|
232
|
+
def variations_en(self) -> List[str]:
|
|
233
|
+
if not self.__variations_en:
|
|
234
|
+
pass
|
|
235
|
+
return self.__variations_en
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
@property
|
|
239
|
+
def full_record(self) -> dict:
|
|
240
|
+
if not self.__full_record:
|
|
241
|
+
self.__full_record = {
|
|
242
|
+
"keyword": self.keyword,
|
|
243
|
+
"keyword_en": self.keyword_en,
|
|
244
|
+
"keyword_type": self.keyword_type,
|
|
245
|
+
"use_with_others": self.use_with_others,
|
|
246
|
+
"subject_field_ids": self.subject_field_ids,
|
|
247
|
+
"subject_fields_et": self.subject_fields_et,
|
|
248
|
+
"subject_fields_en": self.subject_fields_en,
|
|
249
|
+
"synonyms": self.synonyms,
|
|
250
|
+
"synonyms_en": self.synonyms_en,
|
|
251
|
+
"narrower": self.narrower,
|
|
252
|
+
"broader": self.broader,
|
|
253
|
+
"related": self.related,
|
|
254
|
+
"narrower_ems_urls": self.narrower_ems_urls,
|
|
255
|
+
"broader_ems_urls": self.broader_ems_urls,
|
|
256
|
+
"related_ems_urls": self.related_ems_urls,
|
|
257
|
+
"ems_id": self.identifier,
|
|
258
|
+
"ems_url": self.ems_url,
|
|
259
|
+
"identifier_source": self.identifier_source,
|
|
260
|
+
"full_record_marc": str(self.marc_record),
|
|
261
|
+
"full_record_json": json.dumps(self.marc_json_record)
|
|
262
|
+
}
|
|
263
|
+
if self.add_variations:
|
|
264
|
+
self.__full_record.update(
|
|
265
|
+
{"link_variations": self.variations}
|
|
266
|
+
)
|
|
267
|
+
return self.__full_record
|
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
from typing import List, NoReturn
|
|
2
|
+
from pymarc.record import Record
|
|
3
|
+
from rara_tools.parsers.marc_records.base_record import BaseRecord
|
|
4
|
+
from rara_tools.constants.parsers import OrganizationMarcIDs
|
|
5
|
+
import regex as re
|
|
6
|
+
import json
|
|
7
|
+
|
|
8
|
+
# TODO: indikaatorid ind1 väljadel 100 ja 400?
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class OrganizationRecord(BaseRecord):
|
|
12
|
+
""" Generates a simplified organization JSON record
|
|
13
|
+
from a pymarc MARC record.
|
|
14
|
+
"""
|
|
15
|
+
def __init__(self, record: Record, add_variations: bool = False) -> NoReturn:
|
|
16
|
+
""" Initializes OrganizationRecord object.
|
|
17
|
+
|
|
18
|
+
Parameters
|
|
19
|
+
-----------
|
|
20
|
+
record: Record
|
|
21
|
+
pymarc.record.Record object.
|
|
22
|
+
add_variations: bool
|
|
23
|
+
If enabled, constructs an additional variations field, which
|
|
24
|
+
combines the content of multiple fields + adds some generated
|
|
25
|
+
variations. If the output is uploaded into Elastic and used
|
|
26
|
+
via rara-norm-linker, it is necessary to enable this.
|
|
27
|
+
"""
|
|
28
|
+
super().__init__(record=record, add_variations=add_variations)
|
|
29
|
+
|
|
30
|
+
self.__name_field_id: List[str] = OrganizationMarcIDs.NAME
|
|
31
|
+
self.__name_variations_field_id: List[str] = OrganizationMarcIDs.NAME_VARIATIONS
|
|
32
|
+
self.__related_names_field_id: List[str] = OrganizationMarcIDs.RELATED_NAMES
|
|
33
|
+
self.__source_field_id: List[str] = OrganizationMarcIDs.SOURCE
|
|
34
|
+
self.__description_field_id: List[str] = OrganizationMarcIDs.DESCRIPTION
|
|
35
|
+
self.__area_code_id: List[str] = OrganizationMarcIDs.AREA_CODE
|
|
36
|
+
self.__default_year: int | None = None
|
|
37
|
+
|
|
38
|
+
self.__name: str = ""
|
|
39
|
+
self.__original_name: dict = {}
|
|
40
|
+
self.__name_specification: str = ""
|
|
41
|
+
self.__life_years: str = ""
|
|
42
|
+
self.__birth_year: int = -1
|
|
43
|
+
self.__death_year: int = -1
|
|
44
|
+
self.__name_variations: List[str] = []
|
|
45
|
+
self.__source: str = ""
|
|
46
|
+
self.__description: str = ""
|
|
47
|
+
self.__area_code: str = ""
|
|
48
|
+
self.__acronyms: List[str] = []
|
|
49
|
+
self.__alternative_names: List[str] = []
|
|
50
|
+
self.__related_acronyms: List[str] = []
|
|
51
|
+
self.__old_names: List[str] = []
|
|
52
|
+
self.__new_names: List[str] = []
|
|
53
|
+
self.__related_old_names: List[str] = []
|
|
54
|
+
self.__related_new_names: List[str] = []
|
|
55
|
+
self.__full_record: dict = {}
|
|
56
|
+
self.__variations: List[str] = []
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _clean_value(self, value: str) -> str:
|
|
60
|
+
cleaned_value = value.strip("., ")
|
|
61
|
+
return cleaned_value
|
|
62
|
+
|
|
63
|
+
def _merge_and_clean(self, value: dict, keys: List[str]) -> str:
|
|
64
|
+
_merged = []
|
|
65
|
+
for key in keys:
|
|
66
|
+
_value = self._clean_value(value.get(key, ""))
|
|
67
|
+
if _value:
|
|
68
|
+
_merged.append(_value)
|
|
69
|
+
merged = " ".join(_merged)
|
|
70
|
+
return merged
|
|
71
|
+
|
|
72
|
+
@property
|
|
73
|
+
def original_name(self) -> str:
|
|
74
|
+
if not self.__original_name:
|
|
75
|
+
values = self.get_values(
|
|
76
|
+
marc_ids=self.__name_field_id,
|
|
77
|
+
subfield_id=["a", "b"]
|
|
78
|
+
)
|
|
79
|
+
if values:
|
|
80
|
+
self.__original_name = {
|
|
81
|
+
"a": self._clean_value(values[0].get("a", "")),
|
|
82
|
+
"b": self._clean_value(values[0].get("b", ""))
|
|
83
|
+
}
|
|
84
|
+
else:
|
|
85
|
+
print(self.marc_record)
|
|
86
|
+
return self.__original_name
|
|
87
|
+
|
|
88
|
+
@property
|
|
89
|
+
def name(self) -> str:
|
|
90
|
+
if not self.__name:
|
|
91
|
+
self.__name = self._merge_and_clean(self.original_name, ["a", "b"])
|
|
92
|
+
return self.__name
|
|
93
|
+
|
|
94
|
+
@property
|
|
95
|
+
def acronyms(self) -> List[str]:
|
|
96
|
+
if not self.__acronyms:
|
|
97
|
+
values = self.get_values(
|
|
98
|
+
marc_ids=self.__name_variations_field_id,
|
|
99
|
+
subfield_id="a",
|
|
100
|
+
subfield_restriction = ("w", "d")
|
|
101
|
+
)
|
|
102
|
+
self.__acronyms = [self._clean_value(value) for value in values]
|
|
103
|
+
return self.__acronyms
|
|
104
|
+
|
|
105
|
+
@property
|
|
106
|
+
def new_names(self) -> List[str]:
|
|
107
|
+
if not self.__new_names:
|
|
108
|
+
values = self.get_values(
|
|
109
|
+
marc_ids=self.__name_variations_field_id,
|
|
110
|
+
subfield_id=["a", "b"],
|
|
111
|
+
subfield_restriction = ("w", "b")
|
|
112
|
+
)
|
|
113
|
+
self.__new_names = [self._merge_and_clean(value, ["a", "b"]) for value in values]
|
|
114
|
+
return self.__new_names
|
|
115
|
+
|
|
116
|
+
@property
|
|
117
|
+
def old_names(self) -> List[str]:
|
|
118
|
+
if not self.__old_names:
|
|
119
|
+
values = self.get_values(
|
|
120
|
+
marc_ids=self.__name_variations_field_id,
|
|
121
|
+
subfield_id=["a", "b"],
|
|
122
|
+
subfield_restriction = ("w", "a")
|
|
123
|
+
)
|
|
124
|
+
self.__old_names = [self._merge_and_clean(value, ["a", "b"]) for value in values]
|
|
125
|
+
return self.__old_names
|
|
126
|
+
|
|
127
|
+
@property
|
|
128
|
+
def alternative_names(self) -> List[str]:
|
|
129
|
+
if not self.__alternative_names:
|
|
130
|
+
values = self.get_values(
|
|
131
|
+
marc_ids=self.__name_variations_field_id,
|
|
132
|
+
subfield_id=["a", "b"],
|
|
133
|
+
subfield_to_ignore="w"
|
|
134
|
+
)
|
|
135
|
+
self.__alternative_names = [self._merge_and_clean(value, ["a", "b"]) for value in values]
|
|
136
|
+
return self.__alternative_names
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
@property
|
|
140
|
+
def related_acronyms(self) -> List[str]:
|
|
141
|
+
if not self.__related_acronyms:
|
|
142
|
+
values = self.get_values(
|
|
143
|
+
marc_ids=self.__related_names_field_id,
|
|
144
|
+
subfield_id="a",
|
|
145
|
+
subfield_restriction = ("w", "d")
|
|
146
|
+
)
|
|
147
|
+
self.__related_acronyms = [self._clean_value(value) for value in values]
|
|
148
|
+
return self.__related_acronyms
|
|
149
|
+
|
|
150
|
+
@property
|
|
151
|
+
def related_new_names(self) -> List[str]:
|
|
152
|
+
if not self.__related_new_names:
|
|
153
|
+
values = self.get_values(
|
|
154
|
+
marc_ids=self.__related_names_field_id,
|
|
155
|
+
subfield_id=["a", "b"],
|
|
156
|
+
subfield_restriction = ("w", "b")
|
|
157
|
+
)
|
|
158
|
+
self.__related_new_names = [self._merge_and_clean(value, ["a", "b"]) for value in values]
|
|
159
|
+
return self.__related_new_names
|
|
160
|
+
|
|
161
|
+
@property
|
|
162
|
+
def related_old_names(self) -> List[str]:
|
|
163
|
+
if not self.__related_old_names:
|
|
164
|
+
values = self.get_values(
|
|
165
|
+
marc_ids=self.__related_names_field_id,
|
|
166
|
+
subfield_id=["a", "b"],
|
|
167
|
+
subfield_restriction = ("w", "a")
|
|
168
|
+
)
|
|
169
|
+
self.__related_old_names = [self._merge_and_clean(value, ["a", "b"]) for value in values]
|
|
170
|
+
return self.__related_old_names
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
@property
|
|
174
|
+
def source(self) -> str:
|
|
175
|
+
if not self.__source:
|
|
176
|
+
values = self.get_values(
|
|
177
|
+
marc_ids=self.__source_field_id,
|
|
178
|
+
subfield_id="a"
|
|
179
|
+
)
|
|
180
|
+
self.__source = self._clean_value(values[0]) if values else ""
|
|
181
|
+
return self.__source
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
@property
|
|
185
|
+
def area_code(self) -> str:
|
|
186
|
+
if not self.__area_code:
|
|
187
|
+
values = self.get_values(
|
|
188
|
+
marc_ids=self.__area_code_id,
|
|
189
|
+
subfield_id="c"
|
|
190
|
+
)
|
|
191
|
+
self.__area_code = self._clean_value(values[0]) if values else ""
|
|
192
|
+
return self.__area_code
|
|
193
|
+
|
|
194
|
+
@property
|
|
195
|
+
def description(self) -> str:
|
|
196
|
+
if not self.__description:
|
|
197
|
+
values = self.get_values(
|
|
198
|
+
marc_ids=self.__description_field_id,
|
|
199
|
+
subfield_id="i"
|
|
200
|
+
)
|
|
201
|
+
self.__description = self._clean_value(values[0]) if values else ""
|
|
202
|
+
return self.__description
|
|
203
|
+
|
|
204
|
+
@property
|
|
205
|
+
def variations(self) -> List[str]:
|
|
206
|
+
if not self.__variations:
|
|
207
|
+
_variations = [self.name]
|
|
208
|
+
_variations.extend(self.new_names)
|
|
209
|
+
_variations.extend(self.old_names)
|
|
210
|
+
_variations.extend(self.alternative_names)
|
|
211
|
+
_variations.extend(self.related_old_names)
|
|
212
|
+
_variations.extend(self.related_new_names)
|
|
213
|
+
self.__variations = [v.lower() for v in list(set(_variations))]
|
|
214
|
+
|
|
215
|
+
return self.__variations
|
|
216
|
+
|
|
217
|
+
@property
|
|
218
|
+
def full_record(self) -> dict:
|
|
219
|
+
if not self.__full_record:
|
|
220
|
+
self.__full_record = {
|
|
221
|
+
"name": self.name,
|
|
222
|
+
"original_name": self.original_name,
|
|
223
|
+
"acronyms": self.acronyms,
|
|
224
|
+
"new_names": self.new_names,
|
|
225
|
+
"old_names": self.old_names,
|
|
226
|
+
"source": self.source,
|
|
227
|
+
"description": self.description,
|
|
228
|
+
"area_code": self.area_code,
|
|
229
|
+
"alternative_names": self.alternative_names,
|
|
230
|
+
"related_acryonyms": self.related_acronyms,
|
|
231
|
+
"related_new_names": self.related_new_names,
|
|
232
|
+
"related_old_names": self.related_old_names,
|
|
233
|
+
"identifier": self.identifier,
|
|
234
|
+
"identifier_source": self.identifier_source,
|
|
235
|
+
"full_record_marc": str(self.marc_record),
|
|
236
|
+
"full_record_json": json.dumps(self.marc_json_record)
|
|
237
|
+
}
|
|
238
|
+
if self.add_variations:
|
|
239
|
+
self.__full_record.update(
|
|
240
|
+
{
|
|
241
|
+
"link_variations": self.variations,
|
|
242
|
+
"link_acronyms": [a.lower() for a in self.acronyms]
|
|
243
|
+
}
|
|
244
|
+
)
|
|
245
|
+
return self.__full_record
|