rara-tools 0.7.9__tar.gz → 0.7.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rara-tools might be problematic. Click here for more details.
- {rara_tools-0.7.9/rara_tools.egg-info → rara_tools-0.7.11}/PKG-INFO +1 -1
- rara_tools-0.7.11/VERSION +1 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/normalizers/authorities.py +3 -3
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/normalizers/base.py +20 -11
- rara_tools-0.7.11/rara_tools/normalizers/bibs.py +111 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11/rara_tools.egg-info}/PKG-INFO +1 -1
- {rara_tools-0.7.9 → rara_tools-0.7.11}/tests/test_normalization.py +65 -26
- rara_tools-0.7.9/VERSION +0 -1
- rara_tools-0.7.9/rara_tools/normalizers/bibs.py +0 -63
- {rara_tools-0.7.9 → rara_tools-0.7.11}/LICENSE.md +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/README.md +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/pyproject.toml +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/constants/__init__.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/constants/digitizer.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/constants/general.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/constants/language_evaluator.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/constants/linker.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/constants/meta_extractor.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/constants/normalizers.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/constants/parsers.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/constants/subject_indexer.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/converters.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/core_formatters/core_formatter.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/core_formatters/formatted_keyword.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/core_formatters/formatted_meta.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/core_formatters/formatted_object.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/decorators.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/digar_schema_converter.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/elastic.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/exceptions.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/normalizers/__init__.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/normalizers/reader.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/normalizers/viaf.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/parsers/marc_parsers/base_parser.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/parsers/marc_parsers/ems_parser.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/parsers/marc_parsers/location_parser.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/parsers/marc_parsers/organization_parser.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/parsers/marc_parsers/person_parser.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/parsers/marc_parsers/title_parser.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/parsers/marc_records/base_record.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/parsers/marc_records/ems_record.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/parsers/marc_records/organization_record.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/parsers/marc_records/person_record.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/parsers/marc_records/title_record.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/parsers/tools/entity_normalizers.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/parsers/tools/marc_converter.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/parsers/tools/russian_transliterator.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/s3.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/task_reporter.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/utils.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools.egg-info/SOURCES.txt +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools.egg-info/dependency_links.txt +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools.egg-info/requires.txt +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools.egg-info/top_level.txt +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/requirements.txt +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/setup.cfg +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/tests/test_digar_schema_converter.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/tests/test_elastic.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/tests/test_elastic_vector_and_search_operations.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/tests/test_entity_normalizers.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/tests/test_formatters.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/tests/test_marc_parsers.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/tests/test_s3_exceptions.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/tests/test_s3_file_operations.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/tests/test_sierra_converters.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/tests/test_task_reporter.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/tests/test_utils.py +0 -0
- {rara_tools-0.7.9 → rara_tools-0.7.11}/tests/test_viaf_client.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0.7.11
|
|
@@ -69,7 +69,6 @@ class AuthoritiesRecordNormalizer(RecordNormalizer):
|
|
|
69
69
|
record, [Field(tag="046", indicators=EMPTY_INDICATORS, subfields=subfields_046)])
|
|
70
70
|
|
|
71
71
|
def _add_viaf_url_or_isni(self, record: Record, viaf_record: VIAFRecord) -> None:
|
|
72
|
-
# TODO 024. will be used to store KRATT KATA ID. Just generate one?
|
|
73
72
|
viaf_url = f"https://viaf.org/viaf/{viaf_record.viaf_id}"
|
|
74
73
|
|
|
75
74
|
subfields = [Subfield("0", self.get_subfield(
|
|
@@ -109,7 +108,6 @@ class AuthoritiesRecordNormalizer(RecordNormalizer):
|
|
|
109
108
|
100, 110, 111 - non-repeatable field, attempts to add author type, if missing.
|
|
110
109
|
|
|
111
110
|
"""
|
|
112
|
-
# TODO: include KRATT KATA ID to 024 and remove on delete. Increment last elastic ID?
|
|
113
111
|
if not viaf_record:
|
|
114
112
|
return
|
|
115
113
|
|
|
@@ -119,7 +117,9 @@ class AuthoritiesRecordNormalizer(RecordNormalizer):
|
|
|
119
117
|
self._add_author(record, viaf_record)
|
|
120
118
|
|
|
121
119
|
def _normalize_record(self, record: Record, sierraID: str,
|
|
122
|
-
viaf_record: VIAFRecord,
|
|
120
|
+
viaf_record: VIAFRecord,
|
|
121
|
+
is_editing_existing_record: bool,
|
|
122
|
+
original_entity: str) -> Record:
|
|
123
123
|
|
|
124
124
|
self._normalize_sierra(record, sierraID)
|
|
125
125
|
self._normalize_viaf(record, viaf_record)
|
|
@@ -147,21 +147,26 @@ class RecordNormalizer:
|
|
|
147
147
|
return filter(lambda field: not self._field_in_record(field, record), fields)
|
|
148
148
|
|
|
149
149
|
def _format_date(self, value: str) -> str:
|
|
150
|
-
|
|
150
|
+
|
|
151
|
+
if not value:
|
|
151
152
|
return ""
|
|
152
153
|
|
|
153
154
|
if isinstance(value, (datetime, date)):
|
|
154
|
-
return value.strftime(
|
|
155
|
+
return value.strftime("%Y%m%d")
|
|
156
|
+
|
|
157
|
+
val = str(value).strip()
|
|
155
158
|
|
|
156
159
|
try:
|
|
157
|
-
dt = parser.parse(
|
|
158
|
-
|
|
159
|
-
logger.info(f"Formatted date '{formatted_date}' from value '{value}'")
|
|
160
|
-
return formatted_date
|
|
161
|
-
except Exception as e:
|
|
162
|
-
logger.info(f"Failed to format date string '{value}': {e}")
|
|
160
|
+
dt = parser.parse(val, fuzzy=False, default=datetime(1, 1, 1))
|
|
161
|
+
except Exception:
|
|
163
162
|
return ""
|
|
164
163
|
|
|
164
|
+
if len(val) == 4 and val.isdigit():
|
|
165
|
+
return dt.strftime("%Y") # YYYY
|
|
166
|
+
if len(val) in (6, 7): # YYYYMM or YYYY-MM
|
|
167
|
+
return dt.strftime("%Y%m") # YYYYMM
|
|
168
|
+
return dt.strftime("%Y%m%d") # YYYYMMDD
|
|
169
|
+
|
|
165
170
|
def get_subfield(self, record: Record, tag: str, subfield: str, default: str) -> str:
|
|
166
171
|
""" get record existing subfield value or assign a fallback value. """
|
|
167
172
|
|
|
@@ -355,7 +360,7 @@ class RecordNormalizer:
|
|
|
355
360
|
return viaf_record
|
|
356
361
|
|
|
357
362
|
def _normalize_record(self, record: Record, sierraID: str,
|
|
358
|
-
viaf_record: VIAFRecord, is_editing_existing_record: bool) -> Record:
|
|
363
|
+
viaf_record: VIAFRecord, is_editing_existing_record: bool, original_entity: str) -> Record:
|
|
359
364
|
return record
|
|
360
365
|
|
|
361
366
|
@property
|
|
@@ -369,7 +374,11 @@ class RecordNormalizer:
|
|
|
369
374
|
logger.error(f"Failed to normalize record: {e}")
|
|
370
375
|
continue
|
|
371
376
|
return result
|
|
372
|
-
|
|
377
|
+
|
|
378
|
+
@property
|
|
379
|
+
def first(self) -> Record:
|
|
380
|
+
return next(iter(self))
|
|
381
|
+
|
|
373
382
|
def __iter__(self) -> Iterator:
|
|
374
383
|
viaf_id_path = "viaf.queryResult.records.record.0.recordData.VIAFCluster.viafID"
|
|
375
384
|
sierra_id_path = "sierraID"
|
|
@@ -386,7 +395,7 @@ class RecordNormalizer:
|
|
|
386
395
|
record = self._normalize_common(record, is_editing_existing_record)
|
|
387
396
|
|
|
388
397
|
normalized_record = self._normalize_record(
|
|
389
|
-
record, sierra_id, viaf_record, is_editing_existing_record)
|
|
398
|
+
record, sierra_id, viaf_record, is_editing_existing_record, original_entity=entity)
|
|
390
399
|
|
|
391
400
|
normalized_record.fields.sort(key=lambda field: field.tag)
|
|
392
401
|
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
from pymarc import (Field, Subfield, Record)
|
|
2
|
+
from typing import List, Optional
|
|
3
|
+
|
|
4
|
+
from rara_tools.constants import EMPTY_INDICATORS
|
|
5
|
+
from rara_tools.normalizers.viaf import VIAFRecord
|
|
6
|
+
from rara_tools.normalizers import RecordNormalizer
|
|
7
|
+
|
|
8
|
+
from typing import List
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class BibRecordNormalizer(RecordNormalizer):
|
|
12
|
+
""" Normalize bib records. """
|
|
13
|
+
|
|
14
|
+
def __init__(self, linking_results: List[dict] = [], sierra_data: List[dict] = [],
|
|
15
|
+
ALLOW_EDIT_FIELDS: List[str] = ["008", "925"],
|
|
16
|
+
REPEATABLE_FIELDS: List[str] = ["667"]):
|
|
17
|
+
super().__init__(linking_results, sierra_data)
|
|
18
|
+
self.DEFAULT_LEADER = "00399nz a2200145n 4500" # must be 24 digits
|
|
19
|
+
self.ALLOW_EDIT_FIELDS = ALLOW_EDIT_FIELDS
|
|
20
|
+
self.REPEATABLE_FIELDS = REPEATABLE_FIELDS
|
|
21
|
+
|
|
22
|
+
self.records_extra_data = []
|
|
23
|
+
self.sierra_data = sierra_data
|
|
24
|
+
self.records = self._setup_records(linking_results, sierra_data)
|
|
25
|
+
|
|
26
|
+
def _normalize_sierra(self, record: Record) -> Record:
|
|
27
|
+
|
|
28
|
+
suffix_008 = "|||aznnnaabn || ||| "
|
|
29
|
+
|
|
30
|
+
fields = [
|
|
31
|
+
Field(
|
|
32
|
+
tag="008",
|
|
33
|
+
data=f"{self.current_timestamp()}{suffix_008}"
|
|
34
|
+
),
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
self._add_fields_to_record(record, fields)
|
|
38
|
+
|
|
39
|
+
def _include_name_variations(self, record: Record, viaf_record: VIAFRecord) -> None:
|
|
40
|
+
""" Include name variations from VIAF record as 400|t fields """
|
|
41
|
+
|
|
42
|
+
if not viaf_record or not viaf_record.name_variations:
|
|
43
|
+
return
|
|
44
|
+
|
|
45
|
+
existing_name_variations = record.get_fields("400")
|
|
46
|
+
existing_variations = [sf.value for field in existing_name_variations for sf in field.get_subfields("t")]
|
|
47
|
+
|
|
48
|
+
fields = []
|
|
49
|
+
|
|
50
|
+
for variation in viaf_record.name_variations:
|
|
51
|
+
if variation not in existing_variations:
|
|
52
|
+
fields.append(
|
|
53
|
+
Field(
|
|
54
|
+
tag="400",
|
|
55
|
+
indicators=EMPTY_INDICATORS,
|
|
56
|
+
subfields=[
|
|
57
|
+
Subfield("t", variation)
|
|
58
|
+
]
|
|
59
|
+
)
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
self._add_fields_to_record(record, fields)
|
|
63
|
+
|
|
64
|
+
def _add_author(self, record: Record, viaf_record: Optional[VIAFRecord], original_entity: str) -> Optional[Field]:
|
|
65
|
+
if record.get("100") or record.get("110") or record.get("111"):
|
|
66
|
+
return record
|
|
67
|
+
|
|
68
|
+
type_map = {
|
|
69
|
+
"Personal": "100",
|
|
70
|
+
"Corporate": "110",
|
|
71
|
+
"Collective": "111"
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
tag = type_map.get(getattr(viaf_record, "name_type", None), "100")
|
|
75
|
+
title = getattr(viaf_record, "name", None) or original_entity
|
|
76
|
+
|
|
77
|
+
fields = [Field(tag=tag, indicators=EMPTY_INDICATORS, subfields=[Subfield("t", title)])]
|
|
78
|
+
|
|
79
|
+
self._add_fields_to_record(record, fields)
|
|
80
|
+
|
|
81
|
+
if viaf_record:
|
|
82
|
+
self._include_name_variations(record, viaf_record)
|
|
83
|
+
|
|
84
|
+
def _normalize_viaf(self, record: Record, viaf_record: VIAFRecord, original_entity: str) -> None:
|
|
85
|
+
|
|
86
|
+
if not viaf_record:
|
|
87
|
+
# viaf record not found, include original entity as 100|t
|
|
88
|
+
self._add_author(record, viaf_record=None, original_entity=original_entity)
|
|
89
|
+
return record
|
|
90
|
+
|
|
91
|
+
viaf_id = viaf_record.viaf_id
|
|
92
|
+
fields = [
|
|
93
|
+
Field(
|
|
94
|
+
tag="035",
|
|
95
|
+
indicators=EMPTY_INDICATORS,
|
|
96
|
+
subfields=[
|
|
97
|
+
Subfield("a", viaf_id)
|
|
98
|
+
]
|
|
99
|
+
)
|
|
100
|
+
]
|
|
101
|
+
|
|
102
|
+
self._add_fields_to_record(record, fields)
|
|
103
|
+
self._add_author(record, viaf_record, original_entity=original_entity)
|
|
104
|
+
|
|
105
|
+
def _normalize_record(self, record: Record, sierraID: str,
|
|
106
|
+
viaf_record: VIAFRecord, is_editing_existing_record: bool, original_entity: str) -> Record:
|
|
107
|
+
|
|
108
|
+
self._normalize_sierra(record)
|
|
109
|
+
self._normalize_viaf(record, viaf_record, original_entity=original_entity)
|
|
110
|
+
|
|
111
|
+
return record
|
|
@@ -385,7 +385,7 @@ def test_add_birth_and_death_dates():
|
|
|
385
385
|
normalizer._add_birth_and_death_dates(record, viaf_record)
|
|
386
386
|
|
|
387
387
|
field_046 = str(record.get_fields("046")[0])
|
|
388
|
-
assert field_046 == "=046 \\\\$
|
|
388
|
+
assert field_046 == "=046 \\\\$f19160107$g19750605"
|
|
389
389
|
|
|
390
390
|
# Case two: viaf record has birth date, but no death date (author still alive)
|
|
391
391
|
viaf_record = normalizer._get_viaf_record(
|
|
@@ -402,7 +402,7 @@ def test_add_birth_and_death_dates():
|
|
|
402
402
|
|
|
403
403
|
field_046 = str(record.get_fields("046")[0])
|
|
404
404
|
# empty indicators represented with \
|
|
405
|
-
assert field_046 == "=046 \\\\$
|
|
405
|
+
assert field_046 == "=046 \\\\$f19700817"
|
|
406
406
|
|
|
407
407
|
def test_add_nationality():
|
|
408
408
|
""" Test adding nationality from VIAF record to 043 field """
|
|
@@ -456,7 +456,7 @@ def test_add_nationality():
|
|
|
456
456
|
# Case 4 - 043 field already exists - should not get edited (not in ALLOW_EDIT_FIELDS)
|
|
457
457
|
linking_results = [{
|
|
458
458
|
"original_entity": "Eduard Vilde",
|
|
459
|
-
"entity_type":
|
|
459
|
+
"entity_type": EntityType.PER,
|
|
460
460
|
"linked_info": [
|
|
461
461
|
{
|
|
462
462
|
"json": {
|
|
@@ -482,12 +482,7 @@ def test_add_nationality():
|
|
|
482
482
|
normalizer = AuthoritiesRecordNormalizer(
|
|
483
483
|
linking_results=linking_results,
|
|
484
484
|
)
|
|
485
|
-
|
|
486
|
-
json.dumps(normalizer.data, ensure_ascii=False)
|
|
487
|
-
)
|
|
488
|
-
|
|
489
|
-
record = next(iter(records))
|
|
490
|
-
|
|
485
|
+
record = normalizer.first
|
|
491
486
|
# mock run add nationality with foreign VIAF record
|
|
492
487
|
viaf_record = normalizer._get_viaf_record(
|
|
493
488
|
record,
|
|
@@ -519,9 +514,7 @@ def test_create_new_normrecord():
|
|
|
519
514
|
data = normalizer.data
|
|
520
515
|
assert len(data) == 1
|
|
521
516
|
|
|
522
|
-
record =
|
|
523
|
-
json.dumps(normalizer.data, ensure_ascii=False)
|
|
524
|
-
)))
|
|
517
|
+
record = normalizer.first
|
|
525
518
|
leader = str(record.leader)
|
|
526
519
|
assert leader == "01682nz a2200349n 4500"
|
|
527
520
|
assert len(leader) == 24
|
|
@@ -534,9 +527,7 @@ def test_create_new_normrecord():
|
|
|
534
527
|
normalizer = AuthoritiesRecordNormalizer(linking_results=linking_results)
|
|
535
528
|
data = normalizer.data
|
|
536
529
|
assert len(data) == 1
|
|
537
|
-
record =
|
|
538
|
-
json.dumps(normalizer.data, ensure_ascii=False)
|
|
539
|
-
)))
|
|
530
|
+
record = normalizer.first
|
|
540
531
|
|
|
541
532
|
_validate_new_record_notes(record)
|
|
542
533
|
# validate leader
|
|
@@ -552,9 +543,7 @@ def test_create_new_normrecord():
|
|
|
552
543
|
normalizer = BibRecordNormalizer(linking_results=linking_results)
|
|
553
544
|
data = normalizer.data
|
|
554
545
|
assert len(data) == 1
|
|
555
|
-
record =
|
|
556
|
-
json.dumps(normalizer.data, ensure_ascii=False)
|
|
557
|
-
)))
|
|
546
|
+
record = normalizer.first
|
|
558
547
|
|
|
559
548
|
_validate_new_record_notes(record)
|
|
560
549
|
# validate leader
|
|
@@ -571,9 +560,7 @@ def test_create_new_normrecord():
|
|
|
571
560
|
normalizer = BibRecordNormalizer(linking_results=linking_results)
|
|
572
561
|
data = normalizer.data
|
|
573
562
|
assert len(data) == 1
|
|
574
|
-
record =
|
|
575
|
-
json.dumps(normalizer.data, ensure_ascii=False)
|
|
576
|
-
)))
|
|
563
|
+
record = normalizer.first
|
|
577
564
|
|
|
578
565
|
def test_680_field_on_existing_record_moved_to_667():
|
|
579
566
|
""" 680 Should not be added for new, if exists on existing record, should be moved to 667 """
|
|
@@ -604,15 +591,67 @@ def test_680_field_on_existing_record_moved_to_667():
|
|
|
604
591
|
normalizer = AuthoritiesRecordNormalizer(
|
|
605
592
|
linking_results=linking_results
|
|
606
593
|
)
|
|
607
|
-
record =
|
|
608
|
-
json.dumps(normalizer.data, ensure_ascii=False)
|
|
609
|
-
)))
|
|
594
|
+
record = normalizer.first
|
|
610
595
|
|
|
611
596
|
fields_680 = record.get_fields("680")
|
|
612
597
|
assert len(fields_680) == 0
|
|
613
598
|
fields_667 = record.get_fields("667")
|
|
614
599
|
assert len(fields_667) == 3 # original + moved from 680 + new note
|
|
615
600
|
|
|
601
|
+
def test_date_formatting():
|
|
602
|
+
normalizer = AuthoritiesRecordNormalizer()
|
|
603
|
+
|
|
604
|
+
dates = {
|
|
605
|
+
"19700712": "19700712",
|
|
606
|
+
"1970": "1970",
|
|
607
|
+
"1970-07": "197007",
|
|
608
|
+
"2001-12-31": "20011231",
|
|
609
|
+
"1999-01": "199901",
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
for input_date, expected in dates.items():
|
|
613
|
+
assert normalizer._format_date(input_date) == expected
|
|
614
|
+
|
|
615
|
+
# invalid date formats - should return empty string
|
|
616
|
+
invalid_dates = ["abcd", "199A0101"]
|
|
617
|
+
for date in invalid_dates:
|
|
618
|
+
assert normalizer._format_date(date) == ""
|
|
619
|
+
|
|
620
|
+
def test_new_bibrecord_title_included():
|
|
621
|
+
""" normrecord for bibs has to always have the 1XX|t field filled """
|
|
622
|
+
|
|
623
|
+
# Case 1 No linker response, & Viaf record found
|
|
624
|
+
linking_results = [{
|
|
625
|
+
"original_entity": "Lord of the Rings",
|
|
626
|
+
"entity_type": EntityType.TITLE,
|
|
627
|
+
"linked_info": []
|
|
628
|
+
}]
|
|
616
629
|
|
|
617
|
-
|
|
618
|
-
|
|
630
|
+
normalizer = BibRecordNormalizer(
|
|
631
|
+
linking_results=linking_results,
|
|
632
|
+
)
|
|
633
|
+
data = normalizer.data
|
|
634
|
+
assert len(data) == 1 # should enrich existing record
|
|
635
|
+
record = normalizer.first
|
|
636
|
+
|
|
637
|
+
_validate_new_record_notes(record)
|
|
638
|
+
fields_100 = record.get_fields("100")
|
|
639
|
+
assert len(fields_100) == 1
|
|
640
|
+
assert fields_100[0].get_subfields("t")[0] == "Lord of the rings"
|
|
641
|
+
|
|
642
|
+
# Case 2 - Viaf record not found - should use original entity
|
|
643
|
+
linking_results = [{
|
|
644
|
+
"original_entity": "Roolijoodiku katastroofiline jõulusõit",
|
|
645
|
+
"entity_type": EntityType.TITLE,
|
|
646
|
+
"linked_info": []
|
|
647
|
+
}]
|
|
648
|
+
normalizer = BibRecordNormalizer(
|
|
649
|
+
linking_results=linking_results,
|
|
650
|
+
)
|
|
651
|
+
record = normalizer.first
|
|
652
|
+
data = normalizer.data
|
|
653
|
+
assert len(data) == 1 # should enrich existing record
|
|
654
|
+
|
|
655
|
+
fields_100 = record.get_fields("100")
|
|
656
|
+
assert len(fields_100) == 1
|
|
657
|
+
assert fields_100[0].get_subfields("t")[0] == "Roolijoodiku katastroofiline jõulusõit"
|
rara_tools-0.7.9/VERSION
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
0.7.9
|
|
@@ -1,63 +0,0 @@
|
|
|
1
|
-
from pymarc import (Field, Indicators, Subfield, Record)
|
|
2
|
-
|
|
3
|
-
from rara_tools.constants import EMPTY_INDICATORS
|
|
4
|
-
from rara_tools.normalizers.viaf import VIAFRecord
|
|
5
|
-
from rara_tools.normalizers import RecordNormalizer
|
|
6
|
-
|
|
7
|
-
from typing import List
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class BibRecordNormalizer(RecordNormalizer):
|
|
11
|
-
""" Normalize bib records. """
|
|
12
|
-
|
|
13
|
-
def __init__(self, linking_results: List[dict] = [], sierra_data: List[dict] = [],
|
|
14
|
-
ALLOW_EDIT_FIELDS: List[str] = ["008", "925"],
|
|
15
|
-
REPEATABLE_FIELDS: List[str] = ["667"]):
|
|
16
|
-
super().__init__(linking_results, sierra_data)
|
|
17
|
-
self.DEFAULT_LEADER = "00399nz a2200145n 4500" # must be 24 digits
|
|
18
|
-
self.ALLOW_EDIT_FIELDS = ALLOW_EDIT_FIELDS
|
|
19
|
-
self.REPEATABLE_FIELDS = REPEATABLE_FIELDS
|
|
20
|
-
|
|
21
|
-
self.records_extra_data = []
|
|
22
|
-
self.sierra_data = sierra_data
|
|
23
|
-
self.records = self._setup_records(linking_results, sierra_data)
|
|
24
|
-
|
|
25
|
-
def _normalize_sierra(self, record: Record) -> Record:
|
|
26
|
-
|
|
27
|
-
suffix_008 = "|||aznnnaabn || ||| "
|
|
28
|
-
|
|
29
|
-
fields = [
|
|
30
|
-
Field(
|
|
31
|
-
tag="008",
|
|
32
|
-
data=f"{self.current_timestamp()}{suffix_008}"
|
|
33
|
-
),
|
|
34
|
-
]
|
|
35
|
-
|
|
36
|
-
self._add_fields_to_record(record, fields)
|
|
37
|
-
|
|
38
|
-
def _normalize_viaf(self, record: Record, viaf_record: VIAFRecord) -> None:
|
|
39
|
-
|
|
40
|
-
if not viaf_record:
|
|
41
|
-
return record
|
|
42
|
-
|
|
43
|
-
viaf_id = viaf_record.viaf_id
|
|
44
|
-
fields = [
|
|
45
|
-
Field(
|
|
46
|
-
tag="035",
|
|
47
|
-
indicators=EMPTY_INDICATORS,
|
|
48
|
-
subfields=[
|
|
49
|
-
Subfield("a", viaf_id)
|
|
50
|
-
]
|
|
51
|
-
)
|
|
52
|
-
]
|
|
53
|
-
|
|
54
|
-
self._add_fields_to_record(record, fields)
|
|
55
|
-
self._add_author(record, viaf_record)
|
|
56
|
-
|
|
57
|
-
def _normalize_record(self, record: Record, sierraID: str,
|
|
58
|
-
viaf_record: VIAFRecord, is_editing_existing_record: bool) -> Record:
|
|
59
|
-
|
|
60
|
-
self._normalize_sierra(record)
|
|
61
|
-
self._normalize_viaf(record, viaf_record)
|
|
62
|
-
|
|
63
|
-
return record
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/parsers/marc_parsers/organization_parser.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{rara_tools-0.7.9 → rara_tools-0.7.11}/rara_tools/parsers/marc_records/organization_record.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|