rara-tools 0.7.14__tar.gz → 0.7.15__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rara-tools might be problematic. Click here for more details.
- {rara_tools-0.7.14/rara_tools.egg-info → rara_tools-0.7.15}/PKG-INFO +1 -1
- rara_tools-0.7.15/VERSION +1 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/normalizers/authorities.py +1 -1
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/normalizers/base.py +20 -12
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/normalizers/bibs.py +1 -1
- {rara_tools-0.7.14 → rara_tools-0.7.15/rara_tools.egg-info}/PKG-INFO +1 -1
- {rara_tools-0.7.14 → rara_tools-0.7.15}/tests/test_normalization.py +66 -1
- rara_tools-0.7.14/VERSION +0 -1
- {rara_tools-0.7.14 → rara_tools-0.7.15}/LICENSE.md +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/README.md +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/pyproject.toml +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/constants/__init__.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/constants/digitizer.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/constants/general.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/constants/language_evaluator.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/constants/linker.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/constants/meta_extractor.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/constants/normalizers.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/constants/parsers.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/constants/subject_indexer.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/converters.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/core_formatters/core_formatter.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/core_formatters/formatted_keyword.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/core_formatters/formatted_meta.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/core_formatters/formatted_object.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/decorators.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/digar_schema_converter.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/elastic.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/exceptions.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/normalizers/__init__.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/normalizers/reader.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/normalizers/viaf.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/parsers/marc_parsers/base_parser.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/parsers/marc_parsers/ems_parser.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/parsers/marc_parsers/location_parser.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/parsers/marc_parsers/organization_parser.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/parsers/marc_parsers/person_parser.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/parsers/marc_parsers/title_parser.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/parsers/marc_records/base_record.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/parsers/marc_records/ems_record.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/parsers/marc_records/organization_record.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/parsers/marc_records/person_record.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/parsers/marc_records/title_record.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/parsers/tools/entity_normalizers.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/parsers/tools/marc_converter.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/parsers/tools/russian_transliterator.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/s3.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/task_reporter.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/utils.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools.egg-info/SOURCES.txt +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools.egg-info/dependency_links.txt +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools.egg-info/requires.txt +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools.egg-info/top_level.txt +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/requirements.txt +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/setup.cfg +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/tests/test_digar_schema_converter.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/tests/test_elastic.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/tests/test_elastic_vector_and_search_operations.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/tests/test_entity_normalizers.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/tests/test_formatters.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/tests/test_marc_parsers.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/tests/test_s3_exceptions.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/tests/test_s3_file_operations.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/tests/test_sierra_converters.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/tests/test_task_reporter.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/tests/test_utils.py +0 -0
- {rara_tools-0.7.14 → rara_tools-0.7.15}/tests/test_viaf_client.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0.7.15
|
|
@@ -11,7 +11,7 @@ class AuthoritiesRecordNormalizer(RecordNormalizer):
|
|
|
11
11
|
""" Normalize authorities records """
|
|
12
12
|
|
|
13
13
|
def __init__(self, linking_results: List[dict] = [], sierra_data: List[dict] = [],
|
|
14
|
-
classified_fields: List[
|
|
14
|
+
classified_fields: List[List[dict]] = [],
|
|
15
15
|
ALLOW_EDIT_FIELDS: List[str] = ["008", "925"],
|
|
16
16
|
REPEATABLE_FIELDS: List[str] = ["024", "035", "400", "667"]):
|
|
17
17
|
|
|
@@ -34,7 +34,7 @@ class RecordNormalizer:
|
|
|
34
34
|
entities: List of Full names (str). If included, will use NormLinker to match with normalized records on KATA elastic.
|
|
35
35
|
"""
|
|
36
36
|
|
|
37
|
-
def __init__(self, linking_results: List[dict] = [], sierra_data: List[dict] = [], classified_fields: List[
|
|
37
|
+
def __init__(self, linking_results: List[dict] = [], sierra_data: List[dict] = [], classified_fields: List[List[dict]] = [],
|
|
38
38
|
ALLOW_EDIT_FIELDS: List[str] = ["925"], REPEATABLE_FIELDS: List[str] = ["667"]):
|
|
39
39
|
|
|
40
40
|
# Include, if will replace existing field
|
|
@@ -44,16 +44,16 @@ class RecordNormalizer:
|
|
|
44
44
|
# leader applied to new records
|
|
45
45
|
self.DEFAULT_LEADER = "01682nz a2200349n 4500" # must be 24 digits
|
|
46
46
|
|
|
47
|
-
def _setup_records(self, linking_results: List[dict], sierra_data: List[dict], classified_fields: List[
|
|
47
|
+
def _setup_records(self, linking_results: List[dict], sierra_data: List[dict], classified_fields: List[List[dict]] = []) -> JSONReader:
|
|
48
48
|
"""Setup initial MARC records and data.
|
|
49
49
|
|
|
50
50
|
If no linked entities or more than one linked entity found, we create a new record.
|
|
51
51
|
If one linked entity found, we create an updated record from the linked entity data.
|
|
52
52
|
"""
|
|
53
|
-
|
|
54
53
|
linked_records = []
|
|
55
|
-
|
|
56
|
-
for linked in linking_results or []:
|
|
54
|
+
|
|
55
|
+
for idx, linked in enumerate(linking_results or []):
|
|
56
|
+
|
|
57
57
|
if not isinstance(linked, dict):
|
|
58
58
|
continue
|
|
59
59
|
|
|
@@ -70,12 +70,12 @@ class RecordNormalizer:
|
|
|
70
70
|
})
|
|
71
71
|
self.records_extra_data.append({
|
|
72
72
|
"entity": entity,
|
|
73
|
-
"classified_fields": classified_fields,
|
|
73
|
+
"classified_fields": classified_fields[idx] if idx < len(classified_fields) else [],
|
|
74
74
|
"edited": False
|
|
75
75
|
})
|
|
76
76
|
continue
|
|
77
77
|
|
|
78
|
-
|
|
78
|
+
elif len(linked_info) > 1:
|
|
79
79
|
# Multiple linked entities found, create new record
|
|
80
80
|
logger.info(
|
|
81
81
|
f"Multiple linked entities found for {entity}. Creating new record.")
|
|
@@ -85,7 +85,7 @@ class RecordNormalizer:
|
|
|
85
85
|
})
|
|
86
86
|
self.records_extra_data.append({
|
|
87
87
|
"entity": entity,
|
|
88
|
-
"classified_fields": classified_fields,
|
|
88
|
+
"classified_fields": classified_fields[idx] if idx < len(classified_fields) else [],
|
|
89
89
|
"edited": False
|
|
90
90
|
})
|
|
91
91
|
continue
|
|
@@ -100,7 +100,7 @@ class RecordNormalizer:
|
|
|
100
100
|
self.records_extra_data.append({
|
|
101
101
|
"entity": entity,
|
|
102
102
|
"viaf": linked_item.get("viaf", {}),
|
|
103
|
-
"classified_fields": classified_fields,
|
|
103
|
+
"classified_fields": classified_fields[idx] if idx < len(classified_fields) else [],
|
|
104
104
|
"type": "linked",
|
|
105
105
|
"edited": True
|
|
106
106
|
})
|
|
@@ -359,10 +359,12 @@ class RecordNormalizer:
|
|
|
359
359
|
def _include_classified_fields(self, record: Record, classified_fields: list[dict]) -> None:
|
|
360
360
|
"""Include classified fields from core, if any.
|
|
361
361
|
e.g. classified_fields=[{'670': {'ind1': ' ', 'ind2': '0', 'subfields': [{'a': 'Päikesekiri, 2021'}]}}]
|
|
362
|
+
|
|
363
|
+
For each record, we need a list of dicts, to handle repeatable fields.
|
|
362
364
|
"""
|
|
363
365
|
if not classified_fields:
|
|
364
366
|
return
|
|
365
|
-
|
|
367
|
+
|
|
366
368
|
fields = [
|
|
367
369
|
Field(
|
|
368
370
|
tag=str(tag),
|
|
@@ -373,7 +375,6 @@ class RecordNormalizer:
|
|
|
373
375
|
for tag, v in field_dict.items()
|
|
374
376
|
]
|
|
375
377
|
|
|
376
|
-
logger.info(f"Adding classified fields: {[f.tag for f in fields]}")
|
|
377
378
|
self._add_fields_to_record(record, fields)
|
|
378
379
|
|
|
379
380
|
def _normalize_common(self, record: Record, is_editing_existing_record: bool, classified_fields: List[dict]) -> None:
|
|
@@ -474,6 +475,13 @@ class RecordNormalizer:
|
|
|
474
475
|
def _normalize_record(self, record: Record, sierraID: str,
|
|
475
476
|
viaf_record: VIAFRecord, is_editing_existing_record: bool, original_entity: str) -> Record:
|
|
476
477
|
return record
|
|
478
|
+
|
|
479
|
+
def get_record(self, index: int) -> Record:
|
|
480
|
+
"""Get normalized record by index."""
|
|
481
|
+
for idx, record in enumerate(self):
|
|
482
|
+
if idx == index:
|
|
483
|
+
return record
|
|
484
|
+
raise IndexError("Record index out of range.")
|
|
477
485
|
|
|
478
486
|
@property
|
|
479
487
|
def data(self) -> List[dict]:
|
|
@@ -490,7 +498,7 @@ class RecordNormalizer:
|
|
|
490
498
|
@property
|
|
491
499
|
def first(self) -> Record:
|
|
492
500
|
return next(iter(self))
|
|
493
|
-
|
|
501
|
+
|
|
494
502
|
def __iter__(self) -> Iterator:
|
|
495
503
|
viaf_id_path = "viaf.queryResult.records.record.0.recordData.VIAFCluster.viafID"
|
|
496
504
|
sierra_id_path = "sierraID"
|
|
@@ -12,7 +12,7 @@ class BibRecordNormalizer(RecordNormalizer):
|
|
|
12
12
|
""" Normalize bib records. """
|
|
13
13
|
|
|
14
14
|
def __init__(self, linking_results: List[dict] = [], sierra_data: List[dict] = [],
|
|
15
|
-
classified_fields: List[
|
|
15
|
+
classified_fields: List[List[dict]] = [],
|
|
16
16
|
ALLOW_EDIT_FIELDS: List[str] = ["008", "925"],
|
|
17
17
|
REPEATABLE_FIELDS: List[str] = ["667"]):
|
|
18
18
|
super().__init__(linking_results, sierra_data, classified_fields)
|
|
@@ -741,6 +741,7 @@ def test_classified_fields_added_to_linked_record():
|
|
|
741
741
|
""" Test that classified fields Can be passed to normalizer & added to linked record """
|
|
742
742
|
|
|
743
743
|
classified_fields = [
|
|
744
|
+
[
|
|
744
745
|
{
|
|
745
746
|
"670": {
|
|
746
747
|
"ind1": " ",
|
|
@@ -753,6 +754,7 @@ def test_classified_fields_added_to_linked_record():
|
|
|
753
754
|
}
|
|
754
755
|
}
|
|
755
756
|
]
|
|
757
|
+
]
|
|
756
758
|
# Case 1 - no 670 exists, should be added to linked record
|
|
757
759
|
for normalizer in (AuthoritiesRecordNormalizer, BibRecordNormalizer):
|
|
758
760
|
linking_results = [MOCK_LINKER_NOT_FOUND]
|
|
@@ -776,4 +778,67 @@ def test_classified_fields_added_to_linked_record():
|
|
|
776
778
|
record = normalizer.first
|
|
777
779
|
fields_670 = record.get_fields("670")
|
|
778
780
|
assert len(fields_670) == 1
|
|
779
|
-
assert fields_670[0].get_subfields("a")[0] == "Eesti kirjarahva leksikon, 1995."
|
|
781
|
+
assert fields_670[0].get_subfields("a")[0] == "Eesti kirjarahva leksikon, 1995."
|
|
782
|
+
|
|
783
|
+
def test_classified_data_with_multiple_records():
|
|
784
|
+
""" Test classified data with multiple records - should match by sierraID """
|
|
785
|
+
|
|
786
|
+
classified_fields = [
|
|
787
|
+
[{
|
|
788
|
+
"670": {
|
|
789
|
+
"ind1": " ",
|
|
790
|
+
"ind2": "0",
|
|
791
|
+
"subfields": [
|
|
792
|
+
{
|
|
793
|
+
"a": "Päikesekiri, 2021"
|
|
794
|
+
}
|
|
795
|
+
]
|
|
796
|
+
},
|
|
797
|
+
"111": {
|
|
798
|
+
"ind1": "2",
|
|
799
|
+
"ind2": " ",
|
|
800
|
+
"subfields": [
|
|
801
|
+
{
|
|
802
|
+
"a": "Eesti Kirjandusmuuseum"
|
|
803
|
+
}
|
|
804
|
+
]
|
|
805
|
+
}
|
|
806
|
+
}],
|
|
807
|
+
[],
|
|
808
|
+
[{
|
|
809
|
+
"670": {
|
|
810
|
+
"ind1": " ",
|
|
811
|
+
"ind2": "0",
|
|
812
|
+
"subfields": [
|
|
813
|
+
{
|
|
814
|
+
"a": "Teine kirjeldus, 2022"
|
|
815
|
+
}
|
|
816
|
+
]
|
|
817
|
+
}
|
|
818
|
+
}],
|
|
819
|
+
]
|
|
820
|
+
|
|
821
|
+
# Case 1 - no 670 exists, should be added to linked record
|
|
822
|
+
for normalizer in (AuthoritiesRecordNormalizer, BibRecordNormalizer):
|
|
823
|
+
linking_results = [MOCK_LINKER_NOT_FOUND, MOCK_LINKER_ONE_FOUND, MOCK_LINKER_NOT_FOUND]
|
|
824
|
+
normalizer = normalizer(linking_results=linking_results, classified_fields=classified_fields)
|
|
825
|
+
|
|
826
|
+
# Check first record - should have 670 & 111 from classified data
|
|
827
|
+
record = normalizer.first
|
|
828
|
+
assert len(record.get_fields("670")) == 1
|
|
829
|
+
fields_670 = record.get_fields("670")[0]
|
|
830
|
+
fields_111 = record.get_fields("111")[0]
|
|
831
|
+
assert fields_670.get_subfields("a")[0] == "Päikesekiri, 2021"
|
|
832
|
+
assert fields_111.get_subfields("a")[0] == "Eesti Kirjandusmuuseum"
|
|
833
|
+
|
|
834
|
+
# Check second record - should not have 670 from classified data
|
|
835
|
+
record = normalizer.get_record(1)
|
|
836
|
+
assert len(record.get_fields("670")) == 1
|
|
837
|
+
fields_670 = record.get_fields("670")[0]
|
|
838
|
+
assert fields_670.get_subfields("a")[0] == "Eesti kirjarahva leksikon, 1995."
|
|
839
|
+
|
|
840
|
+
# Check third record - should have 670 from classified data
|
|
841
|
+
record = normalizer.get_record(2)
|
|
842
|
+
assert len(record.get_fields("670")) == 1
|
|
843
|
+
fields_670 = record.get_fields("670")[0]
|
|
844
|
+
assert fields_670.get_subfields("a")[0] == "Teine kirjeldus, 2022"
|
rara_tools-0.7.14/VERSION
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
0.7.14
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/parsers/marc_parsers/organization_parser.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/parsers/marc_records/organization_record.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|