rara-tools 0.1.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rara-tools might be problematic. Click here for more details.
- {rara_tools-0.1.0/rara_tools.egg-info → rara_tools-0.3.0}/PKG-INFO +3 -1
- rara_tools-0.3.0/VERSION +1 -0
- rara_tools-0.3.0/rara_tools/constants/__init__.py +1 -0
- rara_tools-0.3.0/rara_tools/constants/normalizers.py +6 -0
- {rara_tools-0.1.0 → rara_tools-0.3.0}/rara_tools/converters.py +42 -33
- {rara_tools-0.1.0 → rara_tools-0.3.0/rara_tools.egg-info}/PKG-INFO +3 -1
- {rara_tools-0.1.0 → rara_tools-0.3.0}/rara_tools.egg-info/SOURCES.txt +6 -2
- {rara_tools-0.1.0 → rara_tools-0.3.0}/rara_tools.egg-info/requires.txt +2 -0
- {rara_tools-0.1.0 → rara_tools-0.3.0}/requirements.txt +2 -0
- rara_tools-0.3.0/tests/test_normalization.py +323 -0
- rara_tools-0.3.0/tests/test_sierra_converters.py +101 -0
- rara_tools-0.3.0/tests/test_utils.py +76 -0
- rara_tools-0.3.0/tests/test_viaf_client.py +19 -0
- rara_tools-0.1.0/VERSION +0 -1
- rara_tools-0.1.0/rara_tools/constants/__init__.py +0 -0
- rara_tools-0.1.0/tests/test_converters.py +0 -127
- {rara_tools-0.1.0 → rara_tools-0.3.0}/LICENSE.md +0 -0
- {rara_tools-0.1.0 → rara_tools-0.3.0}/README.md +0 -0
- {rara_tools-0.1.0 → rara_tools-0.3.0}/pyproject.toml +0 -0
- {rara_tools-0.1.0 → rara_tools-0.3.0}/rara_tools/constants/digitizer.py +0 -0
- {rara_tools-0.1.0 → rara_tools-0.3.0}/rara_tools/constants/general.py +0 -0
- {rara_tools-0.1.0 → rara_tools-0.3.0}/rara_tools/decorators.py +0 -0
- {rara_tools-0.1.0 → rara_tools-0.3.0}/rara_tools/digar_schema_converter.py +0 -0
- {rara_tools-0.1.0 → rara_tools-0.3.0}/rara_tools/elastic.py +0 -0
- {rara_tools-0.1.0 → rara_tools-0.3.0}/rara_tools/exceptions.py +0 -0
- {rara_tools-0.1.0 → rara_tools-0.3.0}/rara_tools/s3.py +0 -0
- {rara_tools-0.1.0 → rara_tools-0.3.0}/rara_tools/task_reporter.py +0 -0
- {rara_tools-0.1.0 → rara_tools-0.3.0}/rara_tools/utils.py +0 -0
- {rara_tools-0.1.0 → rara_tools-0.3.0}/rara_tools.egg-info/dependency_links.txt +0 -0
- {rara_tools-0.1.0 → rara_tools-0.3.0}/rara_tools.egg-info/top_level.txt +0 -0
- {rara_tools-0.1.0 → rara_tools-0.3.0}/setup.cfg +0 -0
- {rara_tools-0.1.0 → rara_tools-0.3.0}/tests/test_digar_schema_converter.py +0 -0
- {rara_tools-0.1.0 → rara_tools-0.3.0}/tests/test_elastic.py +0 -0
- {rara_tools-0.1.0 → rara_tools-0.3.0}/tests/test_elastic_vector_and_search_operations.py +0 -0
- {rara_tools-0.1.0 → rara_tools-0.3.0}/tests/test_s3_exceptions.py +0 -0
- {rara_tools-0.1.0 → rara_tools-0.3.0}/tests/test_s3_file_operations.py +0 -0
- {rara_tools-0.1.0 → rara_tools-0.3.0}/tests/test_task_reporter.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rara-tools
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Tools to support Kata's work.
|
|
5
5
|
Classifier: Programming Language :: Python :: 3
|
|
6
6
|
Classifier: Programming Language :: Python :: 3.10
|
|
@@ -15,6 +15,8 @@ Requires-Dist: elasticsearch_dsl==8.*
|
|
|
15
15
|
Requires-Dist: minio==7.*
|
|
16
16
|
Requires-Dist: requests
|
|
17
17
|
Requires-Dist: iso639-lang
|
|
18
|
+
Requires-Dist: pymarc
|
|
19
|
+
Requires-Dist: glom
|
|
18
20
|
Provides-Extra: testing
|
|
19
21
|
Requires-Dist: pytest>=8.0; extra == "testing"
|
|
20
22
|
Requires-Dist: pytest-order; extra == "testing"
|
rara_tools-0.3.0/VERSION
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0.3.0
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .normalizers import *
|
|
@@ -1,19 +1,22 @@
|
|
|
1
|
-
from .exceptions import SierraResponseConverterException
|
|
1
|
+
from rara_tools.exceptions import SierraResponseConverterException
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
class SierraResponseConverter:
|
|
5
5
|
"""Converts a JSON response from the Sierra API to MARC-in-JSON format."""
|
|
6
|
-
|
|
6
|
+
|
|
7
7
|
def __init__(self, response: dict):
|
|
8
8
|
if not isinstance(response, dict):
|
|
9
|
-
raise SierraResponseConverterException(
|
|
9
|
+
raise SierraResponseConverterException(
|
|
10
|
+
"Please provide a valid JSON response.")
|
|
10
11
|
self.response = response
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
12
|
+
|
|
13
|
+
@staticmethod
|
|
14
|
+
def _map_control_fields(field: dict) -> dict:
|
|
15
|
+
# for tags < 010, no subfields, instead one str value in "value"
|
|
14
16
|
return {field["tag"]: field["value"]}
|
|
15
|
-
|
|
16
|
-
|
|
17
|
+
|
|
18
|
+
@staticmethod
|
|
19
|
+
def _map_data_fields(field: dict) -> dict:
|
|
17
20
|
""" Maps marc fields > 010.
|
|
18
21
|
|
|
19
22
|
Args:
|
|
@@ -22,60 +25,66 @@ class SierraResponseConverter:
|
|
|
22
25
|
Returns:
|
|
23
26
|
dict: standardised marc-in-json format.
|
|
24
27
|
"""
|
|
25
|
-
|
|
28
|
+
|
|
26
29
|
data = field["data"]
|
|
27
|
-
|
|
30
|
+
|
|
28
31
|
# Order matters ind1, in2, subfields
|
|
29
32
|
field_data = {
|
|
30
33
|
"ind1": data.get("ind1", " "),
|
|
31
34
|
"ind2": data.get("ind2", " "),
|
|
32
35
|
"subfields": data.get("subfields", [])
|
|
33
36
|
}
|
|
34
|
-
|
|
37
|
+
|
|
35
38
|
return {field["tag"]: field_data}
|
|
36
|
-
|
|
37
|
-
|
|
39
|
+
|
|
40
|
+
@staticmethod
|
|
41
|
+
def _is_marc21structured(field: dict) -> bool:
|
|
38
42
|
"""Checks if the field is already structured according to MARC21 in JSON"""
|
|
39
43
|
return any(key.isdigit() for key in field.keys())
|
|
40
|
-
|
|
41
|
-
|
|
44
|
+
|
|
42
45
|
def _handle_field_type(self, field: dict) -> dict:
|
|
43
|
-
|
|
46
|
+
|
|
44
47
|
if self._is_marc21structured(field):
|
|
45
48
|
return field
|
|
46
|
-
|
|
49
|
+
|
|
47
50
|
if field.get("data"):
|
|
48
51
|
return self._map_data_fields(field)
|
|
49
|
-
|
|
52
|
+
|
|
50
53
|
tag = field.get("tag")
|
|
51
|
-
|
|
54
|
+
|
|
52
55
|
if not tag:
|
|
53
|
-
raise SierraResponseConverterException(
|
|
54
|
-
|
|
56
|
+
raise SierraResponseConverterException(
|
|
57
|
+
"Field is missing MARC21 tag.")
|
|
58
|
+
|
|
55
59
|
if tag < "010":
|
|
56
60
|
return self._map_control_fields(field)
|
|
57
61
|
else:
|
|
58
62
|
return self._map_data_fields(field)
|
|
59
|
-
|
|
63
|
+
|
|
60
64
|
def _convert_response(self) -> list:
|
|
61
65
|
entries = self.response.get("entries")
|
|
62
66
|
if not entries:
|
|
63
|
-
raise SierraResponseConverterException(
|
|
64
|
-
|
|
67
|
+
raise SierraResponseConverterException(
|
|
68
|
+
"No entries found in the response.")
|
|
69
|
+
|
|
65
70
|
try:
|
|
66
|
-
return
|
|
67
|
-
{
|
|
68
|
-
|
|
71
|
+
return [
|
|
72
|
+
{
|
|
73
|
+
"sierraID": str(e["id"]),
|
|
74
|
+
"leader": e["marc"]["leader"],
|
|
75
|
+
"fields": [
|
|
76
|
+
self._handle_field_type(f) for f in e["marc"]["fields"]
|
|
69
77
|
]}
|
|
70
78
|
for e in entries
|
|
71
|
-
]
|
|
72
|
-
|
|
79
|
+
]
|
|
80
|
+
|
|
73
81
|
except KeyError as e:
|
|
74
|
-
raise SierraResponseConverterException(
|
|
75
|
-
|
|
76
|
-
|
|
82
|
+
raise SierraResponseConverterException(
|
|
83
|
+
f"Malformed response: missing key {e}")
|
|
84
|
+
|
|
77
85
|
def convert(self) -> list:
|
|
78
86
|
try:
|
|
79
87
|
return self._convert_response()
|
|
80
88
|
except Exception as e:
|
|
81
|
-
raise SierraResponseConverterException(
|
|
89
|
+
raise SierraResponseConverterException(
|
|
90
|
+
f"An unexpected error occurred: {e}")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rara-tools
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Tools to support Kata's work.
|
|
5
5
|
Classifier: Programming Language :: Python :: 3
|
|
6
6
|
Classifier: Programming Language :: Python :: 3.10
|
|
@@ -15,6 +15,8 @@ Requires-Dist: elasticsearch_dsl==8.*
|
|
|
15
15
|
Requires-Dist: minio==7.*
|
|
16
16
|
Requires-Dist: requests
|
|
17
17
|
Requires-Dist: iso639-lang
|
|
18
|
+
Requires-Dist: pymarc
|
|
19
|
+
Requires-Dist: glom
|
|
18
20
|
Provides-Extra: testing
|
|
19
21
|
Requires-Dist: pytest>=8.0; extra == "testing"
|
|
20
22
|
Requires-Dist: pytest-order; extra == "testing"
|
|
@@ -19,10 +19,14 @@ rara_tools.egg-info/top_level.txt
|
|
|
19
19
|
rara_tools/constants/__init__.py
|
|
20
20
|
rara_tools/constants/digitizer.py
|
|
21
21
|
rara_tools/constants/general.py
|
|
22
|
-
|
|
22
|
+
rara_tools/constants/normalizers.py
|
|
23
23
|
tests/test_digar_schema_converter.py
|
|
24
24
|
tests/test_elastic.py
|
|
25
25
|
tests/test_elastic_vector_and_search_operations.py
|
|
26
|
+
tests/test_normalization.py
|
|
26
27
|
tests/test_s3_exceptions.py
|
|
27
28
|
tests/test_s3_file_operations.py
|
|
28
|
-
tests/
|
|
29
|
+
tests/test_sierra_converters.py
|
|
30
|
+
tests/test_task_reporter.py
|
|
31
|
+
tests/test_utils.py
|
|
32
|
+
tests/test_viaf_client.py
|
|
@@ -0,0 +1,323 @@
|
|
|
1
|
+
from rara_tools.normalizers import BibRecordNormalizer, AuthoritiesRecordNormalizer
|
|
2
|
+
from tests.test_utils import (get_linker_res_example, get_formatted_sierra_response,
|
|
3
|
+
check_record_tags_sorted, check_no_dupe_tag_values, check_record_tags_have_values)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
from pymarc import Record
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
|
|
10
|
+
TEST_LEVEL = os.getenv("TEST_LEVEL", "unit")
|
|
11
|
+
|
|
12
|
+
EMPTY_SIERRA_RECORDS = [
|
|
13
|
+
{
|
|
14
|
+
"sierraID": "1",
|
|
15
|
+
"leader": "00000nz a2200000n 4500",
|
|
16
|
+
"fields": []
|
|
17
|
+
},
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
REQUIRED_FIELDS = ["667", "925"] # always included after normalization
|
|
21
|
+
MOCK_LINKER_ONE_FOUND = get_linker_res_example(
|
|
22
|
+
"oneFound.json")
|
|
23
|
+
MOCK_LINKER_MULTIPLE_FOUND = get_linker_res_example(
|
|
24
|
+
"multipleFound.json")
|
|
25
|
+
MOCK_LINKER_NOT_FOUND = get_linker_res_example(
|
|
26
|
+
"notFound.json")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def test_normalizers_OK():
|
|
30
|
+
""" Test field editing logic & internals """
|
|
31
|
+
|
|
32
|
+
linking_results = [MOCK_LINKER_ONE_FOUND,
|
|
33
|
+
MOCK_LINKER_MULTIPLE_FOUND]
|
|
34
|
+
|
|
35
|
+
test_sierra_data = get_formatted_sierra_response("authorities.json")
|
|
36
|
+
|
|
37
|
+
normalizer = AuthoritiesRecordNormalizer(
|
|
38
|
+
linking_results=linking_results,
|
|
39
|
+
sierra_data=test_sierra_data,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
assert len(normalizer.records_extra_data) == len(normalizer.data)
|
|
43
|
+
|
|
44
|
+
normalizer = BibRecordNormalizer(
|
|
45
|
+
linking_results=linking_results,
|
|
46
|
+
sierra_data=test_sierra_data,
|
|
47
|
+
)
|
|
48
|
+
assert len(normalizer.records_extra_data) == len(normalizer.data)
|
|
49
|
+
|
|
50
|
+
data = [
|
|
51
|
+
{
|
|
52
|
+
"sierraID": "1",
|
|
53
|
+
"leader": "00000nz a2200000n 4500",
|
|
54
|
+
"fields": [
|
|
55
|
+
{
|
|
56
|
+
"667": {
|
|
57
|
+
"ind1": " ",
|
|
58
|
+
"ind2": " ",
|
|
59
|
+
"subfields": [
|
|
60
|
+
{
|
|
61
|
+
"a": "Val"
|
|
62
|
+
}
|
|
63
|
+
]
|
|
64
|
+
}
|
|
65
|
+
},
|
|
66
|
+
]
|
|
67
|
+
},
|
|
68
|
+
]
|
|
69
|
+
|
|
70
|
+
# default behavior - added if not in record &
|
|
71
|
+
normalizer = AuthoritiesRecordNormalizer(
|
|
72
|
+
sierra_data=data,
|
|
73
|
+
ALLOW_EDIT_FIELDS=[],
|
|
74
|
+
REPEATABLE_FIELDS=[],
|
|
75
|
+
)
|
|
76
|
+
for r in normalizer:
|
|
77
|
+
assert r.get_fields("667")[0].get_subfields("a")[0] == "Val"
|
|
78
|
+
|
|
79
|
+
# not edited if exists
|
|
80
|
+
normalizer = AuthoritiesRecordNormalizer(
|
|
81
|
+
sierra_data=data,
|
|
82
|
+
ALLOW_EDIT_FIELDS=[],
|
|
83
|
+
REPEATABLE_FIELDS=[]
|
|
84
|
+
)
|
|
85
|
+
for r in normalizer:
|
|
86
|
+
assert r.get_fields("667")[0].get_subfields("a")[0] == "Val"
|
|
87
|
+
|
|
88
|
+
# allow repeatable, new field will be added
|
|
89
|
+
normalizer = AuthoritiesRecordNormalizer(
|
|
90
|
+
sierra_data=data,
|
|
91
|
+
ALLOW_EDIT_FIELDS=[],
|
|
92
|
+
REPEATABLE_FIELDS=["667"]
|
|
93
|
+
)
|
|
94
|
+
for r in normalizer:
|
|
95
|
+
fields_667 = r.get_fields("667")
|
|
96
|
+
assert len(fields_667) == 2
|
|
97
|
+
assert fields_667[0].get_subfields("a")[0] == "Val"
|
|
98
|
+
assert fields_667[1].get_subfields("a")[0] == "Muudetud AI poolt"
|
|
99
|
+
|
|
100
|
+
# allow editing, field will be edited
|
|
101
|
+
normalizer = AuthoritiesRecordNormalizer(
|
|
102
|
+
sierra_data=data,
|
|
103
|
+
ALLOW_EDIT_FIELDS=["667"],
|
|
104
|
+
REPEATABLE_FIELDS=[]
|
|
105
|
+
)
|
|
106
|
+
for r in normalizer:
|
|
107
|
+
fields_667 = r.get_fields("667")
|
|
108
|
+
assert len(fields_667) == 1
|
|
109
|
+
assert fields_667[0].get_subfields("a")[0] == "Muudetud AI poolt"
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def validate_bibrecord_normalized(record: Record, has_viaf_data=False):
|
|
113
|
+
# source notes
|
|
114
|
+
assert record.get_fields("667")[0].get_subfields("a")[
|
|
115
|
+
0] == "Muudetud AI poolt"
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def validate_authorities_record_normalized(record: Record, has_viaf_data=False):
|
|
119
|
+
|
|
120
|
+
field_667 = record.get_fields("667")[0].get_subfields("a")[0]
|
|
121
|
+
assert field_667 == "Muudetud AI poolt" or field_667 == "Loodud AI poolt"
|
|
122
|
+
|
|
123
|
+
field_040_subfields = record.get_fields("040")[0]
|
|
124
|
+
|
|
125
|
+
# check that a, b & c subfields have values (can have default or unique)
|
|
126
|
+
assert len(field_040_subfields.get_subfields("a")) > 0
|
|
127
|
+
assert len(field_040_subfields.get_subfields("b")) > 0
|
|
128
|
+
assert len(field_040_subfields.get_subfields("c")) > 0
|
|
129
|
+
|
|
130
|
+
# check that 008 field has a value of length 40
|
|
131
|
+
field_008 = record.get_fields("008")[0].data
|
|
132
|
+
assert len(field_008) == 40
|
|
133
|
+
|
|
134
|
+
if has_viaf_data:
|
|
135
|
+
field_043 = record.get_fields("043")[0].get_subfields(
|
|
136
|
+
"c")[0] # check that 043 has subfield c with value "ee"
|
|
137
|
+
assert field_043 == "ee"
|
|
138
|
+
|
|
139
|
+
field_024 = record.get_fields("024")
|
|
140
|
+
for f in field_024:
|
|
141
|
+
assert len(f.get_subfields("0")) > 0 # VIAF url
|
|
142
|
+
|
|
143
|
+
field_046 = record.get_fields("046")[0]
|
|
144
|
+
assert len(field_046.get_subfields("f")) > 0 # birth date
|
|
145
|
+
assert len(field_046.get_subfields("g")) > 0 # death date
|
|
146
|
+
# assert len(field_046.get_subfields("s")) > 0 # activity start
|
|
147
|
+
# assert len(field_046.get_subfields("t")) > 0 # activity end
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def test_missing_fields_created_bibrecord_normalization():
|
|
151
|
+
linking_results = [MOCK_LINKER_ONE_FOUND]
|
|
152
|
+
|
|
153
|
+
normalizer_entities_only = BibRecordNormalizer(
|
|
154
|
+
linking_results=linking_results,
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
normalizer_sierra_data_only = BibRecordNormalizer(
|
|
158
|
+
sierra_data=EMPTY_SIERRA_RECORDS,
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
for record in normalizer_entities_only:
|
|
162
|
+
check_record_tags_have_values(
|
|
163
|
+
record, ["008", "046", "245", # Sierra related, always with bibs
|
|
164
|
+
"035", "100", # VIAf enriched
|
|
165
|
+
] + REQUIRED_FIELDS
|
|
166
|
+
)
|
|
167
|
+
validate_bibrecord_normalized(record, has_viaf_data=True)
|
|
168
|
+
|
|
169
|
+
for record in normalizer_sierra_data_only:
|
|
170
|
+
check_record_tags_have_values(
|
|
171
|
+
record, ["008", "046", "245", # Sierra related, always with bibs
|
|
172
|
+
] + REQUIRED_FIELDS)
|
|
173
|
+
validate_bibrecord_normalized(record)
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def test_missing_fields_created_authorities_normalization():
|
|
177
|
+
|
|
178
|
+
linking_results = [MOCK_LINKER_ONE_FOUND]
|
|
179
|
+
|
|
180
|
+
normalizer_entities_only = AuthoritiesRecordNormalizer(
|
|
181
|
+
linking_results=linking_results, # find one match
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
normalizer_sierra_data_only = AuthoritiesRecordNormalizer(
|
|
185
|
+
sierra_data=EMPTY_SIERRA_RECORDS,
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
for r in normalizer_entities_only:
|
|
189
|
+
check_record_tags_have_values(r, ["008", "040", # SIERRA related
|
|
190
|
+
"024", "043", "046" # VIAF enriched
|
|
191
|
+
] + REQUIRED_FIELDS)
|
|
192
|
+
|
|
193
|
+
validate_authorities_record_normalized(r, True)
|
|
194
|
+
|
|
195
|
+
for r in normalizer_sierra_data_only:
|
|
196
|
+
check_record_tags_have_values(
|
|
197
|
+
r, ["040"] + REQUIRED_FIELDS)
|
|
198
|
+
validate_authorities_record_normalized(r)
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def test_normalized_fields_sorted():
|
|
202
|
+
|
|
203
|
+
unsorted_bibdata = [
|
|
204
|
+
{
|
|
205
|
+
"sierraID": "1",
|
|
206
|
+
"leader": "00000nz a2200000n 4500",
|
|
207
|
+
"fields": [
|
|
208
|
+
{
|
|
209
|
+
"035": {
|
|
210
|
+
"ind1": " ",
|
|
211
|
+
"ind2": " ",
|
|
212
|
+
"subfields": [
|
|
213
|
+
{
|
|
214
|
+
"a": "(ErESTER)<1>"
|
|
215
|
+
}
|
|
216
|
+
]
|
|
217
|
+
}
|
|
218
|
+
},
|
|
219
|
+
{
|
|
220
|
+
"008": "220805|||aznnnaabn || ||| nz n "
|
|
221
|
+
},
|
|
222
|
+
{
|
|
223
|
+
"046": {
|
|
224
|
+
"ind1": " ",
|
|
225
|
+
"ind2": " ",
|
|
226
|
+
"subfields": [
|
|
227
|
+
{
|
|
228
|
+
"k": "1912"
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
]
|
|
232
|
+
}
|
|
233
|
+
},
|
|
234
|
+
]
|
|
235
|
+
}
|
|
236
|
+
]
|
|
237
|
+
|
|
238
|
+
normalizers = (BibRecordNormalizer, AuthoritiesRecordNormalizer)
|
|
239
|
+
|
|
240
|
+
for normalizer in normalizers:
|
|
241
|
+
normalizer = normalizer(
|
|
242
|
+
linking_results=[],
|
|
243
|
+
sierra_data=unsorted_bibdata
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
for r in normalizer:
|
|
247
|
+
check_no_dupe_tag_values(r)
|
|
248
|
+
check_record_tags_sorted(r)
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def test_authority_normrecord_found_in_es_and_normalized():
|
|
252
|
+
""" KATA elastic normkirjete seast leitakse 1 vaste & normaliseerija täiendab leitud normkirjet VIAF infoga.
|
|
253
|
+
- valideeri normaliseerimise mapping, mis autori tabelis. Täiendatud väljad ja VIAFist info
|
|
254
|
+
- Valideeri märge lisatud (TODO) """
|
|
255
|
+
# Presume, author name identified and sent to linker
|
|
256
|
+
linker_res = get_linker_res_example(
|
|
257
|
+
"oneFound.json") # single result
|
|
258
|
+
linking_results = [linker_res]
|
|
259
|
+
|
|
260
|
+
# 1 result found
|
|
261
|
+
normalizer = AuthoritiesRecordNormalizer(
|
|
262
|
+
linking_results=linking_results
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
data = normalizer.data
|
|
266
|
+
|
|
267
|
+
assert len(data) == 1
|
|
268
|
+
|
|
269
|
+
for r in normalizer:
|
|
270
|
+
check_record_tags_have_values(r, ["040"] + REQUIRED_FIELDS)
|
|
271
|
+
validate_authorities_record_normalized(r, has_viaf_data=True)
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def test_authority_normrecord_not_found_in_es_and_viaf():
|
|
275
|
+
"""KATA elastic normkirjete seast vastet ei leitud & linkija sooritab VIAFisse otsingu
|
|
276
|
+
- Üks vaste leiti - luuakse uus normkirje
|
|
277
|
+
- Ei leitud ühtegi vastet, või on leitud vasteid mitu - AI tuvastatud info põhjal uue kirje loomine(TODO)
|
|
278
|
+
"""
|
|
279
|
+
linker_res = get_linker_res_example(
|
|
280
|
+
"oneFound.json")
|
|
281
|
+
linking_results = [linker_res]
|
|
282
|
+
|
|
283
|
+
normalizer = AuthoritiesRecordNormalizer(
|
|
284
|
+
linking_results=linking_results)
|
|
285
|
+
|
|
286
|
+
data = normalizer.data
|
|
287
|
+
|
|
288
|
+
assert len(data) == 1 # should create new normalized record
|
|
289
|
+
|
|
290
|
+
# Entities not found, es & VIAF
|
|
291
|
+
linking_results = [MOCK_LINKER_NOT_FOUND]
|
|
292
|
+
normalizer = AuthoritiesRecordNormalizer(linking_results=linking_results)
|
|
293
|
+
data = normalizer.data
|
|
294
|
+
# should create new normalized record in the future, none for now
|
|
295
|
+
assert len(data) == 0
|
|
296
|
+
|
|
297
|
+
linker_res = get_linker_res_example(
|
|
298
|
+
"multipleFound.json")
|
|
299
|
+
linking_results = [linker_res]
|
|
300
|
+
normalizer = AuthoritiesRecordNormalizer(linking_results=linking_results)
|
|
301
|
+
data = normalizer.data
|
|
302
|
+
# should create new normalized record in the future, none for now
|
|
303
|
+
assert len(data) == 0
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def test_matching_sierra_record_viaf_id_found():
|
|
307
|
+
"""normkirjelt leitakse VIAF ID, vajadusel normi asukoht, kus see ID sisaldub."""
|
|
308
|
+
pass
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def test_matching_sierra_record_viaf_id_not_found():
|
|
312
|
+
"""kirjelt VIAF IDd ei leitud, soorita otsing VIAFi pihta, et leida _vastutav isik_?. Loo uus vastavalt otsingu tulemusele."""
|
|
313
|
+
pass
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def test_authorities_normalizer_checks():
|
|
317
|
+
"""
|
|
318
|
+
- kontrolli kas tuvastatud nimi on SIERRAst leitud vaste 1XX, 4XX väljadel. Kui pole, siis lisa 4XX väljale.
|
|
319
|
+
- kontrolli, kas VIAF andmete nimekujud on normkandes olemas. Kui pole, lisa need 4XX väljale.
|
|
320
|
+
- Kontrolli, kas VIAF kandes on sünni ja surma daatumid ja kas need klapivad normkandes olevaga. Kui pole, siis liiguta normkandest kogu 1XX väli 4XX väljale. Seejärel loo uute daatumitega 1XX väli.
|
|
321
|
+
- Kontrolli, et väljal 046 olevad daatumid klapiksid just 1xx väljale lisatuga. Kui andmeid muudeti, siis märgi, et baasis on normkanne muutunud
|
|
322
|
+
"""
|
|
323
|
+
pass
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
from rara_tools.converters import SierraResponseConverter
|
|
5
|
+
from rara_tools.exceptions import SierraResponseConverterException
|
|
6
|
+
|
|
7
|
+
from tests.const import SIERRA_OUTPUT_DIR
|
|
8
|
+
from tests.test_utils import (read_json_file, get_formatted_sierra_response, compare_results)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
example_res = {
|
|
12
|
+
"total": 100,
|
|
13
|
+
"start": 50000,
|
|
14
|
+
"entries": [
|
|
15
|
+
{
|
|
16
|
+
"id": 1126963,
|
|
17
|
+
"updatedDate": "2016-02-09T08:42:52Z",
|
|
18
|
+
"createdDate": "2014-05-17T17:22:00Z",
|
|
19
|
+
"deleted": False,
|
|
20
|
+
"suppressed": False,
|
|
21
|
+
"marc": {
|
|
22
|
+
"leader": "00000nz a2200145n 4500",
|
|
23
|
+
"fields": [
|
|
24
|
+
{
|
|
25
|
+
# "tag": "100",
|
|
26
|
+
"data": {
|
|
27
|
+
"ind1": "1",
|
|
28
|
+
"ind2": " ",
|
|
29
|
+
"subfields": [
|
|
30
|
+
{
|
|
31
|
+
"code": "a",
|
|
32
|
+
"data": "Viggor, Signe,"
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
"code": "d",
|
|
36
|
+
"data": "1975-"
|
|
37
|
+
}
|
|
38
|
+
]
|
|
39
|
+
}
|
|
40
|
+
},
|
|
41
|
+
]}}]}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def test_convert_bibs_response():
|
|
45
|
+
|
|
46
|
+
data = get_formatted_sierra_response("bibs.json")
|
|
47
|
+
|
|
48
|
+
expected = read_json_file(os.path.join(SIERRA_OUTPUT_DIR, "bibs.json"))
|
|
49
|
+
|
|
50
|
+
assert compare_results(expected, data)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def test_convert_keywords_response():
|
|
54
|
+
|
|
55
|
+
data = get_formatted_sierra_response("keywords.json")
|
|
56
|
+
|
|
57
|
+
expected = read_json_file(os.path.join(SIERRA_OUTPUT_DIR, "keywords.json"))
|
|
58
|
+
|
|
59
|
+
assert compare_results(expected, data)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def test_convert_authorities_response():
|
|
63
|
+
|
|
64
|
+
data = get_formatted_sierra_response("authorities.json")
|
|
65
|
+
|
|
66
|
+
expected = read_json_file(os.path.join(
|
|
67
|
+
SIERRA_OUTPUT_DIR, "authorities.json"))
|
|
68
|
+
|
|
69
|
+
assert compare_results(expected, data)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def test_converter_handles_marc_in_json_response():
|
|
73
|
+
""" Gracefully handle entries already in MARC-in-JSON format """
|
|
74
|
+
data = get_formatted_sierra_response("bibsmarc.json")
|
|
75
|
+
|
|
76
|
+
expected = read_json_file(os.path.join(SIERRA_OUTPUT_DIR, "bibsmarc.json"))
|
|
77
|
+
|
|
78
|
+
assert compare_results(expected, data)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def test_convert_with_wrong_format():
|
|
82
|
+
with pytest.raises(SierraResponseConverterException):
|
|
83
|
+
SierraResponseConverter("$")
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def test_convert_missing_tag():
|
|
87
|
+
with pytest.raises(SierraResponseConverterException):
|
|
88
|
+
response = example_res.copy()
|
|
89
|
+
response["entries"][0]["marc"]["fields"][0].pop("tag", None)
|
|
90
|
+
|
|
91
|
+
converter = SierraResponseConverter(response)
|
|
92
|
+
converter.convert()
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def test_no_entries_in_response():
|
|
96
|
+
with pytest.raises(SierraResponseConverterException):
|
|
97
|
+
response = example_res.copy()
|
|
98
|
+
response.pop("entries", [])
|
|
99
|
+
|
|
100
|
+
converter = SierraResponseConverter(response)
|
|
101
|
+
converter.convert()
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
from tests.const import SIERRA_INPUT_DIR, LINKER_DIR
|
|
2
|
+
|
|
3
|
+
from rara_tools.converters import SierraResponseConverter
|
|
4
|
+
from rara_tools.normalizers.viaf import VIAFRecord, VIAFClient
|
|
5
|
+
|
|
6
|
+
from pymarc import Record
|
|
7
|
+
from typing import List
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import os
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def read_json_file(path: str):
|
|
14
|
+
with open(path, "r") as f:
|
|
15
|
+
data = f.read()
|
|
16
|
+
return json.loads(data)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def check_record_tags_sorted(record: Record):
|
|
20
|
+
record_tags = [field.tag for field in record.get_fields()]
|
|
21
|
+
assert record_tags == sorted(record_tags)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def check_no_dupe_tag_values(record: Record):
|
|
25
|
+
repetable_tags = ["024", "035", "400", "670"]
|
|
26
|
+
record_tags = [field.tag for field in record.get_fields()
|
|
27
|
+
if field.tag not in repetable_tags]
|
|
28
|
+
assert len(record_tags) == len(set(record_tags))
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def check_record_tags_have_values(record: Record, tags: List[str]):
|
|
32
|
+
for tag in tags:
|
|
33
|
+
assert record[tag] is not None
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def get_record_field_value(record: Record, tag: str):
|
|
37
|
+
""" handle control & variable fields """
|
|
38
|
+
return record.get_fields(tag)[0].value()
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def compare_results(expected: dict, results: dict):
|
|
42
|
+
return json.dumps(expected) == json.dumps(results)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def get_formatted_sierra_response(fname: str):
|
|
46
|
+
""" Reads a mock Sierra response file and converts it to MARC in json."""
|
|
47
|
+
|
|
48
|
+
response = read_json_file(os.path.join(SIERRA_INPUT_DIR, fname))
|
|
49
|
+
|
|
50
|
+
converter = SierraResponseConverter(response)
|
|
51
|
+
return converter.convert()
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def get_viaf_record(id: str, allowed_sources: list):
|
|
55
|
+
""" Fetches VIAF record by ID and returns a VIAFRecord object """
|
|
56
|
+
|
|
57
|
+
client = VIAFClient()
|
|
58
|
+
response = client.get_records_by_viaf_id(id)
|
|
59
|
+
|
|
60
|
+
viaf_record = VIAFRecord(
|
|
61
|
+
response, allowed_sources=allowed_sources)
|
|
62
|
+
return viaf_record
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def search_viaf_record(search_term: str, allowed_sources: list):
|
|
66
|
+
""" Fetches VIAF record by name and returns a VIAFRecord object """
|
|
67
|
+
client = VIAFClient()
|
|
68
|
+
response = client.get_records_by_search_term(search_term)
|
|
69
|
+
|
|
70
|
+
return VIAFRecord(response, allowed_sources=allowed_sources)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def get_linker_res_example(fname: str):
|
|
74
|
+
with open(os.path.join(LINKER_DIR, fname), "r") as f:
|
|
75
|
+
data = f.read()
|
|
76
|
+
return json.loads(data)
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from rara_tools.normalizers.viaf import VIAFRecord, VIAFClient
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def test_fetch_clusters_by_id_list():
|
|
5
|
+
viaf_ids = ["7432247", "456"]
|
|
6
|
+
client = VIAFClient()
|
|
7
|
+
|
|
8
|
+
results = client.fetch_viaf_clusters(viaf_ids)
|
|
9
|
+
assert len(results) == 2
|
|
10
|
+
assert results["456"] == {}
|
|
11
|
+
assert len(results["7432247"]) > 0
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def test_fetch_viaf_results_for_normalizer():
|
|
15
|
+
viaf_ids = ["7432247", "456"]
|
|
16
|
+
client = VIAFClient()
|
|
17
|
+
|
|
18
|
+
results = client.get_normalized_data(viaf_ids)
|
|
19
|
+
assert len(results) == 2
|
rara_tools-0.1.0/VERSION
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
0.1.0
|
|
File without changes
|
|
@@ -1,127 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
import os
|
|
3
|
-
|
|
4
|
-
import pytest
|
|
5
|
-
from rara_tools.converters import SierraResponseConverter
|
|
6
|
-
from rara_tools.exceptions import SierraResponseConverterException
|
|
7
|
-
|
|
8
|
-
import json
|
|
9
|
-
|
|
10
|
-
root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
11
|
-
|
|
12
|
-
SIERRA_TEST_DATA_DIR = os.path.join(root, "tests", "test_data", "sierra")
|
|
13
|
-
INPUT_DIR = os.path.join(SIERRA_TEST_DATA_DIR, "input")
|
|
14
|
-
OUTPUT_DIR = os.path.join(SIERRA_TEST_DATA_DIR, "output")
|
|
15
|
-
|
|
16
|
-
def compare_results(expected, converted):
|
|
17
|
-
return json.dumps(expected) == json.dumps(converted)
|
|
18
|
-
|
|
19
|
-
def read_json_file(file_path):
|
|
20
|
-
with open(file_path, "r") as f:
|
|
21
|
-
data = f.read()
|
|
22
|
-
return json.loads(data)
|
|
23
|
-
|
|
24
|
-
example_res = {
|
|
25
|
-
"total": 100,
|
|
26
|
-
"start": 50000,
|
|
27
|
-
"entries": [
|
|
28
|
-
{
|
|
29
|
-
"id": 1126963,
|
|
30
|
-
"updatedDate": "2016-02-09T08:42:52Z",
|
|
31
|
-
"createdDate": "2014-05-17T17:22:00Z",
|
|
32
|
-
"deleted": False,
|
|
33
|
-
"suppressed": False,
|
|
34
|
-
"marc": {
|
|
35
|
-
"leader": "00000nz a2200145n 4500",
|
|
36
|
-
"fields": [
|
|
37
|
-
{
|
|
38
|
-
# "tag": "100",
|
|
39
|
-
"data": {
|
|
40
|
-
"ind1": "1",
|
|
41
|
-
"ind2": " ",
|
|
42
|
-
"subfields": [
|
|
43
|
-
{
|
|
44
|
-
"code": "a",
|
|
45
|
-
"data": "Viggor, Signe,"
|
|
46
|
-
},
|
|
47
|
-
{
|
|
48
|
-
"code": "d",
|
|
49
|
-
"data": "1975-"
|
|
50
|
-
}
|
|
51
|
-
]
|
|
52
|
-
}
|
|
53
|
-
},
|
|
54
|
-
]}}]}
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
def test_convert_bibs_response():
|
|
60
|
-
response = read_json_file(os.path.join(INPUT_DIR, "bibs.json"))
|
|
61
|
-
|
|
62
|
-
converter = SierraResponseConverter(response)
|
|
63
|
-
data = converter.convert()
|
|
64
|
-
|
|
65
|
-
expected = read_json_file(os.path.join(OUTPUT_DIR, "bibs.json"))
|
|
66
|
-
|
|
67
|
-
assert compare_results(expected, data)
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
def test_convert_keywords_response():
|
|
71
|
-
with open(os.path.join(INPUT_DIR, "keywords.json"), "r") as f:
|
|
72
|
-
response = f.read()
|
|
73
|
-
response = json.loads(response)
|
|
74
|
-
|
|
75
|
-
converter = SierraResponseConverter(response)
|
|
76
|
-
data = converter.convert()
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
expected = read_json_file(os.path.join(OUTPUT_DIR, "keywords.json"))
|
|
80
|
-
|
|
81
|
-
assert compare_results(expected, data)
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
def test_convert_authorities_response():
|
|
85
|
-
with open(os.path.join(INPUT_DIR, "authorities.json"), "r") as f:
|
|
86
|
-
response = f.read()
|
|
87
|
-
response = json.loads(response)
|
|
88
|
-
|
|
89
|
-
converter = SierraResponseConverter(response)
|
|
90
|
-
data = converter.convert()
|
|
91
|
-
|
|
92
|
-
expected = read_json_file(os.path.join(OUTPUT_DIR, "authorities.json"))
|
|
93
|
-
|
|
94
|
-
assert compare_results(expected, data)
|
|
95
|
-
|
|
96
|
-
def test_converter_handles_marc_in_json_response():
|
|
97
|
-
""" Gracefully handle entries already in MARC-in-JSON format """
|
|
98
|
-
with open(os.path.join(INPUT_DIR, "bibsmarc.json"), "r") as f:
|
|
99
|
-
response = f.read()
|
|
100
|
-
response = json.loads(response)
|
|
101
|
-
|
|
102
|
-
converter = SierraResponseConverter(response)
|
|
103
|
-
data = converter.convert()
|
|
104
|
-
|
|
105
|
-
expected = read_json_file(os.path.join(OUTPUT_DIR, "bibsmarc.json"))
|
|
106
|
-
|
|
107
|
-
assert compare_results(expected, data)
|
|
108
|
-
|
|
109
|
-
def test_convert_with_wrong_format():
|
|
110
|
-
with pytest.raises(SierraResponseConverterException):
|
|
111
|
-
SierraResponseConverter("$")
|
|
112
|
-
|
|
113
|
-
def test_convert_missing_tag():
|
|
114
|
-
with pytest.raises(SierraResponseConverterException):
|
|
115
|
-
response = example_res.copy()
|
|
116
|
-
response["entries"][0]["marc"]["fields"][0].pop("tag", None)
|
|
117
|
-
|
|
118
|
-
converter = SierraResponseConverter(response)
|
|
119
|
-
converter.convert()
|
|
120
|
-
|
|
121
|
-
def test_no_entries_in_response():
|
|
122
|
-
with pytest.raises(SierraResponseConverterException):
|
|
123
|
-
response = example_res.copy()
|
|
124
|
-
response.pop("entries", [])
|
|
125
|
-
|
|
126
|
-
converter = SierraResponseConverter(response)
|
|
127
|
-
converter.convert()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|