rara-tools 0.2.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rara-tools might be problematic. Click here for more details.
- {rara_tools-0.2.0/rara_tools.egg-info → rara_tools-0.3.0}/PKG-INFO +1 -2
- rara_tools-0.3.0/VERSION +1 -0
- rara_tools-0.3.0/rara_tools/constants/normalizers.py +6 -0
- {rara_tools-0.2.0 → rara_tools-0.3.0/rara_tools.egg-info}/PKG-INFO +1 -2
- {rara_tools-0.2.0 → rara_tools-0.3.0}/rara_tools.egg-info/requires.txt +0 -1
- {rara_tools-0.2.0 → rara_tools-0.3.0}/requirements.txt +0 -1
- {rara_tools-0.2.0 → rara_tools-0.3.0}/tests/test_normalization.py +41 -33
- {rara_tools-0.2.0 → rara_tools-0.3.0}/tests/test_utils.py +8 -9
- rara_tools-0.2.0/VERSION +0 -1
- rara_tools-0.2.0/rara_tools/constants/normalizers.py +0 -17
- {rara_tools-0.2.0 → rara_tools-0.3.0}/LICENSE.md +0 -0
- {rara_tools-0.2.0 → rara_tools-0.3.0}/README.md +0 -0
- {rara_tools-0.2.0 → rara_tools-0.3.0}/pyproject.toml +0 -0
- {rara_tools-0.2.0 → rara_tools-0.3.0}/rara_tools/constants/__init__.py +0 -0
- {rara_tools-0.2.0 → rara_tools-0.3.0}/rara_tools/constants/digitizer.py +0 -0
- {rara_tools-0.2.0 → rara_tools-0.3.0}/rara_tools/constants/general.py +0 -0
- {rara_tools-0.2.0 → rara_tools-0.3.0}/rara_tools/converters.py +0 -0
- {rara_tools-0.2.0 → rara_tools-0.3.0}/rara_tools/decorators.py +0 -0
- {rara_tools-0.2.0 → rara_tools-0.3.0}/rara_tools/digar_schema_converter.py +0 -0
- {rara_tools-0.2.0 → rara_tools-0.3.0}/rara_tools/elastic.py +0 -0
- {rara_tools-0.2.0 → rara_tools-0.3.0}/rara_tools/exceptions.py +0 -0
- {rara_tools-0.2.0 → rara_tools-0.3.0}/rara_tools/s3.py +0 -0
- {rara_tools-0.2.0 → rara_tools-0.3.0}/rara_tools/task_reporter.py +0 -0
- {rara_tools-0.2.0 → rara_tools-0.3.0}/rara_tools/utils.py +0 -0
- {rara_tools-0.2.0 → rara_tools-0.3.0}/rara_tools.egg-info/SOURCES.txt +0 -0
- {rara_tools-0.2.0 → rara_tools-0.3.0}/rara_tools.egg-info/dependency_links.txt +0 -0
- {rara_tools-0.2.0 → rara_tools-0.3.0}/rara_tools.egg-info/top_level.txt +0 -0
- {rara_tools-0.2.0 → rara_tools-0.3.0}/setup.cfg +0 -0
- {rara_tools-0.2.0 → rara_tools-0.3.0}/tests/test_digar_schema_converter.py +0 -0
- {rara_tools-0.2.0 → rara_tools-0.3.0}/tests/test_elastic.py +0 -0
- {rara_tools-0.2.0 → rara_tools-0.3.0}/tests/test_elastic_vector_and_search_operations.py +0 -0
- {rara_tools-0.2.0 → rara_tools-0.3.0}/tests/test_s3_exceptions.py +0 -0
- {rara_tools-0.2.0 → rara_tools-0.3.0}/tests/test_s3_file_operations.py +0 -0
- {rara_tools-0.2.0 → rara_tools-0.3.0}/tests/test_sierra_converters.py +0 -0
- {rara_tools-0.2.0 → rara_tools-0.3.0}/tests/test_task_reporter.py +0 -0
- {rara_tools-0.2.0 → rara_tools-0.3.0}/tests/test_viaf_client.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rara-tools
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Tools to support Kata's work.
|
|
5
5
|
Classifier: Programming Language :: Python :: 3
|
|
6
6
|
Classifier: Programming Language :: Python :: 3.10
|
|
@@ -13,7 +13,6 @@ License-File: LICENSE.md
|
|
|
13
13
|
Requires-Dist: elasticsearch==8.*
|
|
14
14
|
Requires-Dist: elasticsearch_dsl==8.*
|
|
15
15
|
Requires-Dist: minio==7.*
|
|
16
|
-
Requires-Dist: rara-norm-linker==1.*
|
|
17
16
|
Requires-Dist: requests
|
|
18
17
|
Requires-Dist: iso639-lang
|
|
19
18
|
Requires-Dist: pymarc
|
rara_tools-0.3.0/VERSION
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0.3.0
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rara-tools
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Tools to support Kata's work.
|
|
5
5
|
Classifier: Programming Language :: Python :: 3
|
|
6
6
|
Classifier: Programming Language :: Python :: 3.10
|
|
@@ -13,7 +13,6 @@ License-File: LICENSE.md
|
|
|
13
13
|
Requires-Dist: elasticsearch==8.*
|
|
14
14
|
Requires-Dist: elasticsearch_dsl==8.*
|
|
15
15
|
Requires-Dist: minio==7.*
|
|
16
|
-
Requires-Dist: rara-norm-linker==1.*
|
|
17
16
|
Requires-Dist: requests
|
|
18
17
|
Requires-Dist: iso639-lang
|
|
19
18
|
Requires-Dist: pymarc
|
|
@@ -1,10 +1,9 @@
|
|
|
1
1
|
from rara_tools.normalizers import BibRecordNormalizer, AuthoritiesRecordNormalizer
|
|
2
|
-
from tests.test_utils import (get_formatted_sierra_response,
|
|
2
|
+
from tests.test_utils import (get_linker_res_example, get_formatted_sierra_response,
|
|
3
3
|
check_record_tags_sorted, check_no_dupe_tag_values, check_record_tags_have_values)
|
|
4
4
|
|
|
5
|
-
from pymarc import Record
|
|
6
5
|
|
|
7
|
-
import
|
|
6
|
+
from pymarc import Record
|
|
8
7
|
|
|
9
8
|
import os
|
|
10
9
|
|
|
@@ -19,29 +18,31 @@ EMPTY_SIERRA_RECORDS = [
|
|
|
19
18
|
]
|
|
20
19
|
|
|
21
20
|
REQUIRED_FIELDS = ["667", "925"] # always included after normalization
|
|
22
|
-
|
|
21
|
+
MOCK_LINKER_ONE_FOUND = get_linker_res_example(
|
|
22
|
+
"oneFound.json")
|
|
23
|
+
MOCK_LINKER_MULTIPLE_FOUND = get_linker_res_example(
|
|
24
|
+
"multipleFound.json")
|
|
25
|
+
MOCK_LINKER_NOT_FOUND = get_linker_res_example(
|
|
26
|
+
"notFound.json")
|
|
23
27
|
|
|
24
28
|
|
|
25
|
-
@pytest.mark.skipif(TEST_LEVEL == "ci", reason=REASON)
|
|
26
29
|
def test_normalizers_OK():
|
|
27
|
-
""" Test field editing logic & internals"""
|
|
30
|
+
""" Test field editing logic & internals """
|
|
28
31
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
"Anton Hansen Tammsaare",
|
|
32
|
-
"GIBBBERRISH",
|
|
33
|
-
]
|
|
32
|
+
linking_results = [MOCK_LINKER_ONE_FOUND,
|
|
33
|
+
MOCK_LINKER_MULTIPLE_FOUND]
|
|
34
34
|
|
|
35
35
|
test_sierra_data = get_formatted_sierra_response("authorities.json")
|
|
36
36
|
|
|
37
37
|
normalizer = AuthoritiesRecordNormalizer(
|
|
38
|
-
|
|
38
|
+
linking_results=linking_results,
|
|
39
39
|
sierra_data=test_sierra_data,
|
|
40
40
|
)
|
|
41
|
+
|
|
41
42
|
assert len(normalizer.records_extra_data) == len(normalizer.data)
|
|
42
43
|
|
|
43
44
|
normalizer = BibRecordNormalizer(
|
|
44
|
-
|
|
45
|
+
linking_results=linking_results,
|
|
45
46
|
sierra_data=test_sierra_data,
|
|
46
47
|
)
|
|
47
48
|
assert len(normalizer.records_extra_data) == len(normalizer.data)
|
|
@@ -146,11 +147,11 @@ def validate_authorities_record_normalized(record: Record, has_viaf_data=False):
|
|
|
146
147
|
# assert len(field_046.get_subfields("t")) > 0 # activity end
|
|
147
148
|
|
|
148
149
|
|
|
149
|
-
@pytest.mark.skipif(TEST_LEVEL == "ci", reason=REASON)
|
|
150
150
|
def test_missing_fields_created_bibrecord_normalization():
|
|
151
|
+
linking_results = [MOCK_LINKER_ONE_FOUND]
|
|
151
152
|
|
|
152
153
|
normalizer_entities_only = BibRecordNormalizer(
|
|
153
|
-
|
|
154
|
+
linking_results=linking_results,
|
|
154
155
|
)
|
|
155
156
|
|
|
156
157
|
normalizer_sierra_data_only = BibRecordNormalizer(
|
|
@@ -172,11 +173,12 @@ def test_missing_fields_created_bibrecord_normalization():
|
|
|
172
173
|
validate_bibrecord_normalized(record)
|
|
173
174
|
|
|
174
175
|
|
|
175
|
-
@pytest.mark.skipif(TEST_LEVEL == "ci", reason=REASON)
|
|
176
176
|
def test_missing_fields_created_authorities_normalization():
|
|
177
177
|
|
|
178
|
+
linking_results = [MOCK_LINKER_ONE_FOUND]
|
|
179
|
+
|
|
178
180
|
normalizer_entities_only = AuthoritiesRecordNormalizer(
|
|
179
|
-
|
|
181
|
+
linking_results=linking_results, # find one match
|
|
180
182
|
)
|
|
181
183
|
|
|
182
184
|
normalizer_sierra_data_only = AuthoritiesRecordNormalizer(
|
|
@@ -187,6 +189,7 @@ def test_missing_fields_created_authorities_normalization():
|
|
|
187
189
|
check_record_tags_have_values(r, ["008", "040", # SIERRA related
|
|
188
190
|
"024", "043", "046" # VIAF enriched
|
|
189
191
|
] + REQUIRED_FIELDS)
|
|
192
|
+
|
|
190
193
|
validate_authorities_record_normalized(r, True)
|
|
191
194
|
|
|
192
195
|
for r in normalizer_sierra_data_only:
|
|
@@ -195,7 +198,6 @@ def test_missing_fields_created_authorities_normalization():
|
|
|
195
198
|
validate_authorities_record_normalized(r)
|
|
196
199
|
|
|
197
200
|
|
|
198
|
-
@pytest.mark.skipif(TEST_LEVEL == "ci", reason=REASON)
|
|
199
201
|
def test_normalized_fields_sorted():
|
|
200
202
|
|
|
201
203
|
unsorted_bibdata = [
|
|
@@ -237,7 +239,7 @@ def test_normalized_fields_sorted():
|
|
|
237
239
|
|
|
238
240
|
for normalizer in normalizers:
|
|
239
241
|
normalizer = normalizer(
|
|
240
|
-
|
|
242
|
+
linking_results=[],
|
|
241
243
|
sierra_data=unsorted_bibdata
|
|
242
244
|
)
|
|
243
245
|
|
|
@@ -246,16 +248,18 @@ def test_normalized_fields_sorted():
|
|
|
246
248
|
check_record_tags_sorted(r)
|
|
247
249
|
|
|
248
250
|
|
|
249
|
-
@pytest.mark.skipif(TEST_LEVEL == "ci", reason=REASON)
|
|
250
251
|
def test_authority_normrecord_found_in_es_and_normalized():
|
|
251
252
|
""" KATA elastic normkirjete seast leitakse 1 vaste & normaliseerija täiendab leitud normkirjet VIAF infoga.
|
|
252
253
|
- valideeri normaliseerimise mapping, mis autori tabelis. Täiendatud väljad ja VIAFist info
|
|
253
254
|
- Valideeri märge lisatud (TODO) """
|
|
254
255
|
# Presume, author name identified and sent to linker
|
|
255
|
-
|
|
256
|
+
linker_res = get_linker_res_example(
|
|
257
|
+
"oneFound.json") # single result
|
|
258
|
+
linking_results = [linker_res]
|
|
256
259
|
|
|
260
|
+
# 1 result found
|
|
257
261
|
normalizer = AuthoritiesRecordNormalizer(
|
|
258
|
-
|
|
262
|
+
linking_results=linking_results
|
|
259
263
|
)
|
|
260
264
|
|
|
261
265
|
data = normalizer.data
|
|
@@ -267,44 +271,48 @@ def test_authority_normrecord_found_in_es_and_normalized():
|
|
|
267
271
|
validate_authorities_record_normalized(r, has_viaf_data=True)
|
|
268
272
|
|
|
269
273
|
|
|
270
|
-
@pytest.mark.skipif(TEST_LEVEL == "ci", reason=REASON)
|
|
271
274
|
def test_authority_normrecord_not_found_in_es_and_viaf():
|
|
272
275
|
"""KATA elastic normkirjete seast vastet ei leitud & linkija sooritab VIAFisse otsingu
|
|
273
276
|
- Üks vaste leiti - luuakse uus normkirje
|
|
274
277
|
- Ei leitud ühtegi vastet, või on leitud vasteid mitu - AI tuvastatud info põhjal uue kirje loomine(TODO)
|
|
275
278
|
"""
|
|
279
|
+
linker_res = get_linker_res_example(
|
|
280
|
+
"oneFound.json")
|
|
281
|
+
linking_results = [linker_res]
|
|
276
282
|
|
|
277
|
-
|
|
278
|
-
|
|
283
|
+
normalizer = AuthoritiesRecordNormalizer(
|
|
284
|
+
linking_results=linking_results)
|
|
279
285
|
|
|
280
286
|
data = normalizer.data
|
|
281
287
|
|
|
282
288
|
assert len(data) == 1 # should create new normalized record
|
|
283
289
|
|
|
284
290
|
# Entities not found, es & VIAF
|
|
285
|
-
|
|
291
|
+
linking_results = [MOCK_LINKER_NOT_FOUND]
|
|
292
|
+
normalizer = AuthoritiesRecordNormalizer(linking_results=linking_results)
|
|
286
293
|
data = normalizer.data
|
|
287
|
-
|
|
294
|
+
# should create new normalized record in the future, none for now
|
|
295
|
+
assert len(data) == 0
|
|
288
296
|
|
|
289
|
-
|
|
290
|
-
|
|
297
|
+
linker_res = get_linker_res_example(
|
|
298
|
+
"multipleFound.json")
|
|
299
|
+
linking_results = [linker_res]
|
|
300
|
+
normalizer = AuthoritiesRecordNormalizer(linking_results=linking_results)
|
|
291
301
|
data = normalizer.data
|
|
292
|
-
|
|
302
|
+
# should create new normalized record in the future, none for now
|
|
303
|
+
assert len(data) == 0
|
|
293
304
|
|
|
294
305
|
|
|
295
|
-
@pytest.mark.skipif(TEST_LEVEL == "ci", reason=REASON)
|
|
296
306
|
def test_matching_sierra_record_viaf_id_found():
|
|
297
307
|
"""normkirjelt leitakse VIAF ID, vajadusel normi asukoht, kus see ID sisaldub."""
|
|
298
308
|
pass
|
|
299
309
|
|
|
300
310
|
|
|
301
|
-
@pytest.mark.skipif(TEST_LEVEL == "ci", reason=REASON)
|
|
302
311
|
def test_matching_sierra_record_viaf_id_not_found():
|
|
303
312
|
"""kirjelt VIAF IDd ei leitud, soorita otsing VIAFi pihta, et leida _vastutav isik_?. Loo uus vastavalt otsingu tulemusele."""
|
|
304
313
|
pass
|
|
305
314
|
|
|
306
315
|
|
|
307
|
-
@pytest.mark.skipif(TEST_LEVEL == "ci", reason=REASON)
|
|
308
316
|
def test_authorities_normalizer_checks():
|
|
309
317
|
"""
|
|
310
318
|
- kontrolli kas tuvastatud nimi on SIERRAst leitud vaste 1XX, 4XX väljadel. Kui pole, siis lisa 4XX väljale.
|
|
@@ -1,11 +1,8 @@
|
|
|
1
|
-
from tests.const import SIERRA_INPUT_DIR,
|
|
2
|
-
from rara_tools.constants import VIAF_ALLOWED_SOURCES
|
|
1
|
+
from tests.const import SIERRA_INPUT_DIR, LINKER_DIR
|
|
3
2
|
|
|
4
3
|
from rara_tools.converters import SierraResponseConverter
|
|
5
4
|
from rara_tools.normalizers.viaf import VIAFRecord, VIAFClient
|
|
6
5
|
|
|
7
|
-
from rara_linker.linkers.linker import Linker
|
|
8
|
-
|
|
9
6
|
from pymarc import Record
|
|
10
7
|
from typing import List
|
|
11
8
|
|
|
@@ -25,8 +22,9 @@ def check_record_tags_sorted(record: Record):
|
|
|
25
22
|
|
|
26
23
|
|
|
27
24
|
def check_no_dupe_tag_values(record: Record):
|
|
28
|
-
repetable_tags = ["024", "035", "400", "670"]
|
|
29
|
-
record_tags = [field.tag for field in record.get_fields()
|
|
25
|
+
repetable_tags = ["024", "035", "400", "670"]
|
|
26
|
+
record_tags = [field.tag for field in record.get_fields()
|
|
27
|
+
if field.tag not in repetable_tags]
|
|
30
28
|
assert len(record_tags) == len(set(record_tags))
|
|
31
29
|
|
|
32
30
|
|
|
@@ -56,7 +54,7 @@ def get_formatted_sierra_response(fname: str):
|
|
|
56
54
|
def get_viaf_record(id: str, allowed_sources: list):
|
|
57
55
|
""" Fetches VIAF record by ID and returns a VIAFRecord object """
|
|
58
56
|
|
|
59
|
-
client = VIAFClient()
|
|
57
|
+
client = VIAFClient()
|
|
60
58
|
response = client.get_records_by_viaf_id(id)
|
|
61
59
|
|
|
62
60
|
viaf_record = VIAFRecord(
|
|
@@ -71,7 +69,8 @@ def search_viaf_record(search_term: str, allowed_sources: list):
|
|
|
71
69
|
|
|
72
70
|
return VIAFRecord(response, allowed_sources=allowed_sources)
|
|
73
71
|
|
|
74
|
-
|
|
75
|
-
|
|
72
|
+
|
|
73
|
+
def get_linker_res_example(fname: str):
|
|
74
|
+
with open(os.path.join(LINKER_DIR, fname), "r") as f:
|
|
76
75
|
data = f.read()
|
|
77
76
|
return json.loads(data)
|
rara_tools-0.2.0/VERSION
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
0.2.0
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
from pymarc import Indicators
|
|
2
|
-
import os
|
|
3
|
-
|
|
4
|
-
EMPTY_INDICATORS = Indicators(" ", " ")
|
|
5
|
-
VIAF_ALLOWED_SOURCES = ["LC", "DNB", "LNB", "NLL",
|
|
6
|
-
"ERRR", "J9U"]
|
|
7
|
-
|
|
8
|
-
ES_HOST = os.getenv("ELASTIC_TEST_URL", "http://localhost:9200")
|
|
9
|
-
|
|
10
|
-
LINKER_CONFIG = {
|
|
11
|
-
"add_viaf_info": True,
|
|
12
|
-
"vectorizer_data_path": "./vectorizer_data",
|
|
13
|
-
"per_config": {"es_host": ES_HOST},
|
|
14
|
-
"org_config": {"es_host": ES_HOST},
|
|
15
|
-
"loc_config": {"es_host": ES_HOST},
|
|
16
|
-
"ems_config": {"es_host": ES_HOST},
|
|
17
|
-
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|