rara-tools 0.2.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rara-tools might be problematic. Click here for more details.

Files changed (36) hide show
  1. {rara_tools-0.2.0/rara_tools.egg-info → rara_tools-0.3.0}/PKG-INFO +1 -2
  2. rara_tools-0.3.0/VERSION +1 -0
  3. rara_tools-0.3.0/rara_tools/constants/normalizers.py +6 -0
  4. {rara_tools-0.2.0 → rara_tools-0.3.0/rara_tools.egg-info}/PKG-INFO +1 -2
  5. {rara_tools-0.2.0 → rara_tools-0.3.0}/rara_tools.egg-info/requires.txt +0 -1
  6. {rara_tools-0.2.0 → rara_tools-0.3.0}/requirements.txt +0 -1
  7. {rara_tools-0.2.0 → rara_tools-0.3.0}/tests/test_normalization.py +41 -33
  8. {rara_tools-0.2.0 → rara_tools-0.3.0}/tests/test_utils.py +8 -9
  9. rara_tools-0.2.0/VERSION +0 -1
  10. rara_tools-0.2.0/rara_tools/constants/normalizers.py +0 -17
  11. {rara_tools-0.2.0 → rara_tools-0.3.0}/LICENSE.md +0 -0
  12. {rara_tools-0.2.0 → rara_tools-0.3.0}/README.md +0 -0
  13. {rara_tools-0.2.0 → rara_tools-0.3.0}/pyproject.toml +0 -0
  14. {rara_tools-0.2.0 → rara_tools-0.3.0}/rara_tools/constants/__init__.py +0 -0
  15. {rara_tools-0.2.0 → rara_tools-0.3.0}/rara_tools/constants/digitizer.py +0 -0
  16. {rara_tools-0.2.0 → rara_tools-0.3.0}/rara_tools/constants/general.py +0 -0
  17. {rara_tools-0.2.0 → rara_tools-0.3.0}/rara_tools/converters.py +0 -0
  18. {rara_tools-0.2.0 → rara_tools-0.3.0}/rara_tools/decorators.py +0 -0
  19. {rara_tools-0.2.0 → rara_tools-0.3.0}/rara_tools/digar_schema_converter.py +0 -0
  20. {rara_tools-0.2.0 → rara_tools-0.3.0}/rara_tools/elastic.py +0 -0
  21. {rara_tools-0.2.0 → rara_tools-0.3.0}/rara_tools/exceptions.py +0 -0
  22. {rara_tools-0.2.0 → rara_tools-0.3.0}/rara_tools/s3.py +0 -0
  23. {rara_tools-0.2.0 → rara_tools-0.3.0}/rara_tools/task_reporter.py +0 -0
  24. {rara_tools-0.2.0 → rara_tools-0.3.0}/rara_tools/utils.py +0 -0
  25. {rara_tools-0.2.0 → rara_tools-0.3.0}/rara_tools.egg-info/SOURCES.txt +0 -0
  26. {rara_tools-0.2.0 → rara_tools-0.3.0}/rara_tools.egg-info/dependency_links.txt +0 -0
  27. {rara_tools-0.2.0 → rara_tools-0.3.0}/rara_tools.egg-info/top_level.txt +0 -0
  28. {rara_tools-0.2.0 → rara_tools-0.3.0}/setup.cfg +0 -0
  29. {rara_tools-0.2.0 → rara_tools-0.3.0}/tests/test_digar_schema_converter.py +0 -0
  30. {rara_tools-0.2.0 → rara_tools-0.3.0}/tests/test_elastic.py +0 -0
  31. {rara_tools-0.2.0 → rara_tools-0.3.0}/tests/test_elastic_vector_and_search_operations.py +0 -0
  32. {rara_tools-0.2.0 → rara_tools-0.3.0}/tests/test_s3_exceptions.py +0 -0
  33. {rara_tools-0.2.0 → rara_tools-0.3.0}/tests/test_s3_file_operations.py +0 -0
  34. {rara_tools-0.2.0 → rara_tools-0.3.0}/tests/test_sierra_converters.py +0 -0
  35. {rara_tools-0.2.0 → rara_tools-0.3.0}/tests/test_task_reporter.py +0 -0
  36. {rara_tools-0.2.0 → rara_tools-0.3.0}/tests/test_viaf_client.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rara-tools
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: Tools to support Kata's work.
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Programming Language :: Python :: 3.10
@@ -13,7 +13,6 @@ License-File: LICENSE.md
13
13
  Requires-Dist: elasticsearch==8.*
14
14
  Requires-Dist: elasticsearch_dsl==8.*
15
15
  Requires-Dist: minio==7.*
16
- Requires-Dist: rara-norm-linker==1.*
17
16
  Requires-Dist: requests
18
17
  Requires-Dist: iso639-lang
19
18
  Requires-Dist: pymarc
@@ -0,0 +1 @@
1
+ 0.3.0
@@ -0,0 +1,6 @@
1
+ from pymarc import Indicators
2
+ import os
3
+
4
+ EMPTY_INDICATORS = Indicators(" ", " ")
5
+ VIAF_ALLOWED_SOURCES = ["LC", "DNB", "LNB", "NLL",
6
+ "ERRR", "J9U"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rara-tools
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: Tools to support Kata's work.
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Programming Language :: Python :: 3.10
@@ -13,7 +13,6 @@ License-File: LICENSE.md
13
13
  Requires-Dist: elasticsearch==8.*
14
14
  Requires-Dist: elasticsearch_dsl==8.*
15
15
  Requires-Dist: minio==7.*
16
- Requires-Dist: rara-norm-linker==1.*
17
16
  Requires-Dist: requests
18
17
  Requires-Dist: iso639-lang
19
18
  Requires-Dist: pymarc
@@ -1,7 +1,6 @@
1
1
  elasticsearch==8.*
2
2
  elasticsearch_dsl==8.*
3
3
  minio==7.*
4
- rara-norm-linker==1.*
5
4
  requests
6
5
  iso639-lang
7
6
  pymarc
@@ -1,7 +1,6 @@
1
1
  elasticsearch==8.*
2
2
  elasticsearch_dsl==8.*
3
3
  minio==7.*
4
- rara-norm-linker==1.*
5
4
  requests
6
5
  iso639-lang
7
6
  pymarc
@@ -1,10 +1,9 @@
1
1
  from rara_tools.normalizers import BibRecordNormalizer, AuthoritiesRecordNormalizer
2
- from tests.test_utils import (get_formatted_sierra_response,
2
+ from tests.test_utils import (get_linker_res_example, get_formatted_sierra_response,
3
3
  check_record_tags_sorted, check_no_dupe_tag_values, check_record_tags_have_values)
4
4
 
5
- from pymarc import Record
6
5
 
7
- import pytest
6
+ from pymarc import Record
8
7
 
9
8
  import os
10
9
 
@@ -19,29 +18,31 @@ EMPTY_SIERRA_RECORDS = [
19
18
  ]
20
19
 
21
20
  REQUIRED_FIELDS = ["667", "925"] # always included after normalization
22
- REASON = "Skipped because TEST_LEVEL is set to 'ci'"
21
+ MOCK_LINKER_ONE_FOUND = get_linker_res_example(
22
+ "oneFound.json")
23
+ MOCK_LINKER_MULTIPLE_FOUND = get_linker_res_example(
24
+ "multipleFound.json")
25
+ MOCK_LINKER_NOT_FOUND = get_linker_res_example(
26
+ "notFound.json")
23
27
 
24
28
 
25
- @pytest.mark.skipif(TEST_LEVEL == "ci", reason=REASON)
26
29
  def test_normalizers_OK():
27
- """ Test field editing logic & internals"""
30
+ """ Test field editing logic & internals """
28
31
 
29
- entities = [
30
- "Paul Keres", # will find multiple entities
31
- "Anton Hansen Tammsaare",
32
- "GIBBBERRISH",
33
- ]
32
+ linking_results = [MOCK_LINKER_ONE_FOUND,
33
+ MOCK_LINKER_MULTIPLE_FOUND]
34
34
 
35
35
  test_sierra_data = get_formatted_sierra_response("authorities.json")
36
36
 
37
37
  normalizer = AuthoritiesRecordNormalizer(
38
- entities=entities,
38
+ linking_results=linking_results,
39
39
  sierra_data=test_sierra_data,
40
40
  )
41
+
41
42
  assert len(normalizer.records_extra_data) == len(normalizer.data)
42
43
 
43
44
  normalizer = BibRecordNormalizer(
44
- entities=entities,
45
+ linking_results=linking_results,
45
46
  sierra_data=test_sierra_data,
46
47
  )
47
48
  assert len(normalizer.records_extra_data) == len(normalizer.data)
@@ -146,11 +147,11 @@ def validate_authorities_record_normalized(record: Record, has_viaf_data=False):
146
147
  # assert len(field_046.get_subfields("t")) > 0 # activity end
147
148
 
148
149
 
149
- @pytest.mark.skipif(TEST_LEVEL == "ci", reason=REASON)
150
150
  def test_missing_fields_created_bibrecord_normalization():
151
+ linking_results = [MOCK_LINKER_ONE_FOUND]
151
152
 
152
153
  normalizer_entities_only = BibRecordNormalizer(
153
- entities=["Eduard Vilde", "Linda Vilde"], # find one match
154
+ linking_results=linking_results,
154
155
  )
155
156
 
156
157
  normalizer_sierra_data_only = BibRecordNormalizer(
@@ -172,11 +173,12 @@ def test_missing_fields_created_bibrecord_normalization():
172
173
  validate_bibrecord_normalized(record)
173
174
 
174
175
 
175
- @pytest.mark.skipif(TEST_LEVEL == "ci", reason=REASON)
176
176
  def test_missing_fields_created_authorities_normalization():
177
177
 
178
+ linking_results = [MOCK_LINKER_ONE_FOUND]
179
+
178
180
  normalizer_entities_only = AuthoritiesRecordNormalizer(
179
- entities=["Eduard Vilde"], # find one match
181
+ linking_results=linking_results, # find one match
180
182
  )
181
183
 
182
184
  normalizer_sierra_data_only = AuthoritiesRecordNormalizer(
@@ -187,6 +189,7 @@ def test_missing_fields_created_authorities_normalization():
187
189
  check_record_tags_have_values(r, ["008", "040", # SIERRA related
188
190
  "024", "043", "046" # VIAF enriched
189
191
  ] + REQUIRED_FIELDS)
192
+
190
193
  validate_authorities_record_normalized(r, True)
191
194
 
192
195
  for r in normalizer_sierra_data_only:
@@ -195,7 +198,6 @@ def test_missing_fields_created_authorities_normalization():
195
198
  validate_authorities_record_normalized(r)
196
199
 
197
200
 
198
- @pytest.mark.skipif(TEST_LEVEL == "ci", reason=REASON)
199
201
  def test_normalized_fields_sorted():
200
202
 
201
203
  unsorted_bibdata = [
@@ -237,7 +239,7 @@ def test_normalized_fields_sorted():
237
239
 
238
240
  for normalizer in normalizers:
239
241
  normalizer = normalizer(
240
- entities=[],
242
+ linking_results=[],
241
243
  sierra_data=unsorted_bibdata
242
244
  )
243
245
 
@@ -246,16 +248,18 @@ def test_normalized_fields_sorted():
246
248
  check_record_tags_sorted(r)
247
249
 
248
250
 
249
- @pytest.mark.skipif(TEST_LEVEL == "ci", reason=REASON)
250
251
  def test_authority_normrecord_found_in_es_and_normalized():
251
252
  """ KATA elastic normkirjete seast leitakse 1 vaste & normaliseerija täiendab leitud normkirjet VIAF infoga.
252
253
  - valideeri normaliseerimise mapping, mis autori tabelis. Täiendatud väljad ja VIAFist info
253
254
  - Valideeri märge lisatud (TODO) """
254
255
  # Presume, author name identified and sent to linker
255
- name = "Jaan Kross"
256
+ linker_res = get_linker_res_example(
257
+ "oneFound.json") # single result
258
+ linking_results = [linker_res]
256
259
 
260
+ # 1 result found
257
261
  normalizer = AuthoritiesRecordNormalizer(
258
- entities=[name]
262
+ linking_results=linking_results
259
263
  )
260
264
 
261
265
  data = normalizer.data
@@ -267,44 +271,48 @@ def test_authority_normrecord_found_in_es_and_normalized():
267
271
  validate_authorities_record_normalized(r, has_viaf_data=True)
268
272
 
269
273
 
270
- @pytest.mark.skipif(TEST_LEVEL == "ci", reason=REASON)
271
274
  def test_authority_normrecord_not_found_in_es_and_viaf():
272
275
  """KATA elastic normkirjete seast vastet ei leitud & linkija sooritab VIAFisse otsingu
273
276
  - Üks vaste leiti - luuakse uus normkirje
274
277
  - Ei leitud ühtegi vastet, või on leitud vasteid mitu - AI tuvastatud info põhjal uue kirje loomine(TODO)
275
278
  """
279
+ linker_res = get_linker_res_example(
280
+ "oneFound.json")
281
+ linking_results = [linker_res]
276
282
 
277
- # 1 result found
278
- normalizer = AuthoritiesRecordNormalizer(entities=["Karl Ristikivi"])
283
+ normalizer = AuthoritiesRecordNormalizer(
284
+ linking_results=linking_results)
279
285
 
280
286
  data = normalizer.data
281
287
 
282
288
  assert len(data) == 1 # should create new normalized record
283
289
 
284
290
  # Entities not found, es & VIAF
285
- normalizer = AuthoritiesRecordNormalizer(entities=["asdasd#@2"])
291
+ linking_results = [MOCK_LINKER_NOT_FOUND]
292
+ normalizer = AuthoritiesRecordNormalizer(linking_results=linking_results)
286
293
  data = normalizer.data
287
- assert len(data) == 0 # should create new normalized record
294
+ # should create new normalized record in the future, none for now
295
+ assert len(data) == 0
288
296
 
289
- # multiple entities found, skipped
290
- normalizer = AuthoritiesRecordNormalizer(entities=["Paul Keres"])
297
+ linker_res = get_linker_res_example(
298
+ "multipleFound.json")
299
+ linking_results = [linker_res]
300
+ normalizer = AuthoritiesRecordNormalizer(linking_results=linking_results)
291
301
  data = normalizer.data
292
- assert len(data) == 0 # should not create anything atm
302
+ # should create new normalized record in the future, none for now
303
+ assert len(data) == 0
293
304
 
294
305
 
295
- @pytest.mark.skipif(TEST_LEVEL == "ci", reason=REASON)
296
306
  def test_matching_sierra_record_viaf_id_found():
297
307
  """normkirjelt leitakse VIAF ID, vajadusel normi asukoht, kus see ID sisaldub."""
298
308
  pass
299
309
 
300
310
 
301
- @pytest.mark.skipif(TEST_LEVEL == "ci", reason=REASON)
302
311
  def test_matching_sierra_record_viaf_id_not_found():
303
312
  """kirjelt VIAF IDd ei leitud, soorita otsing VIAFi pihta, et leida _vastutav isik_?. Loo uus vastavalt otsingu tulemusele."""
304
313
  pass
305
314
 
306
315
 
307
- @pytest.mark.skipif(TEST_LEVEL == "ci", reason=REASON)
308
316
  def test_authorities_normalizer_checks():
309
317
  """
310
318
  - kontrolli kas tuvastatud nimi on SIERRAst leitud vaste 1XX, 4XX väljadel. Kui pole, siis lisa 4XX väljale.
@@ -1,11 +1,8 @@
1
- from tests.const import SIERRA_INPUT_DIR, NORMALIZED_DIR, VIAF_TEST_DATA_DIR
2
- from rara_tools.constants import VIAF_ALLOWED_SOURCES
1
+ from tests.const import SIERRA_INPUT_DIR, LINKER_DIR
3
2
 
4
3
  from rara_tools.converters import SierraResponseConverter
5
4
  from rara_tools.normalizers.viaf import VIAFRecord, VIAFClient
6
5
 
7
- from rara_linker.linkers.linker import Linker
8
-
9
6
  from pymarc import Record
10
7
  from typing import List
11
8
 
@@ -25,8 +22,9 @@ def check_record_tags_sorted(record: Record):
25
22
 
26
23
 
27
24
  def check_no_dupe_tag_values(record: Record):
28
- repetable_tags = ["024", "035", "400", "670"]
29
- record_tags = [field.tag for field in record.get_fields() if field.tag not in repetable_tags]
25
+ repetable_tags = ["024", "035", "400", "670"]
26
+ record_tags = [field.tag for field in record.get_fields()
27
+ if field.tag not in repetable_tags]
30
28
  assert len(record_tags) == len(set(record_tags))
31
29
 
32
30
 
@@ -56,7 +54,7 @@ def get_formatted_sierra_response(fname: str):
56
54
  def get_viaf_record(id: str, allowed_sources: list):
57
55
  """ Fetches VIAF record by ID and returns a VIAFRecord object """
58
56
 
59
- client = VIAFClient() # should use Linker instead? not ViafLinker directly
57
+ client = VIAFClient()
60
58
  response = client.get_records_by_viaf_id(id)
61
59
 
62
60
  viaf_record = VIAFRecord(
@@ -71,7 +69,8 @@ def search_viaf_record(search_term: str, allowed_sources: list):
71
69
 
72
70
  return VIAFRecord(response, allowed_sources=allowed_sources)
73
71
 
74
- def get_normalized_example(fname: str):
75
- with open(os.path.join(NORMALIZED_DIR, fname), "r") as f:
72
+
73
+ def get_linker_res_example(fname: str):
74
+ with open(os.path.join(LINKER_DIR, fname), "r") as f:
76
75
  data = f.read()
77
76
  return json.loads(data)
rara_tools-0.2.0/VERSION DELETED
@@ -1 +0,0 @@
1
- 0.2.0
@@ -1,17 +0,0 @@
1
- from pymarc import Indicators
2
- import os
3
-
4
- EMPTY_INDICATORS = Indicators(" ", " ")
5
- VIAF_ALLOWED_SOURCES = ["LC", "DNB", "LNB", "NLL",
6
- "ERRR", "J9U"]
7
-
8
- ES_HOST = os.getenv("ELASTIC_TEST_URL", "http://localhost:9200")
9
-
10
- LINKER_CONFIG = {
11
- "add_viaf_info": True,
12
- "vectorizer_data_path": "./vectorizer_data",
13
- "per_config": {"es_host": ES_HOST},
14
- "org_config": {"es_host": ES_HOST},
15
- "loc_config": {"es_host": ES_HOST},
16
- "ems_config": {"es_host": ES_HOST},
17
- }
File without changes
File without changes
File without changes
File without changes
File without changes