rara-tools 0.1.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rara-tools might be problematic. Click here for more details.

Files changed (37) hide show
  1. {rara_tools-0.1.0/rara_tools.egg-info → rara_tools-0.3.0}/PKG-INFO +3 -1
  2. rara_tools-0.3.0/VERSION +1 -0
  3. rara_tools-0.3.0/rara_tools/constants/__init__.py +1 -0
  4. rara_tools-0.3.0/rara_tools/constants/normalizers.py +6 -0
  5. {rara_tools-0.1.0 → rara_tools-0.3.0}/rara_tools/converters.py +42 -33
  6. {rara_tools-0.1.0 → rara_tools-0.3.0/rara_tools.egg-info}/PKG-INFO +3 -1
  7. {rara_tools-0.1.0 → rara_tools-0.3.0}/rara_tools.egg-info/SOURCES.txt +6 -2
  8. {rara_tools-0.1.0 → rara_tools-0.3.0}/rara_tools.egg-info/requires.txt +2 -0
  9. {rara_tools-0.1.0 → rara_tools-0.3.0}/requirements.txt +2 -0
  10. rara_tools-0.3.0/tests/test_normalization.py +323 -0
  11. rara_tools-0.3.0/tests/test_sierra_converters.py +101 -0
  12. rara_tools-0.3.0/tests/test_utils.py +76 -0
  13. rara_tools-0.3.0/tests/test_viaf_client.py +19 -0
  14. rara_tools-0.1.0/VERSION +0 -1
  15. rara_tools-0.1.0/rara_tools/constants/__init__.py +0 -0
  16. rara_tools-0.1.0/tests/test_converters.py +0 -127
  17. {rara_tools-0.1.0 → rara_tools-0.3.0}/LICENSE.md +0 -0
  18. {rara_tools-0.1.0 → rara_tools-0.3.0}/README.md +0 -0
  19. {rara_tools-0.1.0 → rara_tools-0.3.0}/pyproject.toml +0 -0
  20. {rara_tools-0.1.0 → rara_tools-0.3.0}/rara_tools/constants/digitizer.py +0 -0
  21. {rara_tools-0.1.0 → rara_tools-0.3.0}/rara_tools/constants/general.py +0 -0
  22. {rara_tools-0.1.0 → rara_tools-0.3.0}/rara_tools/decorators.py +0 -0
  23. {rara_tools-0.1.0 → rara_tools-0.3.0}/rara_tools/digar_schema_converter.py +0 -0
  24. {rara_tools-0.1.0 → rara_tools-0.3.0}/rara_tools/elastic.py +0 -0
  25. {rara_tools-0.1.0 → rara_tools-0.3.0}/rara_tools/exceptions.py +0 -0
  26. {rara_tools-0.1.0 → rara_tools-0.3.0}/rara_tools/s3.py +0 -0
  27. {rara_tools-0.1.0 → rara_tools-0.3.0}/rara_tools/task_reporter.py +0 -0
  28. {rara_tools-0.1.0 → rara_tools-0.3.0}/rara_tools/utils.py +0 -0
  29. {rara_tools-0.1.0 → rara_tools-0.3.0}/rara_tools.egg-info/dependency_links.txt +0 -0
  30. {rara_tools-0.1.0 → rara_tools-0.3.0}/rara_tools.egg-info/top_level.txt +0 -0
  31. {rara_tools-0.1.0 → rara_tools-0.3.0}/setup.cfg +0 -0
  32. {rara_tools-0.1.0 → rara_tools-0.3.0}/tests/test_digar_schema_converter.py +0 -0
  33. {rara_tools-0.1.0 → rara_tools-0.3.0}/tests/test_elastic.py +0 -0
  34. {rara_tools-0.1.0 → rara_tools-0.3.0}/tests/test_elastic_vector_and_search_operations.py +0 -0
  35. {rara_tools-0.1.0 → rara_tools-0.3.0}/tests/test_s3_exceptions.py +0 -0
  36. {rara_tools-0.1.0 → rara_tools-0.3.0}/tests/test_s3_file_operations.py +0 -0
  37. {rara_tools-0.1.0 → rara_tools-0.3.0}/tests/test_task_reporter.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rara-tools
3
- Version: 0.1.0
3
+ Version: 0.3.0
4
4
  Summary: Tools to support Kata's work.
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Programming Language :: Python :: 3.10
@@ -15,6 +15,8 @@ Requires-Dist: elasticsearch_dsl==8.*
15
15
  Requires-Dist: minio==7.*
16
16
  Requires-Dist: requests
17
17
  Requires-Dist: iso639-lang
18
+ Requires-Dist: pymarc
19
+ Requires-Dist: glom
18
20
  Provides-Extra: testing
19
21
  Requires-Dist: pytest>=8.0; extra == "testing"
20
22
  Requires-Dist: pytest-order; extra == "testing"
@@ -0,0 +1 @@
1
+ 0.3.0
@@ -0,0 +1 @@
1
+ from .normalizers import *
@@ -0,0 +1,6 @@
1
+ from pymarc import Indicators
2
+ import os
3
+
4
+ EMPTY_INDICATORS = Indicators(" ", " ")
5
+ VIAF_ALLOWED_SOURCES = ["LC", "DNB", "LNB", "NLL",
6
+ "ERRR", "J9U"]
@@ -1,19 +1,22 @@
1
- from .exceptions import SierraResponseConverterException
1
+ from rara_tools.exceptions import SierraResponseConverterException
2
2
 
3
3
 
4
4
  class SierraResponseConverter:
5
5
  """Converts a JSON response from the Sierra API to MARC-in-JSON format."""
6
-
6
+
7
7
  def __init__(self, response: dict):
8
8
  if not isinstance(response, dict):
9
- raise SierraResponseConverterException("Please provide a valid JSON response.")
9
+ raise SierraResponseConverterException(
10
+ "Please provide a valid JSON response.")
10
11
  self.response = response
11
-
12
- def _map_control_fields(self, field: dict) -> dict:
13
- # for tags < 010, no subfields, instead one str value in "value"
12
+
13
+ @staticmethod
14
+ def _map_control_fields(field: dict) -> dict:
15
+ # for tags < 010, no subfields, instead one str value in "value"
14
16
  return {field["tag"]: field["value"]}
15
-
16
- def _map_data_fields(self, field: dict) -> dict:
17
+
18
+ @staticmethod
19
+ def _map_data_fields(field: dict) -> dict:
17
20
  """ Maps marc fields > 010.
18
21
 
19
22
  Args:
@@ -22,60 +25,66 @@ class SierraResponseConverter:
22
25
  Returns:
23
26
  dict: standardised marc-in-json format.
24
27
  """
25
-
28
+
26
29
  data = field["data"]
27
-
30
+
28
31
  # Order matters ind1, in2, subfields
29
32
  field_data = {
30
33
  "ind1": data.get("ind1", " "),
31
34
  "ind2": data.get("ind2", " "),
32
35
  "subfields": data.get("subfields", [])
33
36
  }
34
-
37
+
35
38
  return {field["tag"]: field_data}
36
-
37
- def _is_marc21structured(self, field: dict) -> bool:
39
+
40
+ @staticmethod
41
+ def _is_marc21structured(field: dict) -> bool:
38
42
  """Checks if the field is already structured according to MARC21 in JSON"""
39
43
  return any(key.isdigit() for key in field.keys())
40
-
41
-
44
+
42
45
  def _handle_field_type(self, field: dict) -> dict:
43
-
46
+
44
47
  if self._is_marc21structured(field):
45
48
  return field
46
-
49
+
47
50
  if field.get("data"):
48
51
  return self._map_data_fields(field)
49
-
52
+
50
53
  tag = field.get("tag")
51
-
54
+
52
55
  if not tag:
53
- raise SierraResponseConverterException("Field is missing MARC21 tag.")
54
-
56
+ raise SierraResponseConverterException(
57
+ "Field is missing MARC21 tag.")
58
+
55
59
  if tag < "010":
56
60
  return self._map_control_fields(field)
57
61
  else:
58
62
  return self._map_data_fields(field)
59
-
63
+
60
64
  def _convert_response(self) -> list:
61
65
  entries = self.response.get("entries")
62
66
  if not entries:
63
- raise SierraResponseConverterException("No entries found in the response.")
64
-
67
+ raise SierraResponseConverterException(
68
+ "No entries found in the response.")
69
+
65
70
  try:
66
- return {"fields": [
67
- {e["id"]: [
68
- self._handle_field_type(f) for f in e["marc"]["fields"]
71
+ return [
72
+ {
73
+ "sierraID": str(e["id"]),
74
+ "leader": e["marc"]["leader"],
75
+ "fields": [
76
+ self._handle_field_type(f) for f in e["marc"]["fields"]
69
77
  ]}
70
78
  for e in entries
71
- ]}
72
-
79
+ ]
80
+
73
81
  except KeyError as e:
74
- raise SierraResponseConverterException(f"Malformed response: missing key {e}")
75
-
76
-
82
+ raise SierraResponseConverterException(
83
+ f"Malformed response: missing key {e}")
84
+
77
85
  def convert(self) -> list:
78
86
  try:
79
87
  return self._convert_response()
80
88
  except Exception as e:
81
- raise SierraResponseConverterException(f"An unexpected error occurred: {e}")
89
+ raise SierraResponseConverterException(
90
+ f"An unexpected error occurred: {e}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rara-tools
3
- Version: 0.1.0
3
+ Version: 0.3.0
4
4
  Summary: Tools to support Kata's work.
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Programming Language :: Python :: 3.10
@@ -15,6 +15,8 @@ Requires-Dist: elasticsearch_dsl==8.*
15
15
  Requires-Dist: minio==7.*
16
16
  Requires-Dist: requests
17
17
  Requires-Dist: iso639-lang
18
+ Requires-Dist: pymarc
19
+ Requires-Dist: glom
18
20
  Provides-Extra: testing
19
21
  Requires-Dist: pytest>=8.0; extra == "testing"
20
22
  Requires-Dist: pytest-order; extra == "testing"
@@ -19,10 +19,14 @@ rara_tools.egg-info/top_level.txt
19
19
  rara_tools/constants/__init__.py
20
20
  rara_tools/constants/digitizer.py
21
21
  rara_tools/constants/general.py
22
- tests/test_converters.py
22
+ rara_tools/constants/normalizers.py
23
23
  tests/test_digar_schema_converter.py
24
24
  tests/test_elastic.py
25
25
  tests/test_elastic_vector_and_search_operations.py
26
+ tests/test_normalization.py
26
27
  tests/test_s3_exceptions.py
27
28
  tests/test_s3_file_operations.py
28
- tests/test_task_reporter.py
29
+ tests/test_sierra_converters.py
30
+ tests/test_task_reporter.py
31
+ tests/test_utils.py
32
+ tests/test_viaf_client.py
@@ -3,6 +3,8 @@ elasticsearch_dsl==8.*
3
3
  minio==7.*
4
4
  requests
5
5
  iso639-lang
6
+ pymarc
7
+ glom
6
8
 
7
9
  [testing]
8
10
  pytest>=8.0
@@ -3,3 +3,5 @@ elasticsearch_dsl==8.*
3
3
  minio==7.*
4
4
  requests
5
5
  iso639-lang
6
+ pymarc
7
+ glom
@@ -0,0 +1,323 @@
1
+ from rara_tools.normalizers import BibRecordNormalizer, AuthoritiesRecordNormalizer
2
+ from tests.test_utils import (get_linker_res_example, get_formatted_sierra_response,
3
+ check_record_tags_sorted, check_no_dupe_tag_values, check_record_tags_have_values)
4
+
5
+
6
+ from pymarc import Record
7
+
8
+ import os
9
+
10
+ TEST_LEVEL = os.getenv("TEST_LEVEL", "unit")
11
+
12
+ EMPTY_SIERRA_RECORDS = [
13
+ {
14
+ "sierraID": "1",
15
+ "leader": "00000nz a2200000n 4500",
16
+ "fields": []
17
+ },
18
+ ]
19
+
20
+ REQUIRED_FIELDS = ["667", "925"] # always included after normalization
21
+ MOCK_LINKER_ONE_FOUND = get_linker_res_example(
22
+ "oneFound.json")
23
+ MOCK_LINKER_MULTIPLE_FOUND = get_linker_res_example(
24
+ "multipleFound.json")
25
+ MOCK_LINKER_NOT_FOUND = get_linker_res_example(
26
+ "notFound.json")
27
+
28
+
29
+ def test_normalizers_OK():
30
+ """ Test field editing logic & internals """
31
+
32
+ linking_results = [MOCK_LINKER_ONE_FOUND,
33
+ MOCK_LINKER_MULTIPLE_FOUND]
34
+
35
+ test_sierra_data = get_formatted_sierra_response("authorities.json")
36
+
37
+ normalizer = AuthoritiesRecordNormalizer(
38
+ linking_results=linking_results,
39
+ sierra_data=test_sierra_data,
40
+ )
41
+
42
+ assert len(normalizer.records_extra_data) == len(normalizer.data)
43
+
44
+ normalizer = BibRecordNormalizer(
45
+ linking_results=linking_results,
46
+ sierra_data=test_sierra_data,
47
+ )
48
+ assert len(normalizer.records_extra_data) == len(normalizer.data)
49
+
50
+ data = [
51
+ {
52
+ "sierraID": "1",
53
+ "leader": "00000nz a2200000n 4500",
54
+ "fields": [
55
+ {
56
+ "667": {
57
+ "ind1": " ",
58
+ "ind2": " ",
59
+ "subfields": [
60
+ {
61
+ "a": "Val"
62
+ }
63
+ ]
64
+ }
65
+ },
66
+ ]
67
+ },
68
+ ]
69
+
70
+ # default behavior - added if not in record &
71
+ normalizer = AuthoritiesRecordNormalizer(
72
+ sierra_data=data,
73
+ ALLOW_EDIT_FIELDS=[],
74
+ REPEATABLE_FIELDS=[],
75
+ )
76
+ for r in normalizer:
77
+ assert r.get_fields("667")[0].get_subfields("a")[0] == "Val"
78
+
79
+ # not edited if exists
80
+ normalizer = AuthoritiesRecordNormalizer(
81
+ sierra_data=data,
82
+ ALLOW_EDIT_FIELDS=[],
83
+ REPEATABLE_FIELDS=[]
84
+ )
85
+ for r in normalizer:
86
+ assert r.get_fields("667")[0].get_subfields("a")[0] == "Val"
87
+
88
+ # allow repeatable, new field will be added
89
+ normalizer = AuthoritiesRecordNormalizer(
90
+ sierra_data=data,
91
+ ALLOW_EDIT_FIELDS=[],
92
+ REPEATABLE_FIELDS=["667"]
93
+ )
94
+ for r in normalizer:
95
+ fields_667 = r.get_fields("667")
96
+ assert len(fields_667) == 2
97
+ assert fields_667[0].get_subfields("a")[0] == "Val"
98
+ assert fields_667[1].get_subfields("a")[0] == "Muudetud AI poolt"
99
+
100
+ # allow editing, field will be edited
101
+ normalizer = AuthoritiesRecordNormalizer(
102
+ sierra_data=data,
103
+ ALLOW_EDIT_FIELDS=["667"],
104
+ REPEATABLE_FIELDS=[]
105
+ )
106
+ for r in normalizer:
107
+ fields_667 = r.get_fields("667")
108
+ assert len(fields_667) == 1
109
+ assert fields_667[0].get_subfields("a")[0] == "Muudetud AI poolt"
110
+
111
+
112
+ def validate_bibrecord_normalized(record: Record, has_viaf_data=False):
113
+ # source notes
114
+ assert record.get_fields("667")[0].get_subfields("a")[
115
+ 0] == "Muudetud AI poolt"
116
+
117
+
118
+ def validate_authorities_record_normalized(record: Record, has_viaf_data=False):
119
+
120
+ field_667 = record.get_fields("667")[0].get_subfields("a")[0]
121
+ assert field_667 == "Muudetud AI poolt" or field_667 == "Loodud AI poolt"
122
+
123
+ field_040_subfields = record.get_fields("040")[0]
124
+
125
+ # check that a, b & c subfields have values (can have default or unique)
126
+ assert len(field_040_subfields.get_subfields("a")) > 0
127
+ assert len(field_040_subfields.get_subfields("b")) > 0
128
+ assert len(field_040_subfields.get_subfields("c")) > 0
129
+
130
+ # check that 008 field has a value of length 40
131
+ field_008 = record.get_fields("008")[0].data
132
+ assert len(field_008) == 40
133
+
134
+ if has_viaf_data:
135
+ field_043 = record.get_fields("043")[0].get_subfields(
136
+ "c")[0] # check that 043 has subfield c with value "ee"
137
+ assert field_043 == "ee"
138
+
139
+ field_024 = record.get_fields("024")
140
+ for f in field_024:
141
+ assert len(f.get_subfields("0")) > 0 # VIAF url
142
+
143
+ field_046 = record.get_fields("046")[0]
144
+ assert len(field_046.get_subfields("f")) > 0 # birth date
145
+ assert len(field_046.get_subfields("g")) > 0 # death date
146
+ # assert len(field_046.get_subfields("s")) > 0 # activity start
147
+ # assert len(field_046.get_subfields("t")) > 0 # activity end
148
+
149
+
150
+ def test_missing_fields_created_bibrecord_normalization():
151
+ linking_results = [MOCK_LINKER_ONE_FOUND]
152
+
153
+ normalizer_entities_only = BibRecordNormalizer(
154
+ linking_results=linking_results,
155
+ )
156
+
157
+ normalizer_sierra_data_only = BibRecordNormalizer(
158
+ sierra_data=EMPTY_SIERRA_RECORDS,
159
+ )
160
+
161
+ for record in normalizer_entities_only:
162
+ check_record_tags_have_values(
163
+ record, ["008", "046", "245", # Sierra related, always with bibs
164
+ "035", "100", # VIAf enriched
165
+ ] + REQUIRED_FIELDS
166
+ )
167
+ validate_bibrecord_normalized(record, has_viaf_data=True)
168
+
169
+ for record in normalizer_sierra_data_only:
170
+ check_record_tags_have_values(
171
+ record, ["008", "046", "245", # Sierra related, always with bibs
172
+ ] + REQUIRED_FIELDS)
173
+ validate_bibrecord_normalized(record)
174
+
175
+
176
+ def test_missing_fields_created_authorities_normalization():
177
+
178
+ linking_results = [MOCK_LINKER_ONE_FOUND]
179
+
180
+ normalizer_entities_only = AuthoritiesRecordNormalizer(
181
+ linking_results=linking_results, # find one match
182
+ )
183
+
184
+ normalizer_sierra_data_only = AuthoritiesRecordNormalizer(
185
+ sierra_data=EMPTY_SIERRA_RECORDS,
186
+ )
187
+
188
+ for r in normalizer_entities_only:
189
+ check_record_tags_have_values(r, ["008", "040", # SIERRA related
190
+ "024", "043", "046" # VIAF enriched
191
+ ] + REQUIRED_FIELDS)
192
+
193
+ validate_authorities_record_normalized(r, True)
194
+
195
+ for r in normalizer_sierra_data_only:
196
+ check_record_tags_have_values(
197
+ r, ["040"] + REQUIRED_FIELDS)
198
+ validate_authorities_record_normalized(r)
199
+
200
+
201
+ def test_normalized_fields_sorted():
202
+
203
+ unsorted_bibdata = [
204
+ {
205
+ "sierraID": "1",
206
+ "leader": "00000nz a2200000n 4500",
207
+ "fields": [
208
+ {
209
+ "035": {
210
+ "ind1": " ",
211
+ "ind2": " ",
212
+ "subfields": [
213
+ {
214
+ "a": "(ErESTER)<1>"
215
+ }
216
+ ]
217
+ }
218
+ },
219
+ {
220
+ "008": "220805|||aznnnaabn || ||| nz n "
221
+ },
222
+ {
223
+ "046": {
224
+ "ind1": " ",
225
+ "ind2": " ",
226
+ "subfields": [
227
+ {
228
+ "k": "1912"
229
+ }
230
+
231
+ ]
232
+ }
233
+ },
234
+ ]
235
+ }
236
+ ]
237
+
238
+ normalizers = (BibRecordNormalizer, AuthoritiesRecordNormalizer)
239
+
240
+ for normalizer in normalizers:
241
+ normalizer = normalizer(
242
+ linking_results=[],
243
+ sierra_data=unsorted_bibdata
244
+ )
245
+
246
+ for r in normalizer:
247
+ check_no_dupe_tag_values(r)
248
+ check_record_tags_sorted(r)
249
+
250
+
251
+ def test_authority_normrecord_found_in_es_and_normalized():
252
+ """ KATA elastic normkirjete seast leitakse 1 vaste & normaliseerija täiendab leitud normkirjet VIAF infoga.
253
+ - valideeri normaliseerimise mapping, mis autori tabelis. Täiendatud väljad ja VIAFist info
254
+ - Valideeri märge lisatud (TODO) """
255
+ # Presume, author name identified and sent to linker
256
+ linker_res = get_linker_res_example(
257
+ "oneFound.json") # single result
258
+ linking_results = [linker_res]
259
+
260
+ # 1 result found
261
+ normalizer = AuthoritiesRecordNormalizer(
262
+ linking_results=linking_results
263
+ )
264
+
265
+ data = normalizer.data
266
+
267
+ assert len(data) == 1
268
+
269
+ for r in normalizer:
270
+ check_record_tags_have_values(r, ["040"] + REQUIRED_FIELDS)
271
+ validate_authorities_record_normalized(r, has_viaf_data=True)
272
+
273
+
274
+ def test_authority_normrecord_not_found_in_es_and_viaf():
275
+ """KATA elastic normkirjete seast vastet ei leitud & linkija sooritab VIAFisse otsingu
276
+ - Üks vaste leiti - luuakse uus normkirje
277
+ - Ei leitud ühtegi vastet, või on leitud vasteid mitu - AI tuvastatud info põhjal uue kirje loomine(TODO)
278
+ """
279
+ linker_res = get_linker_res_example(
280
+ "oneFound.json")
281
+ linking_results = [linker_res]
282
+
283
+ normalizer = AuthoritiesRecordNormalizer(
284
+ linking_results=linking_results)
285
+
286
+ data = normalizer.data
287
+
288
+ assert len(data) == 1 # should create new normalized record
289
+
290
+ # Entities not found, es & VIAF
291
+ linking_results = [MOCK_LINKER_NOT_FOUND]
292
+ normalizer = AuthoritiesRecordNormalizer(linking_results=linking_results)
293
+ data = normalizer.data
294
+ # should create new normalized record in the future, none for now
295
+ assert len(data) == 0
296
+
297
+ linker_res = get_linker_res_example(
298
+ "multipleFound.json")
299
+ linking_results = [linker_res]
300
+ normalizer = AuthoritiesRecordNormalizer(linking_results=linking_results)
301
+ data = normalizer.data
302
+ # should create new normalized record in the future, none for now
303
+ assert len(data) == 0
304
+
305
+
306
+ def test_matching_sierra_record_viaf_id_found():
307
+ """normkirjelt leitakse VIAF ID, vajadusel normi asukoht, kus see ID sisaldub."""
308
+ pass
309
+
310
+
311
+ def test_matching_sierra_record_viaf_id_not_found():
312
+ """kirjelt VIAF IDd ei leitud, soorita otsing VIAFi pihta, et leida _vastutav isik_?. Loo uus vastavalt otsingu tulemusele."""
313
+ pass
314
+
315
+
316
+ def test_authorities_normalizer_checks():
317
+ """
318
+ - kontrolli kas tuvastatud nimi on SIERRAst leitud vaste 1XX, 4XX väljadel. Kui pole, siis lisa 4XX väljale.
319
+ - kontrolli, kas VIAF andmete nimekujud on normkandes olemas. Kui pole, lisa need 4XX väljale.
320
+ - Kontrolli, kas VIAF kandes on sünni ja surma daatumid ja kas need klapivad normkandes olevaga. Kui pole, siis liiguta normkandest kogu 1XX väli 4XX väljale. Seejärel loo uute daatumitega 1XX väli.
321
+ - Kontrolli, et väljal 046 olevad daatumid klapiksid just 1xx väljale lisatuga. Kui andmeid muudeti, siis märgi, et baasis on normkanne muutunud
322
+ """
323
+ pass
@@ -0,0 +1,101 @@
1
+ import os
2
+
3
+ import pytest
4
+ from rara_tools.converters import SierraResponseConverter
5
+ from rara_tools.exceptions import SierraResponseConverterException
6
+
7
+ from tests.const import SIERRA_OUTPUT_DIR
8
+ from tests.test_utils import (read_json_file, get_formatted_sierra_response, compare_results)
9
+
10
+
11
+ example_res = {
12
+ "total": 100,
13
+ "start": 50000,
14
+ "entries": [
15
+ {
16
+ "id": 1126963,
17
+ "updatedDate": "2016-02-09T08:42:52Z",
18
+ "createdDate": "2014-05-17T17:22:00Z",
19
+ "deleted": False,
20
+ "suppressed": False,
21
+ "marc": {
22
+ "leader": "00000nz a2200145n 4500",
23
+ "fields": [
24
+ {
25
+ # "tag": "100",
26
+ "data": {
27
+ "ind1": "1",
28
+ "ind2": " ",
29
+ "subfields": [
30
+ {
31
+ "code": "a",
32
+ "data": "Viggor, Signe,"
33
+ },
34
+ {
35
+ "code": "d",
36
+ "data": "1975-"
37
+ }
38
+ ]
39
+ }
40
+ },
41
+ ]}}]}
42
+
43
+
44
+ def test_convert_bibs_response():
45
+
46
+ data = get_formatted_sierra_response("bibs.json")
47
+
48
+ expected = read_json_file(os.path.join(SIERRA_OUTPUT_DIR, "bibs.json"))
49
+
50
+ assert compare_results(expected, data)
51
+
52
+
53
+ def test_convert_keywords_response():
54
+
55
+ data = get_formatted_sierra_response("keywords.json")
56
+
57
+ expected = read_json_file(os.path.join(SIERRA_OUTPUT_DIR, "keywords.json"))
58
+
59
+ assert compare_results(expected, data)
60
+
61
+
62
+ def test_convert_authorities_response():
63
+
64
+ data = get_formatted_sierra_response("authorities.json")
65
+
66
+ expected = read_json_file(os.path.join(
67
+ SIERRA_OUTPUT_DIR, "authorities.json"))
68
+
69
+ assert compare_results(expected, data)
70
+
71
+
72
+ def test_converter_handles_marc_in_json_response():
73
+ """ Gracefully handle entries already in MARC-in-JSON format """
74
+ data = get_formatted_sierra_response("bibsmarc.json")
75
+
76
+ expected = read_json_file(os.path.join(SIERRA_OUTPUT_DIR, "bibsmarc.json"))
77
+
78
+ assert compare_results(expected, data)
79
+
80
+
81
+ def test_convert_with_wrong_format():
82
+ with pytest.raises(SierraResponseConverterException):
83
+ SierraResponseConverter("$")
84
+
85
+
86
+ def test_convert_missing_tag():
87
+ with pytest.raises(SierraResponseConverterException):
88
+ response = example_res.copy()
89
+ response["entries"][0]["marc"]["fields"][0].pop("tag", None)
90
+
91
+ converter = SierraResponseConverter(response)
92
+ converter.convert()
93
+
94
+
95
+ def test_no_entries_in_response():
96
+ with pytest.raises(SierraResponseConverterException):
97
+ response = example_res.copy()
98
+ response.pop("entries", [])
99
+
100
+ converter = SierraResponseConverter(response)
101
+ converter.convert()
@@ -0,0 +1,76 @@
1
+ from tests.const import SIERRA_INPUT_DIR, LINKER_DIR
2
+
3
+ from rara_tools.converters import SierraResponseConverter
4
+ from rara_tools.normalizers.viaf import VIAFRecord, VIAFClient
5
+
6
+ from pymarc import Record
7
+ from typing import List
8
+
9
+ import json
10
+ import os
11
+
12
+
13
+ def read_json_file(path: str):
14
+ with open(path, "r") as f:
15
+ data = f.read()
16
+ return json.loads(data)
17
+
18
+
19
+ def check_record_tags_sorted(record: Record):
20
+ record_tags = [field.tag for field in record.get_fields()]
21
+ assert record_tags == sorted(record_tags)
22
+
23
+
24
+ def check_no_dupe_tag_values(record: Record):
25
+ repetable_tags = ["024", "035", "400", "670"]
26
+ record_tags = [field.tag for field in record.get_fields()
27
+ if field.tag not in repetable_tags]
28
+ assert len(record_tags) == len(set(record_tags))
29
+
30
+
31
+ def check_record_tags_have_values(record: Record, tags: List[str]):
32
+ for tag in tags:
33
+ assert record[tag] is not None
34
+
35
+
36
+ def get_record_field_value(record: Record, tag: str):
37
+ """ handle control & variable fields """
38
+ return record.get_fields(tag)[0].value()
39
+
40
+
41
+ def compare_results(expected: dict, results: dict):
42
+ return json.dumps(expected) == json.dumps(results)
43
+
44
+
45
+ def get_formatted_sierra_response(fname: str):
46
+ """ Reads a mock Sierra response file and converts it to MARC in json."""
47
+
48
+ response = read_json_file(os.path.join(SIERRA_INPUT_DIR, fname))
49
+
50
+ converter = SierraResponseConverter(response)
51
+ return converter.convert()
52
+
53
+
54
+ def get_viaf_record(id: str, allowed_sources: list):
55
+ """ Fetches VIAF record by ID and returns a VIAFRecord object """
56
+
57
+ client = VIAFClient()
58
+ response = client.get_records_by_viaf_id(id)
59
+
60
+ viaf_record = VIAFRecord(
61
+ response, allowed_sources=allowed_sources)
62
+ return viaf_record
63
+
64
+
65
+ def search_viaf_record(search_term: str, allowed_sources: list):
66
+ """ Fetches VIAF record by name and returns a VIAFRecord object """
67
+ client = VIAFClient()
68
+ response = client.get_records_by_search_term(search_term)
69
+
70
+ return VIAFRecord(response, allowed_sources=allowed_sources)
71
+
72
+
73
+ def get_linker_res_example(fname: str):
74
+ with open(os.path.join(LINKER_DIR, fname), "r") as f:
75
+ data = f.read()
76
+ return json.loads(data)
@@ -0,0 +1,19 @@
1
+ from rara_tools.normalizers.viaf import VIAFRecord, VIAFClient
2
+
3
+
4
+ def test_fetch_clusters_by_id_list():
5
+ viaf_ids = ["7432247", "456"]
6
+ client = VIAFClient()
7
+
8
+ results = client.fetch_viaf_clusters(viaf_ids)
9
+ assert len(results) == 2
10
+ assert results["456"] == {}
11
+ assert len(results["7432247"]) > 0
12
+
13
+
14
+ def test_fetch_viaf_results_for_normalizer():
15
+ viaf_ids = ["7432247", "456"]
16
+ client = VIAFClient()
17
+
18
+ results = client.get_normalized_data(viaf_ids)
19
+ assert len(results) == 2
rara_tools-0.1.0/VERSION DELETED
@@ -1 +0,0 @@
1
- 0.1.0
File without changes
@@ -1,127 +0,0 @@
1
- import json
2
- import os
3
-
4
- import pytest
5
- from rara_tools.converters import SierraResponseConverter
6
- from rara_tools.exceptions import SierraResponseConverterException
7
-
8
- import json
9
-
10
- root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
11
-
12
- SIERRA_TEST_DATA_DIR = os.path.join(root, "tests", "test_data", "sierra")
13
- INPUT_DIR = os.path.join(SIERRA_TEST_DATA_DIR, "input")
14
- OUTPUT_DIR = os.path.join(SIERRA_TEST_DATA_DIR, "output")
15
-
16
- def compare_results(expected, converted):
17
- return json.dumps(expected) == json.dumps(converted)
18
-
19
- def read_json_file(file_path):
20
- with open(file_path, "r") as f:
21
- data = f.read()
22
- return json.loads(data)
23
-
24
- example_res = {
25
- "total": 100,
26
- "start": 50000,
27
- "entries": [
28
- {
29
- "id": 1126963,
30
- "updatedDate": "2016-02-09T08:42:52Z",
31
- "createdDate": "2014-05-17T17:22:00Z",
32
- "deleted": False,
33
- "suppressed": False,
34
- "marc": {
35
- "leader": "00000nz a2200145n 4500",
36
- "fields": [
37
- {
38
- # "tag": "100",
39
- "data": {
40
- "ind1": "1",
41
- "ind2": " ",
42
- "subfields": [
43
- {
44
- "code": "a",
45
- "data": "Viggor, Signe,"
46
- },
47
- {
48
- "code": "d",
49
- "data": "1975-"
50
- }
51
- ]
52
- }
53
- },
54
- ]}}]}
55
-
56
-
57
-
58
-
59
- def test_convert_bibs_response():
60
- response = read_json_file(os.path.join(INPUT_DIR, "bibs.json"))
61
-
62
- converter = SierraResponseConverter(response)
63
- data = converter.convert()
64
-
65
- expected = read_json_file(os.path.join(OUTPUT_DIR, "bibs.json"))
66
-
67
- assert compare_results(expected, data)
68
-
69
-
70
- def test_convert_keywords_response():
71
- with open(os.path.join(INPUT_DIR, "keywords.json"), "r") as f:
72
- response = f.read()
73
- response = json.loads(response)
74
-
75
- converter = SierraResponseConverter(response)
76
- data = converter.convert()
77
-
78
-
79
- expected = read_json_file(os.path.join(OUTPUT_DIR, "keywords.json"))
80
-
81
- assert compare_results(expected, data)
82
-
83
-
84
- def test_convert_authorities_response():
85
- with open(os.path.join(INPUT_DIR, "authorities.json"), "r") as f:
86
- response = f.read()
87
- response = json.loads(response)
88
-
89
- converter = SierraResponseConverter(response)
90
- data = converter.convert()
91
-
92
- expected = read_json_file(os.path.join(OUTPUT_DIR, "authorities.json"))
93
-
94
- assert compare_results(expected, data)
95
-
96
- def test_converter_handles_marc_in_json_response():
97
- """ Gracefully handle entries already in MARC-in-JSON format """
98
- with open(os.path.join(INPUT_DIR, "bibsmarc.json"), "r") as f:
99
- response = f.read()
100
- response = json.loads(response)
101
-
102
- converter = SierraResponseConverter(response)
103
- data = converter.convert()
104
-
105
- expected = read_json_file(os.path.join(OUTPUT_DIR, "bibsmarc.json"))
106
-
107
- assert compare_results(expected, data)
108
-
109
- def test_convert_with_wrong_format():
110
- with pytest.raises(SierraResponseConverterException):
111
- SierraResponseConverter("$")
112
-
113
- def test_convert_missing_tag():
114
- with pytest.raises(SierraResponseConverterException):
115
- response = example_res.copy()
116
- response["entries"][0]["marc"]["fields"][0].pop("tag", None)
117
-
118
- converter = SierraResponseConverter(response)
119
- converter.convert()
120
-
121
- def test_no_entries_in_response():
122
- with pytest.raises(SierraResponseConverterException):
123
- response = example_res.copy()
124
- response.pop("entries", [])
125
-
126
- converter = SierraResponseConverter(response)
127
- converter.convert()
File without changes
File without changes
File without changes
File without changes
File without changes