rara-tools 0.7.15__tar.gz → 0.7.17__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rara-tools might be problematic. Click here for more details.

Files changed (69) hide show
  1. {rara_tools-0.7.15/rara_tools.egg-info → rara_tools-0.7.17}/PKG-INFO +1 -1
  2. rara_tools-0.7.17/VERSION +1 -0
  3. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/normalizers/base.py +51 -32
  4. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/normalizers/bibs.py +2 -15
  5. rara_tools-0.7.17/rara_tools/parsers/tools/validators.py +54 -0
  6. {rara_tools-0.7.15 → rara_tools-0.7.17/rara_tools.egg-info}/PKG-INFO +1 -1
  7. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools.egg-info/SOURCES.txt +2 -0
  8. {rara_tools-0.7.15 → rara_tools-0.7.17}/tests/test_normalization.py +110 -3
  9. rara_tools-0.7.17/tests/test_validators.py +55 -0
  10. rara_tools-0.7.15/VERSION +0 -1
  11. {rara_tools-0.7.15 → rara_tools-0.7.17}/LICENSE.md +0 -0
  12. {rara_tools-0.7.15 → rara_tools-0.7.17}/README.md +0 -0
  13. {rara_tools-0.7.15 → rara_tools-0.7.17}/pyproject.toml +0 -0
  14. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/constants/__init__.py +0 -0
  15. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/constants/digitizer.py +0 -0
  16. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/constants/general.py +0 -0
  17. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/constants/language_evaluator.py +0 -0
  18. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/constants/linker.py +0 -0
  19. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/constants/meta_extractor.py +0 -0
  20. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/constants/normalizers.py +0 -0
  21. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/constants/parsers.py +0 -0
  22. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/constants/subject_indexer.py +0 -0
  23. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/converters.py +0 -0
  24. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/core_formatters/core_formatter.py +0 -0
  25. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/core_formatters/formatted_keyword.py +0 -0
  26. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/core_formatters/formatted_meta.py +0 -0
  27. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/core_formatters/formatted_object.py +0 -0
  28. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/decorators.py +0 -0
  29. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/digar_schema_converter.py +0 -0
  30. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/elastic.py +0 -0
  31. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/exceptions.py +0 -0
  32. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/normalizers/__init__.py +0 -0
  33. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/normalizers/authorities.py +0 -0
  34. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/normalizers/reader.py +0 -0
  35. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/normalizers/viaf.py +0 -0
  36. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/parsers/marc_parsers/base_parser.py +0 -0
  37. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/parsers/marc_parsers/ems_parser.py +0 -0
  38. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/parsers/marc_parsers/location_parser.py +0 -0
  39. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/parsers/marc_parsers/organization_parser.py +0 -0
  40. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/parsers/marc_parsers/person_parser.py +0 -0
  41. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/parsers/marc_parsers/title_parser.py +0 -0
  42. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/parsers/marc_records/base_record.py +0 -0
  43. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/parsers/marc_records/ems_record.py +0 -0
  44. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/parsers/marc_records/organization_record.py +0 -0
  45. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/parsers/marc_records/person_record.py +0 -0
  46. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/parsers/marc_records/title_record.py +0 -0
  47. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/parsers/tools/entity_normalizers.py +0 -0
  48. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/parsers/tools/marc_converter.py +0 -0
  49. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/parsers/tools/russian_transliterator.py +0 -0
  50. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/s3.py +0 -0
  51. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/task_reporter.py +0 -0
  52. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools/utils.py +0 -0
  53. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools.egg-info/dependency_links.txt +0 -0
  54. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools.egg-info/requires.txt +0 -0
  55. {rara_tools-0.7.15 → rara_tools-0.7.17}/rara_tools.egg-info/top_level.txt +0 -0
  56. {rara_tools-0.7.15 → rara_tools-0.7.17}/requirements.txt +0 -0
  57. {rara_tools-0.7.15 → rara_tools-0.7.17}/setup.cfg +0 -0
  58. {rara_tools-0.7.15 → rara_tools-0.7.17}/tests/test_digar_schema_converter.py +0 -0
  59. {rara_tools-0.7.15 → rara_tools-0.7.17}/tests/test_elastic.py +0 -0
  60. {rara_tools-0.7.15 → rara_tools-0.7.17}/tests/test_elastic_vector_and_search_operations.py +0 -0
  61. {rara_tools-0.7.15 → rara_tools-0.7.17}/tests/test_entity_normalizers.py +0 -0
  62. {rara_tools-0.7.15 → rara_tools-0.7.17}/tests/test_formatters.py +0 -0
  63. {rara_tools-0.7.15 → rara_tools-0.7.17}/tests/test_marc_parsers.py +0 -0
  64. {rara_tools-0.7.15 → rara_tools-0.7.17}/tests/test_s3_exceptions.py +0 -0
  65. {rara_tools-0.7.15 → rara_tools-0.7.17}/tests/test_s3_file_operations.py +0 -0
  66. {rara_tools-0.7.15 → rara_tools-0.7.17}/tests/test_sierra_converters.py +0 -0
  67. {rara_tools-0.7.15 → rara_tools-0.7.17}/tests/test_task_reporter.py +0 -0
  68. {rara_tools-0.7.15 → rara_tools-0.7.17}/tests/test_utils.py +0 -0
  69. {rara_tools-0.7.15 → rara_tools-0.7.17}/tests/test_viaf_client.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rara-tools
3
- Version: 0.7.15
3
+ Version: 0.7.17
4
4
  Summary: Tools to support Kata's work.
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Programming Language :: Python :: 3.10
@@ -0,0 +1 @@
1
+ 0.7.17
@@ -3,6 +3,8 @@ from pymarc import (Field, Subfield, JSONReader, Record)
3
3
  from typing import List, Optional, Iterator
4
4
  from rara_tools.normalizers.reader import SafeJSONReader
5
5
 
6
+ from rara_tools.parsers.tools.validators import filter_names
7
+
6
8
  from rara_tools.normalizers.viaf import VIAFRecord, VIAFClient
7
9
  from rara_tools.constants.normalizers import (
8
10
  DEFAULT_VIAF_FIELD, ALLOWED_VIAF_FIELDS, ALLOWED_VIAF_WIKILINK_LANGS,
@@ -51,10 +53,23 @@ class RecordNormalizer:
51
53
  If one linked entity found, we create an updated record from the linked entity data.
52
54
  """
53
55
  linked_records = []
56
+
57
+ def handle_create_new_record(entity, idx):
58
+ logger.info(f"No linked entities found for {entity}, Creating new record.")
59
+ linked_records.append({
60
+ "leader": self.DEFAULT_LEADER,
61
+ "fields": []
62
+ })
63
+ self.records_extra_data.append({
64
+ "entity": entity,
65
+ "classified_fields": classified_fields[idx] if idx < len(classified_fields) else [],
66
+ "edited": False,
67
+ })
54
68
 
55
69
  for idx, linked in enumerate(linking_results or []):
56
70
 
57
71
  if not isinstance(linked, dict):
72
+ logger.debug(f"Skipping invalid linked result: {linked}")
58
73
  continue
59
74
 
60
75
  entity = linked.get("original_entity")
@@ -62,46 +77,32 @@ class RecordNormalizer:
62
77
 
63
78
  if not isinstance(linked_info, list) or not linked_info:
64
79
  # No linked entities found, create new record
65
- logger.info(
66
- f"No linked entities found for {entity}, Creating new record.")
67
- linked_records.append({
68
- "leader": self.DEFAULT_LEADER,
69
- "fields": []
70
- })
71
- self.records_extra_data.append({
72
- "entity": entity,
73
- "classified_fields": classified_fields[idx] if idx < len(classified_fields) else [],
74
- "edited": False
75
- })
80
+ handle_create_new_record(entity, idx)
76
81
  continue
77
82
 
78
83
  elif len(linked_info) > 1:
79
84
  # Multiple linked entities found, create new record
80
- logger.info(
81
- f"Multiple linked entities found for {entity}. Creating new record.")
82
- linked_records.append({
83
- "leader": self.DEFAULT_LEADER,
84
- "fields": []
85
- })
86
- self.records_extra_data.append({
87
- "entity": entity,
88
- "classified_fields": classified_fields[idx] if idx < len(classified_fields) else [],
89
- "edited": False
90
- })
85
+ handle_create_new_record(entity, idx)
91
86
  continue
92
87
 
93
88
  elif len(linked_info) == 1:
89
+ # one record match found, we update existing record
90
+
94
91
  linked_item = linked_info[0]
95
92
  if not isinstance(linked_item, dict):
96
93
  continue
97
94
 
95
+ # handle case where we have linked an entity without a record
96
+ if not linked_item.get("json", None):
97
+ handle_create_new_record(entity, idx)
98
+ continue
99
+
98
100
  linked_records.append(linked_item.get("json", {}))
99
101
 
100
102
  self.records_extra_data.append({
101
103
  "entity": entity,
102
104
  "viaf": linked_item.get("viaf", {}),
103
105
  "classified_fields": classified_fields[idx] if idx < len(classified_fields) else [],
104
- "type": "linked",
105
106
  "edited": True
106
107
  })
107
108
  continue
@@ -109,7 +110,6 @@ class RecordNormalizer:
109
110
  self.records_extra_data.extend(
110
111
  {
111
112
  "sierraID": obj.get("sierraID"),
112
- "type": "sierra",
113
113
  "edited": True
114
114
  }
115
115
  for obj in (sierra_data or [])
@@ -313,25 +313,34 @@ class RecordNormalizer:
313
313
  if viaf_record:
314
314
  self._include_name_variations(record, viaf_record)
315
315
 
316
- def _include_name_variations(self, record: Record, viaf_record: VIAFRecord) -> None:
316
+ def _include_name_variations(self, record: Record, viaf_record: VIAFRecord, filter_variations=True) -> None:
317
317
  """ Include name variations from VIAF record as 400|t fields """
318
318
 
319
319
  if not viaf_record or not viaf_record.name_variations:
320
320
  return
321
321
 
322
322
  existing_name_variations = record.get_fields("400")
323
- existing_variations = [sf.value for field in existing_name_variations for sf in field.get_subfields("t")]
323
+ existing_variations = [sf.value for field in existing_name_variations for sf in field.get_subfields("a")]
324
+
325
+ if filter_variations:
326
+ allowed_variations = filter_names(viaf_record.name_variations)
327
+ logger.debug(
328
+ f"filtered out {len(viaf_record.name_variations) - len(allowed_variations)} name variations for '{viaf_record.name}'"
329
+ )
330
+
331
+ else:
332
+ allowed_variations = viaf_record.name_variations
324
333
 
325
334
  fields = []
326
335
 
327
- for variation in viaf_record.name_variations:
336
+ for variation in allowed_variations:
328
337
  if variation not in existing_variations:
329
338
  fields.append(
330
339
  Field(
331
340
  tag="400",
332
341
  indicators=EMPTY_INDICATORS,
333
342
  subfields=[
334
- Subfield("t", variation)
343
+ Subfield("a", variation)
335
344
  ]
336
345
  )
337
346
  )
@@ -465,6 +474,8 @@ class RecordNormalizer:
465
474
  verify=verify,
466
475
  threshold=threshold
467
476
  )
477
+ if viaf_record:
478
+ logger.debug(f"VIAF {search_term}, linked to ID: {viaf_record.viaf_id}")
468
479
 
469
480
  except Exception as e:
470
481
  logger.error(
@@ -473,7 +484,8 @@ class RecordNormalizer:
473
484
  return viaf_record
474
485
 
475
486
  def _normalize_record(self, record: Record, sierraID: str,
476
- viaf_record: VIAFRecord, is_editing_existing_record: bool, original_entity: str) -> Record:
487
+ viaf_record: VIAFRecord, is_editing_existing_record: bool,
488
+ original_entity: str) -> Record:
477
489
  return record
478
490
 
479
491
  def get_record(self, index: int) -> Record:
@@ -500,19 +512,26 @@ class RecordNormalizer:
500
512
  return next(iter(self))
501
513
 
502
514
  def __iter__(self) -> Iterator:
503
- viaf_id_path = "viaf.queryResult.records.record.0.recordData.VIAFCluster.viafID"
515
+ # viaf_id_path = "viaf.original.queryResult.viafID"
516
+ viaf_id_path = "viaf.parsed.viaf_id"
517
+
504
518
  sierra_id_path = "sierraID"
505
-
519
+
506
520
  for record, extra_data in zip(self.records, self.records_extra_data):
507
521
 
508
522
  sierra_id = glom(extra_data, sierra_id_path, default="")
509
523
  viaf_id = glom(extra_data, viaf_id_path, default=None)
510
- classified_fields = extra_data.get("classified_fields", [])
511
524
 
525
+ classified_fields = extra_data.get("classified_fields", [])
512
526
  entity = extra_data.get("entity")
513
527
  is_editing_existing_record = extra_data.get("edited") == True
514
528
 
515
529
  viaf_record = self._get_viaf_record(record, viaf_id, entity)
530
+ if viaf_record:
531
+ logger.debug(
532
+ f"linked VIAF record with ID {viaf_record.viaf_id} for entity '{entity}'"
533
+ )
534
+
516
535
  record = self._normalize_common(record, is_editing_existing_record, classified_fields)
517
536
 
518
537
  normalized_record = self._normalize_record(
@@ -73,26 +73,13 @@ class BibRecordNormalizer(RecordNormalizer):
73
73
 
74
74
 
75
75
  def _normalize_viaf(self, record: Record, viaf_record: VIAFRecord, original_entity: str) -> None:
76
-
77
76
  if not viaf_record:
78
77
  # viaf record not found, include original entity as 100|t
79
78
  self._add_author(record, viaf_record=None, original_entity=original_entity)
80
79
  return record
81
-
82
- viaf_id = viaf_record.viaf_id
83
- fields = [
84
- Field(
85
- tag="035",
86
- indicators=EMPTY_INDICATORS,
87
- subfields=[
88
- Subfield("a", viaf_id)
89
- ]
90
- )
91
- ]
92
-
93
- self._add_fields_to_record(record, fields)
80
+
94
81
  self._add_author(record, viaf_record, original_entity=original_entity)
95
-
82
+
96
83
  def _normalize_record(self, record: Record, sierraID: str,
97
84
  viaf_record: VIAFRecord, is_editing_existing_record: bool, original_entity: str) -> Record:
98
85
 
@@ -0,0 +1,54 @@
1
+ import regex as re
2
+ from typing import List
3
+
4
+ def has_valid_chars(entity: str, allow_cyrillic: bool = True) -> bool:
5
+ """ Checks if entity contains any valid characters in latin
6
+ or in cyrillic, if the latter is enabled
7
+
8
+ Parameters
9
+ ------------
10
+ entity: str
11
+ String to validate.
12
+ allow_cyrillic: bool
13
+ Allow strings in cyrillic?
14
+
15
+ Returns
16
+ ------------
17
+ bool
18
+ Boolean value indicating, if the string
19
+ contains any valid characters.
20
+
21
+ """
22
+ # Check for latin characters
23
+ is_valid = bool(re.search(r"[a-züõöäA-ZÜÕÖÄ]", entity))
24
+
25
+ if allow_cyrillic and not is_valid:
26
+ # If cyrillic characters are allowed,
27
+ # check for them as well
28
+ is_valid = bool(re.search(r"[а-яА-Я]", entity))
29
+
30
+ return is_valid
31
+
32
+
33
+ def filter_names(names: List[str], allow_cyrillic: bool = True) -> List[str]:
34
+ """ Filters out names not in allowed encodings (latin / cyrillic).
35
+
36
+ Parameters
37
+ ------------
38
+ names: List[str]
39
+ Names to filters.
40
+ allow_cyrillic: bool
41
+ Allow strings in cyrillic?
42
+
43
+ Returns
44
+ ------------
45
+ List[str]
46
+ List of filtered names.
47
+
48
+ """
49
+ filtered_names = [
50
+ name for name in names
51
+ if has_valid_chars(entity=name, allow_cyrillic=allow_cyrillic)
52
+ ]
53
+ return filtered_names
54
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rara-tools
3
- Version: 0.7.15
3
+ Version: 0.7.17
4
4
  Summary: Tools to support Kata's work.
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Programming Language :: Python :: 3.10
@@ -49,6 +49,7 @@ rara_tools/parsers/marc_records/title_record.py
49
49
  rara_tools/parsers/tools/entity_normalizers.py
50
50
  rara_tools/parsers/tools/marc_converter.py
51
51
  rara_tools/parsers/tools/russian_transliterator.py
52
+ rara_tools/parsers/tools/validators.py
52
53
  tests/test_digar_schema_converter.py
53
54
  tests/test_elastic.py
54
55
  tests/test_elastic_vector_and_search_operations.py
@@ -61,4 +62,5 @@ tests/test_s3_file_operations.py
61
62
  tests/test_sierra_converters.py
62
63
  tests/test_task_reporter.py
63
64
  tests/test_utils.py
65
+ tests/test_validators.py
64
66
  tests/test_viaf_client.py
@@ -5,6 +5,7 @@ from rara_tools.constants import YYMMDD_FORMAT
5
5
  from rara_tools.normalizers import (BibRecordNormalizer, AuthoritiesRecordNormalizer)
6
6
  from tests.test_utils import (get_linker_res_example, get_formatted_sierra_response,
7
7
  check_record_tags_sorted, check_no_dupe_tag_values, check_record_tags_have_values)
8
+ from rara_tools.normalizers.viaf import VIAFRecord
8
9
 
9
10
  from rara_tools.constants.linker import EntityType
10
11
 
@@ -212,7 +213,7 @@ def test_missing_fields_created_bibrecord_normalization():
212
213
  for record in normalizer_entities_only:
213
214
  check_record_tags_have_values(
214
215
  record, ["008", # Sierra related, always with bibs
215
- "035", "100", # VIAf enriched
216
+ "100", # VIAf enriched
216
217
  ] + REQUIRED_FIELDS
217
218
  )
218
219
  validate_bibrecord_normalized(record, has_viaf_data=True)
@@ -752,7 +753,7 @@ def test_classified_fields_added_to_linked_record():
752
753
  }
753
754
  ]
754
755
  }
755
- }
756
+ }
756
757
  ]
757
758
  ]
758
759
  # Case 1 - no 670 exists, should be added to linked record
@@ -765,7 +766,7 @@ def test_classified_fields_added_to_linked_record():
765
766
  assert len(fields_670) == 1
766
767
  assert fields_670[0].get_subfields("a")[0] == "Päikesekiri, 2021"
767
768
 
768
- # Case 1 - existing record with 670 should not update (same behavior for both normalizers)
769
+ # Case 2 - existing record with 670 should not update (same behavior for both normalizers)
769
770
  linker_res = get_linker_res_example(
770
771
  "oneFound.json")
771
772
  linking_results = [linker_res]
@@ -779,7 +780,54 @@ def test_classified_fields_added_to_linked_record():
779
780
  fields_670 = record.get_fields("670")
780
781
  assert len(fields_670) == 1
781
782
  assert fields_670[0].get_subfields("a")[0] == "Eesti kirjarahva leksikon, 1995."
783
+
784
+ def get_046_field(year: str) -> dict:
785
+ return {
786
+ "046": {
787
+ "ind1": " ",
788
+ "ind2": " ",
789
+ "subfields": [
790
+ {"k": year }
791
+ ]
792
+ }
793
+ }
782
794
 
795
+ # Case 3 - 046 $k - publication date Passed for bib
796
+ classified_fields = [
797
+ [get_046_field("2021")],
798
+ [get_046_field("1999")],
799
+ [get_046_field("2022")]
800
+ ]
801
+
802
+
803
+ mock_046_exists = MOCK_LINKER_ONE_FOUND.copy()
804
+ mock_046_exists["linked_info"][0]["json"]["fields"].append(get_046_field("2000"))
805
+
806
+ # for new record should get included
807
+ linking_results = [MOCK_LINKER_NOT_FOUND, # new record
808
+ MOCK_LINKER_ONE_FOUND, # new record
809
+ MOCK_LINKER_NOT_FOUND] # editing existing record
810
+
811
+ normalizer = BibRecordNormalizer(linking_results=linking_results, classified_fields=classified_fields)
812
+
813
+ # for i, record in enumerate(normalizer):
814
+ # first two should have 046 from classified data
815
+ record1 = normalizer.get_record(0)
816
+ fields_046 = record1.get_fields("046")
817
+ assert len(fields_046) == 1
818
+ assert fields_046[0].get_subfields("k")[0] == "2021"
819
+
820
+ record2 = normalizer.get_record(1)
821
+ fields_046 = record2.get_fields("046")
822
+ assert len(fields_046) == 1
823
+ # should be unchanged, aka 2000
824
+ assert fields_046[0].get_subfields("k")[0] == "2000"
825
+
826
+ record3 = normalizer.get_record(2)
827
+ fields_046 = record3.get_fields("046")
828
+ assert len(fields_046) == 1
829
+ assert fields_046[0].get_subfields("k")[0] == "2022"
830
+
783
831
  def test_classified_data_with_multiple_records():
784
832
  """ Test classified data with multiple records - should match by sierraID """
785
833
 
@@ -842,3 +890,62 @@ def test_classified_data_with_multiple_records():
842
890
  assert len(record.get_fields("670")) == 1
843
891
  fields_670 = record.get_fields("670")[0]
844
892
  assert fields_670.get_subfields("a")[0] == "Teine kirjeldus, 2022"
893
+
894
+
895
+ def test_viaf_name_variations():
896
+ """ Test adding alternative name forms from VIAF to 4XX fields. Should skip some variants """
897
+
898
+ normalizer = AuthoritiesRecordNormalizer()
899
+ record = Record()
900
+
901
+ viaf_record: VIAFRecord = normalizer._get_viaf_record(
902
+ record,
903
+ entity="Jaan Kaplinski"
904
+ )
905
+
906
+ assert viaf_record is not None
907
+ assert len(viaf_record.name_variations) > 0
908
+
909
+ normalizer._add_author(record, viaf_record)
910
+
911
+ fields_4xx = record.get_fields("400") + record.get_fields("410") + record.get_fields("430")
912
+
913
+ unfiltered_name_variations = viaf_record.name_variations
914
+
915
+ assert len(fields_4xx) > 0
916
+ assert len(fields_4xx) < len(unfiltered_name_variations)
917
+
918
+ def test_existing_record_linked_to_viaf_record():
919
+ """ Test existing record linked to VIAF record - should enrich with VIAF data """
920
+
921
+ base_path = "tests/test_data/marc_records/json/"
922
+ with open(os.path.join(base_path, "imbi.json"), "r", encoding="utf-8") as f, \
923
+ open(os.path.join(base_path, "ernits.json"), "r", encoding="utf-8") as f2, \
924
+ open(os.path.join(base_path, "rowling.json"), "r", encoding="utf-8") as f3:
925
+ imbi = json.load(f)
926
+ ernits = json.load(f2)
927
+ rowling = json.load(f3)
928
+
929
+ linking_results = [
930
+ imbi,
931
+ ernits,
932
+ rowling
933
+ ]
934
+
935
+ normalizer = AuthoritiesRecordNormalizer(
936
+ linking_results=linking_results,
937
+ )
938
+
939
+ def get_viaf_url(record: Record):
940
+ field_024 = record.get_fields("024")
941
+ if len(field_024) == 0:
942
+ return None
943
+ return field_024[0].get_subfields("0")[0]
944
+
945
+ viaf_base_url = "http://viaf.org/viaf"
946
+ assert get_viaf_url(normalizer.get_record(0)) == f"{viaf_base_url}/167120147/"
947
+ assert get_viaf_url(normalizer.get_record(1)) == f"{viaf_base_url}/22458146/"
948
+ assert get_viaf_url(normalizer.get_record(1)) == f"{viaf_base_url}/22458146/"
949
+ assert get_viaf_url(normalizer.get_record(2)) == f"{viaf_base_url}/116796842/"
950
+
951
+
@@ -0,0 +1,55 @@
1
+ from rara_tools.parsers.tools.validators import filter_names
2
+ import pytest
3
+
4
+ are_equal = lambda x, y: not bool(set(x).difference(set(y)))
5
+
6
+ names_to_validate = [
7
+ "ליסט, פראנץ",
8
+ "Liszt, Franz",
9
+ "Lißt, Franz",
10
+ "ליסט, פרנץ",
11
+ "Liszt, Ferencz",
12
+ "Лист, Франц",
13
+ "Listz",
14
+ "Lißzt, Franz",
15
+ "Lists, Francis",
16
+ "List, Ferenc",
17
+ "List, Frants리스",
18
+ "List, Ferents",
19
+ "李斯特,弗朗西斯庫斯",
20
+ "ᓕᔅᑦ, ᕗᕌᓐᓯᔅᑲᔅ",
21
+ "리스트, 프란치스코"
22
+ ]
23
+
24
+ valid_names_1 = [
25
+ "Liszt, Franz",
26
+ "Lißt, Franz",
27
+ "Liszt, Ferencz",
28
+ "Лист, Франц",
29
+ "Listz",
30
+ "Lißzt, Franz",
31
+ "Lists, Francis",
32
+ "List, Ferenc",
33
+ "List, Frants리스",
34
+ "List, Ferents"
35
+ ]
36
+
37
+ valid_names_2 = [
38
+ "Liszt, Franz",
39
+ "Lißt, Franz",
40
+ "Liszt, Ferencz",
41
+ "Listz",
42
+ "Lißzt, Franz",
43
+ "Lists, Francis",
44
+ "List, Ferenc",
45
+ "List, Frants리스",
46
+ "List, Ferents"
47
+ ]
48
+
49
+ def test_filtering_latin_cyrillic():
50
+ filtered_names = filter_names(names_to_validate, allow_cyrillic=True)
51
+ assert are_equal(filtered_names, valid_names_1)
52
+
53
+ def test_filtering_latin():
54
+ filtered_names = filter_names(names_to_validate, allow_cyrillic=False)
55
+ assert are_equal(filtered_names, valid_names_2)
rara_tools-0.7.15/VERSION DELETED
@@ -1 +0,0 @@
1
- 0.7.15
File without changes
File without changes
File without changes
File without changes