rara-tools 0.7.14__tar.gz → 0.7.15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rara-tools might be problematic. Click here for more details.

Files changed (67) hide show
  1. {rara_tools-0.7.14/rara_tools.egg-info → rara_tools-0.7.15}/PKG-INFO +1 -1
  2. rara_tools-0.7.15/VERSION +1 -0
  3. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/normalizers/authorities.py +1 -1
  4. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/normalizers/base.py +20 -12
  5. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/normalizers/bibs.py +1 -1
  6. {rara_tools-0.7.14 → rara_tools-0.7.15/rara_tools.egg-info}/PKG-INFO +1 -1
  7. {rara_tools-0.7.14 → rara_tools-0.7.15}/tests/test_normalization.py +66 -1
  8. rara_tools-0.7.14/VERSION +0 -1
  9. {rara_tools-0.7.14 → rara_tools-0.7.15}/LICENSE.md +0 -0
  10. {rara_tools-0.7.14 → rara_tools-0.7.15}/README.md +0 -0
  11. {rara_tools-0.7.14 → rara_tools-0.7.15}/pyproject.toml +0 -0
  12. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/constants/__init__.py +0 -0
  13. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/constants/digitizer.py +0 -0
  14. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/constants/general.py +0 -0
  15. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/constants/language_evaluator.py +0 -0
  16. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/constants/linker.py +0 -0
  17. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/constants/meta_extractor.py +0 -0
  18. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/constants/normalizers.py +0 -0
  19. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/constants/parsers.py +0 -0
  20. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/constants/subject_indexer.py +0 -0
  21. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/converters.py +0 -0
  22. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/core_formatters/core_formatter.py +0 -0
  23. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/core_formatters/formatted_keyword.py +0 -0
  24. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/core_formatters/formatted_meta.py +0 -0
  25. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/core_formatters/formatted_object.py +0 -0
  26. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/decorators.py +0 -0
  27. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/digar_schema_converter.py +0 -0
  28. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/elastic.py +0 -0
  29. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/exceptions.py +0 -0
  30. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/normalizers/__init__.py +0 -0
  31. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/normalizers/reader.py +0 -0
  32. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/normalizers/viaf.py +0 -0
  33. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/parsers/marc_parsers/base_parser.py +0 -0
  34. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/parsers/marc_parsers/ems_parser.py +0 -0
  35. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/parsers/marc_parsers/location_parser.py +0 -0
  36. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/parsers/marc_parsers/organization_parser.py +0 -0
  37. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/parsers/marc_parsers/person_parser.py +0 -0
  38. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/parsers/marc_parsers/title_parser.py +0 -0
  39. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/parsers/marc_records/base_record.py +0 -0
  40. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/parsers/marc_records/ems_record.py +0 -0
  41. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/parsers/marc_records/organization_record.py +0 -0
  42. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/parsers/marc_records/person_record.py +0 -0
  43. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/parsers/marc_records/title_record.py +0 -0
  44. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/parsers/tools/entity_normalizers.py +0 -0
  45. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/parsers/tools/marc_converter.py +0 -0
  46. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/parsers/tools/russian_transliterator.py +0 -0
  47. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/s3.py +0 -0
  48. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/task_reporter.py +0 -0
  49. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/utils.py +0 -0
  50. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools.egg-info/SOURCES.txt +0 -0
  51. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools.egg-info/dependency_links.txt +0 -0
  52. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools.egg-info/requires.txt +0 -0
  53. {rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools.egg-info/top_level.txt +0 -0
  54. {rara_tools-0.7.14 → rara_tools-0.7.15}/requirements.txt +0 -0
  55. {rara_tools-0.7.14 → rara_tools-0.7.15}/setup.cfg +0 -0
  56. {rara_tools-0.7.14 → rara_tools-0.7.15}/tests/test_digar_schema_converter.py +0 -0
  57. {rara_tools-0.7.14 → rara_tools-0.7.15}/tests/test_elastic.py +0 -0
  58. {rara_tools-0.7.14 → rara_tools-0.7.15}/tests/test_elastic_vector_and_search_operations.py +0 -0
  59. {rara_tools-0.7.14 → rara_tools-0.7.15}/tests/test_entity_normalizers.py +0 -0
  60. {rara_tools-0.7.14 → rara_tools-0.7.15}/tests/test_formatters.py +0 -0
  61. {rara_tools-0.7.14 → rara_tools-0.7.15}/tests/test_marc_parsers.py +0 -0
  62. {rara_tools-0.7.14 → rara_tools-0.7.15}/tests/test_s3_exceptions.py +0 -0
  63. {rara_tools-0.7.14 → rara_tools-0.7.15}/tests/test_s3_file_operations.py +0 -0
  64. {rara_tools-0.7.14 → rara_tools-0.7.15}/tests/test_sierra_converters.py +0 -0
  65. {rara_tools-0.7.14 → rara_tools-0.7.15}/tests/test_task_reporter.py +0 -0
  66. {rara_tools-0.7.14 → rara_tools-0.7.15}/tests/test_utils.py +0 -0
  67. {rara_tools-0.7.14 → rara_tools-0.7.15}/tests/test_viaf_client.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rara-tools
3
- Version: 0.7.14
3
+ Version: 0.7.15
4
4
  Summary: Tools to support Kata's work.
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Programming Language :: Python :: 3.10
@@ -0,0 +1 @@
1
+ 0.7.15
@@ -11,7 +11,7 @@ class AuthoritiesRecordNormalizer(RecordNormalizer):
11
11
  """ Normalize authorities records """
12
12
 
13
13
  def __init__(self, linking_results: List[dict] = [], sierra_data: List[dict] = [],
14
- classified_fields: List[str] = [],
14
+ classified_fields: List[List[dict]] = [],
15
15
  ALLOW_EDIT_FIELDS: List[str] = ["008", "925"],
16
16
  REPEATABLE_FIELDS: List[str] = ["024", "035", "400", "667"]):
17
17
 
@@ -34,7 +34,7 @@ class RecordNormalizer:
34
34
  entities: List of Full names (str). If included, will use NormLinker to match with normalized records on KATA elastic.
35
35
  """
36
36
 
37
- def __init__(self, linking_results: List[dict] = [], sierra_data: List[dict] = [], classified_fields: List[str] = [],
37
+ def __init__(self, linking_results: List[dict] = [], sierra_data: List[dict] = [], classified_fields: List[List[dict]] = [],
38
38
  ALLOW_EDIT_FIELDS: List[str] = ["925"], REPEATABLE_FIELDS: List[str] = ["667"]):
39
39
 
40
40
  # Include, if will replace existing field
@@ -44,16 +44,16 @@ class RecordNormalizer:
44
44
  # leader applied to new records
45
45
  self.DEFAULT_LEADER = "01682nz a2200349n 4500" # must be 24 digits
46
46
 
47
- def _setup_records(self, linking_results: List[dict], sierra_data: List[dict], classified_fields: List[str] = []) -> JSONReader:
47
+ def _setup_records(self, linking_results: List[dict], sierra_data: List[dict], classified_fields: List[List[dict]] = []) -> JSONReader:
48
48
  """Setup initial MARC records and data.
49
49
 
50
50
  If no linked entities or more than one linked entity found, we create a new record.
51
51
  If one linked entity found, we create an updated record from the linked entity data.
52
52
  """
53
-
54
53
  linked_records = []
55
-
56
- for linked in linking_results or []:
54
+
55
+ for idx, linked in enumerate(linking_results or []):
56
+
57
57
  if not isinstance(linked, dict):
58
58
  continue
59
59
 
@@ -70,12 +70,12 @@ class RecordNormalizer:
70
70
  })
71
71
  self.records_extra_data.append({
72
72
  "entity": entity,
73
- "classified_fields": classified_fields,
73
+ "classified_fields": classified_fields[idx] if idx < len(classified_fields) else [],
74
74
  "edited": False
75
75
  })
76
76
  continue
77
77
 
78
- if len(linked_info) > 1:
78
+ elif len(linked_info) > 1:
79
79
  # Multiple linked entities found, create new record
80
80
  logger.info(
81
81
  f"Multiple linked entities found for {entity}. Creating new record.")
@@ -85,7 +85,7 @@ class RecordNormalizer:
85
85
  })
86
86
  self.records_extra_data.append({
87
87
  "entity": entity,
88
- "classified_fields": classified_fields,
88
+ "classified_fields": classified_fields[idx] if idx < len(classified_fields) else [],
89
89
  "edited": False
90
90
  })
91
91
  continue
@@ -100,7 +100,7 @@ class RecordNormalizer:
100
100
  self.records_extra_data.append({
101
101
  "entity": entity,
102
102
  "viaf": linked_item.get("viaf", {}),
103
- "classified_fields": classified_fields,
103
+ "classified_fields": classified_fields[idx] if idx < len(classified_fields) else [],
104
104
  "type": "linked",
105
105
  "edited": True
106
106
  })
@@ -359,10 +359,12 @@ class RecordNormalizer:
359
359
  def _include_classified_fields(self, record: Record, classified_fields: list[dict]) -> None:
360
360
  """Include classified fields from core, if any.
361
361
  e.g. classified_fields=[{'670': {'ind1': ' ', 'ind2': '0', 'subfields': [{'a': 'Päikesekiri, 2021'}]}}]
362
+
363
+ For each record, we need a list of dicts, to handle repeatable fields.
362
364
  """
363
365
  if not classified_fields:
364
366
  return
365
-
367
+
366
368
  fields = [
367
369
  Field(
368
370
  tag=str(tag),
@@ -373,7 +375,6 @@ class RecordNormalizer:
373
375
  for tag, v in field_dict.items()
374
376
  ]
375
377
 
376
- logger.info(f"Adding classified fields: {[f.tag for f in fields]}")
377
378
  self._add_fields_to_record(record, fields)
378
379
 
379
380
  def _normalize_common(self, record: Record, is_editing_existing_record: bool, classified_fields: List[dict]) -> None:
@@ -474,6 +475,13 @@ class RecordNormalizer:
474
475
  def _normalize_record(self, record: Record, sierraID: str,
475
476
  viaf_record: VIAFRecord, is_editing_existing_record: bool, original_entity: str) -> Record:
476
477
  return record
478
+
479
+ def get_record(self, index: int) -> Record:
480
+ """Get normalized record by index."""
481
+ for idx, record in enumerate(self):
482
+ if idx == index:
483
+ return record
484
+ raise IndexError("Record index out of range.")
477
485
 
478
486
  @property
479
487
  def data(self) -> List[dict]:
@@ -490,7 +498,7 @@ class RecordNormalizer:
490
498
  @property
491
499
  def first(self) -> Record:
492
500
  return next(iter(self))
493
-
501
+
494
502
  def __iter__(self) -> Iterator:
495
503
  viaf_id_path = "viaf.queryResult.records.record.0.recordData.VIAFCluster.viafID"
496
504
  sierra_id_path = "sierraID"
@@ -12,7 +12,7 @@ class BibRecordNormalizer(RecordNormalizer):
12
12
  """ Normalize bib records. """
13
13
 
14
14
  def __init__(self, linking_results: List[dict] = [], sierra_data: List[dict] = [],
15
- classified_fields: List[str] = [],
15
+ classified_fields: List[List[dict]] = [],
16
16
  ALLOW_EDIT_FIELDS: List[str] = ["008", "925"],
17
17
  REPEATABLE_FIELDS: List[str] = ["667"]):
18
18
  super().__init__(linking_results, sierra_data, classified_fields)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rara-tools
3
- Version: 0.7.14
3
+ Version: 0.7.15
4
4
  Summary: Tools to support Kata's work.
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Programming Language :: Python :: 3.10
@@ -741,6 +741,7 @@ def test_classified_fields_added_to_linked_record():
741
741
  """ Test that classified fields Can be passed to normalizer & added to linked record """
742
742
 
743
743
  classified_fields = [
744
+ [
744
745
  {
745
746
  "670": {
746
747
  "ind1": " ",
@@ -753,6 +754,7 @@ def test_classified_fields_added_to_linked_record():
753
754
  }
754
755
  }
755
756
  ]
757
+ ]
756
758
  # Case 1 - no 670 exists, should be added to linked record
757
759
  for normalizer in (AuthoritiesRecordNormalizer, BibRecordNormalizer):
758
760
  linking_results = [MOCK_LINKER_NOT_FOUND]
@@ -776,4 +778,67 @@ def test_classified_fields_added_to_linked_record():
776
778
  record = normalizer.first
777
779
  fields_670 = record.get_fields("670")
778
780
  assert len(fields_670) == 1
779
- assert fields_670[0].get_subfields("a")[0] == "Eesti kirjarahva leksikon, 1995."
781
+ assert fields_670[0].get_subfields("a")[0] == "Eesti kirjarahva leksikon, 1995."
782
+
783
+ def test_classified_data_with_multiple_records():
784
+ """ Test classified data with multiple records - should match by sierraID """
785
+
786
+ classified_fields = [
787
+ [{
788
+ "670": {
789
+ "ind1": " ",
790
+ "ind2": "0",
791
+ "subfields": [
792
+ {
793
+ "a": "Päikesekiri, 2021"
794
+ }
795
+ ]
796
+ },
797
+ "111": {
798
+ "ind1": "2",
799
+ "ind2": " ",
800
+ "subfields": [
801
+ {
802
+ "a": "Eesti Kirjandusmuuseum"
803
+ }
804
+ ]
805
+ }
806
+ }],
807
+ [],
808
+ [{
809
+ "670": {
810
+ "ind1": " ",
811
+ "ind2": "0",
812
+ "subfields": [
813
+ {
814
+ "a": "Teine kirjeldus, 2022"
815
+ }
816
+ ]
817
+ }
818
+ }],
819
+ ]
820
+
821
+ # Case 1 - no 670 exists, should be added to linked record
822
+ for normalizer in (AuthoritiesRecordNormalizer, BibRecordNormalizer):
823
+ linking_results = [MOCK_LINKER_NOT_FOUND, MOCK_LINKER_ONE_FOUND, MOCK_LINKER_NOT_FOUND]
824
+ normalizer = normalizer(linking_results=linking_results, classified_fields=classified_fields)
825
+
826
+ # Check first record - should have 670 & 111 from classified data
827
+ record = normalizer.first
828
+ assert len(record.get_fields("670")) == 1
829
+ fields_670 = record.get_fields("670")[0]
830
+ fields_111 = record.get_fields("111")[0]
831
+ assert fields_670.get_subfields("a")[0] == "Päikesekiri, 2021"
832
+ assert fields_111.get_subfields("a")[0] == "Eesti Kirjandusmuuseum"
833
+
834
+ # Check second record - should not have 670 from classified data
835
+ record = normalizer.get_record(1)
836
+ assert len(record.get_fields("670")) == 1
837
+ fields_670 = record.get_fields("670")[0]
838
+ assert fields_670.get_subfields("a")[0] == "Eesti kirjarahva leksikon, 1995."
839
+
840
+ # Check third record - should have 670 from classified data
841
+ record = normalizer.get_record(2)
842
+ assert len(record.get_fields("670")) == 1
843
+ fields_670 = record.get_fields("670")[0]
844
+ assert fields_670.get_subfields("a")[0] == "Teine kirjeldus, 2022"
rara_tools-0.7.14/VERSION DELETED
@@ -1 +0,0 @@
1
- 0.7.14
File without changes
File without changes
File without changes
File without changes