PyPI - rara-tools - Versions diffs - 0.7.14__tar.gz → 0.7.15__tar.gz - Mend

rara-tools 0.7.14tar.gz → 0.7.15tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of rara-tools might be problematic. Click here for more details.

Files changed (67) hide show

{rara_tools-0.7.14/rara_tools.egg-info → rara_tools-0.7.15}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: rara-tools
-Version: 0.7.14
+Version: 0.7.15
 Summary: Tools to support Kata's work.
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.10

rara_tools-0.7.15/VERSION ADDED Viewed

	@@ -0,0 +1 @@
1	+ 0.7.15

{rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/normalizers/authorities.py RENAMED Viewed

@@ -11,7 +11,7 @@ class AuthoritiesRecordNormalizer(RecordNormalizer):
     """ Normalize authorities records """
     def __init__(self, linking_results: List[dict] = [], sierra_data: List[dict] = [],
-                 classified_fields: List[str] = [],
+                 classified_fields: List[List[dict]] = [],
                  ALLOW_EDIT_FIELDS: List[str] = ["008", "925"],
                  REPEATABLE_FIELDS: List[str] = ["024", "035", "400", "667"]):

{rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/normalizers/base.py RENAMED Viewed

@@ -34,7 +34,7 @@ class RecordNormalizer:
         entities: List of Full names (str). If included, will use NormLinker to match with normalized records on KATA elastic.
     """
-    def __init__(self, linking_results: List[dict] = [], sierra_data: List[dict] = [], classified_fields: List[str] = [],
+    def __init__(self, linking_results: List[dict] = [], sierra_data: List[dict] = [], classified_fields: List[List[dict]] = [],
                  ALLOW_EDIT_FIELDS: List[str] = ["925"], REPEATABLE_FIELDS: List[str] = ["667"]):
         # Include, if will replace existing field
@@ -44,16 +44,16 @@ class RecordNormalizer:
         # leader applied to new records
         self.DEFAULT_LEADER = "01682nz  a2200349n  4500" # must be 24 digits
-    def _setup_records(self, linking_results: List[dict], sierra_data: List[dict], classified_fields: List[str] = []) -> JSONReader:
+    def _setup_records(self, linking_results: List[dict], sierra_data: List[dict], classified_fields: List[List[dict]] = []) -> JSONReader:
         """Setup initial MARC records and data.
         If no linked entities or more than one linked entity found, we create a new record.
         If one linked entity found, we create an updated record from the linked entity data.
         """
         linked_records = []
-        for linked in linking_results or []:
+        for idx, linked in enumerate(linking_results or []):
             if not isinstance(linked, dict):
                 continue
@@ -70,12 +70,12 @@ class RecordNormalizer:
                 })
                 self.records_extra_data.append({
                     "entity": entity,
-                    "classified_fields": classified_fields,
+                    "classified_fields": classified_fields[idx] if idx < len(classified_fields) else [],
                     "edited": False
                 })
                 continue
-            if len(linked_info) > 1:
+            elif len(linked_info) > 1:
                 # Multiple linked entities found, create new record
                 logger.info(
                     f"Multiple linked entities found for {entity}. Creating new record.")
@@ -85,7 +85,7 @@ class RecordNormalizer:
                 })
                 self.records_extra_data.append({
                     "entity": entity,
-                    "classified_fields": classified_fields,
+                    "classified_fields": classified_fields[idx] if idx < len(classified_fields) else [],
                     "edited": False
                 })
                 continue
@@ -100,7 +100,7 @@ class RecordNormalizer:
                 self.records_extra_data.append({
                     "entity": entity,
                     "viaf": linked_item.get("viaf", {}),
-                    "classified_fields": classified_fields,
+                    "classified_fields": classified_fields[idx] if idx < len(classified_fields) else [],
                     "type": "linked",
                     "edited": True
                 })
@@ -359,10 +359,12 @@ class RecordNormalizer:
     def _include_classified_fields(self, record: Record, classified_fields: list[dict]) -> None:
         """Include classified fields from core, if any.
         e.g. classified_fields=[{'670': {'ind1': ' ', 'ind2': '0', 'subfields': [{'a': 'Päikesekiri, 2021'}]}}]
+        For each record, we need a list of dicts, to handle repeatable fields.
         """
         if not classified_fields:
             return
         fields = [
             Field(
                 tag=str(tag),
@@ -373,7 +375,6 @@ class RecordNormalizer:
             for tag, v in field_dict.items()
         ]
-        logger.info(f"Adding classified fields: {[f.tag for f in fields]}")
         self._add_fields_to_record(record, fields)
     def _normalize_common(self, record: Record, is_editing_existing_record: bool, classified_fields: List[dict]) -> None:
@@ -474,6 +475,13 @@ class RecordNormalizer:
     def _normalize_record(self, record: Record, sierraID: str,
                           viaf_record: VIAFRecord, is_editing_existing_record: bool, original_entity: str) -> Record:
         return record
+    def get_record(self, index: int) -> Record:
+        """Get normalized record by index."""
+        for idx, record in enumerate(self):
+            if idx == index:
+                return record
+        raise IndexError("Record index out of range.")
     @property
     def data(self) -> List[dict]:
@@ -490,7 +498,7 @@ class RecordNormalizer:
     @property
     def first(self) -> Record:
         return next(iter(self))
     def __iter__(self) -> Iterator:
         viaf_id_path = "viaf.queryResult.records.record.0.recordData.VIAFCluster.viafID"
         sierra_id_path = "sierraID"

{rara_tools-0.7.14 → rara_tools-0.7.15}/rara_tools/normalizers/bibs.py RENAMED Viewed

@@ -12,7 +12,7 @@ class BibRecordNormalizer(RecordNormalizer):
     """ Normalize bib records. """
     def __init__(self, linking_results: List[dict] = [], sierra_data: List[dict] = [],
-                 classified_fields: List[str] = [],
+                 classified_fields: List[List[dict]] = [],
                  ALLOW_EDIT_FIELDS: List[str] = ["008", "925"],
                  REPEATABLE_FIELDS: List[str] = ["667"]):
         super().__init__(linking_results, sierra_data, classified_fields)

{rara_tools-0.7.14 → rara_tools-0.7.15/rara_tools.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: rara-tools
-Version: 0.7.14
+Version: 0.7.15
 Summary: Tools to support Kata's work.
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.10

{rara_tools-0.7.14 → rara_tools-0.7.15}/tests/test_normalization.py RENAMED Viewed

@@ -741,6 +741,7 @@ def test_classified_fields_added_to_linked_record():
     """ Test that classified fields Can be passed to normalizer & added to linked record """
     classified_fields = [
+    [
         {
             "670": {
                 "ind1": " ",
@@ -753,6 +754,7 @@ def test_classified_fields_added_to_linked_record():
             }
         }
     ]
+    ]
     # Case 1 - no 670 exists, should be added to linked record
     for normalizer in (AuthoritiesRecordNormalizer, BibRecordNormalizer):
         linking_results = [MOCK_LINKER_NOT_FOUND]
@@ -776,4 +778,67 @@ def test_classified_fields_added_to_linked_record():
         record = normalizer.first
         fields_670 = record.get_fields("670")
         assert len(fields_670) == 1
-        assert fields_670[0].get_subfields("a")[0] == "Eesti kirjarahva leksikon, 1995."
+        assert fields_670[0].get_subfields("a")[0] == "Eesti kirjarahva leksikon, 1995."
+def test_classified_data_with_multiple_records():
+    """ Test classified data with multiple records - should match by sierraID """
+    classified_fields = [
+        [{
+            "670": {
+                "ind1": " ",
+                "ind2": "0",
+                "subfields": [
+                    {
+                        "a": "Päikesekiri, 2021"
+                    }
+                ]
+            },
+            "111": {
+                "ind1": "2",
+                "ind2": " ",
+                "subfields": [
+                    {
+                        "a": "Eesti Kirjandusmuuseum"
+                    }
+                ]
+            }
+        }],
+       [],
+       [{
+            "670": {
+                "ind1": " ",
+                "ind2": "0",
+                "subfields": [
+                    {
+                        "a": "Teine kirjeldus, 2022"
+                    }
+                ]
+            }
+        }],
+    ]
+    # Case 1 - no 670 exists, should be added to linked record
+    for normalizer in (AuthoritiesRecordNormalizer, BibRecordNormalizer):
+        linking_results = [MOCK_LINKER_NOT_FOUND, MOCK_LINKER_ONE_FOUND, MOCK_LINKER_NOT_FOUND]
+        normalizer = normalizer(linking_results=linking_results, classified_fields=classified_fields)
+        # Check first record - should have 670 & 111 from classified data
+        record = normalizer.first
+        assert len(record.get_fields("670")) == 1
+        fields_670 = record.get_fields("670")[0]
+        fields_111 = record.get_fields("111")[0]
+        assert fields_670.get_subfields("a")[0] == "Päikesekiri, 2021"
+        assert fields_111.get_subfields("a")[0] == "Eesti Kirjandusmuuseum"
+        # Check second record - should not have 670 from classified data
+        record = normalizer.get_record(1)
+        assert len(record.get_fields("670")) == 1
+        fields_670 = record.get_fields("670")[0]
+        assert fields_670.get_subfields("a")[0] == "Eesti kirjarahva leksikon, 1995."
+        # Check third record - should have 670 from classified data
+        record = normalizer.get_record(2)
+        assert len(record.get_fields("670")) == 1
+        fields_670 = record.get_fields("670")[0]
+        assert fields_670.get_subfields("a")[0] == "Teine kirjeldus, 2022"