PyPI - rara-tools - Versions diffs - 0.7.14__py3-none-any.whl → 0.7.16__py3-none-any.whl - Mend

rara-tools 0.7.14py3-none-any.whl → 0.7.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of rara-tools might be problematic. Click here for more details.

Files changed (8) hide show

rara_tools/normalizers/authorities.py CHANGED Viewed

@@ -11,7 +11,7 @@ class AuthoritiesRecordNormalizer(RecordNormalizer):
     """ Normalize authorities records """
     def __init__(self, linking_results: List[dict] = [], sierra_data: List[dict] = [],
-                 classified_fields: List[str] = [],
+                 classified_fields: List[List[dict]] = [],
                  ALLOW_EDIT_FIELDS: List[str] = ["008", "925"],
                  REPEATABLE_FIELDS: List[str] = ["024", "035", "400", "667"]):

rara_tools/normalizers/base.py CHANGED Viewed

@@ -34,7 +34,7 @@ class RecordNormalizer:
         entities: List of Full names (str). If included, will use NormLinker to match with normalized records on KATA elastic.
     """
-    def __init__(self, linking_results: List[dict] = [], sierra_data: List[dict] = [], classified_fields: List[str] = [],
+    def __init__(self, linking_results: List[dict] = [], sierra_data: List[dict] = [], classified_fields: List[List[dict]] = [],
                  ALLOW_EDIT_FIELDS: List[str] = ["925"], REPEATABLE_FIELDS: List[str] = ["667"]):
         # Include, if will replace existing field
@@ -44,17 +44,30 @@ class RecordNormalizer:
         # leader applied to new records
         self.DEFAULT_LEADER = "01682nz  a2200349n  4500" # must be 24 digits
-    def _setup_records(self, linking_results: List[dict], sierra_data: List[dict], classified_fields: List[str] = []) -> JSONReader:
+    def _setup_records(self, linking_results: List[dict], sierra_data: List[dict], classified_fields: List[List[dict]] = []) -> JSONReader:
         """Setup initial MARC records and data.
         If no linked entities or more than one linked entity found, we create a new record.
         If one linked entity found, we create an updated record from the linked entity data.
         """
         linked_records = []
-        for linked in linking_results or []:
+        def handle_create_new_record(entity, idx):
+            logger.info(f"No linked entities found for {entity}, Creating new record.")
+            linked_records.append({
+                "leader": self.DEFAULT_LEADER,
+                "fields": []
+            })
+            self.records_extra_data.append({
+                "entity": entity,
+                "classified_fields": classified_fields[idx] if idx < len(classified_fields) else [],
+                "edited": False,
+            })
+        for idx, linked in enumerate(linking_results or []):
             if not isinstance(linked, dict):
+                logger.debug(f"Skipping invalid linked result: {linked}")
                 continue
             entity = linked.get("original_entity")
@@ -62,46 +75,32 @@ class RecordNormalizer:
             if not isinstance(linked_info, list) or not linked_info:
                 # No linked entities found, create new record
-                logger.info(
-                    f"No linked entities found for {entity}, Creating new record.")
-                linked_records.append({
-                    "leader": self.DEFAULT_LEADER,
-                    "fields": []
-                })
-                self.records_extra_data.append({
-                    "entity": entity,
-                    "classified_fields": classified_fields,
-                    "edited": False
-                })
+                handle_create_new_record(entity, idx)
                 continue
-            if len(linked_info) > 1:
+            elif len(linked_info) > 1:
                 # Multiple linked entities found, create new record
-                logger.info(
-                    f"Multiple linked entities found for {entity}. Creating new record.")
-                linked_records.append({
-                    "leader": self.DEFAULT_LEADER,
-                    "fields": []
-                })
-                self.records_extra_data.append({
-                    "entity": entity,
-                    "classified_fields": classified_fields,
-                    "edited": False
-                })
+                handle_create_new_record(entity, idx)
                 continue
             elif len(linked_info) == 1:
+                # one record match found, we update existing record
                 linked_item = linked_info[0]
                 if not isinstance(linked_item, dict):
                     continue
+                # handle case where we have linked an entity without a record
+                if not linked_item.get("json", None):
+                    handle_create_new_record(entity, idx)
+                    continue
                 linked_records.append(linked_item.get("json", {}))
                 self.records_extra_data.append({
                     "entity": entity,
                     "viaf": linked_item.get("viaf", {}),
-                    "classified_fields": classified_fields,
-                    "type": "linked",
+                    "classified_fields": classified_fields[idx] if idx < len(classified_fields) else [],
                     "edited": True
                 })
                 continue
@@ -109,7 +108,6 @@ class RecordNormalizer:
         self.records_extra_data.extend(
             {
                 "sierraID": obj.get("sierraID"),
-                "type": "sierra",
                 "edited": True
             }
             for obj in (sierra_data or [])
@@ -359,10 +357,12 @@ class RecordNormalizer:
     def _include_classified_fields(self, record: Record, classified_fields: list[dict]) -> None:
         """Include classified fields from core, if any.
         e.g. classified_fields=[{'670': {'ind1': ' ', 'ind2': '0', 'subfields': [{'a': 'Päikesekiri, 2021'}]}}]
+        For each record, we need a list of dicts, to handle repeatable fields.
         """
         if not classified_fields:
             return
         fields = [
             Field(
                 tag=str(tag),
@@ -373,7 +373,6 @@ class RecordNormalizer:
             for tag, v in field_dict.items()
         ]
-        logger.info(f"Adding classified fields: {[f.tag for f in fields]}")
         self._add_fields_to_record(record, fields)
     def _normalize_common(self, record: Record, is_editing_existing_record: bool, classified_fields: List[dict]) -> None:
@@ -464,6 +463,8 @@ class RecordNormalizer:
                         verify=verify,
                         threshold=threshold
                     )
+                    if viaf_record:
+                        logger.debug(f"VIAF {search_term}, linked to ID: {viaf_record.viaf_id}")
         except Exception as e:
             logger.error(
@@ -472,8 +473,16 @@ class RecordNormalizer:
         return viaf_record
     def _normalize_record(self, record: Record, sierraID: str,
-                          viaf_record: VIAFRecord, is_editing_existing_record: bool, original_entity: str) -> Record:
+                          viaf_record: VIAFRecord, is_editing_existing_record: bool,
+                          original_entity: str) -> Record:
         return record
+    def get_record(self, index: int) -> Record:
+        """Get normalized record by index."""
+        for idx, record in enumerate(self):
+            if idx == index:
+                return record
+        raise IndexError("Record index out of range.")
     @property
     def data(self) -> List[dict]:
@@ -490,21 +499,28 @@ class RecordNormalizer:
     @property
     def first(self) -> Record:
         return next(iter(self))
     def __iter__(self) -> Iterator:
-        viaf_id_path = "viaf.queryResult.records.record.0.recordData.VIAFCluster.viafID"
+        # viaf_id_path = "viaf.original.queryResult.viafID"
+        viaf_id_path = "viaf.parsed.viaf_id"
         sierra_id_path = "sierraID"
         for record, extra_data in zip(self.records, self.records_extra_data):
             sierra_id = glom(extra_data, sierra_id_path, default="")
             viaf_id = glom(extra_data, viaf_id_path, default=None)
-            classified_fields = extra_data.get("classified_fields", [])
+            classified_fields = extra_data.get("classified_fields", [])
             entity = extra_data.get("entity")
             is_editing_existing_record = extra_data.get("edited") == True
             viaf_record = self._get_viaf_record(record, viaf_id, entity)
+            if viaf_record:
+                logger.debug(
+                    f"linked VIAF record with ID {viaf_record.viaf_id} for entity '{entity}'"
+                )
             record = self._normalize_common(record, is_editing_existing_record, classified_fields)
             normalized_record = self._normalize_record(

rara_tools/normalizers/bibs.py CHANGED Viewed

@@ -12,7 +12,7 @@ class BibRecordNormalizer(RecordNormalizer):
     """ Normalize bib records. """
     def __init__(self, linking_results: List[dict] = [], sierra_data: List[dict] = [],
-                 classified_fields: List[str] = [],
+                 classified_fields: List[List[dict]] = [],
                  ALLOW_EDIT_FIELDS: List[str] = ["008", "925"],
                  REPEATABLE_FIELDS: List[str] = ["667"]):
         super().__init__(linking_results, sierra_data, classified_fields)

{rara_tools-0.7.14.dist-info → rara_tools-0.7.16.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: rara-tools
-Version: 0.7.14
+Version: 0.7.16
 Summary: Tools to support Kata's work.
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.10

{rara_tools-0.7.14.dist-info → rara_tools-0.7.16.dist-info}/RECORD RENAMED Viewed

@@ -20,9 +20,9 @@ rara_tools/core_formatters/formatted_keyword.py,sha256=hhi6wh4ErFionjBqYsEeKGbf1
 rara_tools/core_formatters/formatted_meta.py,sha256=WEnMs8K0YeTLGjXn_mxQTpshxcz5_9YjvqcbRFa3M1g,5605
 rara_tools/core_formatters/formatted_object.py,sha256=7a499ZmcZXOqtlwxDi6FWHWF5a6HdCsduS22wV3uHIE,5656
 rara_tools/normalizers/__init__.py,sha256=_NqpS5w710DhaURytHq9JpEt8HgYpSPfRDcOtOymJgE,193
-rara_tools/normalizers/authorities.py,sha256=U3IjQW3XaxAiIJ30Jlq6ON8fdXgrtgNwrvX8oy_iKL0,5304
-rara_tools/normalizers/base.py,sha256=SkbzmAOX5C6PbEFC9i_mpzK1u6LMAh7PavG52ULeO2Y,19744
-rara_tools/normalizers/bibs.py,sha256=DMjJj1mCVVWRpL2NuR6b7W1RWqkx50xix34X_tyxJig,3931
+rara_tools/normalizers/authorities.py,sha256=iW3cYOqqVJKy4CcnG9_T6dN-1bBT1e-0jtLYvco-MyQ,5311
+rara_tools/normalizers/base.py,sha256=DhMicY5p_N2SC_E3lbWUvSM77AOy_pBjQpbLSvYWDxM,20488
+rara_tools/normalizers/bibs.py,sha256=s8NGoieCjiftASUb--1YvYZ0VzW6uBt2ZidhLi_wP9A,3938
 rara_tools/normalizers/reader.py,sha256=GYCkAtnsNx135w5lD-_MqCZzdHQHHPDF-pDxYj839Vo,1595
 rara_tools/normalizers/viaf.py,sha256=C-NfbvL83ZcHVB9ICMw43wAMYKTqDTHU3ZT2mXKec00,24288
 rara_tools/parsers/marc_parsers/base_parser.py,sha256=Kdw4aivJf2FkWgIK7pJtHtVXF_G1pjHVQ7IcFItSqy8,1649
@@ -39,8 +39,8 @@ rara_tools/parsers/marc_records/title_record.py,sha256=XrtJ4gj7wzSaGxNaPtPuawmqq
 rara_tools/parsers/tools/entity_normalizers.py,sha256=VyCy_NowCLpOsL0luQ55IW-Qi-J5oBH0Ofzr7HRFBhM,8949
 rara_tools/parsers/tools/marc_converter.py,sha256=LgSHe-7n7aiDrw2bnsB53r3fXTRFjZXTwBYfTpL0pfs,415
 rara_tools/parsers/tools/russian_transliterator.py,sha256=5ZU66iTqAhr7pmfVqXPAI_cidF43VqqmuN4d7H4_JuA,9770
-rara_tools-0.7.14.dist-info/licenses/LICENSE.md,sha256=hkZVnIZll7e_KNEQzeY94Y9tlzVL8iVZBTMBvDykksU,35142
-rara_tools-0.7.14.dist-info/METADATA,sha256=GIybax1V5ZqALB6SH-oN-e0Cu9-0dbt7rRph7lsEY-Q,4080
-rara_tools-0.7.14.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-rara_tools-0.7.14.dist-info/top_level.txt,sha256=JwfB5b8BAtW5OFKRln2AQ_WElTRyIBM4nO0FKN1cupY,11
-rara_tools-0.7.14.dist-info/RECORD,,
+rara_tools-0.7.16.dist-info/licenses/LICENSE.md,sha256=hkZVnIZll7e_KNEQzeY94Y9tlzVL8iVZBTMBvDykksU,35142
+rara_tools-0.7.16.dist-info/METADATA,sha256=yBmOUFVy7V6RyZdN_qBGkY7M8zH7H7h7v_QLQeZ3bAM,4080
+rara_tools-0.7.16.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+rara_tools-0.7.16.dist-info/top_level.txt,sha256=JwfB5b8BAtW5OFKRln2AQ_WElTRyIBM4nO0FKN1cupY,11
+rara_tools-0.7.16.dist-info/RECORD,,

{rara_tools-0.7.14.dist-info → rara_tools-0.7.16.dist-info}/WHEEL RENAMED Viewed

File without changes

{rara_tools-0.7.14.dist-info → rara_tools-0.7.16.dist-info}/licenses/LICENSE.md RENAMED Viewed

File without changes

{rara_tools-0.7.14.dist-info → rara_tools-0.7.16.dist-info}/top_level.txt RENAMED Viewed

File without changes

rara-tools 0.7.14__py3-none-any.whl → 0.7.16__py3-none-any.whl

Potentially problematic release.

rara-tools 0.7.14py3-none-any.whl → 0.7.16py3-none-any.whl