PyPI - rara-tools - Versions diffs - 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl - Mend

rara-tools 0.5.1py3-none-any.whl → 0.5.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of rara-tools might be problematic. Click here for more details.

Files changed (9) hide show

rara_tools/constants/general.py CHANGED Viewed

@@ -9,3 +9,7 @@ class Status:
 class Queue:
     CORE = "core"
+class Task:
+    SEND_VERSION = "send_version_to_core"

rara_tools/constants/normalizers.py CHANGED Viewed

@@ -1,6 +1,44 @@
 from pymarc import Indicators
 import os
+class EntityType:
+    PER = "PER"
+    ORG = "ORG"
+    KEYWORD = "EMS_KEYWORD"
+    LOC = "LOC"
+    TITLE = "TITLE"
+    UNK = "UNKNOWN"
 EMPTY_INDICATORS = Indicators(" ", " ")
 VIAF_ALLOWED_SOURCES = ["LC", "DNB", "LNB", "NLL",
                         "ERRR", "J9U"]
+DEFAULT_VIAF_FIELD = "local.names"
+ALLOWED_VIAF_FIELDS = [
+    "cql.any",                          # All fields
+    "local.names",                      # All headings
+    "local.personalNames",              # Personal names
+    "local.corporateNames",             # Corporate names
+    "local.geographicNames",            # Geographic names
+    "local.uniformTitleWorks",          # Works
+    "local.uniformTitleExpressions",    # Expressions
+    "local.mainHeadingEl",              # Preferred headings
+    "Xlocal.names",                     # Exact headings
+    "local.title"                       # Bibliographic titles
+]
+# For mapping rara-linker's entity type's to corresponding VIAF fields
+VIAF_ENTITY_MAP = {
+    EntityType.PER: "local.personalNames",
+    EntityType.ORG: "local.corporateNames",
+    EntityType.LOC: "loca.geographicNames",
+    EntityType.TITLE: "local.uniformTitleWorks"
+}
+ALLOWED_VIAF_WIKILINK_LANGS = ["en", "et"]
+VIAF_SIMILARITY_THRESHOLD = 0.92
+VERIFY_VIAF_RECORD = True
+MAX_VIAF_RECORDS_TO_VERIFY = 10

rara_tools/normalizers/base.py CHANGED Viewed

@@ -4,7 +4,10 @@ from typing import List, Optional, Iterator
 from rara_tools.constants import EMPTY_INDICATORS
 from rara_tools.normalizers.viaf import VIAFRecord, VIAFClient
+from rara_tools.constants.normalizers import (
+    DEFAULT_VIAF_FIELD, ALLOWED_VIAF_FIELDS, ALLOWED_VIAF_WIKILINK_LANGS,
+    VIAF_SIMILARITY_THRESHOLD, VERIFY_VIAF_RECORD, MAX_VIAF_RECORDS_TO_VERIFY
+)
 from glom import glom
 import logging
 import json
@@ -187,7 +190,7 @@ class RecordNormalizer:
             "Collective": "111"
         }
-        author_type = viaf_record.author_type
+        author_type = viaf_record.name_type
         tag = type_map.get(author_type, "100")
         fields = [
@@ -195,9 +198,9 @@ class RecordNormalizer:
                 tag=tag,
                 indicators=EMPTY_INDICATORS,
                 subfields=[
-                    Subfield("a", viaf_record.author),
-                    Subfield("b", viaf_record.author_type),
-                    Subfield("c", viaf_record.author_type)
+                    Subfield("a", viaf_record.name),
+                    Subfield("b", viaf_record.name_type), # Is this correct??
+                    Subfield("c", viaf_record.name_type)  # Is this correct??
                 ]
             )
         ]
@@ -231,32 +234,45 @@ class RecordNormalizer:
         if entity:
             return entity
         else:
-            return record.author
+            return record.name
+    def _get_viaf_record(self, record: Record, viaf_id: Optional[int] = None,
+            entity: Optional[str] = None, viaf_field: str = DEFAULT_VIAF_FIELD,
+            threshold: float = VIAF_SIMILARITY_THRESHOLD, verify: bool = VERIFY_VIAF_RECORD,
+            max_records: int = MAX_VIAF_RECORDS_TO_VERIFY
+    ) -> Optional[VIAFRecord]:
+        viaf_record = None
-    def _get_viaf_record(self, record: Record, viaf_id: Optional[int] = None, entity: Optional[str] = None) -> Optional[VIAFRecord]:
         try:
             viaf_client = VIAFClient()
             if viaf_id:
-                viaf_info = viaf_client.get_records_by_viaf_id(viaf_id).json()
-                return VIAFRecord(viaf_info)
-            search_term = self._get_viaf_search_term(record, entity)
-            results = viaf_client.get_records_by_search_term(
-                search_term).json()
-            num_records = glom(
-                results, "queryResult.numberOfRecords.value", default=0)
-            if num_records == 1:
-                return VIAFRecord(results)
-            logger.warning(
-                f"Multiple VIAF records found for {search_term}: {num_records}. Skipping.")
+                viaf_records = viaf_client.get_normalized_data_by_ids([viaf_id])
+                if viaf_records:
+                    viaf_record = viaf_records[0]
+            else:
+                search_term = self._get_viaf_search_term(record, entity)
+                if not verify:
+                    logger.warning(
+                        f"Record verification is turned off. If multiple records are " \
+                        f"detected for search term '{search_term}', the first " \
+                        f"result is automatically returned. This might lead to " \
+                        f"some inaccuracies!"
+                    )
+                viaf_record = viaf_client.get_normalized_data_by_search_term(
+                    search_term=search_term,
+                    field=viaf_field,
+                    max_records=max_records,
+                    verify=verify,
+                    threshold=threshold
+                )
         except Exception as e:
-            logger.error(f"Error fetching VIAF record: {e}")
+            logger.error(
+                f"Error fetching VIAF record with ID={viaf_id} / entity='{entity}': {e}"
+            )
+        return viaf_record
     def _normalize_record(self, record: Record, sierraID: str,
                           viaf_record: VIAFRecord, is_editing_existing_record: bool) -> Record:

rara_tools/normalizers/viaf.py CHANGED Viewed

@@ -1,22 +1,44 @@
 import requests
 import json
-from typing import List
+import regex as re
+from typing import List, Dict
 from collections import defaultdict
+from jellyfish import jaro_winkler_similarity as jw
+from requests.models import Response
+from rara_tools.parsers.tools.entity_normalizers import PersonalName, Normalizer
+from rara_tools.constants.normalizers import (
+    DEFAULT_VIAF_FIELD, ALLOWED_VIAF_FIELDS, ALLOWED_VIAF_WIKILINK_LANGS,
+    VIAF_SIMILARITY_THRESHOLD
+)
 import logging
 logger = logging.getLogger(__name__)
 class VIAFRecord:
+    """ Takes in a VIAF query response JSON and wraps
+    information extraction from it.
+    """
     def __init__(self,
-                 record: dict,
-                 allowed_sources: List[str] = [
-                     "LC", "DNB", "LNB", "NLL", "ERRR", "J9U"]
-                 ):
+            record: dict,
+            allowed_sources: List[str] = [
+                "LC", "DNB", "LNB", "NLL", "ERRR", "J9U"
+            ]
+    ):
+        """ Initializes VIAFRecord class.
+        Parameters
+        -----------
+        record: dict
+            VIAF query response JSON.
+        allowed_sources: List[str]
+            Only exracts information from these sources. Other
+            sources are ignored.
+        """
         self.__record: dict = record
         self.__record_data: dict = {}
         self.__allowed_sources: List[str] = allowed_sources
         self.__viaf_id: int = None
+        self.__viaf_url: str = ""
         self.__name_variations: List[str] = []
         self.__birth_date: str = None
         self.__death_date: str = None
@@ -24,23 +46,156 @@ class VIAFRecord:
         self.__all_fields: dict = {}
         self.__nationality: str = ""
         self.__has_isni: bool = False
-        self.__author: str = ""
-        self.__author_type: str = None
+        self.__name: str = ""
+        self.__name_type: str = ""
         self.__has_isni: str = ""
         self.__activity_start: str = None
-        self.__activity_end: str = None
+        self.__activity_end: str = None,
+        self.__works: List[str] = []
+        self.__wikilinks: dict = {}
+        self.__all_wikilinks: List[str] = []
+        self.__has_isni: bool | None = None
+        self.__marc_400: List[dict] = []
+        self.__marc_500: List[dict] = []
+        self.__marc_main: List[dict] = []
+        self.__subfield_indicator: str = ""
+        self.__value_fields: List[str] = [
+            "text", "value", "title", "datafield"
+        ]
+        self.__title_types: List[str] = ["UniformTitleWork"]
+    def __get_data(self, field_name: str, subfield_name: str = "data",
+            allowed_sources: List[str] = []
+    ) -> List[str]:
+        if not allowed_sources:
+            allowed_sources = self.__allowed_sources
+        data = []
+        try:
+            entries = self.record_data.get(
+                field_name, {}
+                ).get(subfield_name, [])
+            for entry in entries:
+                sources = entry.get("sources", {}).get("s", [])
+                if set(allowed_sources).intersection(set(sources)):
+                    for field in self.__value_fields:
+                        value = entry.get(field, "")
+                        if value:
+                            data.append(value)
+                            break
+        except Exception as e:
+            logger.error(
+                f"Failed extracting data from field '{field_name}' with subfield " \
+                f"'{subfield_name}'. '{field_name}' dict has the following " \
+                f"structure: {self.record_data.get(field_name)}. " \
+                f"Exception reason: {e}."
+            )
+        return data
+    def _get_wikilink_lang(self, wikilink: str) -> str:
+        """ Parses the language of the Wikipedia page
+        from wikilink.
+        """
+        pattern = r"(?<=https\W{3})\w+(?=[.])"
+        match = re.search(pattern, wikilink)
+        wikilink_lang = ""
+        if match:
+            wikilink_lang = match.group()
+        return wikilink_lang
+    def _get_marc_field(self, marc_dict: dict, subfield: str = "a") -> str:
+        value = ""
+        if marc_dict.get("dtype", "") == "MARC21":
+            subfields = marc_dict.get("subfield", [])
+            for _subfield in subfields:
+                if _subfield.get("code", "") == subfield:
+                    value = _subfield.get("value", "")
+                    break
+        return value
+    def _get_marc_tag(self, marc_dict: dict) -> str:
+        tag = ""
+        if marc_dict.get("dtype", "") == "MARC21":
+            tag = marc_dict.get("tag", "")
+        return tag
+    def _get_names(self, marc_dicts: List[dict]) -> List[str]:
+        names_d = defaultdict(int)
+        for marc_dict in marc_dicts:
+            name = self._get_marc_field(marc_dict, self.subfield_indicator)
+            names_d[name]+=1
+        name_list = sorted(
+            list(names_d.items()),
+            key=lambda x: x[1],
+            reverse=True
+        )
+        names = []
+        for n in name_list:
+            _name = self._strip_punctuation(n[0])
+            if _name not in names:
+                names.append(_name)
+        return names
+    def _get_name(self, marc_dicts: List[dict]) -> str:
+        names = self._get_names(marc_dicts)
+        name = ""
+        if names:
+            name = names[0]
+        return name
+    def _strip_punctuation(self, entity: str) -> str:
+        entity = entity.strip(",")
+        # Strip "." only if the last token is not an initial,
+        # e.g: "Meri, Lennart." -> Strip
+        # "Meri, L." -> Do not strip.
+        ent_tokens = entity.split()
+        if len(ent_tokens[-1]) > 2:
+            entity = entity.strip(".")
+        return entity
+    def _strip_parenthesis(self, entity: str) -> str:
+        """ Strip information in parenthesis from VIAF records
+        in order to compare the records more easily.
+        """
+        _entity = re.sub(r"[(][^)][)]", "", entity)
+        return _entity.strip()
+    @property
+    def subfield_indicator(self) -> str:
+        if not self.__subfield_indicator:
+            if self.name_type in self.__title_types:
+                subfield_name = "t"
+            else:
+                subfield_name = "a"
+            self.__subfield_indicator = subfield_name
+        return self.__subfield_indicator
     @property
-    def author(self) -> str:
-        if not self.__author:
-            self.__author = self.record_data.get(
-                "mainHeading", {}).get("text", "")
+    def name(self) -> str:
+        # author -> name
+        if not self.__name:
+            if self.marc_main:
+                self.__name = self._get_name(self.marc_main)
+            else:
+                names = self.__get_data("mainHeadings", "data")
+                if names:
+                    self.__name = names[0]
+        return self.__name
     @property
-    def author_type(self) -> str:
-        """type of name (personal, corporate, title, etc)"""
-        if not self.__author_type:
-            self.__author_type = self.record_data.get("nameType")
+    def name_type(self) -> str:
+        # author_type -> name_type
+        """ Type of name (personal, corporate, title, etc)
+        """
+        if not self.__name_type:
+            self.__name_type = self.record_data.get("nameType")
+        return self.__name_type
     @property
     def viaf_id(self) -> int:
@@ -49,18 +204,17 @@ class VIAFRecord:
         return self.__viaf_id
     @property
-    def has_isni(self) -> bool:
-        return bool(self.record_data.get("isni", ""))
+    def viaf_url(self) -> str:
+        if not self.__viaf_url:
+            self.__viaf_url = self.record_data.get(
+                "Document", {}).get("about", "")
+        return self.__viaf_url
-    def __get_data(self, field_name: str) -> List[str]:
-        entries = self.record_data.get(field_name, {}).get("data", [])
-        data = []
-        for entry in entries:
-            sources = entry.get("sources", {}).get("s", [])
-            if set(self.__allowed_sources).intersection(set(sources)):
-                data.append(entry.get("text", ""))
-        return data
+    @property
+    def has_isni(self) -> bool:
+        if self.__has_isni == None:
+            self.__has_isni = bool(self.record_data.get("isni", ""))
+        return self.__has_isni
     @property
     def record_data(self) -> dict:
@@ -75,7 +229,18 @@ class VIAFRecord:
     @property
     def name_variations(self) -> List[str]:
         if not self.__name_variations:
-            self.__name_variations = self.__get_data("mainHeadings")
+            if self.marc_400:
+                var_1 = self._get_names(self.marc_400)
+                var_2 = self._get_names(self.marc_main)
+                _vars = var_1 + var_2
+            else:
+                _vars = self.__get_data("mainHeadings")
+            vars_3 = [Normalizer.clean_entity(v) for v in _vars]
+            vars = _vars + vars_3
+            #print(vars)
+            self.__name_variations = list(set(vars))
         return self.__name_variations
     @property
@@ -117,14 +282,75 @@ class VIAFRecord:
                 nationalities_dict[n.lower()] += 1
             if nationalities:
                 self.__nationality = sorted(
-                    nationalities_dict.items(), key=lambda x: x[1], reverse=True)[0][0]
+                    nationalities_dict.items(),
+                    key=lambda x: x[1],
+                    reverse=True
+                )[0][0]
         return self.__nationality
+    @property
+    def works(self) -> List[str]:
+        if not self.__works:
+            self.__works = list(set(self.__get_data(
+                field_name="titles",
+                subfield_name="work"
+            )))
+        return self.__works
+    @property
+    def all_wikilinks(self) -> List[str]:
+        if not self.__all_wikilinks:
+            self.__all_wikilinks = self.__get_data(
+                field_name="xLinks", subfield_name="xLink",
+                allowed_sources=["WKP"]
+            )
+        return self.__all_wikilinks
+    @property
+    def wikilinks(self) -> dict:
+        if not self.__wikilinks:
+            for wikilink in self.all_wikilinks:
+                wikilink_lang = self._get_wikilink_lang(wikilink)
+                if wikilink_lang and wikilink_lang in ALLOWED_VIAF_WIKILINK_LANGS:
+                    self.__wikilinks[wikilink_lang] = wikilink
+        return self.__wikilinks
+    @property
+    def marc_400(self) -> List[dict]:
+        if not self.__marc_400:
+            self.__marc_400 = self.__get_data(
+                field_name="x400s",
+                subfield_name="x400"
+            )
+        return self.__marc_400
+    @property
+    def marc_500(self) -> List[dict]:
+        if not self.__marc_500:
+            self.__marc_500 = self.__get_data(
+                field_name="x500s",
+                subfield_name="x500"
+            )
+        return self.__marc_500
+    @property
+    def marc_main(self) -> List[dict]:
+        if not self.__marc_main:
+            self.__marc_main = self.__get_data(
+                field_name="mainHeadings",
+                subfield_name="mainHeadingEl"
+            )
+        return self.__marc_main
     @property
     def all_fields(self) -> dict:
         if not self.__all_fields:
             self.__all_fields = {
                 "viaf_id": self.viaf_id,
+                "viaf_url": self.viaf_url,
+                "name": self.name,
+                "name_type": self.name_type,
                 "name_variations": self.name_variations,
                 "birth_date": self.birth_date,
                 "death_date": self.death_date,
@@ -133,31 +359,170 @@ class VIAFRecord:
                 "activity_start": self.activity_start,
                 "activity_end": self.activity_end,
                 "has_isni": self.has_isni,
-                "author": self.author
+                "works": self.works,
+                "wikilinks": self.wikilinks,
+                "marc_400": self.marc_400,
+                "marc_500": self.marc_500,
+                "marc_main": self.marc_main
             }
         return self.__all_fields
 class VIAFClient:
     def __init__(self, viaf_api_url: str = "https://viaf.org/api"):
-        self.root_url = viaf_api_url.strip("/")
-        self.record_url = f"{self.root_url}/cluster-record"
-        self.search_url = f"{self.root_url}/search"
-        self.headers = {
+        self.root_url: str = viaf_api_url.strip("/")
+        self.record_url: str = f"{self.root_url}/cluster-record"
+        self.search_url: str = f"{self.root_url}/search"
+        self.headers: dict = {
             "Accept": "application/json",
             "Content-Type": "application/json"
         }
-    def _send_request(self, url: str, data: dict) -> dict:
+    def check_search_term_query(self) -> bool:
+        """ Function for checking, if VIAF search term
+        query works as expected.
+        """
+        test_entity = "Lennart Meri"
+        record = self.get_normalized_data_by_search_term(
+            search_term=test_entity,
+            max_records=1,
+            verify=False
+        )
+        success = True
+        if record:
+            if record.name != "Meri, Lennart":
+                success = False
+        else:
+            success = False
+        if not success:
+            logger.error(f"VIAF search term query has changed or not working!")
+        return success
+    def check_id_query(self) -> bool:
+        """ Function for checking, if VIAF search term
+        query works as expected.
+        """
+        test_id = "84153775"
+        records = self.get_normalized_data_by_ids([test_id])
+        success = True
+        if records:
+            record = records[0]
+            if record.name != "Meri, Lennart":
+                success = False
+        else:
+            success = False
+        if not success:
+            logger.error(f"VIAF ID query has changed or not working!")
+        return success
+    @staticmethod
+    def verify(entity: str, viaf_record: VIAFRecord,
+            threshold: float = VIAF_SIMILARITY_THRESHOLD
+    ) -> dict:
+        """ Verifies, if entity to link is sufficiently
+        similar to a VIAF Record based on name forms in
+        VIAFRecord.name_variations.
+        Parameters
+        ------------
+        entity: str
+            Entity queried from VIAF.
+        viaf_record: VIAFRecord
+            A VIAFRecord object.
+        threshold: float
+            Min similarity threshold for a verified result
+            Should be a float between 0 and 1.
+        Returns
+        ------------
+        dict
+            Dict with keys:
+                verified: bool
+                    If the VIAFRecord was verified to be
+                    sufficiently similar.
+                most_similar_record: str
+                    The most similar string to entity
+                    in VIAFRecord.name_variations.
+                score: float
+                    Similarity score of the most similar record.
+        """
+        # might not always be personal name, but shouldn't break anything
+        if len(entity.split()) > 1:
+            pn = PersonalName(entity)
+            name_forms = [pn.last_comma_first, pn.first_last]
+        else:
+            name_forms = [entity]
+        max_similarity = 0
+        most_similar_record = ""
+        verified = False
+        for var in viaf_record.name_variations:
+            for name_form in name_forms:
+                score = jw(name_form.lower(), var.lower())
+                if score > max_similarity:
+                    max_similarity = score
+                    most_similar_record = var
+                if score >= threshold:
+                    logger.info(
+                        f"Verification successful! '{name_form}' sufficiently " \
+                        f"similar to '{var}'! Score = {score}."
+                    )
+                    verified = True
+                    break
+            if verified:
+                break
+        out = {
+            "verified": verified,
+            "most_similar_record": most_similar_record,
+            "score": max_similarity
+        }
+        return out
+    @staticmethod
+    def get_verified_record(search_term: str, viaf_records: List[VIAFRecord],
+        threshold: float = VIAF_SIMILARITY_THRESHOLD
+    ) -> VIAFRecord:
+        """ Takes in n VIAFRecords found while searching the term `search_term`.
+        Returns the most similar VIAFRecord.
+        """
+        verified_record = None
+        max_score = 0
+        most_similar_record = ""
+        for record in viaf_records:
+            verified = VIAFClient.verify(search_term, record, threshold)
+            if verified.get("score") > max_score:
+                most_similar_record = verified.get("most_similar_record")
+                max_score = verified.get("score")
+            if verified.get("verified"):
+                verified_record = record
+                break
+        if not verified_record:
+            logger.error(
+                f"Verification failed. No matched record surpassed the set similarity " \
+                f"threshold ({threshold}). Closest match for search term '{search_term}' was " \
+                f"'{most_similar_record}' with similarity score {max_score} "
+            )
+        return verified_record
+    def _send_request(self, url: str, data: dict) -> Response:
         return requests.post(url, data=json.dumps(data), headers=self.headers)
     def get_records_by_search_term(self,
-                                   search_term: str,
-                                   index: str = "viaf",
-                                   field: str = "local.names",
-                                   page_index: int = 0,
-                                   page_size: int = 50
-                                   ) -> dict:
+            search_term: str,
+            index: str = "VIAF",
+            field: str = DEFAULT_VIAF_FIELD,
+            page_index: int = 0,
+            page_size: int = 50
+     ) -> Response:
+        """ Query VIAF records by search term.
+        """
+        if field and field not in ALLOWED_VIAF_FIELDS:
+            logger.error(
+                f"Field '{field}' is not allowed. Defaulting to '{DEFAULT_VIAF_FIELD}'. " \
+                f"Allowed VIAF fields are: {ALLOWED_VIAF_FIELDS}. "
+            )
+            field = DEFAULT_VIAF_FIELD
         data = {
             "reqValues": {
                 "field": field,
@@ -173,18 +538,58 @@ class VIAFClient:
         response = self._send_request(url=self.search_url, data=data)
         return response
-    def get_records_by_viaf_id(self, record_id: str) -> dict:
+    def get_records_by_viaf_id(self, record_id: str) -> Response:
+        """ Query VIAF records by ID.
+        """
         data = {
             "reqValues": {
                 "recordId": str(record_id)
             }
         }
         response = self._send_request(url=self.record_url, data=data)
         return response
-    def fetch_viaf_clusters(self, viaf_ids):
+    def extract_viaf_ids(self, search_query_response: Response) -> List[str]:
+        """ Parse VIAF ID-s from search query response.
+        """
+        try:
+            records = search_query_response.json()["queryResult"]["records"]["record"]
+        except Exception as e:
+            logger.error(
+                f"Parsing records from search query " \
+                f"response failed with error: {e}."
+            )
+            records = []
+        viaf_ids = []
+        for record in records:
+            try:
+                viaf_id = record["recordData"]["VIAFCluster"]["viafID"]
+                viaf_ids.append(viaf_id)
+            except Exception as e:
+                logger.error(
+                    f"Extracing VIAF ID from record '{record}' " \
+                    f"failed with error: {e}"
+                )
+        return viaf_ids
+    def get_viaf_ids_by_search_terms(self,
+            search_term: str, field: str = DEFAULT_VIAF_FIELD,
+            viaf_index: str = "VIAF", page_size: int = 50
+    ) -> List[str]:
+        """ Get all matching VIAF IDs for a search term.
+        """
+        search_response = self.get_records_by_search_term(
+            search_term=search_term,
+            field=field,
+            index=viaf_index,
+            page_size=page_size
+        )
+        viaf_ids = self.extract_viaf_ids(search_response)
+        return viaf_ids
+    def fetch_viaf_clusters(self, viaf_ids: List[str]) -> Dict[str, dict]:
         results = {}
         for viaf_id in viaf_ids:
@@ -198,7 +603,51 @@ class VIAFClient:
         return results
-    def get_normalized_data(self, record_ids: List[str]) -> List[VIAFRecord]:
+    def get_normalized_data_by_ids(self, record_ids: List[str]) -> List[VIAFRecord]:
         """ Fetch data required for normalization from VIAF. """
         response = self.fetch_viaf_clusters(record_ids)
-        return [VIAFRecord(response[record_id]) for record_id in record_ids]
+        viaf_records = [
+            VIAFRecord(response[record_id])
+            for record_id in record_ids
+        ]
+        return viaf_records
+    def get_normalized_data_by_search_term(self,
+        search_term: str, field: str = DEFAULT_VIAF_FIELD, max_records: int = 10,
+        verify: bool = True, threshold: float = VIAF_SIMILARITY_THRESHOLD,
+        viaf_index: str = "VIAF"
+    ) -> VIAFRecord | None:
+        """ Fetch data required for normalization from VIAF. """
+        viaf_record = None
+        viaf_ids = self.get_viaf_ids_by_search_terms(
+            search_term=search_term,
+            field=field,
+            page_size=max_records,
+            viaf_index=viaf_index
+        )
+        if verify:
+            records = self.get_normalized_data_by_ids(viaf_ids[:max_records])
+            verified_record = VIAFClient.get_verified_record(
+                search_term=search_term,
+                viaf_records=records,
+                threshold=threshold
+            )
+        else:
+            if viaf_ids:
+                records = self.get_normalized_data_by_ids(viaf_ids[:1])
+                verified_record = records[0] if records else None
+        return verified_record
+if __name__ == "__main__":
+    from pprint import pprint
+    vc = VIAFClient()
+    entity="Kevade"
+    record = vc.get_normalized_data_by_search_term(entity,  field="local.uniformTitleWorks", max_records=5, verify=True)
+    #pprint(record.record_data)
+    if record:
+        pprint(record.all_fields)
+        #pprint(record.record_data)
+    else:
+        print(f"Couldn't detect a verified record for entity '{entity}' :(.")

{rara_tools-0.5.1.dist-info → rara_tools-0.5.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: rara-tools
-Version: 0.5.1
+Version: 0.5.3
 Summary: Tools to support Kata's work.
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.10
@@ -18,6 +18,7 @@ Requires-Dist: nltk
 Requires-Dist: jsonlines
 Requires-Dist: requests
 Requires-Dist: iso639-lang
+Requires-Dist: jellyfish
 Requires-Dist: pymarc
 Requires-Dist: regex
 Requires-Dist: glom

{rara_tools-0.5.1.dist-info → rara_tools-0.5.3.dist-info}/RECORD RENAMED Viewed

@@ -8,18 +8,18 @@ rara_tools/task_reporter.py,sha256=WCcZts9dAUokPc4vbrG3-lNAFLnWaMgE3b3iaUB7mr8,3
 rara_tools/utils.py,sha256=9vSbmuWYU5ydr4lXBKlUKa0xzDccFsaJv4T-XwgUfuY,2578
 rara_tools/constants/__init__.py,sha256=r78laM9vyRDAvzDhPvzDlhaX6qPwUUBBtwf1WosrW3o,27
 rara_tools/constants/digitizer.py,sha256=MND0dUQySBAOVWzuUBxQGZWv_Ckdz2jCp25F2_oHGi8,496
-rara_tools/constants/general.py,sha256=aVUQTMss89atAkTDZKJXNdnsBHPX-RSrlBOtt-wdPGU,195
+rara_tools/constants/general.py,sha256=i-OrySdsf05HzKWEI5CvWs3ZNsBZpZ5fhWVlU3m2QeY,251
 rara_tools/constants/language_evaluator.py,sha256=XtGAgspO2wGV4C2WhPN8zaxHkZ3d5FLgZ1PCvgZY9u0,37
 rara_tools/constants/linker.py,sha256=XUI-fD1LfvpdMDeLmMU3siAsc0pleQ92m6Cdk3_OGmo,169
 rara_tools/constants/meta_extractor.py,sha256=mhuRX4_I2JTnJO_d8tldClmuPx-RwmWWNLavZAJBgVU,33
-rara_tools/constants/normalizers.py,sha256=GmWY89kYfX7_YJ8sdy1vb8ABJc_ABdw_zVVOxd9UZgY,171
+rara_tools/constants/normalizers.py,sha256=Xs3anDwJHpHeniwx3xoIZyqdEXtO3eb7ouGLLr0CpHw,1344
 rara_tools/constants/parsers.py,sha256=L6nh1Itget9_9DMsliDkh6T25z78eMFPWVkbaU08DwU,5561
 rara_tools/constants/subject_indexer.py,sha256=RBbUuhJM8M3GQ1p2GwDAeW5go7zkI5yiuMoL-3V2-NQ,34
 rara_tools/normalizers/__init__.py,sha256=_NqpS5w710DhaURytHq9JpEt8HgYpSPfRDcOtOymJgE,193
 rara_tools/normalizers/authorities.py,sha256=IDtcm0yNZNhv1f-WcdqWFSRzZk_CoKuBFsk6hEPddWM,4513
-rara_tools/normalizers/base.py,sha256=taOboGURQF_ACPVWHX_wMsaDEo8gYdAkiOw0yT0zzR8,10910
+rara_tools/normalizers/base.py,sha256=gsKG8NEOah_lwzY9kgCf68943xYoIIo6pPWZQuFHEuk,11818
 rara_tools/normalizers/bibs.py,sha256=4DTS6k37z8qR5B3n7aiCXsT5Z49rLTvQ60lKKr5dyLs,2352
-rara_tools/normalizers/viaf.py,sha256=9uTyEadSaoFedUbUfY_iWPJtgrt04jP71i_6MLPM08I,6919
+rara_tools/normalizers/viaf.py,sha256=XWpf_GONBGg8nsjGHoF4Vgk4S1xY3TcTIIwsTCNEyAQ,22298
 rara_tools/parsers/marc_parsers/base_parser.py,sha256=wzCccZaiN4p2iUms3PAOfXihNgEeg1cGRzRx26ytJeA,1661
 rara_tools/parsers/marc_parsers/ems_parser.py,sha256=LFuhZcVwmHMcJknX9p4ZkO8RdjPdQZ4APGbw8KV6BIs,2024
 rara_tools/parsers/marc_parsers/location_parser.py,sha256=dSU9dQoGV5z0ajhLI1bn3AAghkOr79qKIrX7sO0_4lA,1873
@@ -34,8 +34,8 @@ rara_tools/parsers/marc_records/title_record.py,sha256=NyrubWvouZEb46vaoy9NHLCzn
 rara_tools/parsers/tools/entity_normalizers.py,sha256=afOMqJoL4aeq0cfsohIuxkxzvqNdZ_ba7U32eyogbzk,8722
 rara_tools/parsers/tools/marc_converter.py,sha256=PUbggzJ_wHfke_bHTF2LOZyzX1t0wRM8qIFL36Dl3AI,414
 rara_tools/parsers/tools/russian_transliterator.py,sha256=5ZU66iTqAhr7pmfVqXPAI_cidF43VqqmuN4d7H4_JuA,9770
-rara_tools-0.5.1.dist-info/licenses/LICENSE.md,sha256=hkZVnIZll7e_KNEQzeY94Y9tlzVL8iVZBTMBvDykksU,35142
-rara_tools-0.5.1.dist-info/METADATA,sha256=vyta87EhsR-MEbYL-0jSedPQ6rL8gTR0O2p-uSXQA-g,4054
-rara_tools-0.5.1.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
-rara_tools-0.5.1.dist-info/top_level.txt,sha256=JwfB5b8BAtW5OFKRln2AQ_WElTRyIBM4nO0FKN1cupY,11
-rara_tools-0.5.1.dist-info/RECORD,,
+rara_tools-0.5.3.dist-info/licenses/LICENSE.md,sha256=hkZVnIZll7e_KNEQzeY94Y9tlzVL8iVZBTMBvDykksU,35142
+rara_tools-0.5.3.dist-info/METADATA,sha256=JoiwsfHX-dE2ZK3lo2gqaQaMFjr1bWL02Hmxa4y0J8E,4079
+rara_tools-0.5.3.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
+rara_tools-0.5.3.dist-info/top_level.txt,sha256=JwfB5b8BAtW5OFKRln2AQ_WElTRyIBM4nO0FKN1cupY,11
+rara_tools-0.5.3.dist-info/RECORD,,

{rara_tools-0.5.1.dist-info → rara_tools-0.5.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{rara_tools-0.5.1.dist-info → rara_tools-0.5.3.dist-info}/licenses/LICENSE.md RENAMED Viewed

File without changes

{rara_tools-0.5.1.dist-info → rara_tools-0.5.3.dist-info}/top_level.txt RENAMED Viewed

File without changes

rara-tools 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl

Potentially problematic release.

rara-tools 0.5.1py3-none-any.whl → 0.5.3py3-none-any.whl