PyPI - destiny_sdk - Versions diffs - 0.6.0__tar.gz → 0.7.1__tar.gz - Mend

destiny_sdk 0.6.0tar.gz → 0.7.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

{destiny_sdk-0.6.0 → destiny_sdk-0.7.1}/.gitignore RENAMED Viewed

@@ -203,3 +203,5 @@ libs/fake_data/*.jsonl
 .env.*
 !.env.example
 .idea/
+.test.tmp

{destiny_sdk-0.6.0 → destiny_sdk-0.7.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: destiny_sdk
-Version: 0.6.0
+Version: 0.7.1
 Summary: A software development kit (sdk) to support interaction with the DESTINY repository
 Author-email: Adam Hamilton <adam@futureevidence.org>, Andrew Harvey <andrew@futureevidence.org>, Daniel Breves <daniel@futureevidence.org>, Jack Walmisley <jack@futureevidence.org>, Tim Repke <tim.repke@pik-potsdam.de>
 License-Expression: Apache-2.0

{destiny_sdk-0.6.0 → destiny_sdk-0.7.1}/pyproject.toml RENAMED Viewed

@@ -34,7 +34,7 @@ license = "Apache-2.0"
 name = "destiny_sdk"
 readme = "README.md"
 requires-python = "~=3.12"
-version = "0.6.0"
+version = "0.7.1"
 [project.optional-dependencies]
 labs = []

{destiny_sdk-0.6.0 → destiny_sdk-0.7.1}/src/destiny_sdk/client.py RENAMED Viewed

@@ -114,7 +114,11 @@ class Client:
         return RobotEnhancementBatchRead.model_validate(response.json())
     def poll_robot_enhancement_batch(
-        self, robot_id: UUID4, limit: int = 10, timeout: int = 60
+        self,
+        robot_id: UUID4,
+        limit: int = 10,
+        lease: str | None = None,
+        timeout: int = 60,
     ) -> RobotEnhancementBatch | None:
         """
         Poll for a robot enhancement batch.
@@ -125,13 +129,20 @@ class Client:
         :type robot_id: UUID4
         :param limit: The maximum number of pending enhancements to return
         :type limit: int
+        :param lease: The duration to lease the pending enhancements for,
+            in ISO 8601 duration format eg PT10M. If not provided the repository will
+            use a default lease duration.
+        :type lease: str | None
         :return: The RobotEnhancementBatch object from the response, or None if no
             batches available
         :rtype: destiny_sdk.robots.RobotEnhancementBatch | None
         """
+        params = {"robot_id": str(robot_id), "limit": limit}
+        if lease:
+            params["lease"] = lease
         response = self.session.post(
             "/robot-enhancement-batches/",
-            params={"robot_id": str(robot_id), "limit": limit},
+            params=params,
             timeout=timeout,
         )
         # HTTP 204 No Content indicates no batches available
@@ -140,3 +151,24 @@ class Client:
         response.raise_for_status()
         return RobotEnhancementBatch.model_validate(response.json())
+    def renew_robot_enhancement_batch_lease(
+        self, robot_enhancement_batch_id: UUID4, lease_duration: str | None = None
+    ) -> None:
+        """
+        Renew the lease for a robot enhancement batch.
+        Signs the request with the client's secret key.
+        :param robot_enhancement_batch_id: The ID of the robot enhancement batch
+        :type robot_enhancement_batch_id: UUID4
+        :param lease_duration: The duration to lease the pending enhancements for,
+            in ISO 8601 duration format eg PT10M. If not provided the repository will
+            use a default lease duration.
+        :type lease_duration: str | None
+        """
+        response = self.session.post(
+            f"/robot-enhancement-batches/{robot_enhancement_batch_id}/renew-lease/",
+            params={"lease": lease_duration} if lease_duration else None,
+        )
+        response.raise_for_status()

{destiny_sdk-0.6.0 → destiny_sdk-0.7.1}/src/destiny_sdk/enhancements.py RENAMED Viewed

@@ -2,9 +2,9 @@
 import datetime
 from enum import StrEnum, auto
-from typing import Annotated, Literal
+from typing import Annotated, Any, Literal, Self
-from pydantic import UUID4, BaseModel, Field, HttpUrl
+from pydantic import UUID4, BaseModel, Field, HttpUrl, model_validator
 from destiny_sdk.core import _JsonlFileInputMixIn
 from destiny_sdk.visibility import Visibility
@@ -25,6 +25,8 @@ class EnhancementType(StrEnum):
     """A free-form enhancement for tagging with labels."""
     LOCATION = auto()
     """Locations where the reference can be found."""
+    RAW = auto()
+    """A free form enhancement for arbitrary/unstructured data."""
     FULL_TEXT = auto()
     """The full text of the reference. (To be implemented)"""
@@ -145,22 +147,33 @@ class AnnotationType(StrEnum):
     """
-class ScoreAnnotation(BaseModel):
-    """
-    An annotation which represents the score for a label.
+class BaseAnnotation(BaseModel):
+    """Base class for annotations, defining the minimal required fields."""
-    This is similar to a BooleanAnnotation, but lacks a boolean determination
-    as to the application of the label.
-    """
-    annotation_type: Literal[AnnotationType.SCORE] = AnnotationType.SCORE
     scheme: str = Field(
         description="An identifier for the scheme of annotation",
         examples=["openalex:topic", "pubmed:mesh"],
+        pattern=r"^[^/]+$",  # No slashes allowed
     )
     label: str = Field(
         description="A high level label for this annotation like the name of the topic",
     )
+    @property
+    def qualified_label(self) -> str:
+        """The qualified label for this annotation."""
+        return f"{self.scheme}/{self.label}"
+class ScoreAnnotation(BaseAnnotation):
+    """
+    An annotation which represents the score for a label.
+    This is similar to a BooleanAnnotation, but lacks a boolean determination
+    as to the application of the label.
+    """
+    annotation_type: Literal[AnnotationType.SCORE] = AnnotationType.SCORE
     score: float = Field(description="""Score for this annotation""")
     data: dict = Field(
         default_factory=dict,
@@ -171,7 +184,7 @@ class ScoreAnnotation(BaseModel):
     )
-class BooleanAnnotation(BaseModel):
+class BooleanAnnotation(BaseAnnotation):
     """
     An annotation is a way of tagging the content with a label of some kind.
@@ -180,13 +193,6 @@ class BooleanAnnotation(BaseModel):
     """
     annotation_type: Literal[AnnotationType.BOOLEAN] = AnnotationType.BOOLEAN
-    scheme: str = Field(
-        description="An identifier for the scheme of the annotation",
-        examples=["openalex:topic", "pubmed:mesh"],
-    )
-    label: str = Field(
-        description="A high level label for this annotation like the name of the topic",
-    )
     value: bool = Field(description="""Boolean flag for this annotation""")
     score: float | None = Field(
         None, description="A confidence score for this annotation"
@@ -295,12 +301,45 @@ class LocationEnhancement(BaseModel):
     )
+class RawEnhancement(BaseModel):
+    """
+    An enhancement for storing raw/arbitrary/unstructured data.
+    Data in these enhancements is intended for future conversion into structured form.
+    This enhancement accepts any fields passed in to `data`. These enhancements cannot
+    be created by robots.
+    """
+    enhancement_type: Literal[EnhancementType.RAW] = EnhancementType.RAW
+    source_export_date: datetime.datetime = Field(
+        description="Date the enhancement data was retrieved."
+    )
+    description: str = Field(
+        description="Description of the data to aid in future refinement."
+    )
+    metadata: dict[str, Any] = Field(
+        default_factory=dict,
+        description="Additional metadata to aid in future structuring of raw data",
+    )
+    data: Any = Field(description="Unstructured data for later processing.")
+    @model_validator(mode="after")
+    def forbid_no_data(self) -> Self:
+        """Prevent a raw enhancement from being created with no data."""
+        if not self.data:
+            msg = "data must be populated on a raw enhancement."
+            raise ValueError(msg)
+        return self
 #: Union type for all enhancement content types.
 EnhancementContent = Annotated[
     BibliographicMetadataEnhancement
     | AbstractContentEnhancement
     | AnnotationEnhancement
-    | LocationEnhancement,
+    | LocationEnhancement
+    | RawEnhancement,
     Field(discriminator="enhancement_type"),
 ]

{destiny_sdk-0.6.0 → destiny_sdk-0.7.1}/src/destiny_sdk/identifiers.py RENAMED Viewed

@@ -17,8 +17,14 @@ class ExternalIdentifierType(StrEnum):
     DOI = auto()
     """A DOI (Digital Object Identifier) which is a unique identifier for a document."""
+    ERIC = auto()
+    """An ERIC (Education Resources Information Identifier) ID which is a unique
+    identifier for a document in ERIC.
+    """
     PM_ID = auto()
     """A PubMed ID which is a unique identifier for a document in PubMed."""
+    PRO_QUEST = auto()
+    """A ProQuest ID which is a unqiue identifier for a document in ProQuest."""
     OPEN_ALEX = auto()
     """An OpenAlex ID which is a unique identifier for a document in OpenAlex."""
     OTHER = auto()
@@ -41,8 +47,64 @@ class DOIIdentifier(BaseModel):
     def remove_doi_url(cls, value: str) -> str:
         """Remove the URL part of the DOI if it exists."""
         return (
-            value.removeprefix("http://doi.org/")
-            .removeprefix("https://doi.org/")
+            value.removeprefix("http://")
+            .removeprefix("https://")
+            .removeprefix("doi.org/")
+            .removeprefix("dx.doi.org/")
+            .removeprefix("doi:")
+            .strip()
+        )
+class ProQuestIdentifier(BaseModel):
+    """An external identifier representing a ProQuest ID."""
+    identifier: str = Field(
+        description="The ProQuest id of the reference", pattern=r"[0-9]+$"
+    )
+    identifier_type: Literal[ExternalIdentifierType.PRO_QUEST] = Field(
+        ExternalIdentifierType.PRO_QUEST, description="The type of identifier used."
+    )
+    @field_validator("identifier", mode="before")
+    @classmethod
+    def remove_proquest_url(cls, value: str) -> str:
+        """Remove the URL part of the ProQuest id if it exists."""
+        return (
+            value.removeprefix("http://")
+            .removeprefix("https://")
+            .removeprefix("search.proquest.com/")
+            .removeprefix("www.proquest.com/")
+            .removeprefix("docview/")
+            .strip()
+        )
+class ERICIdentifier(BaseModel):
+    """
+    An external identifier representing an ERIC Number.
+    An ERIC Number is defined as a unqiue identifiying number preceeded by
+    EJ (for a journal article) or ED (for a non-journal document).
+    """
+    identifier: str = Field(
+        description="The ERIC Number of the reference.", pattern=r"E[D|J][0-9]+$"
+    )
+    identifier_type: Literal[ExternalIdentifierType.ERIC] = Field(
+        ExternalIdentifierType.ERIC, description="The type of identifier used."
+    )
+    @field_validator("identifier", mode="before")
+    @classmethod
+    def remove_eric_url(cls, value: str) -> str:
+        """Remove the URL part of the ERIC ID if it exists."""
+        return (
+            value.removeprefix("http://")
+            .removeprefix("https://")
+            .removeprefix("eric.ed.gov/?id=")
+            .removeprefix("files.eric.ed.gov/fulltext/")
+            .removesuffix(".pdf")
             .strip()
         )
@@ -71,8 +133,11 @@ class OpenAlexIdentifier(BaseModel):
     def remove_open_alex_url(cls, value: str) -> str:
         """Remove the OpenAlex URL if it exists."""
         return (
-            value.removeprefix("http://openalex.org/")
-            .removeprefix("https://openalex.org/")
+            value.removeprefix("http://")
+            .removeprefix("https://")
+            .removeprefix("openalex.org/")
+            .removeprefix("explore.openalex.org/")
+            .removeprefix("works/")
             .strip()
         )
@@ -91,7 +156,12 @@ class OtherIdentifier(BaseModel):
 #: Union type for all external identifiers.
 ExternalIdentifier = Annotated[
-    DOIIdentifier | PubMedIdentifier | OpenAlexIdentifier | OtherIdentifier,
+    DOIIdentifier
+    | ERICIdentifier
+    | PubMedIdentifier
+    | ProQuestIdentifier
+    | OpenAlexIdentifier
+    | OtherIdentifier,
     Field(discriminator="identifier_type"),
 ]

destiny_sdk-0.7.1/src/destiny_sdk/parsers/eppi_parser.py ADDED Viewed

@@ -0,0 +1,284 @@
+"""Parser for a EPPI JSON export file."""
+from datetime import datetime
+from typing import Any
+from pydantic import ValidationError
+from destiny_sdk.enhancements import (
+    AbstractContentEnhancement,
+    AbstractProcessType,
+    AnnotationEnhancement,
+    AnnotationType,
+    AuthorPosition,
+    Authorship,
+    BibliographicMetadataEnhancement,
+    BooleanAnnotation,
+    EnhancementContent,
+    EnhancementFileInput,
+    RawEnhancement,
+)
+from destiny_sdk.identifiers import (
+    DOIIdentifier,
+    ERICIdentifier,
+    ExternalIdentifier,
+    OpenAlexIdentifier,
+    ProQuestIdentifier,
+)
+from destiny_sdk.parsers.exceptions import ExternalIdentifierNotFoundError
+from destiny_sdk.references import ReferenceFileInput
+from destiny_sdk.visibility import Visibility
+class EPPIParser:
+    """
+    Parser for an EPPI JSON export file.
+    See example here: https://eppi.ioe.ac.uk/cms/Portals/35/Maps/Examples/example_orignal.json
+    """
+    version = "2.0"
+    def __init__(
+        self,
+        *,
+        tags: list[str] | None = None,
+        include_raw_data: bool = False,
+        source_export_date: datetime | None = None,
+        data_description: str | None = None,
+        raw_enhancement_excludes: list[str] | None = None,
+    ) -> None:
+        """
+        Initialize the EPPIParser with optional tags.
+        Args:
+            tags (list[str] | None): Optional list of tags to annotate references.
+        """
+        self.tags = tags or []
+        self.parser_source = f"destiny_sdk.eppi_parser@{self.version}"
+        self.include_raw_data = include_raw_data
+        self.source_export_date = source_export_date
+        self.data_description = data_description
+        self.raw_enhancement_excludes = (
+            raw_enhancement_excludes if raw_enhancement_excludes else []
+        )
+        if self.include_raw_data and not all(
+            (
+                self.source_export_date,
+                self.data_description,
+            )
+        ):
+            msg = (
+                "Cannot include raw data enhancements without "
+                "source_export_date, data_description, and raw_enhancement_metadata"
+            )
+            raise RuntimeError(msg)
+    def _parse_identifiers(
+        self, ref_to_import: dict[str, Any]
+    ) -> list[ExternalIdentifier]:
+        identifiers = []
+        if doi := ref_to_import.get("DOI"):
+            doi_identifier = self._parse_doi(doi=doi)
+            if doi_identifier:
+                identifiers.append(doi_identifier)
+        if url := ref_to_import.get("URL"):
+            identifier = self._parse_url_to_identifier(url=url)
+            if identifier:
+                identifiers.append(identifier)
+        if not identifiers:
+            msg = (
+                "No known external identifiers found for Reference data "
+                f"with DOI: '{doi if doi else None}' "
+                f"and URL: '{url if url else None}'."
+            )
+            raise ExternalIdentifierNotFoundError(detail=msg)
+        return identifiers
+    def _parse_doi(self, doi: str) -> DOIIdentifier | None:
+        """Attempt to parse a DOI from a string."""
+        try:
+            doi = doi.strip()
+            return DOIIdentifier(identifier=doi)
+        except ValidationError:
+            return None
+    def _parse_url_to_identifier(self, url: str) -> ExternalIdentifier | None:
+        """Attempt to parse an external identifier from a url string."""
+        url = url.strip()
+        identifier_cls = None
+        if "eric" in url:
+            identifier_cls = ERICIdentifier
+        elif "proquest" in url:
+            identifier_cls = ProQuestIdentifier
+        elif "openalex" in url:
+            identifier_cls = OpenAlexIdentifier
+        else:
+            return None
+        try:
+            return identifier_cls(identifier=url)
+        except ValidationError:
+            return None
+    def _parse_abstract_enhancement(
+        self, ref_to_import: dict[str, Any]
+    ) -> EnhancementContent | None:
+        if abstract := ref_to_import.get("Abstract"):
+            return AbstractContentEnhancement(
+                process=AbstractProcessType.OTHER,
+                abstract=abstract,
+            )
+        return None
+    def _parse_bibliographic_enhancement(
+        self, ref_to_import: dict[str, Any]
+    ) -> EnhancementContent | None:
+        title = ref_to_import.get("Title")
+        publication_year = (
+            int(year)
+            if (year := ref_to_import.get("Year")) and year.isdigit()
+            else None
+        )
+        publisher = ref_to_import.get("Publisher")
+        authors_string = ref_to_import.get("Authors")
+        authorships = []
+        if authors_string:
+            authors = [
+                author.strip() for author in authors_string.split(";") if author.strip()
+            ]
+            for i, author_name in enumerate(authors):
+                position = AuthorPosition.MIDDLE
+                if i == 0:
+                    position = AuthorPosition.FIRST
+                if i == len(authors) - 1 and i > 0:
+                    position = AuthorPosition.LAST
+                authorships.append(
+                    Authorship(
+                        display_name=author_name,
+                        position=position,
+                    )
+                )
+        if not title and not publication_year and not publisher and not authorships:
+            return None
+        return BibliographicMetadataEnhancement(
+            title=title,
+            publication_year=publication_year,
+            publisher=publisher,
+            authorship=authorships if authorships else None,
+        )
+    def _parse_raw_enhancement(
+        self, ref_to_import: dict[str, Any], raw_enhancement_metadata: dict[str, Any]
+    ) -> EnhancementContent | None:
+        """Add Reference data as a raw enhancement."""
+        raw_enhancement_data = ref_to_import.copy()
+        # Remove any keys that should be excluded
+        for exclude in self.raw_enhancement_excludes:
+            raw_enhancement_data.pop(exclude, None)
+        return RawEnhancement(
+            source_export_date=self.source_export_date,
+            description=self.data_description,
+            metadata=raw_enhancement_metadata,
+            data=raw_enhancement_data,
+        )
+    def _create_annotation_enhancement(self) -> EnhancementContent | None:
+        if not self.tags:
+            return None
+        annotations = [
+            BooleanAnnotation(
+                annotation_type=AnnotationType.BOOLEAN,
+                scheme=self.parser_source,
+                label=tag,
+                value=True,
+            )
+            for tag in self.tags
+        ]
+        return AnnotationEnhancement(
+            annotations=annotations,
+        )
+    def parse_data(
+        self,
+        data: dict,
+        source: str | None = None,
+        robot_version: str | None = None,
+    ) -> tuple[list[ReferenceFileInput], list[dict]]:
+        """
+        Parse an EPPI JSON export dict and return a list of ReferenceFileInput objects.
+        Args:
+            data (dict): Parsed EPPI JSON export data.
+            source (str | None): Optional source string for deduplication/provenance.
+            robot_version (str | None): Optional robot version string for provenance.
+            Defaults to parser version.
+        Returns:
+            list[ReferenceFileInput]: List of parsed references from the data.
+        """
+        parser_source = source if source is not None else self.parser_source
+        if self.include_raw_data:
+            codesets = [codeset.get("SetId") for codeset in data.get("CodeSets", [])]
+            raw_enhancement_metadata = {"codeset_ids": codesets}
+        references = []
+        failed_refs = []
+        for ref_to_import in data.get("References", []):
+            try:
+                enhancement_contents = [
+                    content
+                    for content in [
+                        self._parse_abstract_enhancement(ref_to_import),
+                        self._parse_bibliographic_enhancement(ref_to_import),
+                        self._create_annotation_enhancement(),
+                    ]
+                    if content
+                ]
+                if self.include_raw_data:
+                    raw_enhancement = self._parse_raw_enhancement(
+                        ref_to_import=ref_to_import,
+                        raw_enhancement_metadata=raw_enhancement_metadata,
+                    )
+                    if raw_enhancement:
+                        enhancement_contents.append(raw_enhancement)
+                enhancements = [
+                    EnhancementFileInput(
+                        source=parser_source,
+                        visibility=Visibility.PUBLIC,
+                        content=content,
+                        robot_version=robot_version,
+                    )
+                    for content in enhancement_contents
+                ]
+                references.append(
+                    ReferenceFileInput(
+                        visibility=Visibility.PUBLIC,
+                        identifiers=self._parse_identifiers(
+                            ref_to_import=ref_to_import
+                        ),
+                        enhancements=enhancements,
+                    )
+                )
+            except ExternalIdentifierNotFoundError:
+                failed_refs.append(ref_to_import)
+        return references, failed_refs

destiny_sdk-0.7.1/src/destiny_sdk/parsers/exceptions.py ADDED Viewed

@@ -0,0 +1,17 @@
+"""Custom exceptions for destiny sdk parsers."""
+class ExternalIdentifierNotFoundError(Exception):
+    """Raised when an reference has no identifiable external identifiers."""
+    def __init__(self, detail: str | None = None, *args: object) -> None:
+        """
+        Initialize the ExternalIdentifiersNotFoundError.
+        Args:
+            *args: Additional arguments for the exception.
+            **kwargs: Additional keyword arguments for the exception.
+        """
+        self.detail = detail or "No detail provided."
+        super().__init__(detail, *args)

{destiny_sdk-0.6.0 → destiny_sdk-0.7.1}/src/destiny_sdk/search.py RENAMED Viewed

@@ -30,6 +30,7 @@ class AnnotationFilter(BaseModel):
     scheme: str = Field(
         description="The annotation scheme to filter by.",
+        pattern=r"^[^/]+$",
     )
     label: str | None = Field(
         None,
@@ -42,7 +43,7 @@ class AnnotationFilter(BaseModel):
         le=1.0,
     )
-    def serialize(self) -> str:
+    def __repr__(self) -> str:
         """Serialize the annotation filter to a string."""
         annotation = self.scheme
         if self.label:

destiny_sdk 0.6.0__tar.gz → 0.7.1__tar.gz

destiny_sdk 0.6.0tar.gz → 0.7.1tar.gz