destiny_sdk 0.5.1__py3-none-any.whl → 0.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
destiny_sdk/__init__.py CHANGED
@@ -8,6 +8,7 @@ from . import (
8
8
  imports,
9
9
  references,
10
10
  robots,
11
+ search,
11
12
  visibility,
12
13
  )
13
14
 
@@ -19,5 +20,6 @@ __all__ = [
19
20
  "imports",
20
21
  "references",
21
22
  "robots",
23
+ "search",
22
24
  "visibility",
23
25
  ]
destiny_sdk/client.py CHANGED
@@ -114,7 +114,11 @@ class Client:
114
114
  return RobotEnhancementBatchRead.model_validate(response.json())
115
115
 
116
116
  def poll_robot_enhancement_batch(
117
- self, robot_id: UUID4, limit: int = 10, timeout: int = 60
117
+ self,
118
+ robot_id: UUID4,
119
+ limit: int = 10,
120
+ lease: str | None = None,
121
+ timeout: int = 60,
118
122
  ) -> RobotEnhancementBatch | None:
119
123
  """
120
124
  Poll for a robot enhancement batch.
@@ -125,13 +129,20 @@ class Client:
125
129
  :type robot_id: UUID4
126
130
  :param limit: The maximum number of pending enhancements to return
127
131
  :type limit: int
132
+ :param lease: The duration to lease the pending enhancements for,
133
+ in ISO 8601 duration format eg PT10M. If not provided the repository will
134
+ use a default lease duration.
135
+ :type lease: str | None
128
136
  :return: The RobotEnhancementBatch object from the response, or None if no
129
137
  batches available
130
138
  :rtype: destiny_sdk.robots.RobotEnhancementBatch | None
131
139
  """
140
+ params = {"robot_id": str(robot_id), "limit": limit}
141
+ if lease:
142
+ params["lease"] = lease
132
143
  response = self.session.post(
133
144
  "/robot-enhancement-batches/",
134
- params={"robot_id": str(robot_id), "limit": limit},
145
+ params=params,
135
146
  timeout=timeout,
136
147
  )
137
148
  # HTTP 204 No Content indicates no batches available
@@ -140,3 +151,24 @@ class Client:
140
151
 
141
152
  response.raise_for_status()
142
153
  return RobotEnhancementBatch.model_validate(response.json())
154
+
155
+ def renew_robot_enhancement_batch_lease(
156
+ self, robot_enhancement_batch_id: UUID4, lease_duration: str | None = None
157
+ ) -> None:
158
+ """
159
+ Renew the lease for a robot enhancement batch.
160
+
161
+ Signs the request with the client's secret key.
162
+
163
+ :param robot_enhancement_batch_id: The ID of the robot enhancement batch
164
+ :type robot_enhancement_batch_id: UUID4
165
+ :param lease_duration: The duration to lease the pending enhancements for,
166
+ in ISO 8601 duration format eg PT10M. If not provided the repository will
167
+ use a default lease duration.
168
+ :type lease_duration: str | None
169
+ """
170
+ response = self.session.post(
171
+ f"/robot-enhancement-batches/{robot_enhancement_batch_id}/renew-lease/",
172
+ params={"lease": lease_duration} if lease_duration else None,
173
+ )
174
+ response.raise_for_status()
destiny_sdk/core.py CHANGED
@@ -2,7 +2,9 @@
2
2
 
3
3
  from typing import Self
4
4
 
5
- from pydantic import BaseModel
5
+ from pydantic import BaseModel, Field
6
+
7
+ from destiny_sdk.search import SearchResultPage, SearchResultTotal
6
8
 
7
9
  # These are non-standard newline characters that are not escaped by model_dump_json().
8
10
  # We want jsonl files to have empirical new lines so they can be streamed line by line.
@@ -47,3 +49,14 @@ class _JsonlFileInputMixIn(BaseModel):
47
49
  :rtype: Self
48
50
  """
49
51
  return cls.model_validate_json(jsonl)
52
+
53
+
54
+ class SearchResultMixIn(BaseModel):
55
+ """A mixin class for models that represent search results."""
56
+
57
+ total: SearchResultTotal = Field(
58
+ description="The total number of results matching the search criteria.",
59
+ )
60
+ page: SearchResultPage = Field(
61
+ description="Information about the page of results.",
62
+ )
@@ -2,9 +2,9 @@
2
2
 
3
3
  import datetime
4
4
  from enum import StrEnum, auto
5
- from typing import Annotated, Literal
5
+ from typing import Annotated, Any, Literal, Self
6
6
 
7
- from pydantic import UUID4, BaseModel, Field, HttpUrl
7
+ from pydantic import UUID4, BaseModel, Field, HttpUrl, model_validator
8
8
 
9
9
  from destiny_sdk.core import _JsonlFileInputMixIn
10
10
  from destiny_sdk.visibility import Visibility
@@ -25,6 +25,8 @@ class EnhancementType(StrEnum):
25
25
  """A free-form enhancement for tagging with labels."""
26
26
  LOCATION = auto()
27
27
  """Locations where the reference can be found."""
28
+ RAW = auto()
29
+ """A free form enhancement for arbitrary/unstructured data."""
28
30
  FULL_TEXT = auto()
29
31
  """The full text of the reference. (To be implemented)"""
30
32
 
@@ -145,22 +147,33 @@ class AnnotationType(StrEnum):
145
147
  """
146
148
 
147
149
 
148
- class ScoreAnnotation(BaseModel):
149
- """
150
- An annotation which represents the score for a label.
150
+ class BaseAnnotation(BaseModel):
151
+ """Base class for annotations, defining the minimal required fields."""
151
152
 
152
- This is similar to a BooleanAnnotation, but lacks a boolean determination
153
- as to the application of the label.
154
- """
155
-
156
- annotation_type: Literal[AnnotationType.SCORE] = AnnotationType.SCORE
157
153
  scheme: str = Field(
158
154
  description="An identifier for the scheme of annotation",
159
155
  examples=["openalex:topic", "pubmed:mesh"],
156
+ pattern=r"^[^/]+$", # No slashes allowed
160
157
  )
161
158
  label: str = Field(
162
159
  description="A high level label for this annotation like the name of the topic",
163
160
  )
161
+
162
+ @property
163
+ def qualified_label(self) -> str:
164
+ """The qualified label for this annotation."""
165
+ return f"{self.scheme}/{self.label}"
166
+
167
+
168
+ class ScoreAnnotation(BaseAnnotation):
169
+ """
170
+ An annotation which represents the score for a label.
171
+
172
+ This is similar to a BooleanAnnotation, but lacks a boolean determination
173
+ as to the application of the label.
174
+ """
175
+
176
+ annotation_type: Literal[AnnotationType.SCORE] = AnnotationType.SCORE
164
177
  score: float = Field(description="""Score for this annotation""")
165
178
  data: dict = Field(
166
179
  default_factory=dict,
@@ -171,7 +184,7 @@ class ScoreAnnotation(BaseModel):
171
184
  )
172
185
 
173
186
 
174
- class BooleanAnnotation(BaseModel):
187
+ class BooleanAnnotation(BaseAnnotation):
175
188
  """
176
189
  An annotation is a way of tagging the content with a label of some kind.
177
190
 
@@ -180,13 +193,6 @@ class BooleanAnnotation(BaseModel):
180
193
  """
181
194
 
182
195
  annotation_type: Literal[AnnotationType.BOOLEAN] = AnnotationType.BOOLEAN
183
- scheme: str = Field(
184
- description="An identifier for the scheme of the annotation",
185
- examples=["openalex:topic", "pubmed:mesh"],
186
- )
187
- label: str = Field(
188
- description="A high level label for this annotation like the name of the topic",
189
- )
190
196
  value: bool = Field(description="""Boolean flag for this annotation""")
191
197
  score: float | None = Field(
192
198
  None, description="A confidence score for this annotation"
@@ -295,12 +301,45 @@ class LocationEnhancement(BaseModel):
295
301
  )
296
302
 
297
303
 
304
+ class RawEnhancement(BaseModel):
305
+ """
306
+ An enhancement for storing raw/arbitrary/unstructured data.
307
+
308
+ Data in these enhancements is intended for future conversion into structured form.
309
+
310
+ This enhancement accepts any fields passed in to `data`. These enhancements cannot
311
+ be created by robots.
312
+ """
313
+
314
+ enhancement_type: Literal[EnhancementType.RAW] = EnhancementType.RAW
315
+ source_export_date: datetime.datetime = Field(
316
+ description="Date the enhancement data was retrieved."
317
+ )
318
+ description: str = Field(
319
+ description="Description of the data to aid in future refinement."
320
+ )
321
+ metadata: dict[str, Any] = Field(
322
+ default_factory=dict,
323
+ description="Additional metadata to aid in future structuring of raw data",
324
+ )
325
+ data: Any = Field(description="Unstructured data for later processing.")
326
+
327
+ @model_validator(mode="after")
328
+ def forbid_no_data(self) -> Self:
329
+ """Prevent a raw enhancement from being created with no data."""
330
+ if not self.data:
331
+ msg = "data must be populated on a raw enhancement."
332
+ raise ValueError(msg)
333
+ return self
334
+
335
+
298
336
  #: Union type for all enhancement content types.
299
337
  EnhancementContent = Annotated[
300
338
  BibliographicMetadataEnhancement
301
339
  | AbstractContentEnhancement
302
340
  | AnnotationEnhancement
303
- | LocationEnhancement,
341
+ | LocationEnhancement
342
+ | RawEnhancement,
304
343
  Field(discriminator="enhancement_type"),
305
344
  ]
306
345
 
@@ -17,8 +17,14 @@ class ExternalIdentifierType(StrEnum):
17
17
 
18
18
  DOI = auto()
19
19
  """A DOI (Digital Object Identifier) which is a unique identifier for a document."""
20
+ ERIC = auto()
21
+ """An ERIC (Education Resources Information Identifier) ID which is a unique
22
+ identifier for a document in ERIC.
23
+ """
20
24
  PM_ID = auto()
21
25
  """A PubMed ID which is a unique identifier for a document in PubMed."""
26
+ PRO_QUEST = auto()
27
+ """A ProQuest ID which is a unqiue identifier for a document in ProQuest."""
22
28
  OPEN_ALEX = auto()
23
29
  """An OpenAlex ID which is a unique identifier for a document in OpenAlex."""
24
30
  OTHER = auto()
@@ -41,8 +47,64 @@ class DOIIdentifier(BaseModel):
41
47
  def remove_doi_url(cls, value: str) -> str:
42
48
  """Remove the URL part of the DOI if it exists."""
43
49
  return (
44
- value.removeprefix("http://doi.org/")
45
- .removeprefix("https://doi.org/")
50
+ value.removeprefix("http://")
51
+ .removeprefix("https://")
52
+ .removeprefix("doi.org/")
53
+ .removeprefix("dx.doi.org/")
54
+ .removeprefix("doi:")
55
+ .strip()
56
+ )
57
+
58
+
59
+ class ProQuestIdentifier(BaseModel):
60
+ """An external identifier representing a ProQuest ID."""
61
+
62
+ identifier: str = Field(
63
+ description="The ProQuest id of the reference", pattern=r"[0-9]+$"
64
+ )
65
+ identifier_type: Literal[ExternalIdentifierType.PRO_QUEST] = Field(
66
+ ExternalIdentifierType.PRO_QUEST, description="The type of identifier used."
67
+ )
68
+
69
+ @field_validator("identifier", mode="before")
70
+ @classmethod
71
+ def remove_proquest_url(cls, value: str) -> str:
72
+ """Remove the URL part of the ProQuest id if it exists."""
73
+ return (
74
+ value.removeprefix("http://")
75
+ .removeprefix("https://")
76
+ .removeprefix("search.proquest.com/")
77
+ .removeprefix("www.proquest.com/")
78
+ .removeprefix("docview/")
79
+ .strip()
80
+ )
81
+
82
+
83
+ class ERICIdentifier(BaseModel):
84
+ """
85
+ An external identifier representing an ERIC Number.
86
+
87
+ An ERIC Number is defined as a unqiue identifiying number preceeded by
88
+ EJ (for a journal article) or ED (for a non-journal document).
89
+ """
90
+
91
+ identifier: str = Field(
92
+ description="The ERIC Number of the reference.", pattern=r"E[D|J][0-9]+$"
93
+ )
94
+ identifier_type: Literal[ExternalIdentifierType.ERIC] = Field(
95
+ ExternalIdentifierType.ERIC, description="The type of identifier used."
96
+ )
97
+
98
+ @field_validator("identifier", mode="before")
99
+ @classmethod
100
+ def remove_eric_url(cls, value: str) -> str:
101
+ """Remove the URL part of the ERIC ID if it exists."""
102
+ return (
103
+ value.removeprefix("http://")
104
+ .removeprefix("https://")
105
+ .removeprefix("eric.ed.gov/?id=")
106
+ .removeprefix("files.eric.ed.gov/fulltext/")
107
+ .removesuffix(".pdf")
46
108
  .strip()
47
109
  )
48
110
 
@@ -71,8 +133,11 @@ class OpenAlexIdentifier(BaseModel):
71
133
  def remove_open_alex_url(cls, value: str) -> str:
72
134
  """Remove the OpenAlex URL if it exists."""
73
135
  return (
74
- value.removeprefix("http://openalex.org/")
75
- .removeprefix("https://openalex.org/")
136
+ value.removeprefix("http://")
137
+ .removeprefix("https://")
138
+ .removeprefix("openalex.org/")
139
+ .removeprefix("explore.openalex.org/")
140
+ .removeprefix("works/")
76
141
  .strip()
77
142
  )
78
143
 
@@ -91,7 +156,12 @@ class OtherIdentifier(BaseModel):
91
156
 
92
157
  #: Union type for all external identifiers.
93
158
  ExternalIdentifier = Annotated[
94
- DOIIdentifier | PubMedIdentifier | OpenAlexIdentifier | OtherIdentifier,
159
+ DOIIdentifier
160
+ | ERICIdentifier
161
+ | PubMedIdentifier
162
+ | ProQuestIdentifier
163
+ | OpenAlexIdentifier
164
+ | OtherIdentifier,
95
165
  Field(discriminator="identifier_type"),
96
166
  ]
97
167
 
@@ -0,0 +1,10 @@
1
+ """
2
+ Experimental DESTINY SDK.
3
+
4
+ The DESTINY SDK-labs provides experimental features
5
+ for interacting with DESTINY repository.
6
+ """
7
+
8
+ from . import references
9
+
10
+ __all__ = ["references"]
@@ -0,0 +1,154 @@
1
+ """
2
+ Extended Reference SDK.
3
+
4
+ Extended Reference class for the Destiny SDK
5
+ with added experimental convenience methods and properties.
6
+ """
7
+
8
+ from collections.abc import Generator
9
+ from typing import cast
10
+
11
+ from pydantic import BaseModel, Field
12
+
13
+ from destiny_sdk.enhancements import (
14
+ Annotation,
15
+ AnnotationType,
16
+ BibliographicMetadataEnhancement,
17
+ EnhancementType,
18
+ )
19
+ from destiny_sdk.identifiers import ExternalIdentifierType
20
+ from destiny_sdk.references import Reference
21
+
22
+
23
+ class LabsReference(BaseModel):
24
+ """Experimental presenter class for Reference with added convenience methods."""
25
+
26
+ reference: Reference = Field(
27
+ ...,
28
+ description="The core Reference object",
29
+ )
30
+
31
+ def _get_id(self, identifier_type: ExternalIdentifierType) -> str | int | None:
32
+ """Fetch an identifier matching the given identifier_type."""
33
+ for identifier in self.reference.identifiers or []:
34
+ if identifier.identifier_type == identifier_type:
35
+ return identifier.identifier
36
+ return None
37
+
38
+ @property
39
+ def openalex_id(self) -> str | None:
40
+ """Return an OpenAlex ID for the reference."""
41
+ return cast(
42
+ str | None, self._get_id(identifier_type=ExternalIdentifierType.OPEN_ALEX)
43
+ )
44
+
45
+ @property
46
+ def doi(self) -> str | None:
47
+ """Return a DOI for the reference."""
48
+ return cast(
49
+ str | None, self._get_id(identifier_type=ExternalIdentifierType.DOI)
50
+ )
51
+
52
+ @property
53
+ def pubmed_id(self) -> int | None:
54
+ """Return a pubmed ID for the reference."""
55
+ return cast(
56
+ int | None, self._get_id(identifier_type=ExternalIdentifierType.PM_ID)
57
+ )
58
+
59
+ @property
60
+ def abstract(self) -> str | None:
61
+ """Return an abstract for the reference."""
62
+ for enhancement in self.reference.enhancements or []:
63
+ if enhancement.content.enhancement_type == EnhancementType.ABSTRACT:
64
+ return enhancement.content.abstract
65
+ return None
66
+
67
+ @property
68
+ def publication_year(self) -> int | None:
69
+ """Return a publication year for the reference."""
70
+ for meta in self.it_bibliographics():
71
+ if meta.publication_year is not None:
72
+ return meta.publication_year
73
+ return None
74
+
75
+ @property
76
+ def title(self) -> str | None:
77
+ """The title of the reference. If multiple are present, return first one."""
78
+ for meta in self.it_bibliographics():
79
+ if meta.title is not None:
80
+ return meta.title
81
+ return None
82
+
83
+ def it_bibliographics(
84
+ self,
85
+ ) -> Generator[BibliographicMetadataEnhancement, None, None]:
86
+ """Iterate bibliographic enhancements."""
87
+ for enhancement in self.reference.enhancements or []:
88
+ if enhancement.content.enhancement_type == EnhancementType.BIBLIOGRAPHIC:
89
+ yield enhancement.content
90
+
91
+ def it_annotations(
92
+ self,
93
+ source: str | None = None,
94
+ annotation_type: AnnotationType | None = None,
95
+ scheme: str | None = None,
96
+ label: str | None = None,
97
+ ) -> Generator[Annotation, None, None]:
98
+ """
99
+ Iterate annotation enhancements for the given filters.
100
+
101
+ :param source: Optional filter for Enhancement.source
102
+ :param annotation_type: Optional filter for
103
+ AnnotationEnhancement.annotation_type
104
+ :param scheme: Optional filter for Annotation.scheme
105
+ :param label: Optional filter for Annotation.label
106
+ """
107
+ for enhancement in self.reference.enhancements or []:
108
+ if enhancement.content.enhancement_type == EnhancementType.ANNOTATION:
109
+ if source is not None and enhancement.source != source:
110
+ continue
111
+ for annotation in enhancement.content.annotations:
112
+ if (
113
+ annotation_type is not None
114
+ and annotation.annotation_type != annotation_type
115
+ ):
116
+ continue
117
+ if scheme is not None and annotation.scheme != scheme:
118
+ continue
119
+ if label is not None and annotation.label != label:
120
+ continue
121
+ yield annotation
122
+
123
+ def has_bool_annotation(
124
+ self,
125
+ source: str | None = None,
126
+ scheme: str | None = None,
127
+ label: str | None = None,
128
+ expected_value: bool = True, # noqa: FBT001, FBT002
129
+ ) -> bool | None:
130
+ """
131
+ Check if a specific annotation exists and is true.
132
+
133
+ :param source: Optional filter for Enhancement.source
134
+ :param scheme: Optional filter for Annotation.scheme
135
+ :param label: Optional filter for Annotation.label
136
+ :param expected_value: Specify expected boolean annotation value
137
+ :return: Returns the boolean value for the first annotation matching
138
+ the filters or None if nothing is found.
139
+ """
140
+ if scheme is None and label is None:
141
+ msg = "Please use at least one of the optional scheme or label filters."
142
+ raise AssertionError(msg)
143
+
144
+ found_annotation = False
145
+ for annotation in self.it_annotations(
146
+ source=source,
147
+ annotation_type=AnnotationType.BOOLEAN,
148
+ scheme=scheme,
149
+ label=label,
150
+ ):
151
+ if annotation.value == expected_value:
152
+ return True
153
+ found_annotation = True
154
+ return False if found_annotation else None
@@ -1,7 +1,10 @@
1
1
  """Parser for a EPPI JSON export file."""
2
2
 
3
+ from datetime import datetime
3
4
  from typing import Any
4
5
 
6
+ from pydantic import ValidationError
7
+
5
8
  from destiny_sdk.enhancements import (
6
9
  AbstractContentEnhancement,
7
10
  AbstractProcessType,
@@ -13,12 +16,16 @@ from destiny_sdk.enhancements import (
13
16
  BooleanAnnotation,
14
17
  EnhancementContent,
15
18
  EnhancementFileInput,
19
+ RawEnhancement,
16
20
  )
17
21
  from destiny_sdk.identifiers import (
18
22
  DOIIdentifier,
23
+ ERICIdentifier,
19
24
  ExternalIdentifier,
20
- ExternalIdentifierType,
25
+ OpenAlexIdentifier,
26
+ ProQuestIdentifier,
21
27
  )
28
+ from destiny_sdk.parsers.exceptions import ExternalIdentifierNotFoundError
22
29
  from destiny_sdk.references import ReferenceFileInput
23
30
  from destiny_sdk.visibility import Visibility
24
31
 
@@ -30,9 +37,17 @@ class EPPIParser:
30
37
  See example here: https://eppi.ioe.ac.uk/cms/Portals/35/Maps/Examples/example_orignal.json
31
38
  """
32
39
 
33
- version = "1.0"
40
+ version = "2.0"
34
41
 
35
- def __init__(self, tags: list[str] | None = None) -> None:
42
+ def __init__(
43
+ self,
44
+ *,
45
+ tags: list[str] | None = None,
46
+ include_raw_data: bool = False,
47
+ source_export_date: datetime | None = None,
48
+ data_description: str | None = None,
49
+ raw_enhancement_excludes: list[str] | None = None,
50
+ ) -> None:
36
51
  """
37
52
  Initialize the EPPIParser with optional tags.
38
53
 
@@ -42,20 +57,75 @@ class EPPIParser:
42
57
  """
43
58
  self.tags = tags or []
44
59
  self.parser_source = f"destiny_sdk.eppi_parser@{self.version}"
60
+ self.include_raw_data = include_raw_data
61
+ self.source_export_date = source_export_date
62
+ self.data_description = data_description
63
+ self.raw_enhancement_excludes = (
64
+ raw_enhancement_excludes if raw_enhancement_excludes else []
65
+ )
66
+
67
+ if self.include_raw_data and not all(
68
+ (
69
+ self.source_export_date,
70
+ self.data_description,
71
+ )
72
+ ):
73
+ msg = (
74
+ "Cannot include raw data enhancements without "
75
+ "source_export_date, data_description, and raw_enhancement_metadata"
76
+ )
77
+ raise RuntimeError(msg)
45
78
 
46
79
  def _parse_identifiers(
47
80
  self, ref_to_import: dict[str, Any]
48
81
  ) -> list[ExternalIdentifier]:
49
82
  identifiers = []
50
83
  if doi := ref_to_import.get("DOI"):
51
- identifiers.append(
52
- DOIIdentifier(
53
- identifier=doi,
54
- identifier_type=ExternalIdentifierType.DOI,
55
- )
84
+ doi_identifier = self._parse_doi(doi=doi)
85
+ if doi_identifier:
86
+ identifiers.append(doi_identifier)
87
+
88
+ if url := ref_to_import.get("URL"):
89
+ identifier = self._parse_url_to_identifier(url=url)
90
+ if identifier:
91
+ identifiers.append(identifier)
92
+
93
+ if not identifiers:
94
+ msg = (
95
+ "No known external identifiers found for Reference data "
96
+ f"with DOI: '{doi if doi else None}' "
97
+ f"and URL: '{url if url else None}'."
56
98
  )
99
+ raise ExternalIdentifierNotFoundError(detail=msg)
100
+
57
101
  return identifiers
58
102
 
103
+ def _parse_doi(self, doi: str) -> DOIIdentifier | None:
104
+ """Attempt to parse a DOI from a string."""
105
+ try:
106
+ doi = doi.strip()
107
+ return DOIIdentifier(identifier=doi)
108
+ except ValidationError:
109
+ return None
110
+
111
+ def _parse_url_to_identifier(self, url: str) -> ExternalIdentifier | None:
112
+ """Attempt to parse an external identifier from a url string."""
113
+ url = url.strip()
114
+ identifier_cls = None
115
+ if "eric" in url:
116
+ identifier_cls = ERICIdentifier
117
+ elif "proquest" in url:
118
+ identifier_cls = ProQuestIdentifier
119
+ elif "openalex" in url:
120
+ identifier_cls = OpenAlexIdentifier
121
+ else:
122
+ return None
123
+
124
+ try:
125
+ return identifier_cls(identifier=url)
126
+ except ValidationError:
127
+ return None
128
+
59
129
  def _parse_abstract_enhancement(
60
130
  self, ref_to_import: dict[str, Any]
61
131
  ) -> EnhancementContent | None:
@@ -107,6 +177,23 @@ class EPPIParser:
107
177
  authorship=authorships if authorships else None,
108
178
  )
109
179
 
180
+ def _parse_raw_enhancement(
181
+ self, ref_to_import: dict[str, Any], raw_enhancement_metadata: dict[str, Any]
182
+ ) -> EnhancementContent | None:
183
+ """Add Reference data as a raw enhancement."""
184
+ raw_enhancement_data = ref_to_import.copy()
185
+
186
+ # Remove any keys that should be excluded
187
+ for exclude in self.raw_enhancement_excludes:
188
+ raw_enhancement_data.pop(exclude, None)
189
+
190
+ return RawEnhancement(
191
+ source_export_date=self.source_export_date,
192
+ description=self.data_description,
193
+ metadata=raw_enhancement_metadata,
194
+ data=raw_enhancement_data,
195
+ )
196
+
110
197
  def _create_annotation_enhancement(self) -> EnhancementContent | None:
111
198
  if not self.tags:
112
199
  return None
@@ -124,8 +211,11 @@ class EPPIParser:
124
211
  )
125
212
 
126
213
  def parse_data(
127
- self, data: dict, source: str | None = None, robot_version: str | None = None
128
- ) -> list[ReferenceFileInput]:
214
+ self,
215
+ data: dict,
216
+ source: str | None = None,
217
+ robot_version: str | None = None,
218
+ ) -> tuple[list[ReferenceFileInput], list[dict]]:
129
219
  """
130
220
  Parse an EPPI JSON export dict and return a list of ReferenceFileInput objects.
131
221
 
@@ -140,33 +230,55 @@ class EPPIParser:
140
230
 
141
231
  """
142
232
  parser_source = source if source is not None else self.parser_source
233
+
234
+ if self.include_raw_data:
235
+ codesets = [codeset.get("SetId") for codeset in data.get("CodeSets", [])]
236
+ raw_enhancement_metadata = {"codeset_ids": codesets}
237
+
143
238
  references = []
239
+ failed_refs = []
144
240
  for ref_to_import in data.get("References", []):
145
- enhancement_contents = [
146
- content
147
- for content in [
148
- self._parse_abstract_enhancement(ref_to_import),
149
- self._parse_bibliographic_enhancement(ref_to_import),
150
- self._create_annotation_enhancement(),
241
+ try:
242
+ enhancement_contents = [
243
+ content
244
+ for content in [
245
+ self._parse_abstract_enhancement(ref_to_import),
246
+ self._parse_bibliographic_enhancement(ref_to_import),
247
+ self._create_annotation_enhancement(),
248
+ ]
249
+ if content
151
250
  ]
152
- if content
153
- ]
154
251
 
155
- enhancements = [
156
- EnhancementFileInput(
157
- source=parser_source,
158
- visibility=Visibility.PUBLIC,
159
- content=content,
160
- robot_version=robot_version,
161
- )
162
- for content in enhancement_contents
163
- ]
252
+ if self.include_raw_data:
253
+ raw_enhancement = self._parse_raw_enhancement(
254
+ ref_to_import=ref_to_import,
255
+ raw_enhancement_metadata=raw_enhancement_metadata,
256
+ )
164
257
 
165
- references.append(
166
- ReferenceFileInput(
167
- visibility=Visibility.PUBLIC,
168
- identifiers=self._parse_identifiers(ref_to_import),
169
- enhancements=enhancements,
258
+ if raw_enhancement:
259
+ enhancement_contents.append(raw_enhancement)
260
+
261
+ enhancements = [
262
+ EnhancementFileInput(
263
+ source=parser_source,
264
+ visibility=Visibility.PUBLIC,
265
+ content=content,
266
+ robot_version=robot_version,
267
+ )
268
+ for content in enhancement_contents
269
+ ]
270
+
271
+ references.append(
272
+ ReferenceFileInput(
273
+ visibility=Visibility.PUBLIC,
274
+ identifiers=self._parse_identifiers(
275
+ ref_to_import=ref_to_import
276
+ ),
277
+ enhancements=enhancements,
278
+ )
170
279
  )
171
- )
172
- return references
280
+
281
+ except ExternalIdentifierNotFoundError:
282
+ failed_refs.append(ref_to_import)
283
+
284
+ return references, failed_refs
@@ -0,0 +1,17 @@
1
+ """Custom exceptions for destiny sdk parsers."""
2
+
3
+
4
+ class ExternalIdentifierNotFoundError(Exception):
5
+ """Raised when an reference has no identifiable external identifiers."""
6
+
7
+ def __init__(self, detail: str | None = None, *args: object) -> None:
8
+ """
9
+ Initialize the ExternalIdentifiersNotFoundError.
10
+
11
+ Args:
12
+ *args: Additional arguments for the exception.
13
+ **kwargs: Additional keyword arguments for the exception.
14
+
15
+ """
16
+ self.detail = detail or "No detail provided."
17
+ super().__init__(detail, *args)
destiny_sdk/references.py CHANGED
@@ -4,7 +4,7 @@ from typing import Self
4
4
 
5
5
  from pydantic import UUID4, BaseModel, Field, TypeAdapter
6
6
 
7
- from destiny_sdk.core import _JsonlFileInputMixIn
7
+ from destiny_sdk.core import SearchResultMixIn, _JsonlFileInputMixIn
8
8
  from destiny_sdk.enhancements import Enhancement, EnhancementFileInput
9
9
  from destiny_sdk.identifiers import ExternalIdentifier
10
10
  from destiny_sdk.visibility import Visibility
@@ -65,3 +65,11 @@ class ReferenceFileInput(_JsonlFileInputMixIn, BaseModel):
65
65
  default=None,
66
66
  description="A list of enhancements for the reference",
67
67
  )
68
+
69
+
70
+ class ReferenceSearchResult(SearchResultMixIn, BaseModel):
71
+ """A search result for references."""
72
+
73
+ references: list[Reference] = Field(
74
+ description="The references returned by the search.",
75
+ )
destiny_sdk/search.py ADDED
@@ -0,0 +1,53 @@
1
+ """Models for search queries and results."""
2
+
3
+ from pydantic import BaseModel, Field
4
+
5
+
6
+ class SearchResultTotal(BaseModel):
7
+ """Information about the total number of search results."""
8
+
9
+ count: int = Field(
10
+ description="The total number of results matching the search criteria.",
11
+ )
12
+ is_lower_bound: bool = Field(
13
+ description="Whether the count is a lower bound (true) or exact (false).",
14
+ )
15
+
16
+
17
+ class SearchResultPage(BaseModel):
18
+ """Information about the page of search results."""
19
+
20
+ count: int = Field(
21
+ description="The number of results on this page.",
22
+ )
23
+ number: int = Field(
24
+ description="The page number of results returned, indexed from 1.",
25
+ )
26
+
27
+
28
+ class AnnotationFilter(BaseModel):
29
+ """An annotation filter for search queries."""
30
+
31
+ scheme: str = Field(
32
+ description="The annotation scheme to filter by.",
33
+ pattern=r"^[^/]+$",
34
+ )
35
+ label: str | None = Field(
36
+ None,
37
+ description="The annotation label to filter by.",
38
+ )
39
+ score: float | None = Field(
40
+ None,
41
+ description="The minimum score for the annotation filter.",
42
+ ge=0.0,
43
+ le=1.0,
44
+ )
45
+
46
+ def __repr__(self) -> str:
47
+ """Serialize the annotation filter to a string."""
48
+ annotation = self.scheme
49
+ if self.label:
50
+ annotation += f"/{self.label}"
51
+ if self.score is not None:
52
+ annotation += f"@{self.score}"
53
+ return annotation
@@ -1,8 +1,8 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: destiny_sdk
3
- Version: 0.5.1
3
+ Version: 0.7.1
4
4
  Summary: A software development kit (sdk) to support interaction with the DESTINY repository
5
- Author-email: Adam Hamilton <adam@futureevidence.org>, Andrew Harvey <andrew@futureevidence.org>, Daniel Breves <daniel@futureevidence.org>, Jack Walmisley <jack@futureevidence.org>
5
+ Author-email: Adam Hamilton <adam@futureevidence.org>, Andrew Harvey <andrew@futureevidence.org>, Daniel Breves <daniel@futureevidence.org>, Jack Walmisley <jack@futureevidence.org>, Tim Repke <tim.repke@pik-potsdam.de>
6
6
  License-Expression: Apache-2.0
7
7
  License-File: LICENSE
8
8
  Requires-Python: ~=3.12
@@ -14,6 +14,7 @@ Requires-Dist: pytest-asyncio<2,>=1.0.0
14
14
  Requires-Dist: pytest-httpx<0.36,>=0.35.0
15
15
  Requires-Dist: pytest<9,>=8.4.0
16
16
  Requires-Dist: python-jose<4,>=3.4.0
17
+ Provides-Extra: labs
17
18
  Description-Content-Type: text/markdown
18
19
 
19
20
  # DESTINY SDK
@@ -34,6 +35,13 @@ pip install destiny-sdk
34
35
  uv add destiny-sdk
35
36
  ```
36
37
 
38
+ Some labs functionality may require extra dependencies - these can be installed by:
39
+
40
+ ```sh
41
+ pip install destiny-sdk[labs]
42
+ uv add destiny-sdk --extra labs
43
+ ```
44
+
37
45
  ## Development
38
46
 
39
47
  ### Dependencies
@@ -0,0 +1,21 @@
1
+ destiny_sdk/__init__.py,sha256=NdSlsPQyDF3TW30_JzbvYMRBRA9iT677iTRWWCMdYOA,382
2
+ destiny_sdk/auth.py,sha256=bY72ywZEcG_67YBd9PrwgWTXkCf58rhLvVEXrtXbWtA,6247
3
+ destiny_sdk/client.py,sha256=LoXEBPxekbT-Y8eiTt_Gfy4G5RPj1tURZHuH-V9CLXs,6247
4
+ destiny_sdk/core.py,sha256=PYCYpY72MHXo7iQMHtnXcnCOGn6CUsbYoykHvtQl4Oc,1857
5
+ destiny_sdk/enhancements.py,sha256=-4jLm3R0T5UpgCt09CgUfPcnzPOyjdhUZCT1zhEP6sQ,12838
6
+ destiny_sdk/identifiers.py,sha256=r2dFBIv2vtOK-C5lvHryEOqQBQ6_Odehipc6YgMZVBk,9482
7
+ destiny_sdk/imports.py,sha256=b-rh-dt3NsyLGxqmVzIzKaHiXhbw-3wtAaBN-ZW-i1E,5940
8
+ destiny_sdk/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ destiny_sdk/references.py,sha256=3Y8gBMTSyZY35S3pB1bnVHMai9RRiGeoGZysNvSo7kk,2553
10
+ destiny_sdk/robots.py,sha256=I_ZvMxwST52e8ovhv0-gPbOB3P9tptbRG0LrkNNOqKo,13463
11
+ destiny_sdk/search.py,sha256=RAmUBS2KE2fmzLTxB0jV5R3AeuBrOJAWqieGv4GgFAo,1474
12
+ destiny_sdk/visibility.py,sha256=8D44Q868YdScAt6eAFgXXrhonozXnv_Qa5w5yEGMPX8,577
13
+ destiny_sdk/labs/__init__.py,sha256=H4RFPyeelqZ56PagnWPX-JZeWlxPnCZoYHtr4c9SU9Q,180
14
+ destiny_sdk/labs/references.py,sha256=iZisRgGZ5c7X7uTFoe6Q0AwwFMa4yJbIoPUVv_hvOiU,5589
15
+ destiny_sdk/parsers/__init__.py,sha256=d5gS--bXla_0I7e_9wTBnGWMXt2U8b-_ndeprTPe1hk,149
16
+ destiny_sdk/parsers/eppi_parser.py,sha256=_1xnAT0F0o1HKpMWOGQbVS3VPOrhPqyzHDWR3CosWwk,9484
17
+ destiny_sdk/parsers/exceptions.py,sha256=0Sc_M4j560Nqh4SjeP_YrgOUVagdIwWwRz24E6YlZ1k,573
18
+ destiny_sdk-0.7.1.dist-info/METADATA,sha256=w_3Zbj91oWcz1uCgSN1JLYVTisV1m3PJBio2g9AtopY,2657
19
+ destiny_sdk-0.7.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
20
+ destiny_sdk-0.7.1.dist-info/licenses/LICENSE,sha256=6QURU4gvvTjVZ5rfp5amZ6FtFvcpPhAGUjxF5WSZAHI,9138
21
+ destiny_sdk-0.7.1.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.27.0
2
+ Generator: hatchling 1.28.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,17 +0,0 @@
1
- destiny_sdk/__init__.py,sha256=gmmrceJX84T4msk_GSm_OjTQvCpHFZRjnlUK5_7IODE,356
2
- destiny_sdk/auth.py,sha256=bY72ywZEcG_67YBd9PrwgWTXkCf58rhLvVEXrtXbWtA,6247
3
- destiny_sdk/client.py,sha256=fTBtuq5emT8ieNtCuCY8Y6xAKZJDLq8sG1WOvmjLz-I,4971
4
- destiny_sdk/core.py,sha256=_FwDaczKTSaUSV_qfcnLhkBbZagh4ayFpN0qUwJ03-o,1448
5
- destiny_sdk/enhancements.py,sha256=SkIlIlWKBN7Z-aXpQiy22SXrU7zVnKxaRb4F5yaFsO8,11503
6
- destiny_sdk/identifiers.py,sha256=1N2cszBmnQoUeKm54-7MUTO-zTDuvW8U9OjTeAmhWvo,7182
7
- destiny_sdk/imports.py,sha256=b-rh-dt3NsyLGxqmVzIzKaHiXhbw-3wtAaBN-ZW-i1E,5940
8
- destiny_sdk/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- destiny_sdk/references.py,sha256=Dx-WKcv9gNJkKU9n52AYoEey7siTHR5_wBVBKSHND6Q,2321
10
- destiny_sdk/robots.py,sha256=I_ZvMxwST52e8ovhv0-gPbOB3P9tptbRG0LrkNNOqKo,13463
11
- destiny_sdk/visibility.py,sha256=8D44Q868YdScAt6eAFgXXrhonozXnv_Qa5w5yEGMPX8,577
12
- destiny_sdk/parsers/__init__.py,sha256=d5gS--bXla_0I7e_9wTBnGWMXt2U8b-_ndeprTPe1hk,149
13
- destiny_sdk/parsers/eppi_parser.py,sha256=rEOtt_5Kp3oktFlzRTLZ2x4_7aQ9-ba3FYpkaEnpnvs,5521
14
- destiny_sdk-0.5.1.dist-info/METADATA,sha256=eKznxYvVJhW-IVlA16LHzJ2QtpzfG3RyBKk9e-x_o8w,2440
15
- destiny_sdk-0.5.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
16
- destiny_sdk-0.5.1.dist-info/licenses/LICENSE,sha256=6QURU4gvvTjVZ5rfp5amZ6FtFvcpPhAGUjxF5WSZAHI,9138
17
- destiny_sdk-0.5.1.dist-info/RECORD,,