destiny_sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,363 @@
1
+ """Enhancement classes for the Destiny Repository."""
2
+
3
+ import uuid
4
+ from enum import StrEnum, auto
5
+ from typing import Annotated, Literal
6
+
7
+ from pydantic import BaseModel, Field, HttpUrl, PastDate
8
+
9
+ from destiny_sdk.core import _JsonlFileInputMixIn
10
+ from destiny_sdk.visibility import Visibility
11
+
12
+
13
+ class EnhancementType(StrEnum):
14
+ """
15
+ The type of enhancement.
16
+
17
+ This is used to identify the type of enhancement in the `Enhancement` class.
18
+
19
+ **Allowed values**:
20
+ - `bibliographic`: Bibliographic metadata.
21
+ - `abstract`: The abstract of a reference.
22
+ - `annotation`: A free-form enhancement for tagging with labels.
23
+ - `locations`: Locations where the reference can be found.
24
+ - `full_text`: The full text of the reference. (To be implemeted)
25
+ """
26
+
27
+ BIBLIOGRAPHIC = auto()
28
+ ABSTRACT = auto()
29
+ ANNOTATION = auto()
30
+ LOCATION = auto()
31
+ FULL_TEXT = auto()
32
+
33
+
34
+ class AuthorPosition(StrEnum):
35
+ """
36
+ The position of an author in a list of authorships.
37
+
38
+ Maps to the data from OpenAlex.
39
+
40
+ **Allowed values**:
41
+ - `first`: The first author.
42
+ - `middle`: Any middle author
43
+ - `last`: The last author
44
+ """
45
+
46
+ FIRST = auto()
47
+ MIDDLE = auto()
48
+ LAST = auto()
49
+
50
+
51
+ class Authorship(BaseModel):
52
+ """
53
+ Represents a single author and their association with a reference.
54
+
55
+ This is a simplification of the OpenAlex [Authorship
56
+ object](https://docs.openalex.org/api-entities/works/work-object/authorship-object)
57
+ for our purposes.
58
+ """
59
+
60
+ display_name: str = Field(description="The display name of the author.")
61
+ orcid: str = Field(description="The ORCid of the author.")
62
+ position: AuthorPosition = Field(
63
+ description="The position of the author within the list of authors."
64
+ )
65
+
66
+
67
+ class BibliographicMetadataEnhancement(BaseModel):
68
+ """
69
+ An enhancement which is made up of bibliographic metadata.
70
+
71
+ Generally this will be sourced from a database such as OpenAlex or similar.
72
+ For directly contributed references, these may not be complete.
73
+ """
74
+
75
+ enhancement_type: Literal[EnhancementType.BIBLIOGRAPHIC] = (
76
+ EnhancementType.BIBLIOGRAPHIC
77
+ )
78
+ authorship: list[Authorship] | None = Field(
79
+ default=None,
80
+ description="A list of `Authorships` belonging to this reference.",
81
+ )
82
+ cited_by_count: int | None = Field(
83
+ default=None,
84
+ description="""
85
+ (From OpenAlex) The number of citations to this work. These are the times that
86
+ other works have cited this work
87
+ """,
88
+ )
89
+ created_date: PastDate | None = Field(
90
+ default=None, description="The ISO8601 date this metadata record was created"
91
+ )
92
+ publication_date: PastDate | None = Field(
93
+ default=None, description="The date which the version of record was published."
94
+ )
95
+ publication_year: int | None = Field(
96
+ default=None,
97
+ description="The year in which the version of record was published.",
98
+ )
99
+ publisher: str | None = Field(
100
+ default=None,
101
+ description="The name of the entity which published the version of record.",
102
+ )
103
+ title: str | None = Field(default=None, description="The title of the reference.")
104
+
105
+
106
+ class AbstractProcessType(StrEnum):
107
+ """
108
+ The process used to acquyire the abstract.
109
+
110
+ **Allowed values**:
111
+ - `uninverted`
112
+ - `closed_api`
113
+ - `other`
114
+ """
115
+
116
+ UNINVERTED = auto()
117
+ CLOSED_API = auto()
118
+ OTHER = auto()
119
+
120
+
121
+ class AbstractContentEnhancement(BaseModel):
122
+ """
123
+ An enhancement which is specific to the abstract of a reference.
124
+
125
+ This is separate from the `BibliographicMetadata` for two reasons:
126
+
127
+ 1. Abstracts are increasingly missing from sources like OpenAlex, and may be
128
+ backfilled from other sources, without the bibliographic metadata.
129
+ 2. They are also subject to copyright limitations in ways which metadata are
130
+ not, and thus need separate visibility controls.
131
+ """
132
+
133
+ enhancement_type: Literal[EnhancementType.ABSTRACT] = EnhancementType.ABSTRACT
134
+ process: AbstractProcessType = Field(
135
+ description="The process used to acquire the abstract."
136
+ )
137
+ abstract: str = Field(description="The abstract of the reference.")
138
+
139
+
140
+ class AnnotationType(StrEnum):
141
+ """
142
+ The type of annotation.
143
+
144
+ This is used to identify the type of annotation in the `Annotation` class.
145
+
146
+ **Allowed values**:
147
+ - `boolean`: An annotation which is the boolean application of a label across a
148
+ reference.
149
+ - `score`: An annotation which is a score for a label across a reference,
150
+ without a boolean value.
151
+ """
152
+
153
+ BOOLEAN = auto()
154
+ SCORE = auto()
155
+
156
+
157
+ class ScoreAnnotation(BaseModel):
158
+ """
159
+ An annotation which represents the score for a label.
160
+
161
+ This is similar to a BooleanAnnotation, but lacks a boolean determination
162
+ as to the application of the label.
163
+ """
164
+
165
+ annotation_type: Literal[AnnotationType.SCORE] = AnnotationType.SCORE
166
+ scheme: str = Field(
167
+ description="An identifier for the scheme of annotation",
168
+ examples=["openalex:topic", "pubmed:mesh"],
169
+ )
170
+ label: str = Field(
171
+ description="A high level label for this annotation like the name of the topic",
172
+ )
173
+ score: float = Field(description="""Score for this annotation""")
174
+ data: dict = Field(
175
+ description=(
176
+ "An object representation of the annotation including any confidence scores"
177
+ " or descriptions."
178
+ )
179
+ )
180
+
181
+
182
+ class BooleanAnnotation(BaseModel):
183
+ """
184
+ An annotation is a way of tagging the content with a label of some kind.
185
+
186
+ This class will probably be broken up in the future, but covers most of our
187
+ initial cases.
188
+ """
189
+
190
+ annotation_type: Literal[AnnotationType.BOOLEAN] = AnnotationType.BOOLEAN
191
+ scheme: str = Field(
192
+ description="An identifier for the scheme of the annotation",
193
+ examples=["openalex:topic", "pubmed:mesh"],
194
+ )
195
+ label: str = Field(
196
+ description="A high level label for this annotation like the name of the topic",
197
+ )
198
+ value: bool = Field(description="""Boolean flag for this annotation""")
199
+ score: float | None = Field(
200
+ None, description="A confidence score for this annotation"
201
+ )
202
+ data: dict = Field(
203
+ description="""
204
+ An object representation of the annotation including any confidence scores or
205
+ descriptions.
206
+ """,
207
+ )
208
+
209
+
210
+ #: Union type for all annotations.
211
+ Annotation = Annotated[
212
+ BooleanAnnotation | ScoreAnnotation, Field(discriminator="annotation_type")
213
+ ]
214
+
215
+
216
+ class AnnotationEnhancement(BaseModel):
217
+ """An enhancement which is composed of a list of Annotations."""
218
+
219
+ enhancement_type: Literal[EnhancementType.ANNOTATION] = EnhancementType.ANNOTATION
220
+ annotations: list[Annotation]
221
+
222
+
223
+ class DriverVersion(StrEnum):
224
+ """
225
+ The version based on the DRIVER guidelines versioning scheme.
226
+
227
+ (Borrowed from OpenAlex)
228
+
229
+ Allowed values:
230
+ - `publishedVersion`: The document's version of record. This is the most
231
+ authoritative version.
232
+ - `acceptedVersion`: The document after having completed peer review and being
233
+ officially accepted for publication. It will lack publisher formatting, but the
234
+ content should be interchangeable with the that of the publishedVersion.
235
+ - `submittedVersion`: the document as submitted to the publisher by the authors, but
236
+ before peer-review. Its content may differ significantly from that of the accepted
237
+ article.
238
+ """
239
+
240
+ PUBLISHED_VERSION = "publishedVersion"
241
+ ACCEPTED_VERSION = "acceptedVersion"
242
+ SUBMITTED_VERSION = "submittedVersion"
243
+ OTHER = "other"
244
+
245
+
246
+ class Location(BaseModel):
247
+ """
248
+ A location where a reference can be found.
249
+
250
+ This maps almost completely to the OpenAlex
251
+ [Location object](https://docs.openalex.org/api-entities/works/work-object/location-object)
252
+ """
253
+
254
+ is_oa: bool | None = Field(
255
+ default=None,
256
+ description="""
257
+ (From OpenAlex): True if an Open Access (OA) version of this work is available
258
+ at this location. May be left as null if this is unknown (and thus)
259
+ treated effectively as `false`.
260
+ """,
261
+ )
262
+ version: DriverVersion | None = Field(
263
+ default=None,
264
+ description="""
265
+ The version (according to the DRIVER versioning scheme) of this location.
266
+ """,
267
+ )
268
+ landing_page_url: HttpUrl | None = Field(
269
+ default=None,
270
+ description="(From OpenAlex): The landing page URL for this location.",
271
+ )
272
+ pdf_url: HttpUrl | None = Field(
273
+ default=None,
274
+ description="""
275
+ (From OpenAlex): A URL where you can find this location as a PDF.
276
+ """,
277
+ )
278
+ license: str | None = Field(
279
+ default=None,
280
+ description="""
281
+ (From OpenAlex): The location's publishing license. This can be a Creative
282
+ Commons license such as cc0 or cc-by, a publisher-specific license, or null
283
+ which means we are not able to determine a license for this location.
284
+ """,
285
+ )
286
+ extra: dict | None = Field(
287
+ default=None, description="Any extra metadata about this location"
288
+ )
289
+
290
+
291
+ class LocationEnhancement(BaseModel):
292
+ """
293
+ An enhancement which describes locations where this reference can be found.
294
+
295
+ This maps closely (almost exactly) to OpenAlex's locations.
296
+ """
297
+
298
+ enhancement_type: Literal[EnhancementType.LOCATION] = EnhancementType.LOCATION
299
+ locations: list[Location] = Field(
300
+ description="A list of locations where this reference can be found."
301
+ )
302
+
303
+
304
+ #: Union type for all enhancement content types.
305
+ EnhancementContent = Annotated[
306
+ BibliographicMetadataEnhancement
307
+ | AbstractContentEnhancement
308
+ | AnnotationEnhancement
309
+ | LocationEnhancement,
310
+ Field(discriminator="enhancement_type"),
311
+ ]
312
+
313
+
314
+ class Enhancement(_JsonlFileInputMixIn, BaseModel):
315
+ """Core enhancement class."""
316
+
317
+ reference_id: uuid.UUID = Field(
318
+ description="The ID of the reference this enhancement is associated with."
319
+ )
320
+ source: str = Field(
321
+ description="The enhancement source for tracking provenance.",
322
+ )
323
+ visibility: Visibility = Field(
324
+ description="The level of visibility of the enhancement"
325
+ )
326
+ robot_version: str | None = Field(
327
+ default=None,
328
+ description="The version of the robot that generated the content.",
329
+ )
330
+ derived_from: list[uuid.UUID] | None = Field(
331
+ default=None,
332
+ description="List of enhancement IDs that this enhancement was derived from.",
333
+ )
334
+ content: Annotated[
335
+ EnhancementContent,
336
+ Field(
337
+ discriminator="enhancement_type",
338
+ description="The content of the enhancement.",
339
+ ),
340
+ ]
341
+
342
+
343
+ class EnhancementFileInput(BaseModel):
344
+ """Enhancement model used to marshall a file input to new references."""
345
+
346
+ source: str = Field(
347
+ description="The enhancement source for tracking provenance.",
348
+ )
349
+ visibility: Visibility = Field(
350
+ description="The level of visibility of the enhancement"
351
+ )
352
+ enhancement_type: EnhancementType = Field(description="The type of enhancement.")
353
+ robot_version: str | None = Field(
354
+ default=None,
355
+ description="The version of the robot that generated the content.",
356
+ # (Adam) Temporary alias for backwards compatibility for already prepared files
357
+ # Next person who sees this can remove it :)
358
+ alias="processor_version",
359
+ )
360
+ content: EnhancementContent = Field(
361
+ discriminator="enhancement_type",
362
+ description="The content of the enhancement.",
363
+ )
@@ -0,0 +1,107 @@
1
+ """Identifier classes for the Destiny SDK."""
2
+
3
+ from enum import StrEnum, auto
4
+ from typing import Annotated, Literal
5
+
6
+ from pydantic import UUID4, BaseModel, Field, field_validator
7
+
8
+
9
+ class ExternalIdentifierType(StrEnum):
10
+ """
11
+ The type of identifier used to identify a reference.
12
+
13
+ This is used to identify the type of identifier used in the `ExternalIdentifier`
14
+ class.
15
+ **Allowed values**:
16
+ - `doi`: A DOI (Digital Object Identifier) which is a unique identifier for a
17
+ document.
18
+ - `pmid`: A PubMed ID which is a unique identifier for a document in PubMed.
19
+ - `openalex`: An OpenAlex ID which is a unique identifier for a document in
20
+ OpenAlex.
21
+ - `other`: Any other identifier not defined. This should be used sparingly.
22
+ """
23
+
24
+ DOI = auto()
25
+ PM_ID = auto()
26
+ OPEN_ALEX = auto()
27
+ OTHER = auto()
28
+
29
+
30
+ def remove_doi_url(value: str) -> str:
31
+ """Remove the URL part of the DOI if it exists."""
32
+ return (
33
+ value.removeprefix("http://doi.org/").removeprefix("https://doi.org/").strip()
34
+ )
35
+
36
+
37
+ class DOIIdentifier(BaseModel):
38
+ """An external identifier representing a DOI."""
39
+
40
+ identifier: str = Field(
41
+ description="The DOI of the reference.",
42
+ pattern=r"^10\.\d{4,9}/[-._;()/:a-zA-Z0-9%<>\[\]+&]+$",
43
+ )
44
+ identifier_type: Literal[ExternalIdentifierType.DOI] = Field(
45
+ ExternalIdentifierType.DOI, description="The type of identifier used."
46
+ )
47
+
48
+ @field_validator("identifier", mode="before")
49
+ @classmethod
50
+ def remove_doi_url(cls, value: str) -> str:
51
+ """Remove the URL part of the DOI if it exists."""
52
+ return (
53
+ value.removeprefix("http://doi.org/")
54
+ .removeprefix("https://doi.org/")
55
+ .strip()
56
+ )
57
+
58
+
59
+ class PubMedIdentifier(BaseModel):
60
+ """An external identifier representing a PubMed ID."""
61
+
62
+ identifier: int = Field(description="The PubMed ID of the reference.")
63
+ identifier_type: Literal[ExternalIdentifierType.PM_ID] = Field(
64
+ ExternalIdentifierType.PM_ID, description="The type of identifier used."
65
+ )
66
+
67
+
68
+ class OpenAlexIdentifier(BaseModel):
69
+ """An external identifier representing an OpenAlex ID."""
70
+
71
+ identifier: str = Field(
72
+ description="The OpenAlex ID of the reference.", pattern=r"^W\d+$"
73
+ )
74
+ identifier_type: Literal[ExternalIdentifierType.OPEN_ALEX] = Field(
75
+ ExternalIdentifierType.OPEN_ALEX, description="The type of identifier used."
76
+ )
77
+
78
+
79
+ class OtherIdentifier(BaseModel):
80
+ """An external identifier not otherwise defined by the repository."""
81
+
82
+ identifier: str = Field(description="The identifier of the reference.")
83
+ identifier_type: Literal[ExternalIdentifierType.OTHER] = Field(
84
+ ExternalIdentifierType.OTHER, description="The type of identifier used."
85
+ )
86
+ other_identifier_name: str = Field(
87
+ description="The name of the undocumented identifier type."
88
+ )
89
+
90
+
91
+ #: Union type for all external identifiers.
92
+ ExternalIdentifier = Annotated[
93
+ DOIIdentifier | PubMedIdentifier | OpenAlexIdentifier | OtherIdentifier,
94
+ Field(discriminator="identifier_type"),
95
+ ]
96
+
97
+
98
+ class LinkedExternalIdentifier(BaseModel):
99
+ """An external identifier which identifies a reference."""
100
+
101
+ identifier: ExternalIdentifier = Field(
102
+ description="The identifier of the reference.",
103
+ discriminator="identifier_type",
104
+ )
105
+ reference_id: UUID4 = Field(
106
+ description="The ID of the reference this identifier identifies."
107
+ )