destiny_sdk 0.6.0__py3-none-any.whl → 0.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- destiny_sdk/client.py +465 -6
- destiny_sdk/core.py +7 -0
- destiny_sdk/enhancements.py +58 -19
- destiny_sdk/identifiers.py +83 -5
- destiny_sdk/parsers/eppi_parser.py +146 -34
- destiny_sdk/parsers/exceptions.py +17 -0
- destiny_sdk/search.py +6 -1
- {destiny_sdk-0.6.0.dist-info → destiny_sdk-0.7.2.dist-info}/METADATA +2 -1
- destiny_sdk-0.7.2.dist-info/RECORD +21 -0
- {destiny_sdk-0.6.0.dist-info → destiny_sdk-0.7.2.dist-info}/WHEEL +1 -1
- destiny_sdk-0.6.0.dist-info/RECORD +0 -20
- {destiny_sdk-0.6.0.dist-info → destiny_sdk-0.7.2.dist-info}/licenses/LICENSE +0 -0
destiny_sdk/client.py
CHANGED
|
@@ -1,11 +1,22 @@
|
|
|
1
1
|
"""Send authenticated requests to Destiny Repository."""
|
|
2
2
|
|
|
3
|
+
import sys
|
|
3
4
|
import time
|
|
4
5
|
from collections.abc import Generator
|
|
5
6
|
|
|
6
7
|
import httpx
|
|
7
|
-
from
|
|
8
|
+
from msal import (
|
|
9
|
+
ConfidentialClientApplication,
|
|
10
|
+
ManagedIdentityClient,
|
|
11
|
+
PublicClientApplication,
|
|
12
|
+
UserAssignedManagedIdentity,
|
|
13
|
+
)
|
|
14
|
+
from pydantic import UUID4, HttpUrl, TypeAdapter
|
|
8
15
|
|
|
16
|
+
from destiny_sdk.auth import create_signature
|
|
17
|
+
from destiny_sdk.core import sdk_version
|
|
18
|
+
from destiny_sdk.identifiers import IdentifierLookup
|
|
19
|
+
from destiny_sdk.references import Reference, ReferenceSearchResult
|
|
9
20
|
from destiny_sdk.robots import (
|
|
10
21
|
EnhancementRequestRead,
|
|
11
22
|
RobotEnhancementBatch,
|
|
@@ -13,8 +24,10 @@ from destiny_sdk.robots import (
|
|
|
13
24
|
RobotEnhancementBatchResult,
|
|
14
25
|
RobotResult,
|
|
15
26
|
)
|
|
27
|
+
from destiny_sdk.search import AnnotationFilter
|
|
16
28
|
|
|
17
|
-
|
|
29
|
+
python_version = ".".join(map(str, sys.version_info[:3]))
|
|
30
|
+
user_agent = f"python@{python_version}/destiny-sdk@{sdk_version}"
|
|
18
31
|
|
|
19
32
|
|
|
20
33
|
class HMACSigningAuth(httpx.Auth):
|
|
@@ -53,7 +66,7 @@ class HMACSigningAuth(httpx.Auth):
|
|
|
53
66
|
yield request
|
|
54
67
|
|
|
55
68
|
|
|
56
|
-
class
|
|
69
|
+
class RobotClient:
|
|
57
70
|
"""
|
|
58
71
|
Client for interaction with the Destiny API.
|
|
59
72
|
|
|
@@ -71,7 +84,10 @@ class Client:
|
|
|
71
84
|
"""
|
|
72
85
|
self.session = httpx.Client(
|
|
73
86
|
base_url=str(base_url).removesuffix("/").removesuffix("/v1") + "/v1",
|
|
74
|
-
headers={
|
|
87
|
+
headers={
|
|
88
|
+
"Content-Type": "application/json",
|
|
89
|
+
"User-Agent": user_agent,
|
|
90
|
+
},
|
|
75
91
|
auth=HMACSigningAuth(secret_key=secret_key, client_id=client_id),
|
|
76
92
|
)
|
|
77
93
|
|
|
@@ -114,7 +130,11 @@ class Client:
|
|
|
114
130
|
return RobotEnhancementBatchRead.model_validate(response.json())
|
|
115
131
|
|
|
116
132
|
def poll_robot_enhancement_batch(
|
|
117
|
-
self,
|
|
133
|
+
self,
|
|
134
|
+
robot_id: UUID4,
|
|
135
|
+
limit: int = 10,
|
|
136
|
+
lease: str | None = None,
|
|
137
|
+
timeout: int = 60,
|
|
118
138
|
) -> RobotEnhancementBatch | None:
|
|
119
139
|
"""
|
|
120
140
|
Poll for a robot enhancement batch.
|
|
@@ -125,13 +145,20 @@ class Client:
|
|
|
125
145
|
:type robot_id: UUID4
|
|
126
146
|
:param limit: The maximum number of pending enhancements to return
|
|
127
147
|
:type limit: int
|
|
148
|
+
:param lease: The duration to lease the pending enhancements for,
|
|
149
|
+
in ISO 8601 duration format eg PT10M. If not provided the repository will
|
|
150
|
+
use a default lease duration.
|
|
151
|
+
:type lease: str | None
|
|
128
152
|
:return: The RobotEnhancementBatch object from the response, or None if no
|
|
129
153
|
batches available
|
|
130
154
|
:rtype: destiny_sdk.robots.RobotEnhancementBatch | None
|
|
131
155
|
"""
|
|
156
|
+
params = {"robot_id": str(robot_id), "limit": limit}
|
|
157
|
+
if lease:
|
|
158
|
+
params["lease"] = lease
|
|
132
159
|
response = self.session.post(
|
|
133
160
|
"/robot-enhancement-batches/",
|
|
134
|
-
params=
|
|
161
|
+
params=params,
|
|
135
162
|
timeout=timeout,
|
|
136
163
|
)
|
|
137
164
|
# HTTP 204 No Content indicates no batches available
|
|
@@ -140,3 +167,435 @@ class Client:
|
|
|
140
167
|
|
|
141
168
|
response.raise_for_status()
|
|
142
169
|
return RobotEnhancementBatch.model_validate(response.json())
|
|
170
|
+
|
|
171
|
+
def renew_robot_enhancement_batch_lease(
|
|
172
|
+
self, robot_enhancement_batch_id: UUID4, lease_duration: str | None = None
|
|
173
|
+
) -> None:
|
|
174
|
+
"""
|
|
175
|
+
Renew the lease for a robot enhancement batch.
|
|
176
|
+
|
|
177
|
+
Signs the request with the client's secret key.
|
|
178
|
+
|
|
179
|
+
:param robot_enhancement_batch_id: The ID of the robot enhancement batch
|
|
180
|
+
:type robot_enhancement_batch_id: UUID4
|
|
181
|
+
:param lease_duration: The duration to lease the pending enhancements for,
|
|
182
|
+
in ISO 8601 duration format eg PT10M. If not provided the repository will
|
|
183
|
+
use a default lease duration.
|
|
184
|
+
:type lease_duration: str | None
|
|
185
|
+
"""
|
|
186
|
+
response = self.session.post(
|
|
187
|
+
f"/robot-enhancement-batches/{robot_enhancement_batch_id}/renew-lease/",
|
|
188
|
+
params={"lease": lease_duration} if lease_duration else None,
|
|
189
|
+
)
|
|
190
|
+
response.raise_for_status()
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
# Backward compatibility
|
|
194
|
+
Client = RobotClient
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
class OAuthMiddleware(httpx.Auth):
|
|
198
|
+
"""
|
|
199
|
+
Auth middleware that handles OAuth2 token retrieval and refresh.
|
|
200
|
+
|
|
201
|
+
This is generally used in conjunction with
|
|
202
|
+
:class:`OAuthClient <libs.sdk.src.destiny_sdk.client.OAuthClient>`.
|
|
203
|
+
|
|
204
|
+
Supports three authentication flows:
|
|
205
|
+
|
|
206
|
+
**Public Client Application (human login)**
|
|
207
|
+
|
|
208
|
+
Initial login will be interactive through a browser window. Subsequent token
|
|
209
|
+
retrievals will use cached tokens and refreshes where possible, and only prompt
|
|
210
|
+
for login again if necessary.
|
|
211
|
+
|
|
212
|
+
.. code-block:: python
|
|
213
|
+
|
|
214
|
+
auth = OAuthMiddleware(
|
|
215
|
+
azure_client_id="client-id",
|
|
216
|
+
azure_application_id="login-url",
|
|
217
|
+
azure_tenant_id="tenant-id",
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
**Confidential Client Application (client credentials)**
|
|
221
|
+
|
|
222
|
+
Suitable for service-to-service authentication where no user interaction is
|
|
223
|
+
possible or desired. Reach out if you need help setting up a confidential client
|
|
224
|
+
application. The secret must be stored securely.
|
|
225
|
+
|
|
226
|
+
.. code-block:: python
|
|
227
|
+
|
|
228
|
+
auth = OAuthMiddleware(
|
|
229
|
+
azure_client_id="client-id",
|
|
230
|
+
azure_application_id="application-id",
|
|
231
|
+
azure_login_url="login-url",
|
|
232
|
+
azure_client_secret="your-azure-client-secret",
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
**Azure Managed Identity**
|
|
236
|
+
|
|
237
|
+
Suitable for Azure environments that have had API permissions provisioned for
|
|
238
|
+
their managed identity. Note that the ``azure_client_id`` here is the client ID of
|
|
239
|
+
the managed identity, not the repository.
|
|
240
|
+
|
|
241
|
+
.. code-block:: python
|
|
242
|
+
|
|
243
|
+
auth = OAuthMiddleware(
|
|
244
|
+
azure_client_id="your-managed-identity-client-id",
|
|
245
|
+
azure_application_id="application-id",
|
|
246
|
+
use_managed_identity=True,
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
"""
|
|
250
|
+
|
|
251
|
+
def __init__(
|
|
252
|
+
self,
|
|
253
|
+
azure_client_id: str,
|
|
254
|
+
azure_application_id: str,
|
|
255
|
+
azure_login_url: HttpUrl | str | None = None,
|
|
256
|
+
azure_client_secret: str | None = None,
|
|
257
|
+
*,
|
|
258
|
+
use_managed_identity: bool = False,
|
|
259
|
+
) -> None:
|
|
260
|
+
"""
|
|
261
|
+
Initialize the auth middleware.
|
|
262
|
+
|
|
263
|
+
:param tenant_id: The OAuth2 tenant ID.
|
|
264
|
+
:type tenant_id: str
|
|
265
|
+
:param client_id: The OAuth2 client ID.
|
|
266
|
+
:type client_id: str
|
|
267
|
+
:param application_id: The application ID for the Destiny API.
|
|
268
|
+
:type application_id: str
|
|
269
|
+
:param azure_login_url: The Azure login URL.
|
|
270
|
+
:type azure_login_url: str
|
|
271
|
+
:param azure_client_secret: The Azure client secret.
|
|
272
|
+
:type azure_client_secret: str | None
|
|
273
|
+
:param use_managed_identity: Whether to use managed identity for authentication
|
|
274
|
+
:type use_managed_identity: bool
|
|
275
|
+
"""
|
|
276
|
+
if use_managed_identity:
|
|
277
|
+
if (
|
|
278
|
+
any(
|
|
279
|
+
[
|
|
280
|
+
azure_login_url,
|
|
281
|
+
azure_client_secret,
|
|
282
|
+
]
|
|
283
|
+
)
|
|
284
|
+
or not azure_client_id
|
|
285
|
+
):
|
|
286
|
+
msg = (
|
|
287
|
+
"azure_login_url and azure_client_secret must not be provided "
|
|
288
|
+
"when using managed identity authentication"
|
|
289
|
+
)
|
|
290
|
+
raise ValueError(msg)
|
|
291
|
+
self._oauth_app = ManagedIdentityClient(
|
|
292
|
+
UserAssignedManagedIdentity(client_id=azure_client_id),
|
|
293
|
+
http_client=httpx.Client(),
|
|
294
|
+
)
|
|
295
|
+
self._get_token = self._get_token_from_managed_identity
|
|
296
|
+
elif azure_client_secret:
|
|
297
|
+
if not azure_login_url:
|
|
298
|
+
msg = (
|
|
299
|
+
"azure_login_url must be provided "
|
|
300
|
+
"when not using managed identity authentication"
|
|
301
|
+
)
|
|
302
|
+
raise ValueError(msg)
|
|
303
|
+
self._oauth_app = ConfidentialClientApplication(
|
|
304
|
+
client_id=azure_client_id,
|
|
305
|
+
authority=str(azure_login_url),
|
|
306
|
+
client_credential=azure_client_secret,
|
|
307
|
+
)
|
|
308
|
+
self._get_token = self._get_token_from_confidential_client
|
|
309
|
+
else:
|
|
310
|
+
if not azure_login_url:
|
|
311
|
+
msg = (
|
|
312
|
+
"azure_login_url must be provided "
|
|
313
|
+
"when not using managed identity authentication"
|
|
314
|
+
)
|
|
315
|
+
raise ValueError(msg)
|
|
316
|
+
self._oauth_app = PublicClientApplication(
|
|
317
|
+
azure_client_id,
|
|
318
|
+
authority=str(azure_login_url),
|
|
319
|
+
client_credential=None,
|
|
320
|
+
)
|
|
321
|
+
self._get_token = self._get_token_from_public_client
|
|
322
|
+
|
|
323
|
+
self._scope = f"api://{azure_application_id}/.default"
|
|
324
|
+
self._account = None
|
|
325
|
+
|
|
326
|
+
def _parse_token(self, msal_response: dict) -> str:
|
|
327
|
+
"""
|
|
328
|
+
Parse the OAuth2 token from an MSAL response.
|
|
329
|
+
|
|
330
|
+
:param msal_response: The MSAL response containing the token.
|
|
331
|
+
:type msal_response: dict
|
|
332
|
+
:return: The OAuth2 token.
|
|
333
|
+
:rtype: str
|
|
334
|
+
"""
|
|
335
|
+
if not msal_response.get("access_token"):
|
|
336
|
+
msg = (
|
|
337
|
+
"Failed to acquire access token: "
|
|
338
|
+
f"{msal_response.get('error', 'Unknown error')}"
|
|
339
|
+
)
|
|
340
|
+
raise RuntimeError(msg)
|
|
341
|
+
|
|
342
|
+
return msal_response["access_token"]
|
|
343
|
+
|
|
344
|
+
def _get_token_from_public_client(self, *, force_refresh: bool = False) -> str:
|
|
345
|
+
"""
|
|
346
|
+
Get an OAuth2 token from a PublicClientApplication.
|
|
347
|
+
|
|
348
|
+
:param force_refresh: Whether to force a token refresh.
|
|
349
|
+
:type force_refresh: bool
|
|
350
|
+
:return: The OAuth2 token.
|
|
351
|
+
:rtype: str
|
|
352
|
+
"""
|
|
353
|
+
if not isinstance(self._oauth_app, PublicClientApplication):
|
|
354
|
+
msg = "oauth_app must be a PublicClientApplication for this method"
|
|
355
|
+
raise TypeError(msg)
|
|
356
|
+
|
|
357
|
+
# Uses msal cache if possible, else interactive login
|
|
358
|
+
result = self._oauth_app.acquire_token_silent(
|
|
359
|
+
scopes=[self._scope],
|
|
360
|
+
account=self._account,
|
|
361
|
+
force_refresh=force_refresh,
|
|
362
|
+
)
|
|
363
|
+
if not result:
|
|
364
|
+
result = self._oauth_app.acquire_token_interactive(scopes=[self._scope])
|
|
365
|
+
|
|
366
|
+
access_token = self._parse_token(result)
|
|
367
|
+
|
|
368
|
+
# After first login, cache the account for silent token acquisition
|
|
369
|
+
if not self._account and (accounts := self._oauth_app.get_accounts()):
|
|
370
|
+
self._account = accounts[0]
|
|
371
|
+
|
|
372
|
+
return access_token
|
|
373
|
+
|
|
374
|
+
def _get_token_from_confidential_client(
|
|
375
|
+
self,
|
|
376
|
+
*,
|
|
377
|
+
force_refresh: bool = False, # noqa: ARG002 MSAL will handle refreshing
|
|
378
|
+
) -> str:
|
|
379
|
+
"""
|
|
380
|
+
Get an OAuth2 token from a ConfidentialClientApplication.
|
|
381
|
+
|
|
382
|
+
:param force_refresh: Whether to force a token refresh.
|
|
383
|
+
:type force_refresh: bool
|
|
384
|
+
:return: The OAuth2 token.
|
|
385
|
+
:rtype: str
|
|
386
|
+
"""
|
|
387
|
+
if not isinstance(self._oauth_app, ConfidentialClientApplication):
|
|
388
|
+
msg = "oauth_app must be a ConfidentialClientApplication for this method"
|
|
389
|
+
raise TypeError(msg)
|
|
390
|
+
|
|
391
|
+
# Uses msal cache if possible, else client credentials flow
|
|
392
|
+
result = self._oauth_app.acquire_token_for_client(scopes=[self._scope])
|
|
393
|
+
|
|
394
|
+
return self._parse_token(result)
|
|
395
|
+
|
|
396
|
+
def _get_token_from_managed_identity(
|
|
397
|
+
self,
|
|
398
|
+
*,
|
|
399
|
+
force_refresh: bool = False, # noqa: ARG002 MSAL will handle refreshing
|
|
400
|
+
) -> str:
|
|
401
|
+
"""
|
|
402
|
+
Get an OAuth2 token from a ManagedIdentityClient.
|
|
403
|
+
|
|
404
|
+
:param force_refresh: Whether to force a token refresh.
|
|
405
|
+
:type force_refresh: bool
|
|
406
|
+
:return: The OAuth2 token.
|
|
407
|
+
:rtype: str
|
|
408
|
+
"""
|
|
409
|
+
if not isinstance(self._oauth_app, ManagedIdentityClient):
|
|
410
|
+
msg = "oauth_app must be a ManagedIdentityClient for this method"
|
|
411
|
+
raise TypeError(msg)
|
|
412
|
+
|
|
413
|
+
result = self._oauth_app.acquire_token_for_client(
|
|
414
|
+
resource=self._scope.removesuffix("/.default")
|
|
415
|
+
)
|
|
416
|
+
|
|
417
|
+
return self._parse_token(result)
|
|
418
|
+
|
|
419
|
+
def auth_flow(
|
|
420
|
+
self, request: httpx.Request
|
|
421
|
+
) -> Generator[httpx.Request, httpx.Response]:
|
|
422
|
+
"""
|
|
423
|
+
Add OAuth2 token to request and handle token refresh on expiration.
|
|
424
|
+
|
|
425
|
+
:param request: The request to authenticate.
|
|
426
|
+
:type request: httpx.Request
|
|
427
|
+
:yield: Authenticated request with token refresh handling.
|
|
428
|
+
:rtype: Generator[httpx.Request, httpx.Response]
|
|
429
|
+
"""
|
|
430
|
+
# Add initial token
|
|
431
|
+
token = self._get_token()
|
|
432
|
+
request.headers["Authorization"] = f"Bearer {token}"
|
|
433
|
+
|
|
434
|
+
response = yield request
|
|
435
|
+
|
|
436
|
+
# Check if token expired and retry once with fresh token
|
|
437
|
+
if response.status_code == httpx.codes.UNAUTHORIZED:
|
|
438
|
+
try:
|
|
439
|
+
json_response: dict = response.json()
|
|
440
|
+
error_detail: str = json_response.get("detail", {})
|
|
441
|
+
except ValueError:
|
|
442
|
+
error_detail = ""
|
|
443
|
+
|
|
444
|
+
if error_detail == "Token has expired.":
|
|
445
|
+
# Force refresh token and retry
|
|
446
|
+
token = self._get_token(force_refresh=True)
|
|
447
|
+
request.headers["Authorization"] = f"Bearer {token}"
|
|
448
|
+
yield request
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
class OAuthClient:
|
|
452
|
+
"""
|
|
453
|
+
Client for interaction with the Destiny API using OAuth2.
|
|
454
|
+
|
|
455
|
+
This will apply the provided authentication, usually
|
|
456
|
+
:class:`OAuthMiddleware <libs.sdk.src.destiny_sdk.client.OAuthMiddleware>`,
|
|
457
|
+
to all requests. Some API endpoints are supported directly through methods on this
|
|
458
|
+
class, while others can be accessed through the underlying ``httpx`` client.
|
|
459
|
+
|
|
460
|
+
Example usage:
|
|
461
|
+
|
|
462
|
+
.. code-block:: python
|
|
463
|
+
|
|
464
|
+
from destiny_sdk.client import OAuthClient, OAuthMiddleware
|
|
465
|
+
|
|
466
|
+
client = OAuthClient(
|
|
467
|
+
base_url="https://destiny-repository.example.com",
|
|
468
|
+
auth=OAuthMiddleware(...),
|
|
469
|
+
)
|
|
470
|
+
|
|
471
|
+
# Supported method
|
|
472
|
+
response = client.search(query="example")
|
|
473
|
+
|
|
474
|
+
# Unsupported method, use underlying httpx client
|
|
475
|
+
response = client.get_client().get("/system/healthcheck/")
|
|
476
|
+
"""
|
|
477
|
+
|
|
478
|
+
def __init__(
|
|
479
|
+
self,
|
|
480
|
+
base_url: HttpUrl | str,
|
|
481
|
+
auth: httpx.Auth | None = None,
|
|
482
|
+
) -> None:
|
|
483
|
+
"""
|
|
484
|
+
Initialize the client.
|
|
485
|
+
|
|
486
|
+
:param base_url: The base URL for the Destiny Repository API.
|
|
487
|
+
:type base_url: HttpUrl
|
|
488
|
+
:param auth: The middleware for authentication. If not provided, only
|
|
489
|
+
unauthenticated requests can be made. This should almost always be an
|
|
490
|
+
instance of ``OAuthMiddleware``, unless you need to create a custom auth
|
|
491
|
+
class.
|
|
492
|
+
:type auth: httpx.Auth | None
|
|
493
|
+
"""
|
|
494
|
+
self._client = httpx.Client(
|
|
495
|
+
base_url=str(base_url).removesuffix("/").removesuffix("/v1") + "/v1",
|
|
496
|
+
headers={
|
|
497
|
+
"Content-Type": "application/json",
|
|
498
|
+
"User-Agent": user_agent,
|
|
499
|
+
},
|
|
500
|
+
)
|
|
501
|
+
|
|
502
|
+
if auth:
|
|
503
|
+
self._client.auth = auth
|
|
504
|
+
|
|
505
|
+
def _raise_for_status(self, response: httpx.Response) -> None:
|
|
506
|
+
"""
|
|
507
|
+
Raise an error if the response status is not successful.
|
|
508
|
+
|
|
509
|
+
:param response: The HTTP response to check.
|
|
510
|
+
:type response: httpx.Response
|
|
511
|
+
:raises httpx.HTTPStatusError: If the response status is not successful.
|
|
512
|
+
"""
|
|
513
|
+
try:
|
|
514
|
+
response.raise_for_status()
|
|
515
|
+
except httpx.HTTPStatusError as exc:
|
|
516
|
+
msg = (
|
|
517
|
+
f"Error response {exc.response.status_code} from "
|
|
518
|
+
f"{exc.request.url}: {exc.response.text}"
|
|
519
|
+
)
|
|
520
|
+
raise httpx.HTTPStatusError(
|
|
521
|
+
msg, request=exc.request, response=exc.response
|
|
522
|
+
) from exc
|
|
523
|
+
|
|
524
|
+
def search( # noqa: PLR0913
|
|
525
|
+
self,
|
|
526
|
+
query: str,
|
|
527
|
+
start_year: int | None = None,
|
|
528
|
+
end_year: int | None = None,
|
|
529
|
+
annotations: list[str | AnnotationFilter] | None = None,
|
|
530
|
+
sort: str | None = None,
|
|
531
|
+
page: int = 1,
|
|
532
|
+
) -> ReferenceSearchResult:
|
|
533
|
+
"""
|
|
534
|
+
Send a search request to the Destiny Repository API.
|
|
535
|
+
|
|
536
|
+
See also: :ref:`search-procedure`.
|
|
537
|
+
|
|
538
|
+
:param query: The search query string.
|
|
539
|
+
:type query: str
|
|
540
|
+
:param start_year: The start year for filtering results.
|
|
541
|
+
:type start_year: int | None
|
|
542
|
+
:param end_year: The end year for filtering results.
|
|
543
|
+
:type end_year: int | None
|
|
544
|
+
:param annotations: A list of annotation filters to apply.
|
|
545
|
+
:type annotations: list[str | libs.sdk.src.destiny_sdk.search.AnnotationFilter] | None
|
|
546
|
+
:param sort: The sort order for the results.
|
|
547
|
+
:type sort: str | None
|
|
548
|
+
:param page: The page number of results to retrieve.
|
|
549
|
+
:type page: int
|
|
550
|
+
:return: The response from the API.
|
|
551
|
+
:rtype: libs.sdk.src.destiny_sdk.references.ReferenceSearchResult
|
|
552
|
+
""" # noqa: E501
|
|
553
|
+
params = {"q": query, "page": page}
|
|
554
|
+
if start_year:
|
|
555
|
+
params["start_year"] = start_year
|
|
556
|
+
if end_year:
|
|
557
|
+
params["end_year"] = end_year
|
|
558
|
+
if annotations:
|
|
559
|
+
params["annotation"] = [str(annotation) for annotation in annotations]
|
|
560
|
+
if sort:
|
|
561
|
+
params["sort"] = sort
|
|
562
|
+
response = self._client.get(
|
|
563
|
+
"/references/search/",
|
|
564
|
+
params=params,
|
|
565
|
+
)
|
|
566
|
+
self._raise_for_status(response)
|
|
567
|
+
return ReferenceSearchResult.model_validate(response.json())
|
|
568
|
+
|
|
569
|
+
def lookup(
|
|
570
|
+
self,
|
|
571
|
+
identifiers: list[str | IdentifierLookup],
|
|
572
|
+
) -> list[Reference]:
|
|
573
|
+
"""
|
|
574
|
+
Lookup references by identifiers.
|
|
575
|
+
|
|
576
|
+
See also: :ref:`lookup-procedure`.
|
|
577
|
+
|
|
578
|
+
:param identifiers: The identifiers to look up.
|
|
579
|
+
:type identifiers: list[str | libs.sdk.src.destiny_sdk.identifiers.IdentifierLookup]
|
|
580
|
+
:return: The list of references matching the identifiers.
|
|
581
|
+
:rtype: list[libs.sdk.src.destiny_sdk.references.Reference]
|
|
582
|
+
""" # noqa: E501
|
|
583
|
+
response = self._client.get(
|
|
584
|
+
"/references/",
|
|
585
|
+
params={
|
|
586
|
+
"identifier": ",".join([str(identifier) for identifier in identifiers])
|
|
587
|
+
},
|
|
588
|
+
)
|
|
589
|
+
self._raise_for_status(response)
|
|
590
|
+
return TypeAdapter(list[Reference]).validate_python(response.json())
|
|
591
|
+
|
|
592
|
+
def get_client(self) -> httpx.Client:
|
|
593
|
+
"""
|
|
594
|
+
Get the underlying ``httpx`` client.
|
|
595
|
+
|
|
596
|
+
This can be used to make custom requests not covered by the SDK methods.
|
|
597
|
+
|
|
598
|
+
:return: The underlying ``httpx`` client with authentication attached.
|
|
599
|
+
:rtype: `httpx.Client <https://www.python-httpx.org/advanced/clients/>`_
|
|
600
|
+
"""
|
|
601
|
+
return self._client
|
destiny_sdk/core.py
CHANGED
|
@@ -1,11 +1,18 @@
|
|
|
1
1
|
"""Core classes for the Destiny SDK, not exposed to package users."""
|
|
2
2
|
|
|
3
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
3
4
|
from typing import Self
|
|
4
5
|
|
|
5
6
|
from pydantic import BaseModel, Field
|
|
6
7
|
|
|
7
8
|
from destiny_sdk.search import SearchResultPage, SearchResultTotal
|
|
8
9
|
|
|
10
|
+
try:
|
|
11
|
+
sdk_version = version("destiny-sdk")
|
|
12
|
+
except PackageNotFoundError:
|
|
13
|
+
sdk_version = "unknown"
|
|
14
|
+
|
|
15
|
+
|
|
9
16
|
# These are non-standard newline characters that are not escaped by model_dump_json().
|
|
10
17
|
# We want jsonl files to have empirical new lines so they can be streamed line by line.
|
|
11
18
|
# Hence we replace each occurrence with standard new lines.
|
destiny_sdk/enhancements.py
CHANGED
|
@@ -2,9 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
import datetime
|
|
4
4
|
from enum import StrEnum, auto
|
|
5
|
-
from typing import Annotated, Literal
|
|
5
|
+
from typing import Annotated, Any, Literal, Self
|
|
6
6
|
|
|
7
|
-
from pydantic import UUID4, BaseModel, Field, HttpUrl
|
|
7
|
+
from pydantic import UUID4, BaseModel, Field, HttpUrl, model_validator
|
|
8
8
|
|
|
9
9
|
from destiny_sdk.core import _JsonlFileInputMixIn
|
|
10
10
|
from destiny_sdk.visibility import Visibility
|
|
@@ -25,6 +25,8 @@ class EnhancementType(StrEnum):
|
|
|
25
25
|
"""A free-form enhancement for tagging with labels."""
|
|
26
26
|
LOCATION = auto()
|
|
27
27
|
"""Locations where the reference can be found."""
|
|
28
|
+
RAW = auto()
|
|
29
|
+
"""A free form enhancement for arbitrary/unstructured data."""
|
|
28
30
|
FULL_TEXT = auto()
|
|
29
31
|
"""The full text of the reference. (To be implemented)"""
|
|
30
32
|
|
|
@@ -145,22 +147,33 @@ class AnnotationType(StrEnum):
|
|
|
145
147
|
"""
|
|
146
148
|
|
|
147
149
|
|
|
148
|
-
class
|
|
149
|
-
"""
|
|
150
|
-
An annotation which represents the score for a label.
|
|
150
|
+
class BaseAnnotation(BaseModel):
|
|
151
|
+
"""Base class for annotations, defining the minimal required fields."""
|
|
151
152
|
|
|
152
|
-
This is similar to a BooleanAnnotation, but lacks a boolean determination
|
|
153
|
-
as to the application of the label.
|
|
154
|
-
"""
|
|
155
|
-
|
|
156
|
-
annotation_type: Literal[AnnotationType.SCORE] = AnnotationType.SCORE
|
|
157
153
|
scheme: str = Field(
|
|
158
154
|
description="An identifier for the scheme of annotation",
|
|
159
155
|
examples=["openalex:topic", "pubmed:mesh"],
|
|
156
|
+
pattern=r"^[^/]+$", # No slashes allowed
|
|
160
157
|
)
|
|
161
158
|
label: str = Field(
|
|
162
159
|
description="A high level label for this annotation like the name of the topic",
|
|
163
160
|
)
|
|
161
|
+
|
|
162
|
+
@property
|
|
163
|
+
def qualified_label(self) -> str:
|
|
164
|
+
"""The qualified label for this annotation."""
|
|
165
|
+
return f"{self.scheme}/{self.label}"
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
class ScoreAnnotation(BaseAnnotation):
|
|
169
|
+
"""
|
|
170
|
+
An annotation which represents the score for a label.
|
|
171
|
+
|
|
172
|
+
This is similar to a BooleanAnnotation, but lacks a boolean determination
|
|
173
|
+
as to the application of the label.
|
|
174
|
+
"""
|
|
175
|
+
|
|
176
|
+
annotation_type: Literal[AnnotationType.SCORE] = AnnotationType.SCORE
|
|
164
177
|
score: float = Field(description="""Score for this annotation""")
|
|
165
178
|
data: dict = Field(
|
|
166
179
|
default_factory=dict,
|
|
@@ -171,7 +184,7 @@ class ScoreAnnotation(BaseModel):
|
|
|
171
184
|
)
|
|
172
185
|
|
|
173
186
|
|
|
174
|
-
class BooleanAnnotation(
|
|
187
|
+
class BooleanAnnotation(BaseAnnotation):
|
|
175
188
|
"""
|
|
176
189
|
An annotation is a way of tagging the content with a label of some kind.
|
|
177
190
|
|
|
@@ -180,13 +193,6 @@ class BooleanAnnotation(BaseModel):
|
|
|
180
193
|
"""
|
|
181
194
|
|
|
182
195
|
annotation_type: Literal[AnnotationType.BOOLEAN] = AnnotationType.BOOLEAN
|
|
183
|
-
scheme: str = Field(
|
|
184
|
-
description="An identifier for the scheme of the annotation",
|
|
185
|
-
examples=["openalex:topic", "pubmed:mesh"],
|
|
186
|
-
)
|
|
187
|
-
label: str = Field(
|
|
188
|
-
description="A high level label for this annotation like the name of the topic",
|
|
189
|
-
)
|
|
190
196
|
value: bool = Field(description="""Boolean flag for this annotation""")
|
|
191
197
|
score: float | None = Field(
|
|
192
198
|
None, description="A confidence score for this annotation"
|
|
@@ -295,12 +301,45 @@ class LocationEnhancement(BaseModel):
|
|
|
295
301
|
)
|
|
296
302
|
|
|
297
303
|
|
|
304
|
+
class RawEnhancement(BaseModel):
|
|
305
|
+
"""
|
|
306
|
+
An enhancement for storing raw/arbitrary/unstructured data.
|
|
307
|
+
|
|
308
|
+
Data in these enhancements is intended for future conversion into structured form.
|
|
309
|
+
|
|
310
|
+
This enhancement accepts any fields passed in to `data`. These enhancements cannot
|
|
311
|
+
be created by robots.
|
|
312
|
+
"""
|
|
313
|
+
|
|
314
|
+
enhancement_type: Literal[EnhancementType.RAW] = EnhancementType.RAW
|
|
315
|
+
source_export_date: datetime.datetime = Field(
|
|
316
|
+
description="Date the enhancement data was retrieved."
|
|
317
|
+
)
|
|
318
|
+
description: str = Field(
|
|
319
|
+
description="Description of the data to aid in future refinement."
|
|
320
|
+
)
|
|
321
|
+
metadata: dict[str, Any] = Field(
|
|
322
|
+
default_factory=dict,
|
|
323
|
+
description="Additional metadata to aid in future structuring of raw data",
|
|
324
|
+
)
|
|
325
|
+
data: Any = Field(description="Unstructured data for later processing.")
|
|
326
|
+
|
|
327
|
+
@model_validator(mode="after")
|
|
328
|
+
def forbid_no_data(self) -> Self:
|
|
329
|
+
"""Prevent a raw enhancement from being created with no data."""
|
|
330
|
+
if not self.data:
|
|
331
|
+
msg = "data must be populated on a raw enhancement."
|
|
332
|
+
raise ValueError(msg)
|
|
333
|
+
return self
|
|
334
|
+
|
|
335
|
+
|
|
298
336
|
#: Union type for all enhancement content types.
|
|
299
337
|
EnhancementContent = Annotated[
|
|
300
338
|
BibliographicMetadataEnhancement
|
|
301
339
|
| AbstractContentEnhancement
|
|
302
340
|
| AnnotationEnhancement
|
|
303
|
-
| LocationEnhancement
|
|
341
|
+
| LocationEnhancement
|
|
342
|
+
| RawEnhancement,
|
|
304
343
|
Field(discriminator="enhancement_type"),
|
|
305
344
|
]
|
|
306
345
|
|
destiny_sdk/identifiers.py
CHANGED
|
@@ -17,8 +17,14 @@ class ExternalIdentifierType(StrEnum):
|
|
|
17
17
|
|
|
18
18
|
DOI = auto()
|
|
19
19
|
"""A DOI (Digital Object Identifier) which is a unique identifier for a document."""
|
|
20
|
+
ERIC = auto()
|
|
21
|
+
"""An ERIC (Education Resources Information Identifier) ID which is a unique
|
|
22
|
+
identifier for a document in ERIC.
|
|
23
|
+
"""
|
|
20
24
|
PM_ID = auto()
|
|
21
25
|
"""A PubMed ID which is a unique identifier for a document in PubMed."""
|
|
26
|
+
PRO_QUEST = auto()
|
|
27
|
+
"""A ProQuest ID which is a unqiue identifier for a document in ProQuest."""
|
|
22
28
|
OPEN_ALEX = auto()
|
|
23
29
|
"""An OpenAlex ID which is a unique identifier for a document in OpenAlex."""
|
|
24
30
|
OTHER = auto()
|
|
@@ -41,8 +47,64 @@ class DOIIdentifier(BaseModel):
|
|
|
41
47
|
def remove_doi_url(cls, value: str) -> str:
|
|
42
48
|
"""Remove the URL part of the DOI if it exists."""
|
|
43
49
|
return (
|
|
44
|
-
value.removeprefix("http://
|
|
45
|
-
.removeprefix("https://
|
|
50
|
+
value.removeprefix("http://")
|
|
51
|
+
.removeprefix("https://")
|
|
52
|
+
.removeprefix("doi.org/")
|
|
53
|
+
.removeprefix("dx.doi.org/")
|
|
54
|
+
.removeprefix("doi:")
|
|
55
|
+
.strip()
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class ProQuestIdentifier(BaseModel):
|
|
60
|
+
"""An external identifier representing a ProQuest ID."""
|
|
61
|
+
|
|
62
|
+
identifier: str = Field(
|
|
63
|
+
description="The ProQuest id of the reference", pattern=r"[0-9]+$"
|
|
64
|
+
)
|
|
65
|
+
identifier_type: Literal[ExternalIdentifierType.PRO_QUEST] = Field(
|
|
66
|
+
ExternalIdentifierType.PRO_QUEST, description="The type of identifier used."
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
@field_validator("identifier", mode="before")
|
|
70
|
+
@classmethod
|
|
71
|
+
def remove_proquest_url(cls, value: str) -> str:
|
|
72
|
+
"""Remove the URL part of the ProQuest id if it exists."""
|
|
73
|
+
return (
|
|
74
|
+
value.removeprefix("http://")
|
|
75
|
+
.removeprefix("https://")
|
|
76
|
+
.removeprefix("search.proquest.com/")
|
|
77
|
+
.removeprefix("www.proquest.com/")
|
|
78
|
+
.removeprefix("docview/")
|
|
79
|
+
.strip()
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class ERICIdentifier(BaseModel):
|
|
84
|
+
"""
|
|
85
|
+
An external identifier representing an ERIC Number.
|
|
86
|
+
|
|
87
|
+
An ERIC Number is defined as a unqiue identifiying number preceeded by
|
|
88
|
+
EJ (for a journal article) or ED (for a non-journal document).
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
identifier: str = Field(
|
|
92
|
+
description="The ERIC Number of the reference.", pattern=r"E[D|J][0-9]+$"
|
|
93
|
+
)
|
|
94
|
+
identifier_type: Literal[ExternalIdentifierType.ERIC] = Field(
|
|
95
|
+
ExternalIdentifierType.ERIC, description="The type of identifier used."
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
@field_validator("identifier", mode="before")
|
|
99
|
+
@classmethod
|
|
100
|
+
def remove_eric_url(cls, value: str) -> str:
|
|
101
|
+
"""Remove the URL part of the ERIC ID if it exists."""
|
|
102
|
+
return (
|
|
103
|
+
value.removeprefix("http://")
|
|
104
|
+
.removeprefix("https://")
|
|
105
|
+
.removeprefix("eric.ed.gov/?id=")
|
|
106
|
+
.removeprefix("files.eric.ed.gov/fulltext/")
|
|
107
|
+
.removesuffix(".pdf")
|
|
46
108
|
.strip()
|
|
47
109
|
)
|
|
48
110
|
|
|
@@ -71,8 +133,11 @@ class OpenAlexIdentifier(BaseModel):
|
|
|
71
133
|
def remove_open_alex_url(cls, value: str) -> str:
|
|
72
134
|
"""Remove the OpenAlex URL if it exists."""
|
|
73
135
|
return (
|
|
74
|
-
value.removeprefix("http://
|
|
75
|
-
.removeprefix("https://
|
|
136
|
+
value.removeprefix("http://")
|
|
137
|
+
.removeprefix("https://")
|
|
138
|
+
.removeprefix("openalex.org/")
|
|
139
|
+
.removeprefix("explore.openalex.org/")
|
|
140
|
+
.removeprefix("works/")
|
|
76
141
|
.strip()
|
|
77
142
|
)
|
|
78
143
|
|
|
@@ -91,7 +156,12 @@ class OtherIdentifier(BaseModel):
|
|
|
91
156
|
|
|
92
157
|
#: Union type for all external identifiers.
|
|
93
158
|
ExternalIdentifier = Annotated[
|
|
94
|
-
DOIIdentifier
|
|
159
|
+
DOIIdentifier
|
|
160
|
+
| ERICIdentifier
|
|
161
|
+
| PubMedIdentifier
|
|
162
|
+
| ProQuestIdentifier
|
|
163
|
+
| OpenAlexIdentifier
|
|
164
|
+
| OtherIdentifier,
|
|
95
165
|
Field(discriminator="identifier_type"),
|
|
96
166
|
]
|
|
97
167
|
|
|
@@ -190,3 +260,11 @@ class IdentifierLookup(BaseModel):
|
|
|
190
260
|
if self.identifier_type is None:
|
|
191
261
|
return UUID4(self.identifier)
|
|
192
262
|
return ExternalIdentifierAdapter.validate_python(self.model_dump())
|
|
263
|
+
|
|
264
|
+
def __repr__(self) -> str:
|
|
265
|
+
"""Serialize the identifier lookup to a string."""
|
|
266
|
+
return self.serialize()
|
|
267
|
+
|
|
268
|
+
def __str__(self) -> str:
|
|
269
|
+
"""Serialize the identifier lookup to a string."""
|
|
270
|
+
return self.serialize()
|
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
"""Parser for a EPPI JSON export file."""
|
|
2
2
|
|
|
3
|
+
from datetime import datetime
|
|
3
4
|
from typing import Any
|
|
4
5
|
|
|
6
|
+
from pydantic import ValidationError
|
|
7
|
+
|
|
5
8
|
from destiny_sdk.enhancements import (
|
|
6
9
|
AbstractContentEnhancement,
|
|
7
10
|
AbstractProcessType,
|
|
@@ -13,12 +16,16 @@ from destiny_sdk.enhancements import (
|
|
|
13
16
|
BooleanAnnotation,
|
|
14
17
|
EnhancementContent,
|
|
15
18
|
EnhancementFileInput,
|
|
19
|
+
RawEnhancement,
|
|
16
20
|
)
|
|
17
21
|
from destiny_sdk.identifiers import (
|
|
18
22
|
DOIIdentifier,
|
|
23
|
+
ERICIdentifier,
|
|
19
24
|
ExternalIdentifier,
|
|
20
|
-
|
|
25
|
+
OpenAlexIdentifier,
|
|
26
|
+
ProQuestIdentifier,
|
|
21
27
|
)
|
|
28
|
+
from destiny_sdk.parsers.exceptions import ExternalIdentifierNotFoundError
|
|
22
29
|
from destiny_sdk.references import ReferenceFileInput
|
|
23
30
|
from destiny_sdk.visibility import Visibility
|
|
24
31
|
|
|
@@ -30,9 +37,17 @@ class EPPIParser:
|
|
|
30
37
|
See example here: https://eppi.ioe.ac.uk/cms/Portals/35/Maps/Examples/example_orignal.json
|
|
31
38
|
"""
|
|
32
39
|
|
|
33
|
-
version = "
|
|
40
|
+
version = "2.0"
|
|
34
41
|
|
|
35
|
-
def __init__(
|
|
42
|
+
def __init__(
|
|
43
|
+
self,
|
|
44
|
+
*,
|
|
45
|
+
tags: list[str] | None = None,
|
|
46
|
+
include_raw_data: bool = False,
|
|
47
|
+
source_export_date: datetime | None = None,
|
|
48
|
+
data_description: str | None = None,
|
|
49
|
+
raw_enhancement_excludes: list[str] | None = None,
|
|
50
|
+
) -> None:
|
|
36
51
|
"""
|
|
37
52
|
Initialize the EPPIParser with optional tags.
|
|
38
53
|
|
|
@@ -42,20 +57,75 @@ class EPPIParser:
|
|
|
42
57
|
"""
|
|
43
58
|
self.tags = tags or []
|
|
44
59
|
self.parser_source = f"destiny_sdk.eppi_parser@{self.version}"
|
|
60
|
+
self.include_raw_data = include_raw_data
|
|
61
|
+
self.source_export_date = source_export_date
|
|
62
|
+
self.data_description = data_description
|
|
63
|
+
self.raw_enhancement_excludes = (
|
|
64
|
+
raw_enhancement_excludes if raw_enhancement_excludes else []
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
if self.include_raw_data and not all(
|
|
68
|
+
(
|
|
69
|
+
self.source_export_date,
|
|
70
|
+
self.data_description,
|
|
71
|
+
)
|
|
72
|
+
):
|
|
73
|
+
msg = (
|
|
74
|
+
"Cannot include raw data enhancements without "
|
|
75
|
+
"source_export_date, data_description, and raw_enhancement_metadata"
|
|
76
|
+
)
|
|
77
|
+
raise RuntimeError(msg)
|
|
45
78
|
|
|
46
79
|
def _parse_identifiers(
|
|
47
80
|
self, ref_to_import: dict[str, Any]
|
|
48
81
|
) -> list[ExternalIdentifier]:
|
|
49
82
|
identifiers = []
|
|
50
83
|
if doi := ref_to_import.get("DOI"):
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
84
|
+
doi_identifier = self._parse_doi(doi=doi)
|
|
85
|
+
if doi_identifier:
|
|
86
|
+
identifiers.append(doi_identifier)
|
|
87
|
+
|
|
88
|
+
if url := ref_to_import.get("URL"):
|
|
89
|
+
identifier = self._parse_url_to_identifier(url=url)
|
|
90
|
+
if identifier:
|
|
91
|
+
identifiers.append(identifier)
|
|
92
|
+
|
|
93
|
+
if not identifiers:
|
|
94
|
+
msg = (
|
|
95
|
+
"No known external identifiers found for Reference data "
|
|
96
|
+
f"with DOI: '{doi if doi else None}' "
|
|
97
|
+
f"and URL: '{url if url else None}'."
|
|
56
98
|
)
|
|
99
|
+
raise ExternalIdentifierNotFoundError(detail=msg)
|
|
100
|
+
|
|
57
101
|
return identifiers
|
|
58
102
|
|
|
103
|
+
def _parse_doi(self, doi: str) -> DOIIdentifier | None:
|
|
104
|
+
"""Attempt to parse a DOI from a string."""
|
|
105
|
+
try:
|
|
106
|
+
doi = doi.strip()
|
|
107
|
+
return DOIIdentifier(identifier=doi)
|
|
108
|
+
except ValidationError:
|
|
109
|
+
return None
|
|
110
|
+
|
|
111
|
+
def _parse_url_to_identifier(self, url: str) -> ExternalIdentifier | None:
|
|
112
|
+
"""Attempt to parse an external identifier from a url string."""
|
|
113
|
+
url = url.strip()
|
|
114
|
+
identifier_cls = None
|
|
115
|
+
if "eric" in url:
|
|
116
|
+
identifier_cls = ERICIdentifier
|
|
117
|
+
elif "proquest" in url:
|
|
118
|
+
identifier_cls = ProQuestIdentifier
|
|
119
|
+
elif "openalex" in url:
|
|
120
|
+
identifier_cls = OpenAlexIdentifier
|
|
121
|
+
else:
|
|
122
|
+
return None
|
|
123
|
+
|
|
124
|
+
try:
|
|
125
|
+
return identifier_cls(identifier=url)
|
|
126
|
+
except ValidationError:
|
|
127
|
+
return None
|
|
128
|
+
|
|
59
129
|
def _parse_abstract_enhancement(
|
|
60
130
|
self, ref_to_import: dict[str, Any]
|
|
61
131
|
) -> EnhancementContent | None:
|
|
@@ -107,6 +177,23 @@ class EPPIParser:
|
|
|
107
177
|
authorship=authorships if authorships else None,
|
|
108
178
|
)
|
|
109
179
|
|
|
180
|
+
def _parse_raw_enhancement(
|
|
181
|
+
self, ref_to_import: dict[str, Any], raw_enhancement_metadata: dict[str, Any]
|
|
182
|
+
) -> EnhancementContent | None:
|
|
183
|
+
"""Add Reference data as a raw enhancement."""
|
|
184
|
+
raw_enhancement_data = ref_to_import.copy()
|
|
185
|
+
|
|
186
|
+
# Remove any keys that should be excluded
|
|
187
|
+
for exclude in self.raw_enhancement_excludes:
|
|
188
|
+
raw_enhancement_data.pop(exclude, None)
|
|
189
|
+
|
|
190
|
+
return RawEnhancement(
|
|
191
|
+
source_export_date=self.source_export_date,
|
|
192
|
+
description=self.data_description,
|
|
193
|
+
metadata=raw_enhancement_metadata,
|
|
194
|
+
data=raw_enhancement_data,
|
|
195
|
+
)
|
|
196
|
+
|
|
110
197
|
def _create_annotation_enhancement(self) -> EnhancementContent | None:
|
|
111
198
|
if not self.tags:
|
|
112
199
|
return None
|
|
@@ -124,8 +211,11 @@ class EPPIParser:
|
|
|
124
211
|
)
|
|
125
212
|
|
|
126
213
|
def parse_data(
|
|
127
|
-
self,
|
|
128
|
-
|
|
214
|
+
self,
|
|
215
|
+
data: dict,
|
|
216
|
+
source: str | None = None,
|
|
217
|
+
robot_version: str | None = None,
|
|
218
|
+
) -> tuple[list[ReferenceFileInput], list[dict]]:
|
|
129
219
|
"""
|
|
130
220
|
Parse an EPPI JSON export dict and return a list of ReferenceFileInput objects.
|
|
131
221
|
|
|
@@ -140,33 +230,55 @@ class EPPIParser:
|
|
|
140
230
|
|
|
141
231
|
"""
|
|
142
232
|
parser_source = source if source is not None else self.parser_source
|
|
233
|
+
|
|
234
|
+
if self.include_raw_data:
|
|
235
|
+
codesets = [codeset.get("SetId") for codeset in data.get("CodeSets", [])]
|
|
236
|
+
raw_enhancement_metadata = {"codeset_ids": codesets}
|
|
237
|
+
|
|
143
238
|
references = []
|
|
239
|
+
failed_refs = []
|
|
144
240
|
for ref_to_import in data.get("References", []):
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
241
|
+
try:
|
|
242
|
+
enhancement_contents = [
|
|
243
|
+
content
|
|
244
|
+
for content in [
|
|
245
|
+
self._parse_abstract_enhancement(ref_to_import),
|
|
246
|
+
self._parse_bibliographic_enhancement(ref_to_import),
|
|
247
|
+
self._create_annotation_enhancement(),
|
|
248
|
+
]
|
|
249
|
+
if content
|
|
151
250
|
]
|
|
152
|
-
if content
|
|
153
|
-
]
|
|
154
251
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
robot_version=robot_version,
|
|
161
|
-
)
|
|
162
|
-
for content in enhancement_contents
|
|
163
|
-
]
|
|
252
|
+
if self.include_raw_data:
|
|
253
|
+
raw_enhancement = self._parse_raw_enhancement(
|
|
254
|
+
ref_to_import=ref_to_import,
|
|
255
|
+
raw_enhancement_metadata=raw_enhancement_metadata,
|
|
256
|
+
)
|
|
164
257
|
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
258
|
+
if raw_enhancement:
|
|
259
|
+
enhancement_contents.append(raw_enhancement)
|
|
260
|
+
|
|
261
|
+
enhancements = [
|
|
262
|
+
EnhancementFileInput(
|
|
263
|
+
source=parser_source,
|
|
264
|
+
visibility=Visibility.PUBLIC,
|
|
265
|
+
content=content,
|
|
266
|
+
robot_version=robot_version,
|
|
267
|
+
)
|
|
268
|
+
for content in enhancement_contents
|
|
269
|
+
]
|
|
270
|
+
|
|
271
|
+
references.append(
|
|
272
|
+
ReferenceFileInput(
|
|
273
|
+
visibility=Visibility.PUBLIC,
|
|
274
|
+
identifiers=self._parse_identifiers(
|
|
275
|
+
ref_to_import=ref_to_import
|
|
276
|
+
),
|
|
277
|
+
enhancements=enhancements,
|
|
278
|
+
)
|
|
170
279
|
)
|
|
171
|
-
|
|
172
|
-
|
|
280
|
+
|
|
281
|
+
except ExternalIdentifierNotFoundError:
|
|
282
|
+
failed_refs.append(ref_to_import)
|
|
283
|
+
|
|
284
|
+
return references, failed_refs
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""Custom exceptions for destiny sdk parsers."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class ExternalIdentifierNotFoundError(Exception):
|
|
5
|
+
"""Raised when an reference has no identifiable external identifiers."""
|
|
6
|
+
|
|
7
|
+
def __init__(self, detail: str | None = None, *args: object) -> None:
|
|
8
|
+
"""
|
|
9
|
+
Initialize the ExternalIdentifiersNotFoundError.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
*args: Additional arguments for the exception.
|
|
13
|
+
**kwargs: Additional keyword arguments for the exception.
|
|
14
|
+
|
|
15
|
+
"""
|
|
16
|
+
self.detail = detail or "No detail provided."
|
|
17
|
+
super().__init__(detail, *args)
|
destiny_sdk/search.py
CHANGED
|
@@ -30,6 +30,7 @@ class AnnotationFilter(BaseModel):
|
|
|
30
30
|
|
|
31
31
|
scheme: str = Field(
|
|
32
32
|
description="The annotation scheme to filter by.",
|
|
33
|
+
pattern=r"^[^/]+$",
|
|
33
34
|
)
|
|
34
35
|
label: str | None = Field(
|
|
35
36
|
None,
|
|
@@ -42,7 +43,7 @@ class AnnotationFilter(BaseModel):
|
|
|
42
43
|
le=1.0,
|
|
43
44
|
)
|
|
44
45
|
|
|
45
|
-
def
|
|
46
|
+
def __repr__(self) -> str:
|
|
46
47
|
"""Serialize the annotation filter to a string."""
|
|
47
48
|
annotation = self.scheme
|
|
48
49
|
if self.label:
|
|
@@ -50,3 +51,7 @@ class AnnotationFilter(BaseModel):
|
|
|
50
51
|
if self.score is not None:
|
|
51
52
|
annotation += f"@{self.score}"
|
|
52
53
|
return annotation
|
|
54
|
+
|
|
55
|
+
def __str__(self) -> str:
|
|
56
|
+
"""Serialize the annotation filter to a string."""
|
|
57
|
+
return repr(self)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: destiny_sdk
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.7.2
|
|
4
4
|
Summary: A software development kit (sdk) to support interaction with the DESTINY repository
|
|
5
5
|
Author-email: Adam Hamilton <adam@futureevidence.org>, Andrew Harvey <andrew@futureevidence.org>, Daniel Breves <daniel@futureevidence.org>, Jack Walmisley <jack@futureevidence.org>, Tim Repke <tim.repke@pik-potsdam.de>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -9,6 +9,7 @@ Requires-Python: ~=3.12
|
|
|
9
9
|
Requires-Dist: cachetools<6,>=5.5.2
|
|
10
10
|
Requires-Dist: fastapi<0.116,>=0.115.12
|
|
11
11
|
Requires-Dist: httpx<0.29,>=0.28.1
|
|
12
|
+
Requires-Dist: msal>=1.34.0
|
|
12
13
|
Requires-Dist: pydantic<3,>=2.11.3
|
|
13
14
|
Requires-Dist: pytest-asyncio<2,>=1.0.0
|
|
14
15
|
Requires-Dist: pytest-httpx<0.36,>=0.35.0
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
destiny_sdk/__init__.py,sha256=NdSlsPQyDF3TW30_JzbvYMRBRA9iT677iTRWWCMdYOA,382
|
|
2
|
+
destiny_sdk/auth.py,sha256=bY72ywZEcG_67YBd9PrwgWTXkCf58rhLvVEXrtXbWtA,6247
|
|
3
|
+
destiny_sdk/client.py,sha256=nKvS5rRkIpBqv8dVIB57Xsop0UvVz3i875RQxfVSMao,21306
|
|
4
|
+
destiny_sdk/core.py,sha256=E0Wotu9psggK1JRJxbvx3Jc7WEGE6zaz2R2awvRrLz8,2023
|
|
5
|
+
destiny_sdk/enhancements.py,sha256=-4jLm3R0T5UpgCt09CgUfPcnzPOyjdhUZCT1zhEP6sQ,12838
|
|
6
|
+
destiny_sdk/identifiers.py,sha256=I9Q2I35Lg8oyl3uytq1gCGOUu92F9sZSSwzLoCXBJi4,9727
|
|
7
|
+
destiny_sdk/imports.py,sha256=b-rh-dt3NsyLGxqmVzIzKaHiXhbw-3wtAaBN-ZW-i1E,5940
|
|
8
|
+
destiny_sdk/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
+
destiny_sdk/references.py,sha256=3Y8gBMTSyZY35S3pB1bnVHMai9RRiGeoGZysNvSo7kk,2553
|
|
10
|
+
destiny_sdk/robots.py,sha256=I_ZvMxwST52e8ovhv0-gPbOB3P9tptbRG0LrkNNOqKo,13463
|
|
11
|
+
destiny_sdk/search.py,sha256=QWQBNEJJnH2o6CGapChKUp6kOZl2Uq3Pxqg1kcu9x-4,1590
|
|
12
|
+
destiny_sdk/visibility.py,sha256=8D44Q868YdScAt6eAFgXXrhonozXnv_Qa5w5yEGMPX8,577
|
|
13
|
+
destiny_sdk/labs/__init__.py,sha256=H4RFPyeelqZ56PagnWPX-JZeWlxPnCZoYHtr4c9SU9Q,180
|
|
14
|
+
destiny_sdk/labs/references.py,sha256=iZisRgGZ5c7X7uTFoe6Q0AwwFMa4yJbIoPUVv_hvOiU,5589
|
|
15
|
+
destiny_sdk/parsers/__init__.py,sha256=d5gS--bXla_0I7e_9wTBnGWMXt2U8b-_ndeprTPe1hk,149
|
|
16
|
+
destiny_sdk/parsers/eppi_parser.py,sha256=_1xnAT0F0o1HKpMWOGQbVS3VPOrhPqyzHDWR3CosWwk,9484
|
|
17
|
+
destiny_sdk/parsers/exceptions.py,sha256=0Sc_M4j560Nqh4SjeP_YrgOUVagdIwWwRz24E6YlZ1k,573
|
|
18
|
+
destiny_sdk-0.7.2.dist-info/METADATA,sha256=cp-onr2xd51yB4M3VnumJxUpn0rAvHWuAfSKe7V2ZqY,2685
|
|
19
|
+
destiny_sdk-0.7.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
20
|
+
destiny_sdk-0.7.2.dist-info/licenses/LICENSE,sha256=6QURU4gvvTjVZ5rfp5amZ6FtFvcpPhAGUjxF5WSZAHI,9138
|
|
21
|
+
destiny_sdk-0.7.2.dist-info/RECORD,,
|
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
destiny_sdk/__init__.py,sha256=NdSlsPQyDF3TW30_JzbvYMRBRA9iT677iTRWWCMdYOA,382
|
|
2
|
-
destiny_sdk/auth.py,sha256=bY72ywZEcG_67YBd9PrwgWTXkCf58rhLvVEXrtXbWtA,6247
|
|
3
|
-
destiny_sdk/client.py,sha256=fTBtuq5emT8ieNtCuCY8Y6xAKZJDLq8sG1WOvmjLz-I,4971
|
|
4
|
-
destiny_sdk/core.py,sha256=PYCYpY72MHXo7iQMHtnXcnCOGn6CUsbYoykHvtQl4Oc,1857
|
|
5
|
-
destiny_sdk/enhancements.py,sha256=SkIlIlWKBN7Z-aXpQiy22SXrU7zVnKxaRb4F5yaFsO8,11503
|
|
6
|
-
destiny_sdk/identifiers.py,sha256=1N2cszBmnQoUeKm54-7MUTO-zTDuvW8U9OjTeAmhWvo,7182
|
|
7
|
-
destiny_sdk/imports.py,sha256=b-rh-dt3NsyLGxqmVzIzKaHiXhbw-3wtAaBN-ZW-i1E,5940
|
|
8
|
-
destiny_sdk/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
-
destiny_sdk/references.py,sha256=3Y8gBMTSyZY35S3pB1bnVHMai9RRiGeoGZysNvSo7kk,2553
|
|
10
|
-
destiny_sdk/robots.py,sha256=I_ZvMxwST52e8ovhv0-gPbOB3P9tptbRG0LrkNNOqKo,13463
|
|
11
|
-
destiny_sdk/search.py,sha256=LIPj_h0yMnav_Stp4qRLg1PvZa6h3BV4N2bXwAYZDqA,1447
|
|
12
|
-
destiny_sdk/visibility.py,sha256=8D44Q868YdScAt6eAFgXXrhonozXnv_Qa5w5yEGMPX8,577
|
|
13
|
-
destiny_sdk/labs/__init__.py,sha256=H4RFPyeelqZ56PagnWPX-JZeWlxPnCZoYHtr4c9SU9Q,180
|
|
14
|
-
destiny_sdk/labs/references.py,sha256=iZisRgGZ5c7X7uTFoe6Q0AwwFMa4yJbIoPUVv_hvOiU,5589
|
|
15
|
-
destiny_sdk/parsers/__init__.py,sha256=d5gS--bXla_0I7e_9wTBnGWMXt2U8b-_ndeprTPe1hk,149
|
|
16
|
-
destiny_sdk/parsers/eppi_parser.py,sha256=rEOtt_5Kp3oktFlzRTLZ2x4_7aQ9-ba3FYpkaEnpnvs,5521
|
|
17
|
-
destiny_sdk-0.6.0.dist-info/METADATA,sha256=_xLg34LzOiBZmcjmg6-P6bTKmHIBnArl-0k-Qndftwc,2657
|
|
18
|
-
destiny_sdk-0.6.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
19
|
-
destiny_sdk-0.6.0.dist-info/licenses/LICENSE,sha256=6QURU4gvvTjVZ5rfp5amZ6FtFvcpPhAGUjxF5WSZAHI,9138
|
|
20
|
-
destiny_sdk-0.6.0.dist-info/RECORD,,
|
|
File without changes
|