nmdc-runtime 2.10.0__py3-none-any.whl → 2.11.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nmdc-runtime might be problematic. Click here for more details.
- nmdc_runtime/Dockerfile +177 -0
- nmdc_runtime/api/analytics.py +22 -2
- nmdc_runtime/api/core/idgen.py +36 -6
- nmdc_runtime/api/db/mongo.py +0 -12
- nmdc_runtime/api/endpoints/find.py +65 -225
- nmdc_runtime/api/endpoints/lib/linked_instances.py +180 -0
- nmdc_runtime/api/endpoints/nmdcschema.py +65 -144
- nmdc_runtime/api/endpoints/objects.py +4 -11
- nmdc_runtime/api/endpoints/operations.py +0 -27
- nmdc_runtime/api/endpoints/queries.py +22 -0
- nmdc_runtime/api/endpoints/sites.py +0 -24
- nmdc_runtime/api/endpoints/util.py +57 -35
- nmdc_runtime/api/entrypoint.sh +7 -0
- nmdc_runtime/api/main.py +84 -60
- nmdc_runtime/api/models/util.py +12 -5
- nmdc_runtime/api/openapi.py +116 -180
- nmdc_runtime/api/swagger_ui/assets/custom-elements.js +522 -0
- nmdc_runtime/api/swagger_ui/assets/script.js +247 -0
- nmdc_runtime/api/swagger_ui/assets/style.css +155 -0
- nmdc_runtime/api/swagger_ui/swagger_ui.py +34 -0
- nmdc_runtime/minter/adapters/repository.py +21 -0
- nmdc_runtime/minter/domain/model.py +20 -0
- nmdc_runtime/site/changesheets/data/OmicsProcessing-to-catted-Biosamples.tsv +1561 -0
- nmdc_runtime/site/changesheets/scripts/missing_neon_soils_ecosystem_data.py +311 -0
- nmdc_runtime/site/changesheets/scripts/neon_soils_add_ncbi_ids.py +210 -0
- nmdc_runtime/site/dagster.yaml +53 -0
- nmdc_runtime/site/entrypoint-daemon.sh +26 -0
- nmdc_runtime/site/entrypoint-dagit-readonly.sh +26 -0
- nmdc_runtime/site/entrypoint-dagit.sh +26 -0
- nmdc_runtime/site/export/ncbi_xml.py +632 -11
- nmdc_runtime/site/export/ncbi_xml_utils.py +114 -0
- nmdc_runtime/site/graphs.py +7 -0
- nmdc_runtime/site/ops.py +92 -34
- nmdc_runtime/site/repository.py +2 -0
- nmdc_runtime/site/resources.py +16 -3
- nmdc_runtime/site/translation/submission_portal_translator.py +82 -14
- nmdc_runtime/site/workspace.yaml +13 -0
- nmdc_runtime/static/NMDC_logo.svg +1073 -0
- nmdc_runtime/static/ORCID-iD_icon_vector.svg +4 -0
- nmdc_runtime/static/README.md +5 -0
- nmdc_runtime/static/favicon.ico +0 -0
- nmdc_runtime/util.py +87 -1
- nmdc_runtime-2.11.1.dist-info/METADATA +46 -0
- {nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.1.dist-info}/RECORD +47 -57
- {nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.1.dist-info}/WHEEL +1 -2
- nmdc_runtime/api/endpoints/ids.py +0 -192
- nmdc_runtime/client/__init__.py +0 -0
- nmdc_runtime/containers.py +0 -14
- nmdc_runtime/core/__init__.py +0 -0
- nmdc_runtime/core/db/Database.py +0 -13
- nmdc_runtime/core/db/__init__.py +0 -0
- nmdc_runtime/core/exceptions/__init__.py +0 -23
- nmdc_runtime/core/exceptions/base.py +0 -47
- nmdc_runtime/core/exceptions/token.py +0 -13
- nmdc_runtime/domain/__init__.py +0 -0
- nmdc_runtime/domain/users/__init__.py +0 -0
- nmdc_runtime/domain/users/queriesInterface.py +0 -18
- nmdc_runtime/domain/users/userSchema.py +0 -37
- nmdc_runtime/domain/users/userService.py +0 -14
- nmdc_runtime/infrastructure/__init__.py +0 -0
- nmdc_runtime/infrastructure/database/__init__.py +0 -0
- nmdc_runtime/infrastructure/database/db.py +0 -3
- nmdc_runtime/infrastructure/database/models/__init__.py +0 -0
- nmdc_runtime/infrastructure/database/models/user.py +0 -1
- nmdc_runtime/lib/__init__.py +0 -1
- nmdc_runtime/lib/extract_nmdc_data.py +0 -33
- nmdc_runtime/lib/load_nmdc_data.py +0 -121
- nmdc_runtime/lib/nmdc_dataframes.py +0 -825
- nmdc_runtime/lib/nmdc_etl_class.py +0 -396
- nmdc_runtime/lib/transform_nmdc_data.py +0 -1117
- nmdc_runtime/site/drsobjects/__init__.py +0 -0
- nmdc_runtime/site/drsobjects/ingest.py +0 -93
- nmdc_runtime/site/drsobjects/registration.py +0 -131
- nmdc_runtime-2.10.0.dist-info/METADATA +0 -265
- nmdc_runtime-2.10.0.dist-info/top_level.txt +0 -1
- {nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.1.dist-info}/entry_points.txt +0 -0
- {nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.1.dist-info}/licenses/LICENSE +0 -0
nmdc_runtime/api/main.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from contextlib import asynccontextmanager
|
|
3
|
+
from html import escape
|
|
3
4
|
from importlib import import_module
|
|
4
5
|
from importlib.metadata import version
|
|
5
6
|
from typing import Annotated
|
|
@@ -12,7 +13,6 @@ from fastapi import APIRouter, FastAPI, Cookie
|
|
|
12
13
|
from fastapi.middleware.cors import CORSMiddleware
|
|
13
14
|
from fastapi.openapi.docs import get_swagger_ui_html
|
|
14
15
|
from fastapi.staticfiles import StaticFiles
|
|
15
|
-
from setuptools_scm import get_version
|
|
16
16
|
from starlette import status
|
|
17
17
|
from starlette.responses import RedirectResponse, HTMLResponse, FileResponse
|
|
18
18
|
from refscan.lib.helpers import get_collection_names_from_schema
|
|
@@ -55,29 +55,32 @@ from nmdc_runtime.api.endpoints.util import BASE_URL_EXTERNAL
|
|
|
55
55
|
from nmdc_runtime.api.models.site import SiteClientInDB, SiteInDB
|
|
56
56
|
from nmdc_runtime.api.models.user import UserInDB
|
|
57
57
|
from nmdc_runtime.api.models.util import entity_attributes_to_index
|
|
58
|
-
from nmdc_runtime.api.openapi import
|
|
59
|
-
|
|
58
|
+
from nmdc_runtime.api.openapi import (
|
|
59
|
+
OpenAPITag,
|
|
60
|
+
ordered_tag_descriptors,
|
|
61
|
+
make_api_description,
|
|
62
|
+
)
|
|
63
|
+
from nmdc_runtime.api.swagger_ui.swagger_ui import base_swagger_ui_parameters
|
|
60
64
|
from nmdc_runtime.minter.bootstrap import bootstrap as minter_bootstrap
|
|
61
65
|
from nmdc_runtime.minter.entrypoints.fastapi_app import router as minter_router
|
|
62
66
|
|
|
63
67
|
|
|
64
68
|
api_router = APIRouter()
|
|
65
|
-
api_router.include_router(
|
|
66
|
-
api_router.include_router(
|
|
67
|
-
api_router.include_router(
|
|
68
|
-
api_router.include_router(
|
|
69
|
-
api_router.include_router(
|
|
70
|
-
api_router.include_router(
|
|
71
|
-
api_router.include_router(
|
|
72
|
-
api_router.include_router(
|
|
73
|
-
api_router.include_router(
|
|
74
|
-
api_router.include_router(
|
|
75
|
-
api_router.include_router(
|
|
76
|
-
api_router.include_router(
|
|
77
|
-
api_router.include_router(
|
|
78
|
-
api_router.include_router(
|
|
79
|
-
api_router.include_router(
|
|
80
|
-
api_router.include_router(minter_router, prefix="/pids", tags=["minter"])
|
|
69
|
+
api_router.include_router(find.router, tags=[OpenAPITag.METADATA_ACCESS.value])
|
|
70
|
+
api_router.include_router(nmdcschema.router, tags=[OpenAPITag.METADATA_ACCESS.value])
|
|
71
|
+
api_router.include_router(queries.router, tags=[OpenAPITag.METADATA_ACCESS.value])
|
|
72
|
+
api_router.include_router(metadata.router, tags=[OpenAPITag.METADATA_ACCESS.value])
|
|
73
|
+
api_router.include_router(sites.router, tags=[OpenAPITag.WORKFLOWS.value])
|
|
74
|
+
api_router.include_router(workflows.router, tags=[OpenAPITag.WORKFLOWS.value])
|
|
75
|
+
api_router.include_router(capabilities.router, tags=[OpenAPITag.WORKFLOWS.value])
|
|
76
|
+
api_router.include_router(object_types.router, tags=[OpenAPITag.WORKFLOWS.value])
|
|
77
|
+
api_router.include_router(triggers.router, tags=[OpenAPITag.WORKFLOWS.value])
|
|
78
|
+
api_router.include_router(jobs.router, tags=[OpenAPITag.WORKFLOWS.value])
|
|
79
|
+
api_router.include_router(objects.router, tags=[OpenAPITag.WORKFLOWS.value])
|
|
80
|
+
api_router.include_router(operations.router, tags=[OpenAPITag.WORKFLOWS.value])
|
|
81
|
+
api_router.include_router(runs.router, tags=[OpenAPITag.WORKFLOWS.value])
|
|
82
|
+
api_router.include_router(minter_router, prefix="/pids", tags=[OpenAPITag.MINTER.value])
|
|
83
|
+
api_router.include_router(users.router, tags=[OpenAPITag.USERS.value])
|
|
81
84
|
|
|
82
85
|
|
|
83
86
|
def ensure_initial_resources_on_boot():
|
|
@@ -219,9 +222,6 @@ async def lifespan(app: FastAPI):
|
|
|
219
222
|
From the [FastAPI documentation](https://fastapi.tiangolo.com/advanced/events/#lifespan-function):
|
|
220
223
|
> You can define logic (code) that should be executed before the application starts up. This means that
|
|
221
224
|
> this code will be executed once, before the application starts receiving requests.
|
|
222
|
-
|
|
223
|
-
Note: Based on my own observations, I think this function gets called when the first request starts coming in,
|
|
224
|
-
but not before that (i.e. not when the application is idle before any requests start coming in).
|
|
225
225
|
"""
|
|
226
226
|
ensure_initial_resources_on_boot()
|
|
227
227
|
ensure_attribute_indexes()
|
|
@@ -242,21 +242,24 @@ async def root():
|
|
|
242
242
|
)
|
|
243
243
|
|
|
244
244
|
|
|
245
|
-
@api_router.get("/version")
|
|
245
|
+
@api_router.get("/version", tags=[OpenAPITag.SYSTEM_ADMINISTRATION.value])
|
|
246
246
|
async def get_versions():
|
|
247
247
|
return {
|
|
248
|
-
"nmdc-runtime":
|
|
248
|
+
"nmdc-runtime": version("nmdc_runtime"),
|
|
249
249
|
"fastapi": fastapi.__version__,
|
|
250
250
|
"nmdc-schema": version("nmdc_schema"),
|
|
251
251
|
}
|
|
252
252
|
|
|
253
253
|
|
|
254
|
+
# Build an ORCID Login URL for the Swagger UI page, based upon some environment variables.
|
|
255
|
+
orcid_login_url = f"{ORCID_BASE_URL}/oauth/authorize?client_id={ORCID_NMDC_CLIENT_ID}&response_type=code&scope=openid&redirect_uri={BASE_URL_EXTERNAL}/orcid_code"
|
|
256
|
+
|
|
257
|
+
|
|
254
258
|
app = FastAPI(
|
|
255
259
|
title="NMDC Runtime API",
|
|
256
|
-
version=
|
|
260
|
+
version=version("nmdc_runtime"),
|
|
257
261
|
description=make_api_description(
|
|
258
|
-
schema_version=version("nmdc_schema")
|
|
259
|
-
orcid_login_url=f"{ORCID_BASE_URL}/oauth/authorize?client_id={ORCID_NMDC_CLIENT_ID}&response_type=code&scope=openid&redirect_uri={BASE_URL_EXTERNAL}/orcid_code",
|
|
262
|
+
api_version=version("nmdc_runtime"), schema_version=version("nmdc_schema")
|
|
260
263
|
),
|
|
261
264
|
openapi_tags=ordered_tag_descriptors,
|
|
262
265
|
lifespan=lifespan,
|
|
@@ -309,6 +312,14 @@ async def get_scalar_html():
|
|
|
309
312
|
def custom_swagger_ui_html(
|
|
310
313
|
user_id_token: Annotated[str | None, Cookie()] = None,
|
|
311
314
|
):
|
|
315
|
+
r"""Returns the HTML markup for an interactive API docs web page powered by Swagger UI.
|
|
316
|
+
|
|
317
|
+
If the `user_id_token` cookie is present and not empty, this function will send its value to
|
|
318
|
+
the `/token` endpoint in an attempt to get an access token. If it gets one, this function will
|
|
319
|
+
inject that access token into the web page so Swagger UI will consider the user to be logged in.
|
|
320
|
+
|
|
321
|
+
Reference: https://fastapi.tiangolo.com/tutorial/cookie-params/
|
|
322
|
+
"""
|
|
312
323
|
access_token = None
|
|
313
324
|
if user_id_token:
|
|
314
325
|
# get bearer token
|
|
@@ -329,32 +340,9 @@ def custom_swagger_ui_html(
|
|
|
329
340
|
rv.raise_for_status()
|
|
330
341
|
access_token = rv.json()["access_token"]
|
|
331
342
|
|
|
332
|
-
swagger_ui_parameters = {"withCredentials": True}
|
|
333
343
|
onComplete = ""
|
|
334
344
|
if access_token is not None:
|
|
335
|
-
onComplete += f""
|
|
336
|
-
ui.preauthorizeApiKey('bearerAuth', '{access_token}');
|
|
337
|
-
|
|
338
|
-
token_info = document.createElement('section');
|
|
339
|
-
token_info.classList.add('nmdc-info', 'nmdc-info-token', 'block', 'col-12');
|
|
340
|
-
token_info.innerHTML = <double-quote>
|
|
341
|
-
<p>You are now authorized. Prefer a command-line interface (CLI)? Use this header for HTTP requests:</p>
|
|
342
|
-
<p>
|
|
343
|
-
<code>
|
|
344
|
-
<span>Authorization: Bearer </span>
|
|
345
|
-
<span id='token' data-token-value='{access_token}' data-state='masked'>***</span>
|
|
346
|
-
</code>
|
|
347
|
-
</p>
|
|
348
|
-
<p>
|
|
349
|
-
<button id='token-mask-toggler'>Show token</button>
|
|
350
|
-
<button id='token-copier'>Copy token</button>
|
|
351
|
-
<span id='token-copier-message'></span>
|
|
352
|
-
</p>
|
|
353
|
-
</double-quote>;
|
|
354
|
-
document.querySelector('.information-container').append(token_info);
|
|
355
|
-
""".replace(
|
|
356
|
-
"\n", " "
|
|
357
|
-
)
|
|
345
|
+
onComplete += f"ui.preauthorizeApiKey('bearerAuth', '{access_token}');"
|
|
358
346
|
if os.getenv("INFO_BANNER_INNERHTML"):
|
|
359
347
|
info_banner_innerhtml = os.getenv("INFO_BANNER_INNERHTML")
|
|
360
348
|
onComplete += f"""
|
|
@@ -365,14 +353,14 @@ def custom_swagger_ui_html(
|
|
|
365
353
|
""".replace(
|
|
366
354
|
"\n", " "
|
|
367
355
|
)
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
356
|
+
swagger_ui_parameters = base_swagger_ui_parameters.copy()
|
|
357
|
+
# Note: The `nmdcInit` JavaScript event is a custom event we use to trigger anything that is listening for it.
|
|
358
|
+
# Reference: https://developer.mozilla.org/en-US/docs/Web/Events/Creating_and_triggering_events
|
|
359
|
+
swagger_ui_parameters.update(
|
|
360
|
+
{
|
|
361
|
+
"onComplete": f"""<unquote-safe>() => {{ {onComplete}; dispatchEvent(new Event('nmdcInit')); }}</unquote-safe>""",
|
|
362
|
+
}
|
|
363
|
+
)
|
|
376
364
|
response = get_swagger_ui_html(
|
|
377
365
|
openapi_url=app.openapi_url,
|
|
378
366
|
title=app.title,
|
|
@@ -383,15 +371,51 @@ def custom_swagger_ui_html(
|
|
|
383
371
|
assets_dir_path = Path(__file__).parent / "swagger_ui" / "assets"
|
|
384
372
|
style_css: str = Path(assets_dir_path / "style.css").read_text()
|
|
385
373
|
script_js: str = Path(assets_dir_path / "script.js").read_text()
|
|
374
|
+
custom_elements_js: str = Path(assets_dir_path / "custom-elements.js").read_text()
|
|
386
375
|
content = (
|
|
387
376
|
response.body.decode()
|
|
388
377
|
.replace('"<unquote-safe>', "")
|
|
389
378
|
.replace('</unquote-safe>"', "")
|
|
390
379
|
.replace("<double-quote>", '"')
|
|
391
380
|
.replace("</double-quote>", '"')
|
|
381
|
+
# TODO: Consider using a "custom layout" implemented as a React component.
|
|
382
|
+
# Reference: https://github.com/swagger-api/swagger-ui/blob/master/docs/customization/custom-layout.md
|
|
383
|
+
#
|
|
384
|
+
# Note: Custom layouts are specified via the Swagger UI parameter named `layout`, whose value identifies
|
|
385
|
+
# a component that is specified via the Swagger UI parameter named `plugins`. The Swagger UI
|
|
386
|
+
# JavaScript code expects each item in the `plugins` array to be a JavaScript function,
|
|
387
|
+
# but FastAPI's `get_swagger_ui_html` function serializes each parameter's value into JSON,
|
|
388
|
+
# preventing us from specifying a JavaScript function as a value in the `plugins` array.
|
|
389
|
+
#
|
|
390
|
+
# As a workaround, we could use the string `replace`-ment technique shown below to put the literal
|
|
391
|
+
# JavaScript characters into place in the final HTML document. Using that approach, I _have_ been
|
|
392
|
+
# able to display a custom layout (a custom React component), but I have _not_ been able to get
|
|
393
|
+
# that custom layout to display Swagger UI's `BaseLayout` component (which includes the core
|
|
394
|
+
# Swagger UI functionality). That's a deal breaker.
|
|
395
|
+
#
|
|
396
|
+
.replace(r'"{{ NMDC_SWAGGER_UI_PARAMETERS_PLUGINS_PLACEHOLDER }}"', r"[]")
|
|
397
|
+
# Inject HTML elements containing data that can be read via JavaScript (e.g., `swagger_ui/assets/script.js`).
|
|
398
|
+
# Note: We escape the values here so they can be safely used as HTML attribute values.
|
|
399
|
+
.replace(
|
|
400
|
+
"</head>",
|
|
401
|
+
f"""
|
|
402
|
+
</head>
|
|
403
|
+
<div
|
|
404
|
+
id="nmdc-access-token"
|
|
405
|
+
data-token="{escape(access_token if access_token is not None else '')}"
|
|
406
|
+
style="display: none"
|
|
407
|
+
></div>
|
|
408
|
+
<div
|
|
409
|
+
id="nmdc-orcid-login-url"
|
|
410
|
+
data-url="{escape(orcid_login_url)}"
|
|
411
|
+
style="display: none"
|
|
412
|
+
></div>
|
|
413
|
+
""",
|
|
414
|
+
)
|
|
392
415
|
# Inject a custom CSS stylesheet immediately before the closing `</head>` tag.
|
|
393
416
|
.replace("</head>", f"<style>\n{style_css}\n</style>\n</head>")
|
|
394
|
-
# Inject
|
|
417
|
+
# Inject custom JavaScript scripts immediately before the closing `</body>` tag.
|
|
418
|
+
.replace("</body>", f"<script>\n{custom_elements_js}\n</script>\n</body>")
|
|
395
419
|
.replace("</body>", f"<script>\n{script_js}\n</script>\n</body>")
|
|
396
420
|
)
|
|
397
421
|
return HTMLResponse(content=content)
|
nmdc_runtime/api/models/util.py
CHANGED
|
@@ -30,8 +30,13 @@ class ListRequest(BaseModel):
|
|
|
30
30
|
r'{"lat_lon.latitude": {"$gt": 45.0}, "ecosystem_category": "Plants"}',
|
|
31
31
|
],
|
|
32
32
|
)
|
|
33
|
-
# TODO: Document
|
|
34
|
-
|
|
33
|
+
# TODO: Document the following things about this type hint and `Field` definition:
|
|
34
|
+
# (a) why the type here is `int` as opposed to `PerPageRange` (`FindRequest` uses the latter),
|
|
35
|
+
# (b) why the default value here is 20 as opposed to 25 (the default value in `FindRequest`), and
|
|
36
|
+
# (c) why there is no upper limit on the value (the `PerPageRange` type has an upper limit of 2000).
|
|
37
|
+
#
|
|
38
|
+
# Note: If the HTTP request lacks a value for this parameter, Pydantic will fall back to the default value specified here.
|
|
39
|
+
max_page_size: int = Field(
|
|
35
40
|
default=20,
|
|
36
41
|
title="Resources per page",
|
|
37
42
|
description="How many resources you want _each page_ to contain, formatted as a positive integer.",
|
|
@@ -120,10 +125,12 @@ class FindRequest(BaseModel):
|
|
|
120
125
|
default=None,
|
|
121
126
|
title="Page number",
|
|
122
127
|
description="""_Which page_ of resources you want to retrieve, when using page number-based pagination.
|
|
123
|
-
This is the page number formatted as an integer ≥ 1.
|
|
128
|
+
This is the page number formatted as an integer ≥ 1.
|
|
129
|
+
**Limitation:** When using _page number_-based pagination, only the first 10,000 resources
|
|
130
|
+
are accessible. You can access resources beyond that by using _cursor_-based pagination.""",
|
|
124
131
|
examples=[1],
|
|
125
132
|
)
|
|
126
|
-
per_page:
|
|
133
|
+
per_page: PerPageRange = Field(
|
|
127
134
|
default=25,
|
|
128
135
|
title="Resources per page",
|
|
129
136
|
description="How many resources you want _each page_ to contain, formatted as a positive integer ≤ 2000.",
|
|
@@ -133,7 +140,7 @@ class FindRequest(BaseModel):
|
|
|
133
140
|
default=None,
|
|
134
141
|
title="Cursor",
|
|
135
142
|
description="""A bookmark you can use to fetch the _next_ page of resources, when using cursor-based pagination.
|
|
136
|
-
To
|
|
143
|
+
To begin using cursor-based pagination, set the `cursor` parameter to `*`. The response's `meta` object will
|
|
137
144
|
include a `next_cursor` field, whose value can be used as the `cursor` parameter in a subsequent
|
|
138
145
|
request.\n\n_Example_: `nmdc:sys0zr0fbt71`""",
|
|
139
146
|
examples=[
|
nmdc_runtime/api/openapi.py
CHANGED
|
@@ -10,233 +10,169 @@ Notes:
|
|
|
10
10
|
Now that they are in a separate module, we will be able to edit them more easily.
|
|
11
11
|
"""
|
|
12
12
|
|
|
13
|
-
from html import escape
|
|
14
13
|
from typing import List, Dict
|
|
14
|
+
from enum import Enum
|
|
15
15
|
|
|
16
|
-
# Mapping from tag names to their (Markdown-formatted) descriptions.
|
|
17
|
-
tag_descriptions: Dict[str, str] = {}
|
|
18
|
-
|
|
19
|
-
tag_descriptions[
|
|
20
|
-
"sites"
|
|
21
|
-
] = r"""
|
|
22
|
-
A site corresponds to a physical place that may participate in job execution.
|
|
23
|
-
|
|
24
|
-
A site may register data objects and capabilities with NMDC. It may claim jobs to execute, and it may
|
|
25
|
-
update job operations with execution info.
|
|
26
|
-
|
|
27
|
-
A site must be able to service requests for any data objects it has registered.
|
|
28
|
-
|
|
29
|
-
A site may expose a "put object" custom method for authorized users. This method facilitates an
|
|
30
|
-
operation to upload an object to the site and have the site register that object with the runtime
|
|
31
|
-
system.
|
|
32
|
-
"""
|
|
33
|
-
|
|
34
|
-
tag_descriptions[
|
|
35
|
-
"workflows"
|
|
36
|
-
] = r"""
|
|
37
|
-
A workflow is a template for creating jobs.
|
|
38
|
-
|
|
39
|
-
Workflow jobs are typically created by the system via trigger associations between
|
|
40
|
-
workflows and object types. A workflow may also require certain capabilities of sites
|
|
41
|
-
in order for those sites to claim workflow jobs.
|
|
42
|
-
"""
|
|
43
|
-
|
|
44
|
-
tag_descriptions[
|
|
45
|
-
"users"
|
|
46
|
-
] = r"""
|
|
47
|
-
Endpoints for user identification.
|
|
48
|
-
|
|
49
|
-
Currently, accounts for use with the Runtime API are created manually by system administrators.
|
|
50
|
-
"""
|
|
51
|
-
|
|
52
|
-
tag_descriptions[
|
|
53
|
-
"capabilities"
|
|
54
|
-
] = r"""
|
|
55
|
-
A workflow may require an executing site to have particular capabilities.
|
|
56
|
-
|
|
57
|
-
These capabilities go beyond the simple ability to access the data object resources registered with
|
|
58
|
-
the runtime system. Sites register their capabilities, and sites are only able to claim workflow
|
|
59
|
-
jobs if they are known to have the capabilities required by the workflow.
|
|
60
|
-
"""
|
|
61
|
-
|
|
62
|
-
tag_descriptions[
|
|
63
|
-
"object types"
|
|
64
|
-
] = r"""
|
|
65
|
-
An object type is an object annotation that is useful for triggering workflows.
|
|
66
|
-
|
|
67
|
-
A data object may be annotated with one or more types, which in turn can be associated with
|
|
68
|
-
workflows through trigger resources.
|
|
69
|
-
|
|
70
|
-
The data-object type system may be used to trigger workflow jobs on a subset of data objects when a
|
|
71
|
-
new version of a workflow is deployed. This could be done by minting a special object type for the
|
|
72
|
-
occasion, annotating the subset of data objects with that type, and registering the association of
|
|
73
|
-
object type to workflow via a trigger resource.
|
|
74
|
-
"""
|
|
75
|
-
|
|
76
|
-
tag_descriptions[
|
|
77
|
-
"triggers"
|
|
78
|
-
] = r"""
|
|
79
|
-
A trigger is an association between a workflow and a data object type.
|
|
80
|
-
|
|
81
|
-
When a data object is annotated with a type, perhaps shortly after object registration, the NMDC
|
|
82
|
-
Runtime will check, via trigger associations, for potential new jobs to create for any workflows.
|
|
83
|
-
"""
|
|
84
16
|
|
|
85
|
-
|
|
86
|
-
"
|
|
87
|
-
] = r"""
|
|
88
|
-
A job is a resource that isolates workflow configuration from execution.
|
|
17
|
+
class OpenAPITag(str, Enum):
|
|
18
|
+
r"""A tag you can use to group related API endpoints together in an OpenAPI schema."""
|
|
89
19
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
20
|
+
MINTER = "Persistent identifiers"
|
|
21
|
+
SYSTEM_ADMINISTRATION = "System administration"
|
|
22
|
+
WORKFLOWS = "Workflow management"
|
|
23
|
+
METADATA_ACCESS = "Metadata access"
|
|
24
|
+
USERS = "User accounts"
|
|
93
25
|
|
|
94
|
-
A job can have multiple executions, and a workflow's executions are precisely the executions of all
|
|
95
|
-
jobs created for that workflow.
|
|
96
26
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
object inputs for the job without the risk of the runtime system creating a claimable job of the
|
|
100
|
-
pre-claimed type.
|
|
101
|
-
"""
|
|
27
|
+
# Mapping from tag names to their (Markdown-formatted) descriptions.
|
|
28
|
+
tag_descriptions: Dict[str, str] = {}
|
|
102
29
|
|
|
103
30
|
tag_descriptions[
|
|
104
|
-
|
|
31
|
+
OpenAPITag.METADATA_ACCESS.value
|
|
105
32
|
] = r"""
|
|
106
|
-
|
|
107
|
-
object](https://ga4gh.github.io/data-repository-service-schemas/preview/release/drs-1.1.0/docs/#_drs_datatypes)
|
|
108
|
-
represents content necessary for a workflow job to execute, and/or output from a job execution.
|
|
33
|
+
Retrieve and manage metadata.
|
|
109
34
|
|
|
110
|
-
|
|
111
|
-
objects, and sites must ensure that these objects are accessible to the NMDC data broker.
|
|
35
|
+
The metadata access endpoints fall into several subcategories:
|
|
112
36
|
|
|
113
|
-
|
|
37
|
+
- **Find**: Find a few types of metadata, using a simplified syntax.
|
|
38
|
+
- Each endpoint deals with a predetermined type of metadata; i.e., [studies](https://w3id.org/nmdc/Study/), [biosamples](https://w3id.org/nmdc/Biosample/), [data objects](https://w3id.org/nmdc/DataObject/), [planned processes](https://w3id.org/nmdc/PlannedProcess/), or [workflow executions](https://w3id.org/nmdc/WorkflowExecution/).
|
|
39
|
+
- **NMDC schema**: Examine the [NMDC schema](https://microbiomedata.github.io/nmdc-schema/), itself, and use schema-related terminology to find metadata of any type.
|
|
40
|
+
- **Queries**: Find, update, and delete metadata using [MongoDB commands](https://www.mongodb.com/docs/manual/reference/command/#user-commands).
|
|
41
|
+
- **Changesheets**: Modify metadata by uploading [changesheets](https://docs.microbiomedata.org/runtime/howto-guides/author-changesheets/).
|
|
42
|
+
- **JSON operations**: Insert or update metadata by submitting a JSON document representing a [Database](https://w3id.org/nmdc/Database/).
|
|
114
43
|
"""
|
|
115
44
|
|
|
116
45
|
tag_descriptions[
|
|
117
|
-
|
|
46
|
+
OpenAPITag.WORKFLOWS.value
|
|
118
47
|
] = r"""
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
48
|
+
Manage workflows and their execution.
|
|
49
|
+
|
|
50
|
+
The workflow management endpoints fall into several subcategories:
|
|
51
|
+
|
|
52
|
+
- **Sites**: Register compute sites that can execute workflows, and generate credentials for them.
|
|
53
|
+
- A site corresponds to a physical place that may participate in job execution.
|
|
54
|
+
- A site may register data objects and capabilities with the Runtime. It may claim jobs to execute, and it may update job operations with execution info.
|
|
55
|
+
- A site must be able to service requests for any data objects it has registered.
|
|
56
|
+
- A site may expose a "put object" custom method for authorized users. This method facilitates an operation to upload an object to the site and have the site register that object with the Runtime system.
|
|
57
|
+
- **Workflows**: Manage workflow templates, which serve as blueprints for job execution.
|
|
58
|
+
- A workflow is a template for creating jobs.
|
|
59
|
+
- Workflow jobs are typically created by the system via triggers, which are associations between workflows and data object types.
|
|
60
|
+
- **Capabilities**: Manage the technical requirements that sites must meet to execute specific workflows.
|
|
61
|
+
- A workflow may require a site that executes it to have specific capabilities.
|
|
62
|
+
- These capabilities may go beyond the simple ability to access the data objects registered with the Runtime system.
|
|
63
|
+
- Sites register their capabilities, and sites are only able to claim workflow jobs if those sites have the capabilities required by the workflow.
|
|
64
|
+
- **Object types**: Manage the types of data objects whose creation can trigger job creation and, eventually, workflow execution.
|
|
65
|
+
- A data object type is an annotation that can be applied to data objects.
|
|
66
|
+
- A data object may have one or more types. Those types can be associated with workflows, through triggers.
|
|
67
|
+
- **Triggers**: Define associations between workflows and object types to enable automatic job creation.
|
|
68
|
+
- A [trigger](https://docs.microbiomedata.org/runtime/howto-guides/create-triggers/) is an association between a workflow and a data object type.
|
|
69
|
+
- When a data object is [annotated with a type](https://docs.microbiomedata.org/runtime/nb/queue_and_trigger_data_jobs/#use-case-annotate-a-known-object-with-a-type-that-will-trigger-a-workflow)—which may occur shortly after object registration—the Runtime will check—via trigger associations—whether it is due to create any jobs.
|
|
70
|
+
- **Jobs**: Manage the [claiming](https://docs.microbiomedata.org/runtime/howto-guides/claim-and-run-jobs/) and status of workflow executions.
|
|
71
|
+
- A job is a resource that decouples the configuration of a workflow, from execution of that workflow.
|
|
72
|
+
- Rather than directly creating a workflow operation, the Runtime creates a job that pairs a workflow with its configuration. Then, a site can claim the job—by its ID—and execute the associated workflow without doing additional configuration.
|
|
73
|
+
- A job can have multiple executions. All executions of all jobs of a given workflow, make up that workflow's executions.
|
|
74
|
+
- A site that already has a compatible job execution result can preempt the unnecessary creation of a job by _pre-claiming_ it. This will return like a claim, and now the site can register known data object inputs for the job without the risk of the Runtime creating a claimable job of the pre-claimed type.
|
|
75
|
+
- **Objects**: Manage the Data Repository Service (DRS) objects that are inputs and outputs of workflow executions.
|
|
76
|
+
- A [Data Repository Service (DRS) object](https://ga4gh.github.io/data-repository-service-schemas/preview/release/drs-1.1.0/docs/#_drs_datatypes) represents content necessary for—or content produced by—job execution.
|
|
77
|
+
- An object may be a *blob* (analogous to a file) or a *bundle* (analogous to a folder). Sites register objects, and sites must ensure that these objects are accessible to the "NMDC data broker."
|
|
78
|
+
- An object may be annotated with one or more object types, useful for triggering workflows.
|
|
79
|
+
- **Operations**: Track and monitor the real-time execution status of claimed jobs, including progress updates and error handling.
|
|
80
|
+
- An operation is a resource for tracking the execution of a job.
|
|
81
|
+
- When a job is claimed by a site for execution, an operation resource is created.
|
|
82
|
+
- An operation is like a "promise," in that it should eventually resolve to either a successful result—i.e., an execution resource—or to an error.
|
|
83
|
+
- An operation is parameterized to return a result type, and a metadata type for storing progress information, that are both particular to the job type.
|
|
84
|
+
- Operations may be paused, resumed, and/or cancelled.
|
|
85
|
+
- Operations may expire, i.e. not be stored indefinitely. In this case, it is recommended that execution resources have longer lifetimes/not expire, so that information about successful results of operations are available.
|
|
86
|
+
- **Runs**: _(work in progress)_ Execute simple jobs and report execution events back to the Runtime.
|
|
87
|
+
- Run simple jobs.
|
|
88
|
+
- For off-site job runs, keep the Runtime appraised of run events.
|
|
134
89
|
"""
|
|
135
90
|
|
|
136
91
|
tag_descriptions[
|
|
137
|
-
|
|
92
|
+
OpenAPITag.USERS.value
|
|
138
93
|
] = r"""
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
Metadata -- for studies, biosamples, omics processing, etc. -- is used by sites to execute jobs,
|
|
142
|
-
as the parameterization of job executions may depend not only on the content of data objects, but
|
|
143
|
-
also on objects' associated metadata.
|
|
144
|
-
|
|
145
|
-
Also, the function of many workflows is to extract or produce new metadata. Such metadata products
|
|
146
|
-
should be registered as data objects, and they may also be supplied by sites to the runtime system
|
|
147
|
-
as an update query (if the latter is not done, the runtime system will sense the new metadata and
|
|
148
|
-
issue an update query).
|
|
94
|
+
Create and manage user accounts.
|
|
149
95
|
"""
|
|
150
96
|
|
|
151
97
|
tag_descriptions[
|
|
152
|
-
|
|
98
|
+
OpenAPITag.MINTER.value
|
|
153
99
|
] = r"""
|
|
154
|
-
|
|
155
|
-
metadata from collection set types (including
|
|
156
|
-
[studies](https://w3id.org/nmdc/Study/),
|
|
157
|
-
[biosamples](https://w3id.org/nmdc/Biosample/),
|
|
158
|
-
[planned processes](https://w3id.org/nmdc/PlannedProcess/), and
|
|
159
|
-
[data objects](https://w3id.org/nmdc/DataObject/)
|
|
160
|
-
as discussed in the __find__ section).
|
|
161
|
-
<br/>
|
|
162
|
-
|
|
163
|
-
The __metadata__ endpoints allow users to retrieve metadata from the data portal using the various
|
|
164
|
-
GET endpoints that are slightly different than the __find__ endpoints, but some can be used similarly.
|
|
165
|
-
As with the __find__ endpoints, parameters for the __metadata__ endpoints that do not have a
|
|
166
|
-
red ___* required___ next to them are optional. <br/>
|
|
167
|
-
|
|
168
|
-
Unlike the compact syntax used in the __find__ endpoints, the syntax for the filter parameter of
|
|
169
|
-
the metadata endpoints
|
|
170
|
-
uses [MongoDB-like language querying](https://www.mongodb.com/docs/manual/tutorial/query-documents/).
|
|
100
|
+
Mint and manage persistent identifiers.
|
|
171
101
|
"""
|
|
172
102
|
|
|
173
103
|
tag_descriptions[
|
|
174
|
-
|
|
104
|
+
OpenAPITag.SYSTEM_ADMINISTRATION.value
|
|
175
105
|
] = r"""
|
|
176
|
-
|
|
177
|
-
already specified - where metadata about [studies](https://w3id.org/nmdc/Study),
|
|
178
|
-
[biosamples](https://w3id.org/nmdc/Biosample), [data objects](https://w3id.org/nmdc/DataObject/),
|
|
179
|
-
and [planned processes](https://w3id.org/nmdc/PlannedProcess/) can be retrieved using GET requests.
|
|
180
|
-
<br/>
|
|
181
|
-
|
|
182
|
-
Each endpoint is unique and requires the applicable attribute names to be known in order to structure a query
|
|
183
|
-
in a meaningful way. Parameters that do not have a red ___* required___ label next to them are optional.
|
|
184
|
-
"""
|
|
185
|
-
|
|
186
|
-
tag_descriptions[
|
|
187
|
-
"runs"
|
|
188
|
-
] = r"""
|
|
189
|
-
**WORK IN PROGRESS**
|
|
190
|
-
|
|
191
|
-
Run simple jobs.
|
|
192
|
-
|
|
193
|
-
For off-site job runs, keep the Runtime appraised of run events.
|
|
106
|
+
Retrieve information about the software components that make up the Runtime.
|
|
194
107
|
"""
|
|
195
108
|
|
|
196
109
|
# Remove leading and trailing whitespace from each description.
|
|
197
110
|
for name, description in tag_descriptions.items():
|
|
198
111
|
tag_descriptions[name] = description.strip()
|
|
199
112
|
|
|
200
|
-
ordered_tag_descriptors: List[Dict
|
|
201
|
-
{
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
{
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
{
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
{
|
|
113
|
+
ordered_tag_descriptors: List[Dict] = [
|
|
114
|
+
{
|
|
115
|
+
"name": OpenAPITag.METADATA_ACCESS.value,
|
|
116
|
+
"description": tag_descriptions[OpenAPITag.METADATA_ACCESS.value],
|
|
117
|
+
},
|
|
118
|
+
{
|
|
119
|
+
"name": OpenAPITag.WORKFLOWS.value,
|
|
120
|
+
"description": tag_descriptions[OpenAPITag.WORKFLOWS.value],
|
|
121
|
+
},
|
|
122
|
+
{
|
|
123
|
+
"name": OpenAPITag.MINTER.value,
|
|
124
|
+
"description": tag_descriptions[OpenAPITag.MINTER.value],
|
|
125
|
+
},
|
|
126
|
+
{
|
|
127
|
+
"name": OpenAPITag.USERS.value,
|
|
128
|
+
"description": tag_descriptions[OpenAPITag.USERS.value],
|
|
129
|
+
},
|
|
130
|
+
{
|
|
131
|
+
"name": OpenAPITag.SYSTEM_ADMINISTRATION.value,
|
|
132
|
+
"description": tag_descriptions[OpenAPITag.SYSTEM_ADMINISTRATION.value],
|
|
133
|
+
},
|
|
214
134
|
]
|
|
215
135
|
|
|
216
136
|
|
|
217
|
-
def make_api_description(
|
|
137
|
+
def make_api_description(api_version: str, schema_version: str) -> str:
|
|
218
138
|
r"""
|
|
219
|
-
Returns an API description into which the specified schema version
|
|
220
|
-
ORCID login URL have been incorporated.
|
|
139
|
+
Returns an API description into which the specified schema version string has been incorporated.
|
|
221
140
|
|
|
222
141
|
Args:
|
|
142
|
+
api_version (str): The version of this Runtime instance.
|
|
223
143
|
schema_version (str): The version of `nmdc-schema` the Runtime is using.
|
|
224
|
-
orcid_login_url (str): The URL at which a user could login via ORCID.
|
|
225
144
|
|
|
226
145
|
Returns:
|
|
227
146
|
str: The Markdown-formatted API description.
|
|
228
147
|
"""
|
|
229
148
|
result = f"""
|
|
230
|
-
|
|
231
|
-
supports validation and submission of metadata, as well as orchestration of workflow executions.
|
|
149
|
+
Welcome to the **NMDC Runtime API**, an API you can use to [access metadata](https://docs.microbiomedata.org/howto_guides/api_gui/) residing in the NMDC database.
|
|
232
150
|
|
|
233
|
-
|
|
151
|
+
Users having adequate permissions can also use it to generate identifiers, submit metadata,
|
|
152
|
+
and manage workflow executions.
|
|
153
|
+
|
|
154
|
+
##### Quick start
|
|
234
155
|
|
|
235
|
-
|
|
156
|
+
The endpoints of the NMDC Runtime API are listed below.
|
|
157
|
+
They are organized into sections, each of which can be opened and closed.
|
|
158
|
+
The endpoints, themselves, can also be opened and closed.
|
|
236
159
|
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
160
|
+
Each endpoint—when opened—has a "Try it out" button, which you can press in order to send a request
|
|
161
|
+
to the endpoint directly from this web page. Each endpoint can also be
|
|
162
|
+
[accessed programmatically](https://docs.microbiomedata.org/runtime/nb/api_access_via_python/).
|
|
163
|
+
|
|
164
|
+
Some endpoints have a padlock icon, which means that the endpoint is only accessible to logged-in users.
|
|
165
|
+
You can log in by clicking the "Authorize" button located directly above the list of endpoints.
|
|
166
|
+
|
|
167
|
+
##### Contact us
|
|
168
|
+
|
|
169
|
+
You can [contact us](https://microbiomedata.org/contact/) anytime.
|
|
170
|
+
We continuously refine the API and may be able to streamline your use case.
|
|
171
|
+
|
|
172
|
+
##### Versions
|
|
173
|
+
|
|
174
|
+
[NMDC Runtime](https://docs.microbiomedata.org/runtime/) version: `{api_version}`
|
|
175
|
+
|
|
176
|
+
[NMDC Schema](https://microbiomedata.github.io/nmdc-schema/) version: `{schema_version}`
|
|
241
177
|
""".strip()
|
|
242
178
|
return result
|