nmdc-runtime 2.10.0__py3-none-any.whl → 2.11.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nmdc-runtime might be problematic. Click here for more details.

Files changed (77) hide show
  1. nmdc_runtime/Dockerfile +177 -0
  2. nmdc_runtime/api/analytics.py +22 -2
  3. nmdc_runtime/api/core/idgen.py +36 -6
  4. nmdc_runtime/api/db/mongo.py +0 -12
  5. nmdc_runtime/api/endpoints/find.py +65 -225
  6. nmdc_runtime/api/endpoints/lib/linked_instances.py +180 -0
  7. nmdc_runtime/api/endpoints/nmdcschema.py +65 -144
  8. nmdc_runtime/api/endpoints/objects.py +4 -11
  9. nmdc_runtime/api/endpoints/operations.py +0 -27
  10. nmdc_runtime/api/endpoints/queries.py +22 -0
  11. nmdc_runtime/api/endpoints/sites.py +0 -24
  12. nmdc_runtime/api/endpoints/util.py +57 -35
  13. nmdc_runtime/api/entrypoint.sh +7 -0
  14. nmdc_runtime/api/main.py +84 -60
  15. nmdc_runtime/api/models/util.py +12 -5
  16. nmdc_runtime/api/openapi.py +116 -180
  17. nmdc_runtime/api/swagger_ui/assets/custom-elements.js +522 -0
  18. nmdc_runtime/api/swagger_ui/assets/script.js +247 -0
  19. nmdc_runtime/api/swagger_ui/assets/style.css +155 -0
  20. nmdc_runtime/api/swagger_ui/swagger_ui.py +34 -0
  21. nmdc_runtime/minter/adapters/repository.py +21 -0
  22. nmdc_runtime/minter/domain/model.py +20 -0
  23. nmdc_runtime/site/changesheets/data/OmicsProcessing-to-catted-Biosamples.tsv +1561 -0
  24. nmdc_runtime/site/changesheets/scripts/missing_neon_soils_ecosystem_data.py +311 -0
  25. nmdc_runtime/site/changesheets/scripts/neon_soils_add_ncbi_ids.py +210 -0
  26. nmdc_runtime/site/dagster.yaml +53 -0
  27. nmdc_runtime/site/entrypoint-daemon.sh +26 -0
  28. nmdc_runtime/site/entrypoint-dagit-readonly.sh +26 -0
  29. nmdc_runtime/site/entrypoint-dagit.sh +26 -0
  30. nmdc_runtime/site/export/ncbi_xml.py +632 -11
  31. nmdc_runtime/site/export/ncbi_xml_utils.py +114 -0
  32. nmdc_runtime/site/graphs.py +7 -0
  33. nmdc_runtime/site/ops.py +92 -34
  34. nmdc_runtime/site/repository.py +2 -0
  35. nmdc_runtime/site/resources.py +16 -3
  36. nmdc_runtime/site/translation/submission_portal_translator.py +82 -14
  37. nmdc_runtime/site/workspace.yaml +13 -0
  38. nmdc_runtime/static/NMDC_logo.svg +1073 -0
  39. nmdc_runtime/static/ORCID-iD_icon_vector.svg +4 -0
  40. nmdc_runtime/static/README.md +5 -0
  41. nmdc_runtime/static/favicon.ico +0 -0
  42. nmdc_runtime/util.py +87 -1
  43. nmdc_runtime-2.11.1.dist-info/METADATA +46 -0
  44. {nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.1.dist-info}/RECORD +47 -57
  45. {nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.1.dist-info}/WHEEL +1 -2
  46. nmdc_runtime/api/endpoints/ids.py +0 -192
  47. nmdc_runtime/client/__init__.py +0 -0
  48. nmdc_runtime/containers.py +0 -14
  49. nmdc_runtime/core/__init__.py +0 -0
  50. nmdc_runtime/core/db/Database.py +0 -13
  51. nmdc_runtime/core/db/__init__.py +0 -0
  52. nmdc_runtime/core/exceptions/__init__.py +0 -23
  53. nmdc_runtime/core/exceptions/base.py +0 -47
  54. nmdc_runtime/core/exceptions/token.py +0 -13
  55. nmdc_runtime/domain/__init__.py +0 -0
  56. nmdc_runtime/domain/users/__init__.py +0 -0
  57. nmdc_runtime/domain/users/queriesInterface.py +0 -18
  58. nmdc_runtime/domain/users/userSchema.py +0 -37
  59. nmdc_runtime/domain/users/userService.py +0 -14
  60. nmdc_runtime/infrastructure/__init__.py +0 -0
  61. nmdc_runtime/infrastructure/database/__init__.py +0 -0
  62. nmdc_runtime/infrastructure/database/db.py +0 -3
  63. nmdc_runtime/infrastructure/database/models/__init__.py +0 -0
  64. nmdc_runtime/infrastructure/database/models/user.py +0 -1
  65. nmdc_runtime/lib/__init__.py +0 -1
  66. nmdc_runtime/lib/extract_nmdc_data.py +0 -33
  67. nmdc_runtime/lib/load_nmdc_data.py +0 -121
  68. nmdc_runtime/lib/nmdc_dataframes.py +0 -825
  69. nmdc_runtime/lib/nmdc_etl_class.py +0 -396
  70. nmdc_runtime/lib/transform_nmdc_data.py +0 -1117
  71. nmdc_runtime/site/drsobjects/__init__.py +0 -0
  72. nmdc_runtime/site/drsobjects/ingest.py +0 -93
  73. nmdc_runtime/site/drsobjects/registration.py +0 -131
  74. nmdc_runtime-2.10.0.dist-info/METADATA +0 -265
  75. nmdc_runtime-2.10.0.dist-info/top_level.txt +0 -1
  76. {nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.1.dist-info}/entry_points.txt +0 -0
  77. {nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.1.dist-info}/licenses/LICENSE +0 -0
nmdc_runtime/api/main.py CHANGED
@@ -1,5 +1,6 @@
1
1
  import os
2
2
  from contextlib import asynccontextmanager
3
+ from html import escape
3
4
  from importlib import import_module
4
5
  from importlib.metadata import version
5
6
  from typing import Annotated
@@ -12,7 +13,6 @@ from fastapi import APIRouter, FastAPI, Cookie
12
13
  from fastapi.middleware.cors import CORSMiddleware
13
14
  from fastapi.openapi.docs import get_swagger_ui_html
14
15
  from fastapi.staticfiles import StaticFiles
15
- from setuptools_scm import get_version
16
16
  from starlette import status
17
17
  from starlette.responses import RedirectResponse, HTMLResponse, FileResponse
18
18
  from refscan.lib.helpers import get_collection_names_from_schema
@@ -55,29 +55,32 @@ from nmdc_runtime.api.endpoints.util import BASE_URL_EXTERNAL
55
55
  from nmdc_runtime.api.models.site import SiteClientInDB, SiteInDB
56
56
  from nmdc_runtime.api.models.user import UserInDB
57
57
  from nmdc_runtime.api.models.util import entity_attributes_to_index
58
- from nmdc_runtime.api.openapi import ordered_tag_descriptors, make_api_description
59
- from nmdc_runtime.api.v1.router import router_v1
58
+ from nmdc_runtime.api.openapi import (
59
+ OpenAPITag,
60
+ ordered_tag_descriptors,
61
+ make_api_description,
62
+ )
63
+ from nmdc_runtime.api.swagger_ui.swagger_ui import base_swagger_ui_parameters
60
64
  from nmdc_runtime.minter.bootstrap import bootstrap as minter_bootstrap
61
65
  from nmdc_runtime.minter.entrypoints.fastapi_app import router as minter_router
62
66
 
63
67
 
64
68
  api_router = APIRouter()
65
- api_router.include_router(users.router, tags=["users"])
66
- api_router.include_router(operations.router, tags=["operations"])
67
- api_router.include_router(sites.router, tags=["sites"])
68
- api_router.include_router(jobs.router, tags=["jobs"])
69
- api_router.include_router(objects.router, tags=["objects"])
70
- api_router.include_router(capabilities.router, tags=["capabilities"])
71
- api_router.include_router(triggers.router, tags=["triggers"])
72
- api_router.include_router(workflows.router, tags=["workflows"])
73
- api_router.include_router(object_types.router, tags=["object types"])
74
- api_router.include_router(queries.router, tags=["queries"])
75
- api_router.include_router(metadata.router, tags=["metadata"])
76
- api_router.include_router(nmdcschema.router, tags=["metadata"])
77
- api_router.include_router(find.router, tags=["find"])
78
- api_router.include_router(runs.router, tags=["runs"])
79
- api_router.include_router(router_v1, tags=["v1"])
80
- api_router.include_router(minter_router, prefix="/pids", tags=["minter"])
69
+ api_router.include_router(find.router, tags=[OpenAPITag.METADATA_ACCESS.value])
70
+ api_router.include_router(nmdcschema.router, tags=[OpenAPITag.METADATA_ACCESS.value])
71
+ api_router.include_router(queries.router, tags=[OpenAPITag.METADATA_ACCESS.value])
72
+ api_router.include_router(metadata.router, tags=[OpenAPITag.METADATA_ACCESS.value])
73
+ api_router.include_router(sites.router, tags=[OpenAPITag.WORKFLOWS.value])
74
+ api_router.include_router(workflows.router, tags=[OpenAPITag.WORKFLOWS.value])
75
+ api_router.include_router(capabilities.router, tags=[OpenAPITag.WORKFLOWS.value])
76
+ api_router.include_router(object_types.router, tags=[OpenAPITag.WORKFLOWS.value])
77
+ api_router.include_router(triggers.router, tags=[OpenAPITag.WORKFLOWS.value])
78
+ api_router.include_router(jobs.router, tags=[OpenAPITag.WORKFLOWS.value])
79
+ api_router.include_router(objects.router, tags=[OpenAPITag.WORKFLOWS.value])
80
+ api_router.include_router(operations.router, tags=[OpenAPITag.WORKFLOWS.value])
81
+ api_router.include_router(runs.router, tags=[OpenAPITag.WORKFLOWS.value])
82
+ api_router.include_router(minter_router, prefix="/pids", tags=[OpenAPITag.MINTER.value])
83
+ api_router.include_router(users.router, tags=[OpenAPITag.USERS.value])
81
84
 
82
85
 
83
86
  def ensure_initial_resources_on_boot():
@@ -219,9 +222,6 @@ async def lifespan(app: FastAPI):
219
222
  From the [FastAPI documentation](https://fastapi.tiangolo.com/advanced/events/#lifespan-function):
220
223
  > You can define logic (code) that should be executed before the application starts up. This means that
221
224
  > this code will be executed once, before the application starts receiving requests.
222
-
223
- Note: Based on my own observations, I think this function gets called when the first request starts coming in,
224
- but not before that (i.e. not when the application is idle before any requests start coming in).
225
225
  """
226
226
  ensure_initial_resources_on_boot()
227
227
  ensure_attribute_indexes()
@@ -242,21 +242,24 @@ async def root():
242
242
  )
243
243
 
244
244
 
245
- @api_router.get("/version")
245
+ @api_router.get("/version", tags=[OpenAPITag.SYSTEM_ADMINISTRATION.value])
246
246
  async def get_versions():
247
247
  return {
248
- "nmdc-runtime": get_version(),
248
+ "nmdc-runtime": version("nmdc_runtime"),
249
249
  "fastapi": fastapi.__version__,
250
250
  "nmdc-schema": version("nmdc_schema"),
251
251
  }
252
252
 
253
253
 
254
+ # Build an ORCID Login URL for the Swagger UI page, based upon some environment variables.
255
+ orcid_login_url = f"{ORCID_BASE_URL}/oauth/authorize?client_id={ORCID_NMDC_CLIENT_ID}&response_type=code&scope=openid&redirect_uri={BASE_URL_EXTERNAL}/orcid_code"
256
+
257
+
254
258
  app = FastAPI(
255
259
  title="NMDC Runtime API",
256
- version=get_version(),
260
+ version=version("nmdc_runtime"),
257
261
  description=make_api_description(
258
- schema_version=version("nmdc_schema"),
259
- orcid_login_url=f"{ORCID_BASE_URL}/oauth/authorize?client_id={ORCID_NMDC_CLIENT_ID}&response_type=code&scope=openid&redirect_uri={BASE_URL_EXTERNAL}/orcid_code",
262
+ api_version=version("nmdc_runtime"), schema_version=version("nmdc_schema")
260
263
  ),
261
264
  openapi_tags=ordered_tag_descriptors,
262
265
  lifespan=lifespan,
@@ -309,6 +312,14 @@ async def get_scalar_html():
309
312
  def custom_swagger_ui_html(
310
313
  user_id_token: Annotated[str | None, Cookie()] = None,
311
314
  ):
315
+ r"""Returns the HTML markup for an interactive API docs web page powered by Swagger UI.
316
+
317
+ If the `user_id_token` cookie is present and not empty, this function will send its value to
318
+ the `/token` endpoint in an attempt to get an access token. If it gets one, this function will
319
+ inject that access token into the web page so Swagger UI will consider the user to be logged in.
320
+
321
+ Reference: https://fastapi.tiangolo.com/tutorial/cookie-params/
322
+ """
312
323
  access_token = None
313
324
  if user_id_token:
314
325
  # get bearer token
@@ -329,32 +340,9 @@ def custom_swagger_ui_html(
329
340
  rv.raise_for_status()
330
341
  access_token = rv.json()["access_token"]
331
342
 
332
- swagger_ui_parameters = {"withCredentials": True}
333
343
  onComplete = ""
334
344
  if access_token is not None:
335
- onComplete += f"""
336
- ui.preauthorizeApiKey('bearerAuth', '{access_token}');
337
-
338
- token_info = document.createElement('section');
339
- token_info.classList.add('nmdc-info', 'nmdc-info-token', 'block', 'col-12');
340
- token_info.innerHTML = <double-quote>
341
- <p>You are now authorized. Prefer a command-line interface (CLI)? Use this header for HTTP requests:</p>
342
- <p>
343
- <code>
344
- <span>Authorization: Bearer </span>
345
- <span id='token' data-token-value='{access_token}' data-state='masked'>***</span>
346
- </code>
347
- </p>
348
- <p>
349
- <button id='token-mask-toggler'>Show token</button>
350
- <button id='token-copier'>Copy token</button>
351
- <span id='token-copier-message'></span>
352
- </p>
353
- </double-quote>;
354
- document.querySelector('.information-container').append(token_info);
355
- """.replace(
356
- "\n", " "
357
- )
345
+ onComplete += f"ui.preauthorizeApiKey('bearerAuth', '{access_token}');"
358
346
  if os.getenv("INFO_BANNER_INNERHTML"):
359
347
  info_banner_innerhtml = os.getenv("INFO_BANNER_INNERHTML")
360
348
  onComplete += f"""
@@ -365,14 +353,14 @@ def custom_swagger_ui_html(
365
353
  """.replace(
366
354
  "\n", " "
367
355
  )
368
- if onComplete:
369
- # Note: The `nmdcInit` JavaScript event is a custom event we use to trigger anything that is listening for it.
370
- # Reference: https://developer.mozilla.org/en-US/docs/Web/Events/Creating_and_triggering_events
371
- swagger_ui_parameters.update(
372
- {
373
- "onComplete": f"""<unquote-safe>() => {{ {onComplete}; dispatchEvent(new Event('nmdcInit')); }}</unquote-safe>""",
374
- }
375
- )
356
+ swagger_ui_parameters = base_swagger_ui_parameters.copy()
357
+ # Note: The `nmdcInit` JavaScript event is a custom event we use to trigger anything that is listening for it.
358
+ # Reference: https://developer.mozilla.org/en-US/docs/Web/Events/Creating_and_triggering_events
359
+ swagger_ui_parameters.update(
360
+ {
361
+ "onComplete": f"""<unquote-safe>() => {{ {onComplete}; dispatchEvent(new Event('nmdcInit')); }}</unquote-safe>""",
362
+ }
363
+ )
376
364
  response = get_swagger_ui_html(
377
365
  openapi_url=app.openapi_url,
378
366
  title=app.title,
@@ -383,15 +371,51 @@ def custom_swagger_ui_html(
383
371
  assets_dir_path = Path(__file__).parent / "swagger_ui" / "assets"
384
372
  style_css: str = Path(assets_dir_path / "style.css").read_text()
385
373
  script_js: str = Path(assets_dir_path / "script.js").read_text()
374
+ custom_elements_js: str = Path(assets_dir_path / "custom-elements.js").read_text()
386
375
  content = (
387
376
  response.body.decode()
388
377
  .replace('"<unquote-safe>', "")
389
378
  .replace('</unquote-safe>"', "")
390
379
  .replace("<double-quote>", '"')
391
380
  .replace("</double-quote>", '"')
381
+ # TODO: Consider using a "custom layout" implemented as a React component.
382
+ # Reference: https://github.com/swagger-api/swagger-ui/blob/master/docs/customization/custom-layout.md
383
+ #
384
+ # Note: Custom layouts are specified via the Swagger UI parameter named `layout`, whose value identifies
385
+ # a component that is specified via the Swagger UI parameter named `plugins`. The Swagger UI
386
+ # JavaScript code expects each item in the `plugins` array to be a JavaScript function,
387
+ # but FastAPI's `get_swagger_ui_html` function serializes each parameter's value into JSON,
388
+ # preventing us from specifying a JavaScript function as a value in the `plugins` array.
389
+ #
390
+ # As a workaround, we could use the string `replace`-ment technique shown below to put the literal
391
+ # JavaScript characters into place in the final HTML document. Using that approach, I _have_ been
392
+ # able to display a custom layout (a custom React component), but I have _not_ been able to get
393
+ # that custom layout to display Swagger UI's `BaseLayout` component (which includes the core
394
+ # Swagger UI functionality). That's a deal breaker.
395
+ #
396
+ .replace(r'"{{ NMDC_SWAGGER_UI_PARAMETERS_PLUGINS_PLACEHOLDER }}"', r"[]")
397
+ # Inject HTML elements containing data that can be read via JavaScript (e.g., `swagger_ui/assets/script.js`).
398
+ # Note: We escape the values here so they can be safely used as HTML attribute values.
399
+ .replace(
400
+ "</head>",
401
+ f"""
402
+ </head>
403
+ <div
404
+ id="nmdc-access-token"
405
+ data-token="{escape(access_token if access_token is not None else '')}"
406
+ style="display: none"
407
+ ></div>
408
+ <div
409
+ id="nmdc-orcid-login-url"
410
+ data-url="{escape(orcid_login_url)}"
411
+ style="display: none"
412
+ ></div>
413
+ """,
414
+ )
392
415
  # Inject a custom CSS stylesheet immediately before the closing `</head>` tag.
393
416
  .replace("</head>", f"<style>\n{style_css}\n</style>\n</head>")
394
- # Inject a custom JavaScript script immediately before the closing `</body>` tag.
417
+ # Inject custom JavaScript scripts immediately before the closing `</body>` tag.
418
+ .replace("</body>", f"<script>\n{custom_elements_js}\n</script>\n</body>")
395
419
  .replace("</body>", f"<script>\n{script_js}\n</script>\n</body>")
396
420
  )
397
421
  return HTMLResponse(content=content)
@@ -30,8 +30,13 @@ class ListRequest(BaseModel):
30
30
  r'{"lat_lon.latitude": {"$gt": 45.0}, "ecosystem_category": "Plants"}',
31
31
  ],
32
32
  )
33
- # TODO: Document why the optional type here is `int` as opposed to `PerPageRange` (`FindRequest` uses the latter).
34
- max_page_size: Optional[int] = Field(
33
+ # TODO: Document the following things about this type hint and `Field` definition:
34
+ # (a) why the type here is `int` as opposed to `PerPageRange` (`FindRequest` uses the latter),
35
+ # (b) why the default value here is 20 as opposed to 25 (the default value in `FindRequest`), and
36
+ # (c) why there is no upper limit on the value (the `PerPageRange` type has an upper limit of 2000).
37
+ #
38
+ # Note: If the HTTP request lacks a value for this parameter, Pydantic will fall back to the default value specified here.
39
+ max_page_size: int = Field(
35
40
  default=20,
36
41
  title="Resources per page",
37
42
  description="How many resources you want _each page_ to contain, formatted as a positive integer.",
@@ -120,10 +125,12 @@ class FindRequest(BaseModel):
120
125
  default=None,
121
126
  title="Page number",
122
127
  description="""_Which page_ of resources you want to retrieve, when using page number-based pagination.
123
- This is the page number formatted as an integer ≥ 1.""",
128
+ This is the page number formatted as an integer ≥ 1.
129
+ **Limitation:** When using _page number_-based pagination, only the first 10,000 resources
130
+ are accessible. You can access resources beyond that by using _cursor_-based pagination.""",
124
131
  examples=[1],
125
132
  )
126
- per_page: Optional[PerPageRange] = Field(
133
+ per_page: PerPageRange = Field(
127
134
  default=25,
128
135
  title="Resources per page",
129
136
  description="How many resources you want _each page_ to contain, formatted as a positive integer ≤ 2000.",
@@ -133,7 +140,7 @@ class FindRequest(BaseModel):
133
140
  default=None,
134
141
  title="Cursor",
135
142
  description="""A bookmark you can use to fetch the _next_ page of resources, when using cursor-based pagination.
136
- To use cursor-based pagination, set the `cursor` parameter to `*`. The response's `meta` object will
143
+ To begin using cursor-based pagination, set the `cursor` parameter to `*`. The response's `meta` object will
137
144
  include a `next_cursor` field, whose value can be used as the `cursor` parameter in a subsequent
138
145
  request.\n\n_Example_: `nmdc:sys0zr0fbt71`""",
139
146
  examples=[
@@ -10,233 +10,169 @@ Notes:
10
10
  Now that they are in a separate module, we will be able to edit them more easily.
11
11
  """
12
12
 
13
- from html import escape
14
13
  from typing import List, Dict
14
+ from enum import Enum
15
15
 
16
- # Mapping from tag names to their (Markdown-formatted) descriptions.
17
- tag_descriptions: Dict[str, str] = {}
18
-
19
- tag_descriptions[
20
- "sites"
21
- ] = r"""
22
- A site corresponds to a physical place that may participate in job execution.
23
-
24
- A site may register data objects and capabilities with NMDC. It may claim jobs to execute, and it may
25
- update job operations with execution info.
26
-
27
- A site must be able to service requests for any data objects it has registered.
28
-
29
- A site may expose a "put object" custom method for authorized users. This method facilitates an
30
- operation to upload an object to the site and have the site register that object with the runtime
31
- system.
32
- """
33
-
34
- tag_descriptions[
35
- "workflows"
36
- ] = r"""
37
- A workflow is a template for creating jobs.
38
-
39
- Workflow jobs are typically created by the system via trigger associations between
40
- workflows and object types. A workflow may also require certain capabilities of sites
41
- in order for those sites to claim workflow jobs.
42
- """
43
-
44
- tag_descriptions[
45
- "users"
46
- ] = r"""
47
- Endpoints for user identification.
48
-
49
- Currently, accounts for use with the Runtime API are created manually by system administrators.
50
- """
51
-
52
- tag_descriptions[
53
- "capabilities"
54
- ] = r"""
55
- A workflow may require an executing site to have particular capabilities.
56
-
57
- These capabilities go beyond the simple ability to access the data object resources registered with
58
- the runtime system. Sites register their capabilities, and sites are only able to claim workflow
59
- jobs if they are known to have the capabilities required by the workflow.
60
- """
61
-
62
- tag_descriptions[
63
- "object types"
64
- ] = r"""
65
- An object type is an object annotation that is useful for triggering workflows.
66
-
67
- A data object may be annotated with one or more types, which in turn can be associated with
68
- workflows through trigger resources.
69
-
70
- The data-object type system may be used to trigger workflow jobs on a subset of data objects when a
71
- new version of a workflow is deployed. This could be done by minting a special object type for the
72
- occasion, annotating the subset of data objects with that type, and registering the association of
73
- object type to workflow via a trigger resource.
74
- """
75
-
76
- tag_descriptions[
77
- "triggers"
78
- ] = r"""
79
- A trigger is an association between a workflow and a data object type.
80
-
81
- When a data object is annotated with a type, perhaps shortly after object registration, the NMDC
82
- Runtime will check, via trigger associations, for potential new jobs to create for any workflows.
83
- """
84
16
 
85
- tag_descriptions[
86
- "jobs"
87
- ] = r"""
88
- A job is a resource that isolates workflow configuration from execution.
17
+ class OpenAPITag(str, Enum):
18
+ r"""A tag you can use to group related API endpoints together in an OpenAPI schema."""
89
19
 
90
- Rather than directly creating a workflow operation by supplying a workflow ID along with
91
- configuration, NMDC creates a job that pairs a workflow with configuration. Then, a site can claim a
92
- job ID, allowing the site to execute the intended workflow without additional configuration.
20
+ MINTER = "Persistent identifiers"
21
+ SYSTEM_ADMINISTRATION = "System administration"
22
+ WORKFLOWS = "Workflow management"
23
+ METADATA_ACCESS = "Metadata access"
24
+ USERS = "User accounts"
93
25
 
94
- A job can have multiple executions, and a workflow's executions are precisely the executions of all
95
- jobs created for that workflow.
96
26
 
97
- A site that already has a compatible job execution result can preempt the unnecessary creation of a
98
- job by pre-claiming it. This will return like a claim, and now the site can register known data
99
- object inputs for the job without the risk of the runtime system creating a claimable job of the
100
- pre-claimed type.
101
- """
27
+ # Mapping from tag names to their (Markdown-formatted) descriptions.
28
+ tag_descriptions: Dict[str, str] = {}
102
29
 
103
30
  tag_descriptions[
104
- "objects"
31
+ OpenAPITag.METADATA_ACCESS.value
105
32
  ] = r"""
106
- A [Data Repository Service (DRS)
107
- object](https://ga4gh.github.io/data-repository-service-schemas/preview/release/drs-1.1.0/docs/#_drs_datatypes)
108
- represents content necessary for a workflow job to execute, and/or output from a job execution.
33
+ Retrieve and manage metadata.
109
34
 
110
- An object may be a *blob*, analogous to a file, or a *bundle*, analogous to a folder. Sites register
111
- objects, and sites must ensure that these objects are accessible to the NMDC data broker.
35
+ The metadata access endpoints fall into several subcategories:
112
36
 
113
- An object may be associated with one or more object types, useful for triggering workflows.
37
+ - **Find**: Find a few types of metadata, using a simplified syntax.
38
+ - Each endpoint deals with a predetermined type of metadata; i.e., [studies](https://w3id.org/nmdc/Study/), [biosamples](https://w3id.org/nmdc/Biosample/), [data objects](https://w3id.org/nmdc/DataObject/), [planned processes](https://w3id.org/nmdc/PlannedProcess/), or [workflow executions](https://w3id.org/nmdc/WorkflowExecution/).
39
+ - **NMDC schema**: Examine the [NMDC schema](https://microbiomedata.github.io/nmdc-schema/), itself, and use schema-related terminology to find metadata of any type.
40
+ - **Queries**: Find, update, and delete metadata using [MongoDB commands](https://www.mongodb.com/docs/manual/reference/command/#user-commands).
41
+ - **Changesheets**: Modify metadata by uploading [changesheets](https://docs.microbiomedata.org/runtime/howto-guides/author-changesheets/).
42
+ - **JSON operations**: Insert or update metadata by submitting a JSON document representing a [Database](https://w3id.org/nmdc/Database/).
114
43
  """
115
44
 
116
45
  tag_descriptions[
117
- "operations"
46
+ OpenAPITag.WORKFLOWS.value
118
47
  ] = r"""
119
- An operation is a resource for tracking the execution of a job.
120
-
121
- When a job is claimed by a site for execution, an operation resource is created.
122
-
123
- An operation is akin to a "promise" or "future" in that it should eventually resolve to either a
124
- successful result, i.e. an execution resource, or to an error.
125
-
126
- An operation is parameterized to return a result type, and a metadata type for storing progress
127
- information, that are both particular to the job type.
128
-
129
- Operations may be paused, resumed, and/or cancelled.
130
-
131
- Operations may expire, i.e. not be stored indefinitely. In this case, it is recommended that
132
- execution resources have longer lifetimes / not expire, so that information about successful results
133
- of operations are available.
48
+ Manage workflows and their execution.
49
+
50
+ The workflow management endpoints fall into several subcategories:
51
+
52
+ - **Sites**: Register compute sites that can execute workflows, and generate credentials for them.
53
+ - A site corresponds to a physical place that may participate in job execution.
54
+ - A site may register data objects and capabilities with the Runtime. It may claim jobs to execute, and it may update job operations with execution info.
55
+ - A site must be able to service requests for any data objects it has registered.
56
+ - A site may expose a "put object" custom method for authorized users. This method facilitates an operation to upload an object to the site and have the site register that object with the Runtime system.
57
+ - **Workflows**: Manage workflow templates, which serve as blueprints for job execution.
58
+ - A workflow is a template for creating jobs.
59
+ - Workflow jobs are typically created by the system via triggers, which are associations between workflows and data object types.
60
+ - **Capabilities**: Manage the technical requirements that sites must meet to execute specific workflows.
61
+ - A workflow may require a site that executes it to have specific capabilities.
62
+ - These capabilities may go beyond the simple ability to access the data objects registered with the Runtime system.
63
+ - Sites register their capabilities, and sites are only able to claim workflow jobs if those sites have the capabilities required by the workflow.
64
+ - **Object types**: Manage the types of data objects whose creation can trigger job creation and, eventually, workflow execution.
65
+ - A data object type is an annotation that can be applied to data objects.
66
+ - A data object may have one or more types. Those types can be associated with workflows, through triggers.
67
+ - **Triggers**: Define associations between workflows and object types to enable automatic job creation.
68
+ - A [trigger](https://docs.microbiomedata.org/runtime/howto-guides/create-triggers/) is an association between a workflow and a data object type.
69
+ - When a data object is [annotated with a type](https://docs.microbiomedata.org/runtime/nb/queue_and_trigger_data_jobs/#use-case-annotate-a-known-object-with-a-type-that-will-trigger-a-workflow)—which may occur shortly after object registration—the Runtime will check—via trigger associations—whether it is due to create any jobs.
70
+ - **Jobs**: Manage the [claiming](https://docs.microbiomedata.org/runtime/howto-guides/claim-and-run-jobs/) and status of workflow executions.
71
+ - A job is a resource that decouples the configuration of a workflow, from execution of that workflow.
72
+ - Rather than directly creating a workflow operation, the Runtime creates a job that pairs a workflow with its configuration. Then, a site can claim the job—by its ID—and execute the associated workflow without doing additional configuration.
73
+ - A job can have multiple executions. All executions of all jobs of a given workflow, make up that workflow's executions.
74
+ - A site that already has a compatible job execution result can preempt the unnecessary creation of a job by _pre-claiming_ it. This will return like a claim, and now the site can register known data object inputs for the job without the risk of the Runtime creating a claimable job of the pre-claimed type.
75
+ - **Objects**: Manage the Data Repository Service (DRS) objects that are inputs and outputs of workflow executions.
76
+ - A [Data Repository Service (DRS) object](https://ga4gh.github.io/data-repository-service-schemas/preview/release/drs-1.1.0/docs/#_drs_datatypes) represents content necessary for—or content produced by—job execution.
77
+ - An object may be a *blob* (analogous to a file) or a *bundle* (analogous to a folder). Sites register objects, and sites must ensure that these objects are accessible to the "NMDC data broker."
78
+ - An object may be annotated with one or more object types, useful for triggering workflows.
79
+ - **Operations**: Track and monitor the real-time execution status of claimed jobs, including progress updates and error handling.
80
+ - An operation is a resource for tracking the execution of a job.
81
+ - When a job is claimed by a site for execution, an operation resource is created.
82
+ - An operation is like a "promise," in that it should eventually resolve to either a successful result—i.e., an execution resource—or to an error.
83
+ - An operation is parameterized to return a result type, and a metadata type for storing progress information, that are both particular to the job type.
84
+ - Operations may be paused, resumed, and/or cancelled.
85
+ - Operations may expire, i.e. not be stored indefinitely. In this case, it is recommended that execution resources have longer lifetimes/not expire, so that information about successful results of operations are available.
86
+ - **Runs**: _(work in progress)_ Execute simple jobs and report execution events back to the Runtime.
87
+ - Run simple jobs.
88
+ - For off-site job runs, keep the Runtime appraised of run events.
134
89
  """
135
90
 
136
91
  tag_descriptions[
137
- "queries"
92
+ OpenAPITag.USERS.value
138
93
  ] = r"""
139
- A query is an operation (find, update, etc.) against the metadata store.
140
-
141
- Metadata -- for studies, biosamples, omics processing, etc. -- is used by sites to execute jobs,
142
- as the parameterization of job executions may depend not only on the content of data objects, but
143
- also on objects' associated metadata.
144
-
145
- Also, the function of many workflows is to extract or produce new metadata. Such metadata products
146
- should be registered as data objects, and they may also be supplied by sites to the runtime system
147
- as an update query (if the latter is not done, the runtime system will sense the new metadata and
148
- issue an update query).
94
+ Create and manage user accounts.
149
95
  """
150
96
 
151
97
  tag_descriptions[
152
- "metadata"
98
+ OpenAPITag.MINTER.value
153
99
  ] = r"""
154
- The [metadata endpoints](https://api.microbiomedata.org/docs#/metadata) can be used to get and filter
155
- metadata from collection set types (including
156
- [studies](https://w3id.org/nmdc/Study/),
157
- [biosamples](https://w3id.org/nmdc/Biosample/),
158
- [planned processes](https://w3id.org/nmdc/PlannedProcess/), and
159
- [data objects](https://w3id.org/nmdc/DataObject/)
160
- as discussed in the __find__ section).
161
- <br/>
162
-
163
- The __metadata__ endpoints allow users to retrieve metadata from the data portal using the various
164
- GET endpoints that are slightly different than the __find__ endpoints, but some can be used similarly.
165
- As with the __find__ endpoints, parameters for the __metadata__ endpoints that do not have a
166
- red ___* required___ next to them are optional. <br/>
167
-
168
- Unlike the compact syntax used in the __find__ endpoints, the syntax for the filter parameter of
169
- the metadata endpoints
170
- uses [MongoDB-like language querying](https://www.mongodb.com/docs/manual/tutorial/query-documents/).
100
+ Mint and manage persistent identifiers.
171
101
  """
172
102
 
173
103
  tag_descriptions[
174
- "find"
104
+ OpenAPITag.SYSTEM_ADMINISTRATION.value
175
105
  ] = r"""
176
- The [find endpoints](https://api.microbiomedata.org/docs#/find) are provided with NMDC metadata entities
177
- already specified - where metadata about [studies](https://w3id.org/nmdc/Study),
178
- [biosamples](https://w3id.org/nmdc/Biosample), [data objects](https://w3id.org/nmdc/DataObject/),
179
- and [planned processes](https://w3id.org/nmdc/PlannedProcess/) can be retrieved using GET requests.
180
- <br/>
181
-
182
- Each endpoint is unique and requires the applicable attribute names to be known in order to structure a query
183
- in a meaningful way. Parameters that do not have a red ___* required___ label next to them are optional.
184
- """
185
-
186
- tag_descriptions[
187
- "runs"
188
- ] = r"""
189
- **WORK IN PROGRESS**
190
-
191
- Run simple jobs.
192
-
193
- For off-site job runs, keep the Runtime appraised of run events.
106
+ Retrieve information about the software components that make up the Runtime.
194
107
  """
195
108
 
196
109
  # Remove leading and trailing whitespace from each description.
197
110
  for name, description in tag_descriptions.items():
198
111
  tag_descriptions[name] = description.strip()
199
112
 
200
- ordered_tag_descriptors: List[Dict[str, str]] = [
201
- {"name": "sites", "description": tag_descriptions["sites"]},
202
- {"name": "users", "description": tag_descriptions["users"]},
203
- {"name": "workflows", "description": tag_descriptions["workflows"]},
204
- {"name": "capabilities", "description": tag_descriptions["capabilities"]},
205
- {"name": "object types", "description": tag_descriptions["object types"]},
206
- {"name": "triggers", "description": tag_descriptions["triggers"]},
207
- {"name": "jobs", "description": tag_descriptions["jobs"]},
208
- {"name": "objects", "description": tag_descriptions["objects"]},
209
- {"name": "operations", "description": tag_descriptions["operations"]},
210
- {"name": "queries", "description": tag_descriptions["queries"]},
211
- {"name": "metadata", "description": tag_descriptions["metadata"]},
212
- {"name": "find", "description": tag_descriptions["find"]},
213
- {"name": "runs", "description": tag_descriptions["runs"]},
113
+ ordered_tag_descriptors: List[Dict] = [
114
+ {
115
+ "name": OpenAPITag.METADATA_ACCESS.value,
116
+ "description": tag_descriptions[OpenAPITag.METADATA_ACCESS.value],
117
+ },
118
+ {
119
+ "name": OpenAPITag.WORKFLOWS.value,
120
+ "description": tag_descriptions[OpenAPITag.WORKFLOWS.value],
121
+ },
122
+ {
123
+ "name": OpenAPITag.MINTER.value,
124
+ "description": tag_descriptions[OpenAPITag.MINTER.value],
125
+ },
126
+ {
127
+ "name": OpenAPITag.USERS.value,
128
+ "description": tag_descriptions[OpenAPITag.USERS.value],
129
+ },
130
+ {
131
+ "name": OpenAPITag.SYSTEM_ADMINISTRATION.value,
132
+ "description": tag_descriptions[OpenAPITag.SYSTEM_ADMINISTRATION.value],
133
+ },
214
134
  ]
215
135
 
216
136
 
217
- def make_api_description(schema_version: str, orcid_login_url: str) -> str:
137
+ def make_api_description(api_version: str, schema_version: str) -> str:
218
138
  r"""
219
- Returns an API description into which the specified schema version and
220
- ORCID login URL have been incorporated.
139
+ Returns an API description into which the specified schema version string has been incorporated.
221
140
 
222
141
  Args:
142
+ api_version (str): The version of this Runtime instance.
223
143
  schema_version (str): The version of `nmdc-schema` the Runtime is using.
224
- orcid_login_url (str): The URL at which a user could login via ORCID.
225
144
 
226
145
  Returns:
227
146
  str: The Markdown-formatted API description.
228
147
  """
229
148
  result = f"""
230
- The NMDC Runtime API, via on-demand functions and via schedule-based and sensor-based automation,
231
- supports validation and submission of metadata, as well as orchestration of workflow executions.
149
+ Welcome to the **NMDC Runtime API**, an API you can use to [access metadata](https://docs.microbiomedata.org/howto_guides/api_gui/) residing in the NMDC database.
232
150
 
233
- [NMDC Schema](https://microbiomedata.github.io/nmdc-schema/) version: `{schema_version}`
151
+ Users having adequate permissions can also use it to generate identifiers, submit metadata,
152
+ and manage workflow executions.
153
+
154
+ ##### Quick start
234
155
 
235
- [Documentation](https://docs.microbiomedata.org/runtime/)
156
+ The endpoints of the NMDC Runtime API are listed below.
157
+ They are organized into sections, each of which can be opened and closed.
158
+ The endpoints, themselves, can also be opened and closed.
236
159
 
237
- <img src="/static/ORCIDiD_icon128x128.png" height="18" width="18"/>
238
- <a href="{escape(orcid_login_url)}" title="Login with ORCID">
239
- Login with ORCID
240
- </a>
160
+ Each endpoint—when opened—has a "Try it out" button, which you can press in order to send a request
161
+ to the endpoint directly from this web page. Each endpoint can also be
162
+ [accessed programmatically](https://docs.microbiomedata.org/runtime/nb/api_access_via_python/).
163
+
164
+ Some endpoints have a padlock icon, which means that the endpoint is only accessible to logged-in users.
165
+ You can log in by clicking the "Authorize" button located directly above the list of endpoints.
166
+
167
+ ##### Contact us
168
+
169
+ You can [contact us](https://microbiomedata.org/contact/) anytime.
170
+ We continuously refine the API and may be able to streamline your use case.
171
+
172
+ ##### Versions
173
+
174
+ [NMDC Runtime](https://docs.microbiomedata.org/runtime/) version: `{api_version}`
175
+
176
+ [NMDC Schema](https://microbiomedata.github.io/nmdc-schema/) version: `{schema_version}`
241
177
  """.strip()
242
178
  return result