nmdc-runtime 2.9.0__py3-none-any.whl → 2.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nmdc-runtime might be problematic. Click here for more details.
- nmdc_runtime/Dockerfile +167 -0
- nmdc_runtime/api/analytics.py +90 -0
- nmdc_runtime/api/boot/capabilities.py +9 -0
- nmdc_runtime/api/boot/object_types.py +126 -0
- nmdc_runtime/api/boot/triggers.py +84 -0
- nmdc_runtime/api/boot/workflows.py +116 -0
- nmdc_runtime/api/core/auth.py +208 -0
- nmdc_runtime/api/core/idgen.py +200 -0
- nmdc_runtime/api/core/metadata.py +788 -0
- nmdc_runtime/api/core/util.py +109 -0
- nmdc_runtime/api/db/mongo.py +435 -0
- nmdc_runtime/api/db/s3.py +37 -0
- nmdc_runtime/api/endpoints/capabilities.py +25 -0
- nmdc_runtime/api/endpoints/find.py +634 -0
- nmdc_runtime/api/endpoints/jobs.py +143 -0
- nmdc_runtime/api/endpoints/lib/helpers.py +274 -0
- nmdc_runtime/api/endpoints/lib/linked_instances.py +180 -0
- nmdc_runtime/api/endpoints/lib/path_segments.py +165 -0
- nmdc_runtime/api/endpoints/metadata.py +260 -0
- nmdc_runtime/api/endpoints/nmdcschema.py +502 -0
- nmdc_runtime/api/endpoints/object_types.py +38 -0
- nmdc_runtime/api/endpoints/objects.py +270 -0
- nmdc_runtime/api/endpoints/operations.py +78 -0
- nmdc_runtime/api/endpoints/queries.py +701 -0
- nmdc_runtime/api/endpoints/runs.py +98 -0
- nmdc_runtime/api/endpoints/search.py +38 -0
- nmdc_runtime/api/endpoints/sites.py +205 -0
- nmdc_runtime/api/endpoints/triggers.py +25 -0
- nmdc_runtime/api/endpoints/users.py +214 -0
- nmdc_runtime/api/endpoints/util.py +796 -0
- nmdc_runtime/api/endpoints/workflows.py +353 -0
- nmdc_runtime/api/entrypoint.sh +7 -0
- nmdc_runtime/api/main.py +425 -0
- nmdc_runtime/api/middleware.py +43 -0
- nmdc_runtime/api/models/capability.py +14 -0
- nmdc_runtime/api/models/id.py +92 -0
- nmdc_runtime/api/models/job.py +37 -0
- nmdc_runtime/api/models/lib/helpers.py +78 -0
- nmdc_runtime/api/models/metadata.py +11 -0
- nmdc_runtime/api/models/nmdc_schema.py +146 -0
- nmdc_runtime/api/models/object.py +180 -0
- nmdc_runtime/api/models/object_type.py +20 -0
- nmdc_runtime/api/models/operation.py +66 -0
- nmdc_runtime/api/models/query.py +246 -0
- nmdc_runtime/api/models/query_continuation.py +111 -0
- nmdc_runtime/api/models/run.py +161 -0
- nmdc_runtime/api/models/site.py +87 -0
- nmdc_runtime/api/models/trigger.py +13 -0
- nmdc_runtime/api/models/user.py +140 -0
- nmdc_runtime/api/models/util.py +260 -0
- nmdc_runtime/api/models/workflow.py +15 -0
- nmdc_runtime/api/openapi.py +178 -0
- nmdc_runtime/api/swagger_ui/assets/custom-elements.js +522 -0
- nmdc_runtime/api/swagger_ui/assets/script.js +247 -0
- nmdc_runtime/api/swagger_ui/assets/style.css +155 -0
- nmdc_runtime/api/swagger_ui/swagger_ui.py +34 -0
- nmdc_runtime/config.py +7 -8
- nmdc_runtime/minter/adapters/repository.py +22 -2
- nmdc_runtime/minter/config.py +2 -0
- nmdc_runtime/minter/domain/model.py +55 -1
- nmdc_runtime/minter/entrypoints/fastapi_app.py +1 -1
- nmdc_runtime/mongo_util.py +1 -2
- nmdc_runtime/site/backup/nmdcdb_mongodump.py +1 -1
- nmdc_runtime/site/backup/nmdcdb_mongoexport.py +1 -3
- nmdc_runtime/site/changesheets/data/OmicsProcessing-to-catted-Biosamples.tsv +1561 -0
- nmdc_runtime/site/changesheets/scripts/missing_neon_soils_ecosystem_data.py +311 -0
- nmdc_runtime/site/changesheets/scripts/neon_soils_add_ncbi_ids.py +210 -0
- nmdc_runtime/site/dagster.yaml +53 -0
- nmdc_runtime/site/entrypoint-daemon.sh +26 -0
- nmdc_runtime/site/entrypoint-dagit-readonly.sh +26 -0
- nmdc_runtime/site/entrypoint-dagit.sh +26 -0
- nmdc_runtime/site/export/ncbi_xml.py +633 -13
- nmdc_runtime/site/export/ncbi_xml_utils.py +115 -1
- nmdc_runtime/site/graphs.py +8 -22
- nmdc_runtime/site/ops.py +147 -181
- nmdc_runtime/site/repository.py +2 -112
- nmdc_runtime/site/resources.py +16 -3
- nmdc_runtime/site/translation/gold_translator.py +4 -12
- nmdc_runtime/site/translation/neon_benthic_translator.py +0 -1
- nmdc_runtime/site/translation/neon_soil_translator.py +4 -5
- nmdc_runtime/site/translation/neon_surface_water_translator.py +0 -2
- nmdc_runtime/site/translation/submission_portal_translator.py +84 -68
- nmdc_runtime/site/translation/translator.py +63 -1
- nmdc_runtime/site/util.py +8 -3
- nmdc_runtime/site/validation/util.py +10 -5
- nmdc_runtime/site/workspace.yaml +13 -0
- nmdc_runtime/static/NMDC_logo.svg +1073 -0
- nmdc_runtime/static/ORCID-iD_icon_vector.svg +4 -0
- nmdc_runtime/static/README.md +5 -0
- nmdc_runtime/static/favicon.ico +0 -0
- nmdc_runtime/util.py +90 -48
- nmdc_runtime-2.11.0.dist-info/METADATA +46 -0
- nmdc_runtime-2.11.0.dist-info/RECORD +128 -0
- {nmdc_runtime-2.9.0.dist-info → nmdc_runtime-2.11.0.dist-info}/WHEEL +1 -2
- nmdc_runtime/containers.py +0 -14
- nmdc_runtime/core/db/Database.py +0 -15
- nmdc_runtime/core/exceptions/__init__.py +0 -23
- nmdc_runtime/core/exceptions/base.py +0 -47
- nmdc_runtime/core/exceptions/token.py +0 -13
- nmdc_runtime/domain/users/queriesInterface.py +0 -18
- nmdc_runtime/domain/users/userSchema.py +0 -37
- nmdc_runtime/domain/users/userService.py +0 -14
- nmdc_runtime/infrastructure/database/db.py +0 -3
- nmdc_runtime/infrastructure/database/models/user.py +0 -10
- nmdc_runtime/lib/__init__.py +0 -1
- nmdc_runtime/lib/extract_nmdc_data.py +0 -41
- nmdc_runtime/lib/load_nmdc_data.py +0 -121
- nmdc_runtime/lib/nmdc_dataframes.py +0 -829
- nmdc_runtime/lib/nmdc_etl_class.py +0 -402
- nmdc_runtime/lib/transform_nmdc_data.py +0 -1117
- nmdc_runtime/site/drsobjects/ingest.py +0 -93
- nmdc_runtime/site/drsobjects/registration.py +0 -131
- nmdc_runtime/site/translation/emsl.py +0 -43
- nmdc_runtime/site/translation/gold.py +0 -53
- nmdc_runtime/site/translation/jgi.py +0 -32
- nmdc_runtime/site/translation/util.py +0 -132
- nmdc_runtime/site/validation/jgi.py +0 -43
- nmdc_runtime-2.9.0.dist-info/METADATA +0 -214
- nmdc_runtime-2.9.0.dist-info/RECORD +0 -84
- nmdc_runtime-2.9.0.dist-info/top_level.txt +0 -1
- /nmdc_runtime/{client → api}/__init__.py +0 -0
- /nmdc_runtime/{core → api/boot}/__init__.py +0 -0
- /nmdc_runtime/{core/db → api/core}/__init__.py +0 -0
- /nmdc_runtime/{domain → api/db}/__init__.py +0 -0
- /nmdc_runtime/{domain/users → api/endpoints}/__init__.py +0 -0
- /nmdc_runtime/{infrastructure → api/endpoints/lib}/__init__.py +0 -0
- /nmdc_runtime/{infrastructure/database → api/models}/__init__.py +0 -0
- /nmdc_runtime/{infrastructure/database/models → api/models/lib}/__init__.py +0 -0
- /nmdc_runtime/{site/drsobjects/__init__.py → api/models/minter.py} +0 -0
- {nmdc_runtime-2.9.0.dist-info → nmdc_runtime-2.11.0.dist-info}/entry_points.txt +0 -0
- {nmdc_runtime-2.9.0.dist-info → nmdc_runtime-2.11.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
from typing import TypeVar, List, Optional, Generic, Annotated
|
|
2
|
+
|
|
3
|
+
from pydantic import model_validator, Field, BaseModel
|
|
4
|
+
|
|
5
|
+
ResultT = TypeVar("ResultT")
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ListResponse(BaseModel, Generic[ResultT]):
|
|
9
|
+
resources: List[ResultT]
|
|
10
|
+
next_page_token: Optional[str] = None
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ListRequest(BaseModel):
|
|
14
|
+
r"""
|
|
15
|
+
An encapsulation of a set of parameters accepted by API endpoints related to listing things.
|
|
16
|
+
|
|
17
|
+
Note: This class was documented after the `FindRequest` class was documented. You can refer to the documentation of
|
|
18
|
+
the latter class for additional context about the usage of Pydantic's `Field` constructor in this class.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
filter: Optional[str] = Field(
|
|
22
|
+
default=None,
|
|
23
|
+
title="Filter",
|
|
24
|
+
description="""The criteria by which you want to filter the resources, in the same format as the [`query`
|
|
25
|
+
parameter](https://www.mongodb.com/docs/manual/reference/method/db.collection.find/#std-label-method-find-query)
|
|
26
|
+
of MongoDB's `db.collection.find()` method.\n\n_Example:_
|
|
27
|
+
`{"lat_lon.latitude": {"$gt": 45.0}, "ecosystem_category": "Plants"}`""",
|
|
28
|
+
examples=[
|
|
29
|
+
r'{"ecosystem_type": "Freshwater"}',
|
|
30
|
+
r'{"lat_lon.latitude": {"$gt": 45.0}, "ecosystem_category": "Plants"}',
|
|
31
|
+
],
|
|
32
|
+
)
|
|
33
|
+
# TODO: Document the following things about this type hint and `Field` definition:
|
|
34
|
+
# (a) why the type here is `int` as opposed to `PerPageRange` (`FindRequest` uses the latter),
|
|
35
|
+
# (b) why the default value here is 20 as opposed to 25 (the default value in `FindRequest`), and
|
|
36
|
+
# (c) why there is no upper limit on the value (the `PerPageRange` type has an upper limit of 2000).
|
|
37
|
+
#
|
|
38
|
+
# Note: If the HTTP request lacks a value for this parameter, Pydantic will fall back to the default value specified here.
|
|
39
|
+
max_page_size: int = Field(
|
|
40
|
+
default=20,
|
|
41
|
+
title="Resources per page",
|
|
42
|
+
description="How many resources you want _each page_ to contain, formatted as a positive integer.",
|
|
43
|
+
examples=[20],
|
|
44
|
+
)
|
|
45
|
+
page_token: Optional[str] = Field(
|
|
46
|
+
default=None,
|
|
47
|
+
title="Next page token",
|
|
48
|
+
description="""A bookmark you can use to fetch the _next_ page of resources. You can get this from the
|
|
49
|
+
`next_page_token` field in a previous response from this endpoint.\n\n_Example_:
|
|
50
|
+
`nmdc:sys0zr0fbt71`""",
|
|
51
|
+
examples=[
|
|
52
|
+
"nmdc:sys0zr0fbt71",
|
|
53
|
+
],
|
|
54
|
+
)
|
|
55
|
+
# TODO: Document the endpoint's behavior when a projection includes a _nested_ field identifier (i.e. `foo.bar`),
|
|
56
|
+
# and ensure the endpoint doesn't break when the projection includes field descriptors that contain commas.
|
|
57
|
+
projection: Optional[str] = Field(
|
|
58
|
+
default=None,
|
|
59
|
+
title="Projection",
|
|
60
|
+
description="""Comma-delimited list of the names of the fields you want the resources in the response to
|
|
61
|
+
include. Note: In addition to those fields, the response will also include the `id`
|
|
62
|
+
field.\n\n_Example_: `name, ecosystem_type`""",
|
|
63
|
+
examples=[
|
|
64
|
+
"name, ecosystem_type",
|
|
65
|
+
],
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
PerPageRange = Annotated[int, Field(gt=0, le=2_000)]
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class FindRequest(BaseModel):
|
|
73
|
+
r"""
|
|
74
|
+
An encapsulation of a set of parameters accepted by API endpoints related to finding things.
|
|
75
|
+
|
|
76
|
+
Notes:
|
|
77
|
+
- The "Query Parameter Models" section of the FastAPI docs says that this way of encapsulating
|
|
78
|
+
a set of query parameter definitions in a Pydantic model — so that Swagger UI displays a given
|
|
79
|
+
parameter's _description_ — was introduced in FastAPI 0.115.0.
|
|
80
|
+
Reference: https://fastapi.tiangolo.com/tutorial/query-param-models/
|
|
81
|
+
- While Swagger UI does show the parameter's _description_, specifically, it does not currently show the
|
|
82
|
+
parameter's _title_ or example value(s). The approach shown in the "Classes as Dependencies" section
|
|
83
|
+
of the FastAPI docs (i.e. https://fastapi.tiangolo.com/tutorial/dependencies/classes-as-dependencies/)
|
|
84
|
+
does result in Swagger UI showing those additional things, but the approach involves not inheriting
|
|
85
|
+
from Pydantic's `BaseModel` class and involves defining an `__init__` method for the class. That is
|
|
86
|
+
further than I want to take these classes from their existing selves at this point. To compensate
|
|
87
|
+
for that, I have included examples _within_ some of the descriptions.
|
|
88
|
+
Reference: https://github.com/fastapi/fastapi/issues/318#issuecomment-507043221
|
|
89
|
+
- The "Fields" section of the Pydantic docs says:
|
|
90
|
+
> "The `Field` function is used to customize and add metadata to fields of models."
|
|
91
|
+
References: https://docs.pydantic.dev/latest/concepts/fields/
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
filter: Optional[str] = Field(
|
|
95
|
+
default=None,
|
|
96
|
+
title="Filter",
|
|
97
|
+
description="""The criteria by which you want to filter the resources, formatted as a comma-separated list of
|
|
98
|
+
`attribute:value` pairs. The `value` can include a comparison operator (e.g. `>=`). If the attribute
|
|
99
|
+
is of type _string_ and you append `.search` to its name, the server will perform a full-text
|
|
100
|
+
search.\n\n_Example:_ `ecosystem_category:Plants, lat_lon.latitude:>35.0`""",
|
|
101
|
+
examples=[
|
|
102
|
+
"ecosystem_category:Plants",
|
|
103
|
+
"ecosystem_category:Plants, lat_lon.latitude:>35.0",
|
|
104
|
+
],
|
|
105
|
+
)
|
|
106
|
+
search: Optional[str] = Field(
|
|
107
|
+
default=None,
|
|
108
|
+
title="Search",
|
|
109
|
+
description="N/A _(not implemented yet)_",
|
|
110
|
+
)
|
|
111
|
+
sort: Optional[str] = Field(
|
|
112
|
+
default=None,
|
|
113
|
+
title="Sort",
|
|
114
|
+
description="""How you want the resources to be ordered in the response, formatted as a comma-separated list of
|
|
115
|
+
`attribute:value` pairs. Each `attribute` is the name of a field you want the resources to be
|
|
116
|
+
ordered by, and each `value` is the direction you want the values in that field to be ordered
|
|
117
|
+
(i.e. `asc` or no value for _ascending_ order, and `desc` for _descending_ order).\n\n_Example:_
|
|
118
|
+
`depth.has_numeric_value:desc, ecosystem_type`""",
|
|
119
|
+
examples=[
|
|
120
|
+
"depth.has_numeric_value:desc",
|
|
121
|
+
"depth.has_numeric_value:desc, ecosystem_type",
|
|
122
|
+
],
|
|
123
|
+
)
|
|
124
|
+
page: Optional[int] = Field(
|
|
125
|
+
default=None,
|
|
126
|
+
title="Page number",
|
|
127
|
+
description="""_Which page_ of resources you want to retrieve, when using page number-based pagination.
|
|
128
|
+
This is the page number formatted as an integer ≥ 1.
|
|
129
|
+
**Limitation:** When using _page number_-based pagination, only the first 10,000 resources
|
|
130
|
+
are accessible. You can access resources beyond that by using _cursor_-based pagination.""",
|
|
131
|
+
examples=[1],
|
|
132
|
+
)
|
|
133
|
+
per_page: PerPageRange = Field(
|
|
134
|
+
default=25,
|
|
135
|
+
title="Resources per page",
|
|
136
|
+
description="How many resources you want _each page_ to contain, formatted as a positive integer ≤ 2000.",
|
|
137
|
+
examples=[25],
|
|
138
|
+
)
|
|
139
|
+
cursor: Optional[str] = Field(
|
|
140
|
+
default=None,
|
|
141
|
+
title="Cursor",
|
|
142
|
+
description="""A bookmark you can use to fetch the _next_ page of resources, when using cursor-based pagination.
|
|
143
|
+
To begin using cursor-based pagination, set the `cursor` parameter to `*`. The response's `meta` object will
|
|
144
|
+
include a `next_cursor` field, whose value can be used as the `cursor` parameter in a subsequent
|
|
145
|
+
request.\n\n_Example_: `nmdc:sys0zr0fbt71`""",
|
|
146
|
+
examples=[
|
|
147
|
+
"*",
|
|
148
|
+
"nmdc:sys0zr0fbt71",
|
|
149
|
+
],
|
|
150
|
+
)
|
|
151
|
+
group_by: Optional[str] = Field(
|
|
152
|
+
default=None,
|
|
153
|
+
title="Group by",
|
|
154
|
+
description="N/A _(not implemented yet)_",
|
|
155
|
+
)
|
|
156
|
+
fields: Optional[str] = Field(
|
|
157
|
+
default=None,
|
|
158
|
+
title="Fields",
|
|
159
|
+
description="""The fields you want the resources to include in the response, formatted as a comma-separated list
|
|
160
|
+
of field names. This can be used to reduce the size and complexity of the response.\n\n_Example:_
|
|
161
|
+
`name, ess_dive_datasets`""",
|
|
162
|
+
examples=[
|
|
163
|
+
"name",
|
|
164
|
+
"name, ess_dive_datasets",
|
|
165
|
+
],
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
# Reference: https://docs.pydantic.dev/latest/concepts/validators/#model-validators
|
|
169
|
+
@model_validator(mode="before")
|
|
170
|
+
def set_page_if_cursor_unset(cls, values):
|
|
171
|
+
page, cursor = values.get("page"), values.get("cursor")
|
|
172
|
+
if page is not None and cursor is not None:
|
|
173
|
+
raise ValueError("cannot use cursor- and page-based pagination together")
|
|
174
|
+
if page is None and cursor is None:
|
|
175
|
+
values["page"] = 1
|
|
176
|
+
return values
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
class FindResponse(BaseModel):
|
|
180
|
+
meta: dict
|
|
181
|
+
results: List[dict]
|
|
182
|
+
group_by: List[dict]
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
class DeleteResponse(BaseModel):
|
|
186
|
+
r"""
|
|
187
|
+
Response model for "delete" operations. It summarizes the result of the
|
|
188
|
+
operation and it lists identifiers of the documents that were deleted.
|
|
189
|
+
"""
|
|
190
|
+
|
|
191
|
+
message: str = Field(
|
|
192
|
+
description="Success message describing the deletion operation"
|
|
193
|
+
)
|
|
194
|
+
deleted_workflow_execution_ids: List[str] = Field(
|
|
195
|
+
# Note: `default_factory=list` sets this to an empty list by default.
|
|
196
|
+
default_factory=list,
|
|
197
|
+
description="The `id`s of the `WorkflowExecution`s that were deleted",
|
|
198
|
+
)
|
|
199
|
+
deleted_data_object_ids: List[str] = Field(
|
|
200
|
+
default_factory=list,
|
|
201
|
+
description="The `id`s of the `DataObject`s that were deleted",
|
|
202
|
+
)
|
|
203
|
+
deleted_functional_annotation_agg_oids: List[str] = Field(
|
|
204
|
+
default_factory=list,
|
|
205
|
+
description="The internal MongoDB `ObjectId`s of the `FunctionalAnnotationAggMember`s that were deleted",
|
|
206
|
+
)
|
|
207
|
+
deleted_job_ids: List[str] = Field(
|
|
208
|
+
default_factory=list,
|
|
209
|
+
description="The `id`s of the `jobs` documents that were deleted",
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
# Note: For MongoDB, a single collection can have no more than 64 indexes
|
|
214
|
+
# Note: Each collection has a unique index set on "id" elsewhere.
|
|
215
|
+
entity_attributes_to_index = {
|
|
216
|
+
"biosample_set": {
|
|
217
|
+
"alternative_identifiers",
|
|
218
|
+
"env_broad_scale.has_raw_value",
|
|
219
|
+
"env_local_scale.has_raw_value",
|
|
220
|
+
"env_medium.has_raw_value",
|
|
221
|
+
"collection_date.has_raw_value",
|
|
222
|
+
"ecosystem",
|
|
223
|
+
"ecosystem_category",
|
|
224
|
+
"ecosystem_type",
|
|
225
|
+
"ecosystem_subtype",
|
|
226
|
+
"specific_ecosystem",
|
|
227
|
+
# Note: if `lat_lon` was GeoJSON, i.e. {type,coordinates}, MongoDB has a "2dsphere" index
|
|
228
|
+
"lat_lon.latitude",
|
|
229
|
+
"lat_lon.longitude",
|
|
230
|
+
},
|
|
231
|
+
"study_set": {
|
|
232
|
+
"has_credit_associations.applied_roles",
|
|
233
|
+
"has_credit_associations.applies_to_person.name",
|
|
234
|
+
"has_credit_associations.applies_to_person.orcid",
|
|
235
|
+
},
|
|
236
|
+
"data_object_set": {
|
|
237
|
+
"data_object_type",
|
|
238
|
+
"file_size_bytes",
|
|
239
|
+
"md5_checksum",
|
|
240
|
+
"url",
|
|
241
|
+
},
|
|
242
|
+
# TODO: Refrain from ensuring indexes exist in the `omics_processing_set` collection,
|
|
243
|
+
# since that collection was deleted as part of the "Berkeley schema" refactor.
|
|
244
|
+
# Reference: https://microbiomedata.github.io/nmdc-schema/v10-vs-v11-retrospective/#slots-removed-from-database
|
|
245
|
+
"omics_processing_set": {
|
|
246
|
+
"has_input",
|
|
247
|
+
"has_output",
|
|
248
|
+
"instrument_name",
|
|
249
|
+
"alternative_identifiers",
|
|
250
|
+
},
|
|
251
|
+
"functional_annotation_agg": {"was_generated_by"},
|
|
252
|
+
"workflow_execution_set": {
|
|
253
|
+
"has_input",
|
|
254
|
+
"has_output",
|
|
255
|
+
},
|
|
256
|
+
# Note: The `jobs` collection is not described by the NMDC schema.
|
|
257
|
+
"jobs": {
|
|
258
|
+
"config.activity_id",
|
|
259
|
+
},
|
|
260
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
from typing import Optional, List
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class WorkflowBase(BaseModel):
|
|
8
|
+
name: Optional[str] = None
|
|
9
|
+
description: Optional[str] = None
|
|
10
|
+
capability_ids: Optional[List[str]] = None
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Workflow(WorkflowBase):
|
|
14
|
+
id: str
|
|
15
|
+
created_at: Optional[datetime.datetime] = None
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
r"""
|
|
2
|
+
This module contains the definitions of constants and functions related to
|
|
3
|
+
generating the API's OpenAPI schema (a.k.a. Swagger schema).
|
|
4
|
+
|
|
5
|
+
References:
|
|
6
|
+
- FastAPI Documentation: https://fastapi.tiangolo.com/tutorial/metadata/
|
|
7
|
+
|
|
8
|
+
Notes:
|
|
9
|
+
- The tag descriptions in this file were cut/pasted from `nmdc_runtime/api/main.py`.
|
|
10
|
+
Now that they are in a separate module, we will be able to edit them more easily.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from typing import List, Dict
|
|
14
|
+
from enum import Enum
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class OpenAPITag(str, Enum):
|
|
18
|
+
r"""A tag you can use to group related API endpoints together in an OpenAPI schema."""
|
|
19
|
+
|
|
20
|
+
MINTER = "Persistent identifiers"
|
|
21
|
+
SYSTEM_ADMINISTRATION = "System administration"
|
|
22
|
+
WORKFLOWS = "Workflow management"
|
|
23
|
+
METADATA_ACCESS = "Metadata access"
|
|
24
|
+
USERS = "User accounts"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# Mapping from tag names to their (Markdown-formatted) descriptions.
|
|
28
|
+
tag_descriptions: Dict[str, str] = {}
|
|
29
|
+
|
|
30
|
+
tag_descriptions[
|
|
31
|
+
OpenAPITag.METADATA_ACCESS.value
|
|
32
|
+
] = r"""
|
|
33
|
+
Retrieve and manage metadata.
|
|
34
|
+
|
|
35
|
+
The metadata access endpoints fall into several subcategories:
|
|
36
|
+
|
|
37
|
+
- **Find**: Find a few types of metadata, using a simplified syntax.
|
|
38
|
+
- Each endpoint deals with a predetermined type of metadata; i.e., [studies](https://w3id.org/nmdc/Study/), [biosamples](https://w3id.org/nmdc/Biosample/), [data objects](https://w3id.org/nmdc/DataObject/), [planned processes](https://w3id.org/nmdc/PlannedProcess/), or [workflow executions](https://w3id.org/nmdc/WorkflowExecution/).
|
|
39
|
+
- **NMDC schema**: Examine the [NMDC schema](https://microbiomedata.github.io/nmdc-schema/), itself, and use schema-related terminology to find metadata of any type.
|
|
40
|
+
- **Queries**: Find, update, and delete metadata using [MongoDB commands](https://www.mongodb.com/docs/manual/reference/command/#user-commands).
|
|
41
|
+
- **Changesheets**: Modify metadata by uploading [changesheets](https://docs.microbiomedata.org/runtime/howto-guides/author-changesheets/).
|
|
42
|
+
- **JSON operations**: Insert or update metadata by submitting a JSON document representing a [Database](https://w3id.org/nmdc/Database/).
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
tag_descriptions[
|
|
46
|
+
OpenAPITag.WORKFLOWS.value
|
|
47
|
+
] = r"""
|
|
48
|
+
Manage workflows and their execution.
|
|
49
|
+
|
|
50
|
+
The workflow management endpoints fall into several subcategories:
|
|
51
|
+
|
|
52
|
+
- **Sites**: Register compute sites that can execute workflows, and generate credentials for them.
|
|
53
|
+
- A site corresponds to a physical place that may participate in job execution.
|
|
54
|
+
- A site may register data objects and capabilities with the Runtime. It may claim jobs to execute, and it may update job operations with execution info.
|
|
55
|
+
- A site must be able to service requests for any data objects it has registered.
|
|
56
|
+
- A site may expose a "put object" custom method for authorized users. This method facilitates an operation to upload an object to the site and have the site register that object with the Runtime system.
|
|
57
|
+
- **Workflows**: Manage workflow templates, which serve as blueprints for job execution.
|
|
58
|
+
- A workflow is a template for creating jobs.
|
|
59
|
+
- Workflow jobs are typically created by the system via triggers, which are associations between workflows and data object types.
|
|
60
|
+
- **Capabilities**: Manage the technical requirements that sites must meet to execute specific workflows.
|
|
61
|
+
- A workflow may require a site that executes it to have specific capabilities.
|
|
62
|
+
- These capabilities may go beyond the simple ability to access the data objects registered with the Runtime system.
|
|
63
|
+
- Sites register their capabilities, and sites are only able to claim workflow jobs if those sites have the capabilities required by the workflow.
|
|
64
|
+
- **Object types**: Manage the types of data objects whose creation can trigger job creation and, eventually, workflow execution.
|
|
65
|
+
- A data object type is an annotation that can be applied to data objects.
|
|
66
|
+
- A data object may have one or more types. Those types can be associated with workflows, through triggers.
|
|
67
|
+
- **Triggers**: Define associations between workflows and object types to enable automatic job creation.
|
|
68
|
+
- A [trigger](https://docs.microbiomedata.org/runtime/howto-guides/create-triggers/) is an association between a workflow and a data object type.
|
|
69
|
+
- When a data object is [annotated with a type](https://docs.microbiomedata.org/runtime/nb/queue_and_trigger_data_jobs/#use-case-annotate-a-known-object-with-a-type-that-will-trigger-a-workflow)—which may occur shortly after object registration—the Runtime will check—via trigger associations—whether it is due to create any jobs.
|
|
70
|
+
- **Jobs**: Manage the [claiming](https://docs.microbiomedata.org/runtime/howto-guides/claim-and-run-jobs/) and status of workflow executions.
|
|
71
|
+
- A job is a resource that decouples the configuration of a workflow, from execution of that workflow.
|
|
72
|
+
- Rather than directly creating a workflow operation, the Runtime creates a job that pairs a workflow with its configuration. Then, a site can claim the job—by its ID—and execute the associated workflow without doing additional configuration.
|
|
73
|
+
- A job can have multiple executions. All executions of all jobs of a given workflow, make up that workflow's executions.
|
|
74
|
+
- A site that already has a compatible job execution result can preempt the unnecessary creation of a job by _pre-claiming_ it. This will return like a claim, and now the site can register known data object inputs for the job without the risk of the Runtime creating a claimable job of the pre-claimed type.
|
|
75
|
+
- **Objects**: Manage the Data Repository Service (DRS) objects that are inputs and outputs of workflow executions.
|
|
76
|
+
- A [Data Repository Service (DRS) object](https://ga4gh.github.io/data-repository-service-schemas/preview/release/drs-1.1.0/docs/#_drs_datatypes) represents content necessary for—or content produced by—job execution.
|
|
77
|
+
- An object may be a *blob* (analogous to a file) or a *bundle* (analogous to a folder). Sites register objects, and sites must ensure that these objects are accessible to the "NMDC data broker."
|
|
78
|
+
- An object may be annotated with one or more object types, useful for triggering workflows.
|
|
79
|
+
- **Operations**: Track and monitor the real-time execution status of claimed jobs, including progress updates and error handling.
|
|
80
|
+
- An operation is a resource for tracking the execution of a job.
|
|
81
|
+
- When a job is claimed by a site for execution, an operation resource is created.
|
|
82
|
+
- An operation is like a "promise," in that it should eventually resolve to either a successful result—i.e., an execution resource—or to an error.
|
|
83
|
+
- An operation is parameterized to return a result type, and a metadata type for storing progress information, that are both particular to the job type.
|
|
84
|
+
- Operations may be paused, resumed, and/or cancelled.
|
|
85
|
+
- Operations may expire, i.e. not be stored indefinitely. In this case, it is recommended that execution resources have longer lifetimes/not expire, so that information about successful results of operations are available.
|
|
86
|
+
- **Runs**: _(work in progress)_ Execute simple jobs and report execution events back to the Runtime.
|
|
87
|
+
- Run simple jobs.
|
|
88
|
+
- For off-site job runs, keep the Runtime appraised of run events.
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
tag_descriptions[
|
|
92
|
+
OpenAPITag.USERS.value
|
|
93
|
+
] = r"""
|
|
94
|
+
Create and manage user accounts.
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
tag_descriptions[
|
|
98
|
+
OpenAPITag.MINTER.value
|
|
99
|
+
] = r"""
|
|
100
|
+
Mint and manage persistent identifiers.
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
tag_descriptions[
|
|
104
|
+
OpenAPITag.SYSTEM_ADMINISTRATION.value
|
|
105
|
+
] = r"""
|
|
106
|
+
Retrieve information about the software components that make up the Runtime.
|
|
107
|
+
"""
|
|
108
|
+
|
|
109
|
+
# Remove leading and trailing whitespace from each description.
|
|
110
|
+
for name, description in tag_descriptions.items():
|
|
111
|
+
tag_descriptions[name] = description.strip()
|
|
112
|
+
|
|
113
|
+
ordered_tag_descriptors: List[Dict] = [
|
|
114
|
+
{
|
|
115
|
+
"name": OpenAPITag.METADATA_ACCESS.value,
|
|
116
|
+
"description": tag_descriptions[OpenAPITag.METADATA_ACCESS.value],
|
|
117
|
+
},
|
|
118
|
+
{
|
|
119
|
+
"name": OpenAPITag.WORKFLOWS.value,
|
|
120
|
+
"description": tag_descriptions[OpenAPITag.WORKFLOWS.value],
|
|
121
|
+
},
|
|
122
|
+
{
|
|
123
|
+
"name": OpenAPITag.MINTER.value,
|
|
124
|
+
"description": tag_descriptions[OpenAPITag.MINTER.value],
|
|
125
|
+
},
|
|
126
|
+
{
|
|
127
|
+
"name": OpenAPITag.USERS.value,
|
|
128
|
+
"description": tag_descriptions[OpenAPITag.USERS.value],
|
|
129
|
+
},
|
|
130
|
+
{
|
|
131
|
+
"name": OpenAPITag.SYSTEM_ADMINISTRATION.value,
|
|
132
|
+
"description": tag_descriptions[OpenAPITag.SYSTEM_ADMINISTRATION.value],
|
|
133
|
+
},
|
|
134
|
+
]
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def make_api_description(api_version: str, schema_version: str) -> str:
|
|
138
|
+
r"""
|
|
139
|
+
Returns an API description into which the specified schema version string has been incorporated.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
api_version (str): The version of this Runtime instance.
|
|
143
|
+
schema_version (str): The version of `nmdc-schema` the Runtime is using.
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
str: The Markdown-formatted API description.
|
|
147
|
+
"""
|
|
148
|
+
result = f"""
|
|
149
|
+
Welcome to the **NMDC Runtime API**, an API you can use to [access metadata](https://docs.microbiomedata.org/howto_guides/api_gui/) residing in the NMDC database.
|
|
150
|
+
|
|
151
|
+
Users having adequate permissions can also use it to generate identifiers, submit metadata,
|
|
152
|
+
and manage workflow executions.
|
|
153
|
+
|
|
154
|
+
##### Quick start
|
|
155
|
+
|
|
156
|
+
The endpoints of the NMDC Runtime API are listed below.
|
|
157
|
+
They are organized into sections, each of which can be opened and closed.
|
|
158
|
+
The endpoints, themselves, can also be opened and closed.
|
|
159
|
+
|
|
160
|
+
Each endpoint—when opened—has a "Try it out" button, which you can press in order to send a request
|
|
161
|
+
to the endpoint directly from this web page. Each endpoint can also be
|
|
162
|
+
[accessed programmatically](https://docs.microbiomedata.org/runtime/nb/api_access_via_python/).
|
|
163
|
+
|
|
164
|
+
Some endpoints have a padlock icon, which means that the endpoint is only accessible to logged-in users.
|
|
165
|
+
You can log in by clicking the "Authorize" button located directly above the list of endpoints.
|
|
166
|
+
|
|
167
|
+
##### Contact us
|
|
168
|
+
|
|
169
|
+
You can [contact us](https://microbiomedata.org/contact/) anytime.
|
|
170
|
+
We continuously refine the API and may be able to streamline your use case.
|
|
171
|
+
|
|
172
|
+
##### Versions
|
|
173
|
+
|
|
174
|
+
[NMDC Runtime](https://docs.microbiomedata.org/runtime/) version: `{api_version}`
|
|
175
|
+
|
|
176
|
+
[NMDC Schema](https://microbiomedata.github.io/nmdc-schema/) version: `{schema_version}`
|
|
177
|
+
""".strip()
|
|
178
|
+
return result
|