nmdc-runtime 2.8.0__py3-none-any.whl → 2.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nmdc-runtime might be problematic. Click here for more details.

Files changed (100) hide show
  1. nmdc_runtime/api/__init__.py +0 -0
  2. nmdc_runtime/api/analytics.py +70 -0
  3. nmdc_runtime/api/boot/__init__.py +0 -0
  4. nmdc_runtime/api/boot/capabilities.py +9 -0
  5. nmdc_runtime/api/boot/object_types.py +126 -0
  6. nmdc_runtime/api/boot/triggers.py +84 -0
  7. nmdc_runtime/api/boot/workflows.py +116 -0
  8. nmdc_runtime/api/core/__init__.py +0 -0
  9. nmdc_runtime/api/core/auth.py +208 -0
  10. nmdc_runtime/api/core/idgen.py +170 -0
  11. nmdc_runtime/api/core/metadata.py +788 -0
  12. nmdc_runtime/api/core/util.py +109 -0
  13. nmdc_runtime/api/db/__init__.py +0 -0
  14. nmdc_runtime/api/db/mongo.py +447 -0
  15. nmdc_runtime/api/db/s3.py +37 -0
  16. nmdc_runtime/api/endpoints/__init__.py +0 -0
  17. nmdc_runtime/api/endpoints/capabilities.py +25 -0
  18. nmdc_runtime/api/endpoints/find.py +794 -0
  19. nmdc_runtime/api/endpoints/ids.py +192 -0
  20. nmdc_runtime/api/endpoints/jobs.py +143 -0
  21. nmdc_runtime/api/endpoints/lib/__init__.py +0 -0
  22. nmdc_runtime/api/endpoints/lib/helpers.py +274 -0
  23. nmdc_runtime/api/endpoints/lib/path_segments.py +165 -0
  24. nmdc_runtime/api/endpoints/metadata.py +260 -0
  25. nmdc_runtime/api/endpoints/nmdcschema.py +581 -0
  26. nmdc_runtime/api/endpoints/object_types.py +38 -0
  27. nmdc_runtime/api/endpoints/objects.py +277 -0
  28. nmdc_runtime/api/endpoints/operations.py +105 -0
  29. nmdc_runtime/api/endpoints/queries.py +679 -0
  30. nmdc_runtime/api/endpoints/runs.py +98 -0
  31. nmdc_runtime/api/endpoints/search.py +38 -0
  32. nmdc_runtime/api/endpoints/sites.py +229 -0
  33. nmdc_runtime/api/endpoints/triggers.py +25 -0
  34. nmdc_runtime/api/endpoints/users.py +214 -0
  35. nmdc_runtime/api/endpoints/util.py +774 -0
  36. nmdc_runtime/api/endpoints/workflows.py +353 -0
  37. nmdc_runtime/api/main.py +401 -0
  38. nmdc_runtime/api/middleware.py +43 -0
  39. nmdc_runtime/api/models/__init__.py +0 -0
  40. nmdc_runtime/api/models/capability.py +14 -0
  41. nmdc_runtime/api/models/id.py +92 -0
  42. nmdc_runtime/api/models/job.py +37 -0
  43. nmdc_runtime/api/models/lib/__init__.py +0 -0
  44. nmdc_runtime/api/models/lib/helpers.py +78 -0
  45. nmdc_runtime/api/models/metadata.py +11 -0
  46. nmdc_runtime/api/models/minter.py +0 -0
  47. nmdc_runtime/api/models/nmdc_schema.py +146 -0
  48. nmdc_runtime/api/models/object.py +180 -0
  49. nmdc_runtime/api/models/object_type.py +20 -0
  50. nmdc_runtime/api/models/operation.py +66 -0
  51. nmdc_runtime/api/models/query.py +246 -0
  52. nmdc_runtime/api/models/query_continuation.py +111 -0
  53. nmdc_runtime/api/models/run.py +161 -0
  54. nmdc_runtime/api/models/site.py +87 -0
  55. nmdc_runtime/api/models/trigger.py +13 -0
  56. nmdc_runtime/api/models/user.py +140 -0
  57. nmdc_runtime/api/models/util.py +253 -0
  58. nmdc_runtime/api/models/workflow.py +15 -0
  59. nmdc_runtime/api/openapi.py +242 -0
  60. nmdc_runtime/config.py +55 -4
  61. nmdc_runtime/core/db/Database.py +1 -3
  62. nmdc_runtime/infrastructure/database/models/user.py +0 -9
  63. nmdc_runtime/lib/extract_nmdc_data.py +0 -8
  64. nmdc_runtime/lib/nmdc_dataframes.py +3 -7
  65. nmdc_runtime/lib/nmdc_etl_class.py +1 -7
  66. nmdc_runtime/minter/adapters/repository.py +1 -2
  67. nmdc_runtime/minter/config.py +2 -0
  68. nmdc_runtime/minter/domain/model.py +35 -1
  69. nmdc_runtime/minter/entrypoints/fastapi_app.py +1 -1
  70. nmdc_runtime/mongo_util.py +1 -2
  71. nmdc_runtime/site/backup/nmdcdb_mongodump.py +1 -1
  72. nmdc_runtime/site/backup/nmdcdb_mongoexport.py +1 -3
  73. nmdc_runtime/site/export/ncbi_xml.py +1 -2
  74. nmdc_runtime/site/export/ncbi_xml_utils.py +1 -1
  75. nmdc_runtime/site/graphs.py +33 -28
  76. nmdc_runtime/site/ops.py +97 -237
  77. nmdc_runtime/site/repair/database_updater.py +8 -0
  78. nmdc_runtime/site/repository.py +7 -117
  79. nmdc_runtime/site/resources.py +4 -4
  80. nmdc_runtime/site/translation/gold_translator.py +22 -21
  81. nmdc_runtime/site/translation/neon_benthic_translator.py +0 -1
  82. nmdc_runtime/site/translation/neon_soil_translator.py +4 -5
  83. nmdc_runtime/site/translation/neon_surface_water_translator.py +0 -2
  84. nmdc_runtime/site/translation/submission_portal_translator.py +64 -54
  85. nmdc_runtime/site/translation/translator.py +63 -1
  86. nmdc_runtime/site/util.py +8 -3
  87. nmdc_runtime/site/validation/util.py +10 -5
  88. nmdc_runtime/util.py +9 -321
  89. {nmdc_runtime-2.8.0.dist-info → nmdc_runtime-2.10.0.dist-info}/METADATA +57 -6
  90. nmdc_runtime-2.10.0.dist-info/RECORD +138 -0
  91. nmdc_runtime/site/translation/emsl.py +0 -43
  92. nmdc_runtime/site/translation/gold.py +0 -53
  93. nmdc_runtime/site/translation/jgi.py +0 -32
  94. nmdc_runtime/site/translation/util.py +0 -132
  95. nmdc_runtime/site/validation/jgi.py +0 -43
  96. nmdc_runtime-2.8.0.dist-info/RECORD +0 -84
  97. {nmdc_runtime-2.8.0.dist-info → nmdc_runtime-2.10.0.dist-info}/WHEEL +0 -0
  98. {nmdc_runtime-2.8.0.dist-info → nmdc_runtime-2.10.0.dist-info}/entry_points.txt +0 -0
  99. {nmdc_runtime-2.8.0.dist-info → nmdc_runtime-2.10.0.dist-info}/licenses/LICENSE +0 -0
  100. {nmdc_runtime-2.8.0.dist-info → nmdc_runtime-2.10.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,253 @@
1
+ from typing import TypeVar, List, Optional, Generic, Annotated
2
+
3
+ from pydantic import model_validator, Field, BaseModel
4
+
5
+ ResultT = TypeVar("ResultT")
6
+
7
+
8
+ class ListResponse(BaseModel, Generic[ResultT]):
9
+ resources: List[ResultT]
10
+ next_page_token: Optional[str] = None
11
+
12
+
13
+ class ListRequest(BaseModel):
14
+ r"""
15
+ An encapsulation of a set of parameters accepted by API endpoints related to listing things.
16
+
17
+ Note: This class was documented after the `FindRequest` class was documented. You can refer to the documentation of
18
+ the latter class for additional context about the usage of Pydantic's `Field` constructor in this class.
19
+ """
20
+
21
+ filter: Optional[str] = Field(
22
+ default=None,
23
+ title="Filter",
24
+ description="""The criteria by which you want to filter the resources, in the same format as the [`query`
25
+ parameter](https://www.mongodb.com/docs/manual/reference/method/db.collection.find/#std-label-method-find-query)
26
+ of MongoDB's `db.collection.find()` method.\n\n_Example:_
27
+ `{"lat_lon.latitude": {"$gt": 45.0}, "ecosystem_category": "Plants"}`""",
28
+ examples=[
29
+ r'{"ecosystem_type": "Freshwater"}',
30
+ r'{"lat_lon.latitude": {"$gt": 45.0}, "ecosystem_category": "Plants"}',
31
+ ],
32
+ )
33
+ # TODO: Document why the optional type here is `int` as opposed to `PerPageRange` (`FindRequest` uses the latter).
34
+ max_page_size: Optional[int] = Field(
35
+ default=20,
36
+ title="Resources per page",
37
+ description="How many resources you want _each page_ to contain, formatted as a positive integer.",
38
+ examples=[20],
39
+ )
40
+ page_token: Optional[str] = Field(
41
+ default=None,
42
+ title="Next page token",
43
+ description="""A bookmark you can use to fetch the _next_ page of resources. You can get this from the
44
+ `next_page_token` field in a previous response from this endpoint.\n\n_Example_:
45
+ `nmdc:sys0zr0fbt71`""",
46
+ examples=[
47
+ "nmdc:sys0zr0fbt71",
48
+ ],
49
+ )
50
+ # TODO: Document the endpoint's behavior when a projection includes a _nested_ field identifier (i.e. `foo.bar`),
51
+ # and ensure the endpoint doesn't break when the projection includes field descriptors that contain commas.
52
+ projection: Optional[str] = Field(
53
+ default=None,
54
+ title="Projection",
55
+ description="""Comma-delimited list of the names of the fields you want the resources in the response to
56
+ include. Note: In addition to those fields, the response will also include the `id`
57
+ field.\n\n_Example_: `name, ecosystem_type`""",
58
+ examples=[
59
+ "name, ecosystem_type",
60
+ ],
61
+ )
62
+
63
+
64
+ PerPageRange = Annotated[int, Field(gt=0, le=2_000)]
65
+
66
+
67
+ class FindRequest(BaseModel):
68
+ r"""
69
+ An encapsulation of a set of parameters accepted by API endpoints related to finding things.
70
+
71
+ Notes:
72
+ - The "Query Parameter Models" section of the FastAPI docs says that this way of encapsulating
73
+ a set of query parameter definitions in a Pydantic model — so that Swagger UI displays a given
74
+ parameter's _description_ — was introduced in FastAPI 0.115.0.
75
+ Reference: https://fastapi.tiangolo.com/tutorial/query-param-models/
76
+ - While Swagger UI does show the parameter's _description_, specifically, it does not currently show the
77
+ parameter's _title_ or example value(s). The approach shown in the "Classes as Dependencies" section
78
+ of the FastAPI docs (i.e. https://fastapi.tiangolo.com/tutorial/dependencies/classes-as-dependencies/)
79
+ does result in Swagger UI showing those additional things, but the approach involves not inheriting
80
+ from Pydantic's `BaseModel` class and involves defining an `__init__` method for the class. That is
81
+ further than I want to take these classes from their existing selves at this point. To compensate
82
+ for that, I have included examples _within_ some of the descriptions.
83
+ Reference: https://github.com/fastapi/fastapi/issues/318#issuecomment-507043221
84
+ - The "Fields" section of the Pydantic docs says:
85
+ > "The `Field` function is used to customize and add metadata to fields of models."
86
+ References: https://docs.pydantic.dev/latest/concepts/fields/
87
+ """
88
+
89
+ filter: Optional[str] = Field(
90
+ default=None,
91
+ title="Filter",
92
+ description="""The criteria by which you want to filter the resources, formatted as a comma-separated list of
93
+ `attribute:value` pairs. The `value` can include a comparison operator (e.g. `>=`). If the attribute
94
+ is of type _string_ and you append `.search` to its name, the server will perform a full-text
95
+ search.\n\n_Example:_ `ecosystem_category:Plants, lat_lon.latitude:>35.0`""",
96
+ examples=[
97
+ "ecosystem_category:Plants",
98
+ "ecosystem_category:Plants, lat_lon.latitude:>35.0",
99
+ ],
100
+ )
101
+ search: Optional[str] = Field(
102
+ default=None,
103
+ title="Search",
104
+ description="N/A _(not implemented yet)_",
105
+ )
106
+ sort: Optional[str] = Field(
107
+ default=None,
108
+ title="Sort",
109
+ description="""How you want the resources to be ordered in the response, formatted as a comma-separated list of
110
+ `attribute:value` pairs. Each `attribute` is the name of a field you want the resources to be
111
+ ordered by, and each `value` is the direction you want the values in that field to be ordered
112
+ (i.e. `asc` or no value for _ascending_ order, and `desc` for _descending_ order).\n\n_Example:_
113
+ `depth.has_numeric_value:desc, ecosystem_type`""",
114
+ examples=[
115
+ "depth.has_numeric_value:desc",
116
+ "depth.has_numeric_value:desc, ecosystem_type",
117
+ ],
118
+ )
119
+ page: Optional[int] = Field(
120
+ default=None,
121
+ title="Page number",
122
+ description="""_Which page_ of resources you want to retrieve, when using page number-based pagination.
123
+ This is the page number formatted as an integer ≥ 1.""",
124
+ examples=[1],
125
+ )
126
+ per_page: Optional[PerPageRange] = Field(
127
+ default=25,
128
+ title="Resources per page",
129
+ description="How many resources you want _each page_ to contain, formatted as a positive integer ≤ 2000.",
130
+ examples=[25],
131
+ )
132
+ cursor: Optional[str] = Field(
133
+ default=None,
134
+ title="Cursor",
135
+ description="""A bookmark you can use to fetch the _next_ page of resources, when using cursor-based pagination.
136
+ To use cursor-based pagination, set the `cursor` parameter to `*`. The response's `meta` object will
137
+ include a `next_cursor` field, whose value can be used as the `cursor` parameter in a subsequent
138
+ request.\n\n_Example_: `nmdc:sys0zr0fbt71`""",
139
+ examples=[
140
+ "*",
141
+ "nmdc:sys0zr0fbt71",
142
+ ],
143
+ )
144
+ group_by: Optional[str] = Field(
145
+ default=None,
146
+ title="Group by",
147
+ description="N/A _(not implemented yet)_",
148
+ )
149
+ fields: Optional[str] = Field(
150
+ default=None,
151
+ title="Fields",
152
+ description="""The fields you want the resources to include in the response, formatted as a comma-separated list
153
+ of field names. This can be used to reduce the size and complexity of the response.\n\n_Example:_
154
+ `name, ess_dive_datasets`""",
155
+ examples=[
156
+ "name",
157
+ "name, ess_dive_datasets",
158
+ ],
159
+ )
160
+
161
+ # Reference: https://docs.pydantic.dev/latest/concepts/validators/#model-validators
162
+ @model_validator(mode="before")
163
+ def set_page_if_cursor_unset(cls, values):
164
+ page, cursor = values.get("page"), values.get("cursor")
165
+ if page is not None and cursor is not None:
166
+ raise ValueError("cannot use cursor- and page-based pagination together")
167
+ if page is None and cursor is None:
168
+ values["page"] = 1
169
+ return values
170
+
171
+
172
+ class FindResponse(BaseModel):
173
+ meta: dict
174
+ results: List[dict]
175
+ group_by: List[dict]
176
+
177
+
178
+ class DeleteResponse(BaseModel):
179
+ r"""
180
+ Response model for "delete" operations. It summarizes the result of the
181
+ operation and it lists identifiers of the documents that were deleted.
182
+ """
183
+
184
+ message: str = Field(
185
+ description="Success message describing the deletion operation"
186
+ )
187
+ deleted_workflow_execution_ids: List[str] = Field(
188
+ # Note: `default_factory=list` sets this to an empty list by default.
189
+ default_factory=list,
190
+ description="The `id`s of the `WorkflowExecution`s that were deleted",
191
+ )
192
+ deleted_data_object_ids: List[str] = Field(
193
+ default_factory=list,
194
+ description="The `id`s of the `DataObject`s that were deleted",
195
+ )
196
+ deleted_functional_annotation_agg_oids: List[str] = Field(
197
+ default_factory=list,
198
+ description="The internal MongoDB `ObjectId`s of the `FunctionalAnnotationAggMember`s that were deleted",
199
+ )
200
+ deleted_job_ids: List[str] = Field(
201
+ default_factory=list,
202
+ description="The `id`s of the `jobs` documents that were deleted",
203
+ )
204
+
205
+
206
+ # Note: For MongoDB, a single collection can have no more than 64 indexes
207
+ # Note: Each collection has a unique index set on "id" elsewhere.
208
+ entity_attributes_to_index = {
209
+ "biosample_set": {
210
+ "alternative_identifiers",
211
+ "env_broad_scale.has_raw_value",
212
+ "env_local_scale.has_raw_value",
213
+ "env_medium.has_raw_value",
214
+ "collection_date.has_raw_value",
215
+ "ecosystem",
216
+ "ecosystem_category",
217
+ "ecosystem_type",
218
+ "ecosystem_subtype",
219
+ "specific_ecosystem",
220
+ # Note: if `lat_lon` was GeoJSON, i.e. {type,coordinates}, MongoDB has a "2dsphere" index
221
+ "lat_lon.latitude",
222
+ "lat_lon.longitude",
223
+ },
224
+ "study_set": {
225
+ "has_credit_associations.applied_roles",
226
+ "has_credit_associations.applies_to_person.name",
227
+ "has_credit_associations.applies_to_person.orcid",
228
+ },
229
+ "data_object_set": {
230
+ "data_object_type",
231
+ "file_size_bytes",
232
+ "md5_checksum",
233
+ "url",
234
+ },
235
+ # TODO: Refrain from ensuring indexes exist in the `omics_processing_set` collection,
236
+ # since that collection was deleted as part of the "Berkeley schema" refactor.
237
+ # Reference: https://microbiomedata.github.io/nmdc-schema/v10-vs-v11-retrospective/#slots-removed-from-database
238
+ "omics_processing_set": {
239
+ "has_input",
240
+ "has_output",
241
+ "instrument_name",
242
+ "alternative_identifiers",
243
+ },
244
+ "functional_annotation_agg": {"was_generated_by"},
245
+ "workflow_execution_set": {
246
+ "has_input",
247
+ "has_output",
248
+ },
249
+ # Note: The `jobs` collection is not described by the NMDC schema.
250
+ "jobs": {
251
+ "config.activity_id",
252
+ },
253
+ }
@@ -0,0 +1,15 @@
1
+ import datetime
2
+ from typing import Optional, List
3
+
4
+ from pydantic import BaseModel
5
+
6
+
7
+ class WorkflowBase(BaseModel):
8
+ name: Optional[str] = None
9
+ description: Optional[str] = None
10
+ capability_ids: Optional[List[str]] = None
11
+
12
+
13
+ class Workflow(WorkflowBase):
14
+ id: str
15
+ created_at: Optional[datetime.datetime] = None
@@ -0,0 +1,242 @@
1
+ r"""
2
+ This module contains the definitions of constants and functions related to
3
+ generating the API's OpenAPI schema (a.k.a. Swagger schema).
4
+
5
+ References:
6
+ - FastAPI Documentation: https://fastapi.tiangolo.com/tutorial/metadata/
7
+
8
+ Notes:
9
+ - The tag descriptions in this file were cut/pasted from `nmdc_runtime/api/main.py`.
10
+ Now that they are in a separate module, we will be able to edit them more easily.
11
+ """
12
+
13
+ from html import escape
14
+ from typing import List, Dict
15
+
16
+ # Mapping from tag names to their (Markdown-formatted) descriptions.
17
+ tag_descriptions: Dict[str, str] = {}
18
+
19
+ tag_descriptions[
20
+ "sites"
21
+ ] = r"""
22
+ A site corresponds to a physical place that may participate in job execution.
23
+
24
+ A site may register data objects and capabilities with NMDC. It may claim jobs to execute, and it may
25
+ update job operations with execution info.
26
+
27
+ A site must be able to service requests for any data objects it has registered.
28
+
29
+ A site may expose a "put object" custom method for authorized users. This method facilitates an
30
+ operation to upload an object to the site and have the site register that object with the runtime
31
+ system.
32
+ """
33
+
34
+ tag_descriptions[
35
+ "workflows"
36
+ ] = r"""
37
+ A workflow is a template for creating jobs.
38
+
39
+ Workflow jobs are typically created by the system via trigger associations between
40
+ workflows and object types. A workflow may also require certain capabilities of sites
41
+ in order for those sites to claim workflow jobs.
42
+ """
43
+
44
+ tag_descriptions[
45
+ "users"
46
+ ] = r"""
47
+ Endpoints for user identification.
48
+
49
+ Currently, accounts for use with the Runtime API are created manually by system administrators.
50
+ """
51
+
52
+ tag_descriptions[
53
+ "capabilities"
54
+ ] = r"""
55
+ A workflow may require an executing site to have particular capabilities.
56
+
57
+ These capabilities go beyond the simple ability to access the data object resources registered with
58
+ the runtime system. Sites register their capabilities, and sites are only able to claim workflow
59
+ jobs if they are known to have the capabilities required by the workflow.
60
+ """
61
+
62
+ tag_descriptions[
63
+ "object types"
64
+ ] = r"""
65
+ An object type is an object annotation that is useful for triggering workflows.
66
+
67
+ A data object may be annotated with one or more types, which in turn can be associated with
68
+ workflows through trigger resources.
69
+
70
+ The data-object type system may be used to trigger workflow jobs on a subset of data objects when a
71
+ new version of a workflow is deployed. This could be done by minting a special object type for the
72
+ occasion, annotating the subset of data objects with that type, and registering the association of
73
+ object type to workflow via a trigger resource.
74
+ """
75
+
76
+ tag_descriptions[
77
+ "triggers"
78
+ ] = r"""
79
+ A trigger is an association between a workflow and a data object type.
80
+
81
+ When a data object is annotated with a type, perhaps shortly after object registration, the NMDC
82
+ Runtime will check, via trigger associations, for potential new jobs to create for any workflows.
83
+ """
84
+
85
+ tag_descriptions[
86
+ "jobs"
87
+ ] = r"""
88
+ A job is a resource that isolates workflow configuration from execution.
89
+
90
+ Rather than directly creating a workflow operation by supplying a workflow ID along with
91
+ configuration, NMDC creates a job that pairs a workflow with configuration. Then, a site can claim a
92
+ job ID, allowing the site to execute the intended workflow without additional configuration.
93
+
94
+ A job can have multiple executions, and a workflow's executions are precisely the executions of all
95
+ jobs created for that workflow.
96
+
97
+ A site that already has a compatible job execution result can preempt the unnecessary creation of a
98
+ job by pre-claiming it. This will return like a claim, and now the site can register known data
99
+ object inputs for the job without the risk of the runtime system creating a claimable job of the
100
+ pre-claimed type.
101
+ """
102
+
103
+ tag_descriptions[
104
+ "objects"
105
+ ] = r"""
106
+ A [Data Repository Service (DRS)
107
+ object](https://ga4gh.github.io/data-repository-service-schemas/preview/release/drs-1.1.0/docs/#_drs_datatypes)
108
+ represents content necessary for a workflow job to execute, and/or output from a job execution.
109
+
110
+ An object may be a *blob*, analogous to a file, or a *bundle*, analogous to a folder. Sites register
111
+ objects, and sites must ensure that these objects are accessible to the NMDC data broker.
112
+
113
+ An object may be associated with one or more object types, useful for triggering workflows.
114
+ """
115
+
116
+ tag_descriptions[
117
+ "operations"
118
+ ] = r"""
119
+ An operation is a resource for tracking the execution of a job.
120
+
121
+ When a job is claimed by a site for execution, an operation resource is created.
122
+
123
+ An operation is akin to a "promise" or "future" in that it should eventually resolve to either a
124
+ successful result, i.e. an execution resource, or to an error.
125
+
126
+ An operation is parameterized to return a result type, and a metadata type for storing progress
127
+ information, that are both particular to the job type.
128
+
129
+ Operations may be paused, resumed, and/or cancelled.
130
+
131
+ Operations may expire, i.e. not be stored indefinitely. In this case, it is recommended that
132
+ execution resources have longer lifetimes / not expire, so that information about successful results
133
+ of operations are available.
134
+ """
135
+
136
+ tag_descriptions[
137
+ "queries"
138
+ ] = r"""
139
+ A query is an operation (find, update, etc.) against the metadata store.
140
+
141
+ Metadata -- for studies, biosamples, omics processing, etc. -- is used by sites to execute jobs,
142
+ as the parameterization of job executions may depend not only on the content of data objects, but
143
+ also on objects' associated metadata.
144
+
145
+ Also, the function of many workflows is to extract or produce new metadata. Such metadata products
146
+ should be registered as data objects, and they may also be supplied by sites to the runtime system
147
+ as an update query (if the latter is not done, the runtime system will sense the new metadata and
148
+ issue an update query).
149
+ """
150
+
151
+ tag_descriptions[
152
+ "metadata"
153
+ ] = r"""
154
+ The [metadata endpoints](https://api.microbiomedata.org/docs#/metadata) can be used to get and filter
155
+ metadata from collection set types (including
156
+ [studies](https://w3id.org/nmdc/Study/),
157
+ [biosamples](https://w3id.org/nmdc/Biosample/),
158
+ [planned processes](https://w3id.org/nmdc/PlannedProcess/), and
159
+ [data objects](https://w3id.org/nmdc/DataObject/)
160
+ as discussed in the __find__ section).
161
+ <br/>
162
+
163
+ The __metadata__ endpoints allow users to retrieve metadata from the data portal using the various
164
+ GET endpoints that are slightly different than the __find__ endpoints, but some can be used similarly.
165
+ As with the __find__ endpoints, parameters for the __metadata__ endpoints that do not have a
166
+ red ___* required___ next to them are optional. <br/>
167
+
168
+ Unlike the compact syntax used in the __find__ endpoints, the syntax for the filter parameter of
169
+ the metadata endpoints
170
+ uses [MongoDB-like language querying](https://www.mongodb.com/docs/manual/tutorial/query-documents/).
171
+ """
172
+
173
+ tag_descriptions[
174
+ "find"
175
+ ] = r"""
176
+ The [find endpoints](https://api.microbiomedata.org/docs#/find) are provided with NMDC metadata entities
177
+ already specified - where metadata about [studies](https://w3id.org/nmdc/Study),
178
+ [biosamples](https://w3id.org/nmdc/Biosample), [data objects](https://w3id.org/nmdc/DataObject/),
179
+ and [planned processes](https://w3id.org/nmdc/PlannedProcess/) can be retrieved using GET requests.
180
+ <br/>
181
+
182
+ Each endpoint is unique and requires the applicable attribute names to be known in order to structure a query
183
+ in a meaningful way. Parameters that do not have a red ___* required___ label next to them are optional.
184
+ """
185
+
186
+ tag_descriptions[
187
+ "runs"
188
+ ] = r"""
189
+ **WORK IN PROGRESS**
190
+
191
+ Run simple jobs.
192
+
193
+ For off-site job runs, keep the Runtime appraised of run events.
194
+ """
195
+
196
+ # Remove leading and trailing whitespace from each description.
197
+ for name, description in tag_descriptions.items():
198
+ tag_descriptions[name] = description.strip()
199
+
200
+ ordered_tag_descriptors: List[Dict[str, str]] = [
201
+ {"name": "sites", "description": tag_descriptions["sites"]},
202
+ {"name": "users", "description": tag_descriptions["users"]},
203
+ {"name": "workflows", "description": tag_descriptions["workflows"]},
204
+ {"name": "capabilities", "description": tag_descriptions["capabilities"]},
205
+ {"name": "object types", "description": tag_descriptions["object types"]},
206
+ {"name": "triggers", "description": tag_descriptions["triggers"]},
207
+ {"name": "jobs", "description": tag_descriptions["jobs"]},
208
+ {"name": "objects", "description": tag_descriptions["objects"]},
209
+ {"name": "operations", "description": tag_descriptions["operations"]},
210
+ {"name": "queries", "description": tag_descriptions["queries"]},
211
+ {"name": "metadata", "description": tag_descriptions["metadata"]},
212
+ {"name": "find", "description": tag_descriptions["find"]},
213
+ {"name": "runs", "description": tag_descriptions["runs"]},
214
+ ]
215
+
216
+
217
+ def make_api_description(schema_version: str, orcid_login_url: str) -> str:
218
+ r"""
219
+ Returns an API description into which the specified schema version and
220
+ ORCID login URL have been incorporated.
221
+
222
+ Args:
223
+ schema_version (str): The version of `nmdc-schema` the Runtime is using.
224
+ orcid_login_url (str): The URL at which a user could login via ORCID.
225
+
226
+ Returns:
227
+ str: The Markdown-formatted API description.
228
+ """
229
+ result = f"""
230
+ The NMDC Runtime API, via on-demand functions and via schedule-based and sensor-based automation,
231
+ supports validation and submission of metadata, as well as orchestration of workflow executions.
232
+
233
+ [NMDC Schema](https://microbiomedata.github.io/nmdc-schema/) version: `{schema_version}`
234
+
235
+ [Documentation](https://docs.microbiomedata.org/runtime/)
236
+
237
+ <img src="/static/ORCIDiD_icon128x128.png" height="18" width="18"/>
238
+ <a href="{escape(orcid_login_url)}" title="Login with ORCID">
239
+ Login with ORCID
240
+ </a>
241
+ """.strip()
242
+ return result
nmdc_runtime/config.py CHANGED
@@ -1,5 +1,56 @@
1
- DATABASE_CLASS_NAME = "Database"
1
+ """
2
+ This module acts as a unified interface between the codebase and the environment.
3
+ We will eventually move all of the Runtime's environment variables reads into this
4
+ module, instead of leaving them sprinkled throughout the codebase.
2
5
 
3
- # Feature flag that can be used to enable/disable the `/nmdcschema/related_ids`
4
- # endpoint and the tests that target it.
5
- IS_RELATED_IDS_ENDPOINT_ENABLED = False
6
+ TODO: Move all environment variable reads into this module and update references accordingly.
7
+ """
8
+
9
+ from typing import Set
10
+ import os
11
+
12
+
13
+ def is_env_var_true(name: str, default: str = "false") -> bool:
14
+ r"""
15
+ Checks whether the value of the specified environment variable
16
+ meets our criteria for true-ness.
17
+
18
+ Reference: https://docs.python.org/3/library/os.html#os.environ
19
+
20
+ Run doctests via: $ python -m doctest nmdc_runtime/config.py
21
+
22
+ >>> import os
23
+ >>> name = "EXAMPLE_ENV_VAR"
24
+ >>> os.unsetenv(name) # Undefined
25
+ >>> is_env_var_true(name)
26
+ False
27
+ >>> is_env_var_true(name, "true") # Undefined, overridden default
28
+ True
29
+ >>> os.environ[name] = "false" # Defined as false
30
+ >>> is_env_var_true(name)
31
+ False
32
+ >>> os.environ[name] = "true" # Defined as true
33
+ >>> is_env_var_true(name)
34
+ True
35
+ >>> os.environ[name] = "TRUE" # Case-insensitive
36
+ >>> is_env_var_true(name)
37
+ True
38
+ >>> os.environ[name] = "potato" # Non-boolean string
39
+ >>> is_env_var_true(name)
40
+ False
41
+ """
42
+ lowercase_true_strings: Set[str] = {"true"}
43
+ return os.environ.get(name, default).lower() in lowercase_true_strings
44
+
45
+
46
+ # Feature flag to enable/disable the `/nmdcschema/linked_instances` endpoint and the tests that target it.
47
+ IS_LINKED_INSTANCES_ENDPOINT_ENABLED: bool = is_env_var_true(
48
+ "IS_LINKED_INSTANCES_ENDPOINT_ENABLED", default="true"
49
+ )
50
+
51
+ # Feature flag that can be used to enable/disable the `/scalar` endpoint.
52
+ IS_SCALAR_ENABLED: bool = is_env_var_true("IS_SCALAR_ENABLED", default="true")
53
+
54
+ # Feature flag that can be used to enable/disable performance profiling,
55
+ # which can be activated via the `?profile=true` URL query parameter.
56
+ IS_PROFILING_ENABLED: bool = is_env_var_true("IS_PROFILING_ENABLED", default="false")
@@ -1,6 +1,4 @@
1
- from contextlib import contextmanager, AbstractContextManager
2
- from typing import Callable
3
- import logging
1
+ from contextlib import contextmanager
4
2
 
5
3
  from motor import motor_asyncio
6
4
 
@@ -1,10 +1 @@
1
1
  from __future__ import annotations
2
-
3
- from abc import abstractmethod
4
- from typing import List
5
-
6
- from nmdc_runtime.domain.users.userSchema import (
7
- UserAuth,
8
- UserUpdate,
9
- UserOut,
10
- )
@@ -3,14 +3,6 @@
3
3
 
4
4
  ## system level modules
5
5
  import pandas as pds
6
- import jq
7
- import jsonasobj
8
- import json
9
- import zipfile
10
- import yaml
11
- from yaml import CLoader as Loader, CDumper as Dumper
12
- from dotted_dict import DottedDict
13
- from collections import namedtuple
14
6
 
15
7
 
16
8
  def extract_table(merged_df, table_name):
@@ -3,13 +3,9 @@
3
3
 
4
4
  ## system level modules
5
5
  import pandas as pds
6
- import jsonasobj
7
- import json
8
6
  import zipfile
9
7
  import yaml
10
- from pandas.core.dtypes.missing import notnull
11
- from yaml import CLoader as Loader, CDumper as Dumper
12
- from dotted_dict import DottedDict
8
+ from yaml import CLoader as Loader
13
9
  from collections import namedtuple
14
10
 
15
11
 
@@ -309,10 +305,10 @@ def make_study_dataframe(study_table, contact_table, proposals_table, result_col
309
305
 
310
306
  ## make sure the contact ids are strings with the ".0" removed from the end (i.e., the strings aren't floats)
311
307
  study_table["contact_id"] = (
312
- study_table["contact_id"].astype(str).replace("\.0", "", regex=True)
308
+ study_table["contact_id"].astype(str).replace(r"\.0", "", regex=True)
313
309
  )
314
310
  contact_table_splice["contact_id"] = (
315
- contact_table_splice["contact_id"].astype(str).replace("\.0", "", regex=True)
311
+ contact_table_splice["contact_id"].astype(str).replace(r"\.0", "", regex=True)
316
312
  )
317
313
  # print(study_table[['contact_id', 'principal_investigator_name']].head())
318
314
 
@@ -8,14 +8,8 @@ import nmdc_runtime.lib.load_nmdc_data as lx
8
8
  import nmdc_runtime.lib.nmdc_dataframes as nmdc_dfs
9
9
  from nmdc_schema import nmdc
10
10
  import pandas as pds
11
- import jq
12
- import jsonasobj
13
- import json
14
- import zipfile
15
11
  import yaml
16
- from yaml import CLoader as Loader, CDumper as Dumper
17
- from dotted_dict import DottedDict
18
- from collections import namedtuple
12
+ from yaml import CLoader as Loader
19
13
 
20
14
 
21
15
  class NMDC_ETL:
@@ -2,9 +2,8 @@ import abc
2
2
  import re
3
3
  from typing import Union
4
4
 
5
- from fastapi import HTTPException
6
5
  from pymongo import ReturnDocument
7
- from toolz import merge, dissoc
6
+ from toolz import merge
8
7
  from pymongo.database import Database as MongoDatabase
9
8
 
10
9
 
@@ -23,9 +23,11 @@ def typecodes() -> List[dict]:
23
23
  that class _today_; regardless of what it may have used in the past.
24
24
 
25
25
  >>> typecode_descriptors = typecodes()
26
+
26
27
  # Test #1: We get the typecode we expect, for a class whose pattern contains only one typecode.
27
28
  >>> any((td["name"] == "sty" and td["schema_class"] == "nmdc:Study") for td in typecode_descriptors)
28
29
  True
30
+
29
31
  # Tests #2 and #3: We get only the typecode we expect, for a class whose pattern contains multiple typecodes.
30
32
  >>> any((td["name"] == "dgms" and td["schema_class"] == "nmdc:MassSpectrometry") for td in typecode_descriptors)
31
33
  True