nmdc-runtime 1.3.1__py3-none-any.whl → 2.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. nmdc_runtime/Dockerfile +177 -0
  2. nmdc_runtime/api/analytics.py +90 -0
  3. nmdc_runtime/api/boot/capabilities.py +9 -0
  4. nmdc_runtime/api/boot/object_types.py +126 -0
  5. nmdc_runtime/api/boot/triggers.py +84 -0
  6. nmdc_runtime/api/boot/workflows.py +116 -0
  7. nmdc_runtime/api/core/auth.py +212 -0
  8. nmdc_runtime/api/core/idgen.py +200 -0
  9. nmdc_runtime/api/core/metadata.py +777 -0
  10. nmdc_runtime/api/core/util.py +114 -0
  11. nmdc_runtime/api/db/mongo.py +436 -0
  12. nmdc_runtime/api/db/s3.py +37 -0
  13. nmdc_runtime/api/endpoints/capabilities.py +25 -0
  14. nmdc_runtime/api/endpoints/find.py +634 -0
  15. nmdc_runtime/api/endpoints/jobs.py +206 -0
  16. nmdc_runtime/api/endpoints/lib/helpers.py +274 -0
  17. nmdc_runtime/api/endpoints/lib/linked_instances.py +193 -0
  18. nmdc_runtime/api/endpoints/lib/path_segments.py +165 -0
  19. nmdc_runtime/api/endpoints/metadata.py +260 -0
  20. nmdc_runtime/api/endpoints/nmdcschema.py +515 -0
  21. nmdc_runtime/api/endpoints/object_types.py +38 -0
  22. nmdc_runtime/api/endpoints/objects.py +277 -0
  23. nmdc_runtime/api/endpoints/operations.py +78 -0
  24. nmdc_runtime/api/endpoints/queries.py +701 -0
  25. nmdc_runtime/api/endpoints/runs.py +98 -0
  26. nmdc_runtime/api/endpoints/search.py +38 -0
  27. nmdc_runtime/api/endpoints/sites.py +205 -0
  28. nmdc_runtime/api/endpoints/triggers.py +25 -0
  29. nmdc_runtime/api/endpoints/users.py +214 -0
  30. nmdc_runtime/api/endpoints/util.py +817 -0
  31. nmdc_runtime/api/endpoints/wf_file_staging.py +307 -0
  32. nmdc_runtime/api/endpoints/workflows.py +353 -0
  33. nmdc_runtime/api/entrypoint.sh +7 -0
  34. nmdc_runtime/api/main.py +495 -0
  35. nmdc_runtime/api/middleware.py +43 -0
  36. nmdc_runtime/api/models/capability.py +14 -0
  37. nmdc_runtime/api/models/id.py +92 -0
  38. nmdc_runtime/api/models/job.py +57 -0
  39. nmdc_runtime/api/models/lib/helpers.py +78 -0
  40. nmdc_runtime/api/models/metadata.py +11 -0
  41. nmdc_runtime/api/models/nmdc_schema.py +146 -0
  42. nmdc_runtime/api/models/object.py +180 -0
  43. nmdc_runtime/api/models/object_type.py +20 -0
  44. nmdc_runtime/api/models/operation.py +66 -0
  45. nmdc_runtime/api/models/query.py +246 -0
  46. nmdc_runtime/api/models/query_continuation.py +111 -0
  47. nmdc_runtime/api/models/run.py +161 -0
  48. nmdc_runtime/api/models/site.py +87 -0
  49. nmdc_runtime/api/models/trigger.py +13 -0
  50. nmdc_runtime/api/models/user.py +207 -0
  51. nmdc_runtime/api/models/util.py +260 -0
  52. nmdc_runtime/api/models/wfe_file_stages.py +122 -0
  53. nmdc_runtime/api/models/workflow.py +15 -0
  54. nmdc_runtime/api/openapi.py +178 -0
  55. nmdc_runtime/api/swagger_ui/assets/EllipsesButton.js +146 -0
  56. nmdc_runtime/api/swagger_ui/assets/EndpointSearchWidget.js +369 -0
  57. nmdc_runtime/api/swagger_ui/assets/script.js +252 -0
  58. nmdc_runtime/api/swagger_ui/assets/style.css +155 -0
  59. nmdc_runtime/api/swagger_ui/swagger_ui.py +34 -0
  60. nmdc_runtime/config.py +56 -0
  61. nmdc_runtime/minter/adapters/repository.py +22 -2
  62. nmdc_runtime/minter/config.py +30 -4
  63. nmdc_runtime/minter/domain/model.py +55 -1
  64. nmdc_runtime/minter/entrypoints/fastapi_app.py +1 -1
  65. nmdc_runtime/mongo_util.py +89 -0
  66. nmdc_runtime/site/backup/nmdcdb_mongodump.py +1 -1
  67. nmdc_runtime/site/backup/nmdcdb_mongoexport.py +1 -3
  68. nmdc_runtime/site/changesheets/data/OmicsProcessing-to-catted-Biosamples.tsv +1561 -0
  69. nmdc_runtime/site/changesheets/scripts/missing_neon_soils_ecosystem_data.py +311 -0
  70. nmdc_runtime/site/changesheets/scripts/neon_soils_add_ncbi_ids.py +210 -0
  71. nmdc_runtime/site/dagster.yaml +53 -0
  72. nmdc_runtime/site/entrypoint-daemon.sh +29 -0
  73. nmdc_runtime/site/entrypoint-dagit-readonly.sh +26 -0
  74. nmdc_runtime/site/entrypoint-dagit.sh +29 -0
  75. nmdc_runtime/site/export/ncbi_xml.py +1331 -0
  76. nmdc_runtime/site/export/ncbi_xml_utils.py +405 -0
  77. nmdc_runtime/site/export/study_metadata.py +27 -4
  78. nmdc_runtime/site/graphs.py +294 -45
  79. nmdc_runtime/site/ops.py +1008 -230
  80. nmdc_runtime/site/repair/database_updater.py +451 -0
  81. nmdc_runtime/site/repository.py +368 -133
  82. nmdc_runtime/site/resources.py +154 -80
  83. nmdc_runtime/site/translation/gold_translator.py +235 -83
  84. nmdc_runtime/site/translation/neon_benthic_translator.py +212 -188
  85. nmdc_runtime/site/translation/neon_soil_translator.py +82 -58
  86. nmdc_runtime/site/translation/neon_surface_water_translator.py +698 -0
  87. nmdc_runtime/site/translation/neon_utils.py +24 -7
  88. nmdc_runtime/site/translation/submission_portal_translator.py +616 -162
  89. nmdc_runtime/site/translation/translator.py +73 -3
  90. nmdc_runtime/site/util.py +26 -7
  91. nmdc_runtime/site/validation/emsl.py +1 -0
  92. nmdc_runtime/site/validation/gold.py +1 -0
  93. nmdc_runtime/site/validation/util.py +16 -12
  94. nmdc_runtime/site/workspace.yaml +13 -0
  95. nmdc_runtime/static/NMDC_logo.svg +1073 -0
  96. nmdc_runtime/static/ORCID-iD_icon_vector.svg +4 -0
  97. nmdc_runtime/static/README.md +5 -0
  98. nmdc_runtime/static/favicon.ico +0 -0
  99. nmdc_runtime/util.py +236 -192
  100. nmdc_runtime-2.12.0.dist-info/METADATA +45 -0
  101. nmdc_runtime-2.12.0.dist-info/RECORD +131 -0
  102. {nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info}/WHEEL +1 -2
  103. {nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info}/entry_points.txt +0 -1
  104. nmdc_runtime/containers.py +0 -14
  105. nmdc_runtime/core/db/Database.py +0 -15
  106. nmdc_runtime/core/exceptions/__init__.py +0 -23
  107. nmdc_runtime/core/exceptions/base.py +0 -47
  108. nmdc_runtime/core/exceptions/token.py +0 -13
  109. nmdc_runtime/domain/users/queriesInterface.py +0 -18
  110. nmdc_runtime/domain/users/userSchema.py +0 -37
  111. nmdc_runtime/domain/users/userService.py +0 -14
  112. nmdc_runtime/infrastructure/database/db.py +0 -3
  113. nmdc_runtime/infrastructure/database/models/user.py +0 -10
  114. nmdc_runtime/lib/__init__.py +0 -1
  115. nmdc_runtime/lib/extract_nmdc_data.py +0 -41
  116. nmdc_runtime/lib/load_nmdc_data.py +0 -121
  117. nmdc_runtime/lib/nmdc_dataframes.py +0 -829
  118. nmdc_runtime/lib/nmdc_etl_class.py +0 -402
  119. nmdc_runtime/lib/transform_nmdc_data.py +0 -1117
  120. nmdc_runtime/site/drsobjects/ingest.py +0 -93
  121. nmdc_runtime/site/drsobjects/registration.py +0 -131
  122. nmdc_runtime/site/terminusdb/generate.py +0 -198
  123. nmdc_runtime/site/terminusdb/ingest.py +0 -44
  124. nmdc_runtime/site/terminusdb/schema.py +0 -1671
  125. nmdc_runtime/site/translation/emsl.py +0 -42
  126. nmdc_runtime/site/translation/gold.py +0 -53
  127. nmdc_runtime/site/translation/jgi.py +0 -31
  128. nmdc_runtime/site/translation/util.py +0 -132
  129. nmdc_runtime/site/validation/jgi.py +0 -42
  130. nmdc_runtime-1.3.1.dist-info/METADATA +0 -181
  131. nmdc_runtime-1.3.1.dist-info/RECORD +0 -81
  132. nmdc_runtime-1.3.1.dist-info/top_level.txt +0 -1
  133. /nmdc_runtime/{client → api}/__init__.py +0 -0
  134. /nmdc_runtime/{core → api/boot}/__init__.py +0 -0
  135. /nmdc_runtime/{core/db → api/core}/__init__.py +0 -0
  136. /nmdc_runtime/{domain → api/db}/__init__.py +0 -0
  137. /nmdc_runtime/{domain/users → api/endpoints}/__init__.py +0 -0
  138. /nmdc_runtime/{infrastructure → api/endpoints/lib}/__init__.py +0 -0
  139. /nmdc_runtime/{infrastructure/database → api/models}/__init__.py +0 -0
  140. /nmdc_runtime/{infrastructure/database/models → api/models/lib}/__init__.py +0 -0
  141. /nmdc_runtime/{site/drsobjects/__init__.py → api/models/minter.py} +0 -0
  142. /nmdc_runtime/site/{terminusdb → repair}/__init__.py +0 -0
  143. {nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,307 @@
1
+ from fastapi import APIRouter, Depends, Query
2
+ from pymongo.database import Database
3
+ from typing import Annotated
4
+ from toolz import merge
5
+ import logging
6
+
7
+ from nmdc_runtime.api.core.util import raise404_if_none, HTTPException, status
8
+ from nmdc_runtime.api.db.mongo import get_mongo_db
9
+ from nmdc_runtime.api.endpoints.util import (
10
+ check_action_permitted,
11
+ list_resources,
12
+ strip_oid,
13
+ )
14
+ from nmdc_runtime.api.models.metadata import Doc
15
+ from nmdc_runtime.api.models.user import User, get_current_active_user
16
+ from nmdc_runtime.api.models.util import ListRequest, ListResponse
17
+ from nmdc_runtime.api.models.wfe_file_stages import (
18
+ GlobusTask,
19
+ GlobusTaskStatus,
20
+ JDPFileStatus,
21
+ JGISample,
22
+ JGISequencingProject,
23
+ WorkflowFileStagingCollectionName as CollectionName,
24
+ )
25
+
26
+ router = APIRouter()
27
+
28
+
29
+ def check_can_run_wf_file_staging_endpoints(user: User):
30
+ """
31
+ Check if the user is permitted to run the wf_file_staging endpoints in this file.
32
+ """
33
+ if not check_action_permitted(user.username, "/wf_file_staging"):
34
+ raise HTTPException(
35
+ status_code=status.HTTP_403_FORBIDDEN,
36
+ detail="Only specific users are allowed to issue wf_file_staging commands.",
37
+ )
38
+
39
+
40
+ @router.post(
41
+ "/wf_file_staging/globus_tasks",
42
+ status_code=status.HTTP_201_CREATED,
43
+ response_model=GlobusTask,
44
+ )
45
+ def create_globus_tasks(
46
+ globus_in: GlobusTask,
47
+ mdb: Database = Depends(get_mongo_db),
48
+ user: User = Depends(get_current_active_user),
49
+ ):
50
+ """Create a `GlobusTask`."""
51
+
52
+ # check for permissions first
53
+ check_can_run_wf_file_staging_endpoints(user)
54
+ # check if record with same task_id already exists
55
+ existing = mdb["wf_file_staging.globus_tasks"].find_one(
56
+ {"task_id": globus_in.task_id}
57
+ )
58
+ if existing is not None:
59
+ raise HTTPException(
60
+ status_code=status.HTTP_400_BAD_REQUEST,
61
+ detail=f"Globus task with task_id {globus_in.task_id} already exists.",
62
+ )
63
+ # check the status exists in the Enum, if not log a warning
64
+ if globus_in.task_status not in GlobusTaskStatus.__members__.values():
65
+ logging.warning(
66
+ f"Globus task status {globus_in.task_status} does not exist in GlobusTaskStatus enum."
67
+ )
68
+
69
+ globus_dict = globus_in.model_dump()
70
+ mdb["wf_file_staging.globus_tasks"].insert_one(globus_dict)
71
+ return globus_dict
72
+
73
+
74
+ @router.get("/wf_file_staging/globus_tasks/{task_id}", response_model=GlobusTask)
75
+ def get_globus_tasks(
76
+ task_id: str,
77
+ mdb: Database = Depends(get_mongo_db),
78
+ user: User = Depends(get_current_active_user),
79
+ ):
80
+ """Retrieve a `GlobusTask`."""
81
+
82
+ # check for permissions first
83
+ check_can_run_wf_file_staging_endpoints(user)
84
+ return raise404_if_none(
85
+ mdb["wf_file_staging.globus_tasks"].find_one({"task_id": task_id})
86
+ )
87
+
88
+
89
+ @router.patch("/wf_file_staging/globus_tasks/{task_id}", response_model=GlobusTask)
90
+ def update_globus_tasks(
91
+ task_id: str,
92
+ globus_patch: GlobusTask,
93
+ mdb: Database = Depends(get_mongo_db),
94
+ user: User = Depends(get_current_active_user),
95
+ ):
96
+ """Update a `GlobusTask`."""
97
+
98
+ # check for permissions first
99
+ check_can_run_wf_file_staging_endpoints(user)
100
+
101
+ if task_id != globus_patch.task_id:
102
+ raise HTTPException(
103
+ status_code=status.HTTP_400_BAD_REQUEST,
104
+ detail="task_id in path and body must match.",
105
+ )
106
+
107
+ doc = raise404_if_none(
108
+ mdb["wf_file_staging.globus_tasks"].find_one({"task_id": task_id})
109
+ )
110
+ doc_globus_patched = merge(doc, globus_patch.model_dump(exclude_unset=True))
111
+ mdb["wf_file_staging.globus_tasks"].replace_one(
112
+ {"task_id": task_id}, doc_globus_patched
113
+ )
114
+ return doc_globus_patched
115
+
116
+
117
+ # Note: We use the generic `Doc` class—instead of the `GlobusTask` class—to describe the response
118
+ # because this endpoint (via `ListRequest`) supports projection, which can be used to omit
119
+ # fields from the response, even fields the `GlobusTask` class says are required.
120
+ @router.get(
121
+ "/wf_file_staging/globus_tasks",
122
+ response_model=ListResponse[Doc],
123
+ response_model_exclude_unset=True,
124
+ )
125
+ def list_globus_tasks(
126
+ req: Annotated[ListRequest, Query()],
127
+ mdb: Database = Depends(get_mongo_db),
128
+ user: User = Depends(get_current_active_user),
129
+ ):
130
+ """Get a list of `GlobusTask`s."""
131
+
132
+ # check for permissions first
133
+ check_can_run_wf_file_staging_endpoints(user)
134
+ rv = list_resources(req, mdb, "wf_file_staging.globus_tasks")
135
+ rv["resources"] = [strip_oid(d) for d in rv["resources"]]
136
+ return rv
137
+
138
+
139
+ @router.post(
140
+ "/wf_file_staging/jgi_samples",
141
+ status_code=status.HTTP_201_CREATED,
142
+ response_model=JGISample,
143
+ )
144
+ def create_jgi_sample(
145
+ jgi_in: JGISample,
146
+ mdb: Database = Depends(get_mongo_db),
147
+ user: User = Depends(get_current_active_user),
148
+ ):
149
+ """
150
+ Create a JGI Sample.
151
+ """
152
+
153
+ # check for permissions first
154
+ check_can_run_wf_file_staging_endpoints(user)
155
+ # check if record with same jdp_file_id already exists
156
+ existing = mdb["wf_file_staging.jgi_samples"].find_one(
157
+ {"jdp_file_id": jgi_in.jdp_file_id}
158
+ )
159
+ if existing is not None:
160
+ raise HTTPException(
161
+ status_code=status.HTTP_400_BAD_REQUEST,
162
+ detail=f"JGI sample with jdp_file_id {jgi_in.jdp_file_id} already exists.",
163
+ )
164
+ # check the status exists in the enum, if not log a warning
165
+ if jgi_in.jdp_file_status not in JDPFileStatus.__members__.values():
166
+ logging.warning(
167
+ f"JDP file status {jgi_in.jdp_file_status} does not exist in JDPFileStatus enum."
168
+ )
169
+ if jgi_in.globus_file_status not in GlobusTaskStatus.__members__.values():
170
+ logging.warning(
171
+ f"Globus file status {jgi_in.globus_file_status} does not exist in GlobusTaskStatus enum."
172
+ )
173
+
174
+ sample_dict = jgi_in.model_dump(exclude_unset=True)
175
+ try:
176
+ mdb["wf_file_staging.jgi_samples"].insert_one(sample_dict)
177
+ return sample_dict
178
+ except Exception as e:
179
+ logging.error(f"Error during jgi sample insertion: {str(e)}", exc_info=True)
180
+ raise HTTPException(
181
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
182
+ detail=f"Error during insertion: {str(e)}",
183
+ )
184
+
185
+
186
+ # Note: We use the generic `Doc` class—instead of the `JGISample` class—to describe the response
187
+ # because this endpoint (via `ListRequest`) supports projection, which can be used to omit
188
+ # fields from the response, even fields the `JGISample` class says are required.
189
+ @router.get(
190
+ "/wf_file_staging/jgi_samples",
191
+ response_model=ListResponse[Doc],
192
+ response_model_exclude_unset=True,
193
+ )
194
+ def list_jgi_samples(
195
+ req: Annotated[ListRequest, Query()],
196
+ mdb: Database = Depends(get_mongo_db),
197
+ user: User = Depends(get_current_active_user),
198
+ ):
199
+ r"""
200
+ Retrieves JGI Sample records that match the specified filter criteria. Uses Mongo-like filters.
201
+ """
202
+ # perm check
203
+ check_can_run_wf_file_staging_endpoints(user)
204
+
205
+ rv = list_resources(req, mdb, "wf_file_staging.jgi_samples")
206
+ rv["resources"] = [strip_oid(d) for d in rv["resources"]]
207
+ return rv
208
+
209
+
210
+ @router.patch("/wf_file_staging/jgi_samples/{jdp_file_id}", response_model=JGISample)
211
+ def update_jgi_samples(
212
+ jdp_file_id: str,
213
+ jgi_sample_patch: JGISample,
214
+ mdb: Database = Depends(get_mongo_db),
215
+ user: User = Depends(get_current_active_user),
216
+ ):
217
+ """
218
+ Update a JGI Sample record by its jdp_file_id.
219
+ """
220
+ # check for permissions first
221
+ check_can_run_wf_file_staging_endpoints(user)
222
+
223
+ if jdp_file_id != jgi_sample_patch.jdp_file_id:
224
+ raise HTTPException(
225
+ status_code=status.HTTP_400_BAD_REQUEST,
226
+ detail="Cannot modify jdp_file_id (jdp_file_id in path and body must match).",
227
+ )
228
+
229
+ doc_jgi_sample_original = raise404_if_none(
230
+ mdb["wf_file_staging.jgi_samples"].find_one({"jdp_file_id": jdp_file_id})
231
+ )
232
+ doc_jgi_sample_patched = merge(
233
+ doc_jgi_sample_original, jgi_sample_patch.model_dump(exclude_unset=True)
234
+ )
235
+ mdb["wf_file_staging.jgi_samples"].replace_one(
236
+ {"jdp_file_id": jdp_file_id}, doc_jgi_sample_patched
237
+ )
238
+ return doc_jgi_sample_patched
239
+
240
+
241
+ # Note: We use the generic `Doc` class—instead of the `JGISequencingProject` class—to describe the response
242
+ # because this endpoint (via `ListRequest`) supports projection, which can be used to omit
243
+ # fields from the response, even fields the `JGISequencingProject` class says are required.
244
+ @router.get(
245
+ "/wf_file_staging/jgi_sequencing_projects",
246
+ response_model=ListResponse[Doc],
247
+ response_model_exclude_unset=True,
248
+ )
249
+ def list_sequencing_project_records(
250
+ req: Annotated[ListRequest, Query()],
251
+ mdb: Database = Depends(get_mongo_db),
252
+ user: User = Depends(get_current_active_user),
253
+ ):
254
+ """Get a list of `JGISequencingProject`s."""
255
+
256
+ check_can_run_wf_file_staging_endpoints(user)
257
+ rv = list_resources(req, mdb, CollectionName.JGI_SEQUENCING_PROJECTS.value)
258
+ rv["resources"] = [strip_oid(d) for d in rv["resources"]]
259
+ return rv
260
+
261
+
262
+ @router.post(
263
+ "/wf_file_staging/jgi_sequencing_projects",
264
+ status_code=status.HTTP_201_CREATED,
265
+ response_model=JGISequencingProject,
266
+ )
267
+ def create_sequencing_record(
268
+ sequencing_project_in: JGISequencingProject,
269
+ mdb: Database = Depends(get_mongo_db),
270
+ user: User = Depends(get_current_active_user),
271
+ ):
272
+ """Create a `JGISequencingProject`."""
273
+
274
+ check_can_run_wf_file_staging_endpoints(user)
275
+ existing = mdb[CollectionName.JGI_SEQUENCING_PROJECTS.value].find_one(
276
+ {"sequencing_project_name": sequencing_project_in.sequencing_project_name}
277
+ )
278
+ if existing is not None:
279
+ raise HTTPException(
280
+ status_code=status.HTTP_400_BAD_REQUEST,
281
+ detail=f"JGISequencingProject with project name {sequencing_project_in.sequencing_project_name} already exists.",
282
+ )
283
+ sequencing_project_dict = sequencing_project_in.model_dump()
284
+ mdb[CollectionName.JGI_SEQUENCING_PROJECTS.value].insert_one(
285
+ sequencing_project_dict
286
+ )
287
+ return sequencing_project_dict
288
+
289
+
290
+ @router.get(
291
+ "/wf_file_staging/jgi_sequencing_projects/{sequencing_project_name}",
292
+ response_model=JGISequencingProject,
293
+ )
294
+ def get_sequencing_project(
295
+ sequencing_project_name: str,
296
+ mdb: Database = Depends(get_mongo_db),
297
+ user: User = Depends(get_current_active_user),
298
+ ):
299
+ """Retrieve a `JGISequencingProject`."""
300
+
301
+ check_can_run_wf_file_staging_endpoints(user)
302
+
303
+ return raise404_if_none(
304
+ mdb[CollectionName.JGI_SEQUENCING_PROJECTS.value].find_one(
305
+ {"sequencing_project_name": sequencing_project_name}
306
+ )
307
+ )
@@ -0,0 +1,353 @@
1
+ import logging
2
+ import os
3
+ from typing import Any, List, Set, Annotated
4
+
5
+ import pymongo
6
+ from bson import ObjectId
7
+ from fastapi import APIRouter, Depends, HTTPException, Path
8
+ from pymongo.database import Database as MongoDatabase
9
+ from pymongo.errors import BulkWriteError
10
+ from starlette import status
11
+
12
+ from nmdc_runtime.api.core.util import raise404_if_none
13
+ from nmdc_runtime.api.endpoints.queries import (
14
+ _run_mdb_cmd,
15
+ check_can_update_and_delete,
16
+ _run_delete_nonschema,
17
+ )
18
+ from nmdc_runtime.api.db.mongo import get_mongo_db, validate_json
19
+ from nmdc_runtime.api.models.capability import Capability
20
+ from nmdc_runtime.api.models.object_type import ObjectType
21
+ from nmdc_runtime.api.models.query import DeleteCommand, DeleteStatement
22
+ from nmdc_runtime.api.models.site import Site, get_current_client_site
23
+ from nmdc_runtime.api.models.user import User, get_current_active_user
24
+ from nmdc_runtime.api.models.util import DeleteResponse
25
+ from nmdc_runtime.api.models.workflow import Workflow
26
+ from nmdc_runtime.site.resources import MongoDB
27
+ from nmdc_schema.nmdc import (
28
+ MetagenomeAnnotation,
29
+ MetaproteomicsAnalysis,
30
+ MetatranscriptomeAnnotation,
31
+ )
32
+
33
+
34
+ router = APIRouter()
35
+
36
+
37
+ @router.get("/workflows", response_model=List[Workflow])
38
+ def list_workflows(
39
+ mdb: pymongo.database.Database = Depends(get_mongo_db),
40
+ ):
41
+ return list(mdb.workflows.find())
42
+
43
+
44
+ @router.get("/workflows/{workflow_id}", response_model=Workflow)
45
+ def get_workflow(
46
+ workflow_id: str,
47
+ mdb: pymongo.database.Database = Depends(get_mongo_db),
48
+ ):
49
+ return raise404_if_none(mdb.workflows.find_one({"id": workflow_id}))
50
+
51
+
52
+ @router.get("/workflows/{workflow_id}/object_types", response_model=List[ObjectType])
53
+ def list_workflow_object_types(
54
+ workflow_id: str, mdb: pymongo.database.Database = Depends(get_mongo_db)
55
+ ):
56
+ object_type_ids = [
57
+ doc["object_type_id"] for doc in mdb.triggers.find({"workflow_id": workflow_id})
58
+ ]
59
+ return list(mdb.object_types.find({"id": {"$in": object_type_ids}}))
60
+
61
+
62
+ @router.get("/workflows/{workflow_id}/capabilities", response_model=List[Capability])
63
+ def list_workflow_capabilities(
64
+ workflow_id: str, mdb: pymongo.database.Database = Depends(get_mongo_db)
65
+ ):
66
+ doc = raise404_if_none(mdb.workflows.find_one({"id": workflow_id}))
67
+ return list(mdb.capabilities.find({"id": {"$in": doc.get("capability_ids", [])}}))
68
+
69
+
70
+ @router.post("/workflows/activities", status_code=status.HTTP_410_GONE, deprecated=True)
71
+ async def post_activity(
72
+ activity_set: dict[str, Any],
73
+ site: Site = Depends(get_current_client_site),
74
+ mdb: MongoDatabase = Depends(get_mongo_db),
75
+ ):
76
+ """
77
+ DEPRECATED: migrate all workflows from this endpoint to `/workflows/workflow_executions`.
78
+ """
79
+ return f"DEPRECATED: POST your request to `/workflows/workflow_executions` instead."
80
+
81
+
82
+ @router.post("/workflows/workflow_executions")
83
+ async def post_workflow_execution(
84
+ workflow_execution_set: dict[str, Any],
85
+ site: Site = Depends(get_current_client_site),
86
+ mdb: MongoDatabase = Depends(get_mongo_db),
87
+ ):
88
+ """
89
+ Post workflow execution set to database and claim job.
90
+
91
+ Parameters
92
+ -------
93
+ workflow_execution_set: dict[str,Any]
94
+ Set of workflow executions for specific workflows, in the form of a nmdc:Database.
95
+ Other collections (such as data_object_set) are allowed, as they may be associated
96
+ with the workflow executions submitted.
97
+
98
+ site: Site
99
+ mdb: MongoDatabase
100
+
101
+ Returns
102
+ -------
103
+ dict[str,str]
104
+
105
+ """
106
+ _ = site # must be authenticated
107
+ try:
108
+ # validate request JSON
109
+ rv = validate_json(
110
+ workflow_execution_set, mdb, check_inter_document_references=True
111
+ )
112
+ if rv["result"] == "errors":
113
+ raise HTTPException(
114
+ status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
115
+ detail=str(rv),
116
+ )
117
+ # create mongodb instance for dagster
118
+ mongo_resource = MongoDB(
119
+ host=os.getenv("MONGO_HOST"),
120
+ dbname=os.getenv("MONGO_DBNAME"),
121
+ username=os.getenv("MONGO_USERNAME"),
122
+ password=os.getenv("MONGO_PASSWORD"),
123
+ )
124
+ mongo_resource.add_docs(workflow_execution_set, validate=False, replace=True)
125
+ return {"message": "jobs accepted"}
126
+ except BulkWriteError as e:
127
+ raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail=str(e))
128
+ except ValueError as e:
129
+ raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail=str(e))
130
+
131
+
132
+ @router.delete(
133
+ "/workflows/workflow_executions/{workflow_execution_id}",
134
+ response_model=DeleteResponse,
135
+ description="Delete a workflow execution and its downstream workflow executions, data objects, "
136
+ "functional annotation aggregation members, and related job records.\n\n"
137
+ "This endpoint performs recursive deletion of the specified workflow execution, "
138
+ "all downstream workflow executions that depend on this workflow execution's outputs, "
139
+ "all functional annotation aggregation members generated by deleted workflow executions, "
140
+ "all data objects that are outputs of deleted workflow executions, "
141
+ "and all job records that have the workflow execution ID as their config.activity_id.",
142
+ )
143
+ async def delete_workflow_execution(
144
+ workflow_execution_id: Annotated[
145
+ str,
146
+ Path(
147
+ title="Workflow Execution ID",
148
+ description="The `id` of the `WorkflowExecution` you want to delete.\n\n_Example_: `nmdc:wfmgan-11-abc123.1`",
149
+ examples=["nmdc:wfmgan-11-abc123.1"],
150
+ ),
151
+ ],
152
+ user: User = Depends(get_current_active_user),
153
+ mdb: MongoDatabase = Depends(get_mongo_db),
154
+ ):
155
+ """
156
+ Delete a given workflow execution and its downstream workflow executions, data objects,
157
+ functional annotation aggregation members, and related job records.
158
+
159
+ This endpoint performs recursive deletion of:
160
+ 1. The specified workflow execution
161
+ 2. All downstream workflow executions that depend on this execution's outputs
162
+ 3. All functional annotation aggregation members generated by deleted workflow executions
163
+ 4. All data objects that are outputs of deleted workflow executions
164
+ 5. All job records that have the workflow execution ID as their config.activity_id
165
+
166
+ Input data objects (has_input) are preserved as they may be used by other workflow executions.
167
+ TODO: Consider deleting input data objects that are _not_ used by other workflow executions
168
+ (otherwise, they may accumulate in the database as so-called "orphaned documents").
169
+
170
+ Parameters
171
+ ----------
172
+ workflow_execution_id : str
173
+ ID of the workflow execution to delete
174
+ user : User
175
+ Authenticated user (required)
176
+ mdb : MongoDatabase
177
+ MongoDB database connection
178
+
179
+ Returns
180
+ -------
181
+ dict
182
+ Catalog of deleted workflow executions, data objects, functional annotation aggregation members, and job records
183
+ """
184
+
185
+ # Check user permissions for delete operations
186
+ # TODO: Decouple this endpoint's authorization criteria from that of the `/queries:run` endpoint.
187
+ # Currently, both endpoints rely on the "/queries:run(query_cmd:DeleteCommand)" allowance.
188
+ check_can_update_and_delete(user)
189
+
190
+ try:
191
+ # Check if workflow execution exists
192
+ workflow_execution = mdb.workflow_execution_set.find_one(
193
+ {"id": workflow_execution_id}
194
+ )
195
+ if not workflow_execution:
196
+ raise HTTPException(
197
+ status_code=status.HTTP_404_NOT_FOUND,
198
+ detail=f"Workflow execution {workflow_execution_id} not found",
199
+ )
200
+
201
+ # Track what we've deleted to avoid cycles and provide summary
202
+ deleted_workflow_execution_ids: Set[str] = set()
203
+ deleted_data_object_ids: Set[str] = set()
204
+ deleted_functional_annotation_agg_oids: Set[str] = set()
205
+ deleted_job_ids: Set[str] = set()
206
+
207
+ def find_linked_workflow_executions(
208
+ data_object_ids: List[str],
209
+ ) -> List[str]:
210
+ """Find workflow executions that use any of the given data objects as inputs."""
211
+ if not data_object_ids:
212
+ return []
213
+
214
+ linked_wfes = list(
215
+ mdb.workflow_execution_set.find(
216
+ {"has_input": {"$in": data_object_ids}}, {"id": 1}
217
+ )
218
+ )
219
+ return [wfe["id"] for wfe in linked_wfes]
220
+
221
+ def recursive_delete_workflow_execution(wfe_id: str) -> None:
222
+ """Recursively delete a workflow execution and all its downstream dependencies."""
223
+ if wfe_id in deleted_workflow_execution_ids:
224
+ return # Already deleted or in progress
225
+
226
+ # Get the workflow execution
227
+ wfe = mdb.workflow_execution_set.find_one({"id": wfe_id})
228
+ if not wfe:
229
+ return # Already deleted or doesn't exist
230
+
231
+ # Mark as being processed to prevent cycles
232
+ deleted_workflow_execution_ids.add(wfe_id)
233
+
234
+ # Get output data objects from this workflow execution
235
+ output_data_object_ids = wfe.get("has_output", [])
236
+
237
+ # Check if this is an AnnotatingWorkflow (e.g., metagenome annotation)
238
+ # If so, we need to also delete functional_annotation_agg records
239
+ wfe_type = wfe.get("type", "")
240
+ is_annotating_workflow = wfe_type in [
241
+ MetagenomeAnnotation.class_class_curie,
242
+ MetatranscriptomeAnnotation.class_class_curie,
243
+ MetaproteomicsAnalysis.class_class_curie,
244
+ ]
245
+
246
+ # Find linked workflow executions that use these data objects as inputs
247
+ linked_wfe_ids = find_linked_workflow_executions(output_data_object_ids)
248
+
249
+ # Recursively delete linked workflow executions first
250
+ for linked_wfe_id in linked_wfe_ids:
251
+ if linked_wfe_id not in deleted_workflow_execution_ids:
252
+ recursive_delete_workflow_execution(linked_wfe_id)
253
+
254
+ # Add data objects to deletion set
255
+ deleted_data_object_ids.update(output_data_object_ids)
256
+
257
+ # If this is an AnnotatingWorkflow, mark functional annotation records for deletion
258
+ if is_annotating_workflow:
259
+ func_annotation_records = list(
260
+ mdb.functional_annotation_agg.find(
261
+ {"was_generated_by": wfe_id}, {"_id": 1}
262
+ )
263
+ )
264
+ if func_annotation_records:
265
+ # Store the ObjectIds for deletion from functional_annotation_agg
266
+ deleted_functional_annotation_agg_oids.update(
267
+ [str(record["_id"]) for record in func_annotation_records]
268
+ )
269
+
270
+ # Find and mark job records for deletion that have this workflow execution as activity_id
271
+ job_records = list(mdb.jobs.find({"config.activity_id": wfe_id}, {"id": 1}))
272
+ if job_records:
273
+ deleted_job_ids.update([job["id"] for job in job_records])
274
+
275
+ # Start recursive deletion from the target workflow execution
276
+ recursive_delete_workflow_execution(workflow_execution_id)
277
+
278
+ # Prepare deletion payload
279
+ docs_to_delete = {}
280
+ if deleted_workflow_execution_ids:
281
+ docs_to_delete["workflow_execution_set"] = list(
282
+ deleted_workflow_execution_ids
283
+ )
284
+ if deleted_data_object_ids:
285
+ docs_to_delete["data_object_set"] = list(deleted_data_object_ids)
286
+ if deleted_functional_annotation_agg_oids:
287
+ docs_to_delete["functional_annotation_agg"] = list(
288
+ deleted_functional_annotation_agg_oids
289
+ )
290
+ if deleted_job_ids:
291
+ docs_to_delete["jobs"] = list(deleted_job_ids)
292
+
293
+ # Perform the actual deletion using `_run_mdb_cmd`, so the operations
294
+ # undergo schema, validation and referential integrity checking, and
295
+ # deleted documents are backed up to the `nmdc_deleted` database.
296
+ deletion_results = {}
297
+
298
+ for collection_name, doc_ids in docs_to_delete.items():
299
+ if not doc_ids:
300
+ continue
301
+
302
+ # Handle special case for functional_annotation_agg which uses _id instead of id
303
+ if collection_name == "functional_annotation_agg":
304
+ # Convert string ObjectIds back to ObjectId instances for the filter
305
+ object_ids = [ObjectId(doc_id) for doc_id in doc_ids]
306
+ filter_dict = {"_id": {"$in": object_ids}}
307
+ else:
308
+ # Standard case - use id field
309
+ filter_dict = {"id": {"$in": doc_ids}}
310
+
311
+ # Create delete command
312
+ delete_cmd = DeleteCommand(
313
+ delete=collection_name,
314
+ deletes=[
315
+ DeleteStatement(q=filter_dict, limit=0)
316
+ ], # limit=0 means delete all matching
317
+ )
318
+
319
+ logging.warning(
320
+ f"Executing cascading delete command for {collection_name} - you may temporarily encounter broken references."
321
+ )
322
+ # Execute the delete command
323
+ if collection_name == "jobs":
324
+ response = _run_delete_nonschema(delete_cmd, mdb)
325
+ else:
326
+ response = _run_mdb_cmd(delete_cmd, mdb, allow_broken_refs=True)
327
+
328
+ # Store the result
329
+ deletion_results[collection_name] = {
330
+ "deleted_count": response.n,
331
+ "doc_ids": doc_ids,
332
+ }
333
+
334
+ return {
335
+ "message": "Workflow execution and dependencies deleted successfully",
336
+ "deleted_workflow_execution_ids": list(deleted_workflow_execution_ids),
337
+ "deleted_data_object_ids": list(deleted_data_object_ids),
338
+ "deleted_functional_annotation_agg_oids": [
339
+ str(oid) for oid in deleted_functional_annotation_agg_oids
340
+ ],
341
+ "deleted_job_ids": list(deleted_job_ids),
342
+ }
343
+
344
+ except HTTPException:
345
+ raise
346
+ except Exception as e:
347
+ logging.error(
348
+ f"Error during workflow execution deletion: {str(e)}", exc_info=True
349
+ )
350
+ raise HTTPException(
351
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
352
+ detail=f"Error during deletion: {str(e)}",
353
+ )
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+
3
+ set -euo pipefail
4
+
5
+ exec gunicorn --worker-tmp-dir /dev/shm --workers=2 \
6
+ --threads=4 --worker-class gthread \
7
+ --log-file=- --bind 0.0.0.0:8000 nmdc_runtime.api.main:app