PyPI - nmdc-runtime - Versions diffs - 1.3.1__py3-none-any.whl → 2.12.0__py3-none-any.whl - Mend

nmdc-runtime 1.3.1py3-none-any.whl → 2.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (143) hide show

nmdc_runtime/Dockerfile +177 -0
nmdc_runtime/api/analytics.py +90 -0
nmdc_runtime/api/boot/capabilities.py +9 -0
nmdc_runtime/api/boot/object_types.py +126 -0
nmdc_runtime/api/boot/triggers.py +84 -0
nmdc_runtime/api/boot/workflows.py +116 -0
nmdc_runtime/api/core/auth.py +212 -0
nmdc_runtime/api/core/idgen.py +200 -0
nmdc_runtime/api/core/metadata.py +777 -0
nmdc_runtime/api/core/util.py +114 -0
nmdc_runtime/api/db/mongo.py +436 -0
nmdc_runtime/api/db/s3.py +37 -0
nmdc_runtime/api/endpoints/capabilities.py +25 -0
nmdc_runtime/api/endpoints/find.py +634 -0
nmdc_runtime/api/endpoints/jobs.py +206 -0
nmdc_runtime/api/endpoints/lib/helpers.py +274 -0
nmdc_runtime/api/endpoints/lib/linked_instances.py +193 -0
nmdc_runtime/api/endpoints/lib/path_segments.py +165 -0
nmdc_runtime/api/endpoints/metadata.py +260 -0
nmdc_runtime/api/endpoints/nmdcschema.py +515 -0
nmdc_runtime/api/endpoints/object_types.py +38 -0
nmdc_runtime/api/endpoints/objects.py +277 -0
nmdc_runtime/api/endpoints/operations.py +78 -0
nmdc_runtime/api/endpoints/queries.py +701 -0
nmdc_runtime/api/endpoints/runs.py +98 -0
nmdc_runtime/api/endpoints/search.py +38 -0
nmdc_runtime/api/endpoints/sites.py +205 -0
nmdc_runtime/api/endpoints/triggers.py +25 -0
nmdc_runtime/api/endpoints/users.py +214 -0
nmdc_runtime/api/endpoints/util.py +817 -0
nmdc_runtime/api/endpoints/wf_file_staging.py +307 -0
nmdc_runtime/api/endpoints/workflows.py +353 -0
nmdc_runtime/api/entrypoint.sh +7 -0
nmdc_runtime/api/main.py +495 -0
nmdc_runtime/api/middleware.py +43 -0
nmdc_runtime/api/models/capability.py +14 -0
nmdc_runtime/api/models/id.py +92 -0
nmdc_runtime/api/models/job.py +57 -0
nmdc_runtime/api/models/lib/helpers.py +78 -0
nmdc_runtime/api/models/metadata.py +11 -0
nmdc_runtime/api/models/nmdc_schema.py +146 -0
nmdc_runtime/api/models/object.py +180 -0
nmdc_runtime/api/models/object_type.py +20 -0
nmdc_runtime/api/models/operation.py +66 -0
nmdc_runtime/api/models/query.py +246 -0
nmdc_runtime/api/models/query_continuation.py +111 -0
nmdc_runtime/api/models/run.py +161 -0
nmdc_runtime/api/models/site.py +87 -0
nmdc_runtime/api/models/trigger.py +13 -0
nmdc_runtime/api/models/user.py +207 -0
nmdc_runtime/api/models/util.py +260 -0
nmdc_runtime/api/models/wfe_file_stages.py +122 -0
nmdc_runtime/api/models/workflow.py +15 -0
nmdc_runtime/api/openapi.py +178 -0
nmdc_runtime/api/swagger_ui/assets/EllipsesButton.js +146 -0
nmdc_runtime/api/swagger_ui/assets/EndpointSearchWidget.js +369 -0
nmdc_runtime/api/swagger_ui/assets/script.js +252 -0
nmdc_runtime/api/swagger_ui/assets/style.css +155 -0
nmdc_runtime/api/swagger_ui/swagger_ui.py +34 -0
nmdc_runtime/config.py +56 -0
nmdc_runtime/minter/adapters/repository.py +22 -2
nmdc_runtime/minter/config.py +30 -4
nmdc_runtime/minter/domain/model.py +55 -1
nmdc_runtime/minter/entrypoints/fastapi_app.py +1 -1
nmdc_runtime/mongo_util.py +89 -0
nmdc_runtime/site/backup/nmdcdb_mongodump.py +1 -1
nmdc_runtime/site/backup/nmdcdb_mongoexport.py +1 -3
nmdc_runtime/site/changesheets/data/OmicsProcessing-to-catted-Biosamples.tsv +1561 -0
nmdc_runtime/site/changesheets/scripts/missing_neon_soils_ecosystem_data.py +311 -0
nmdc_runtime/site/changesheets/scripts/neon_soils_add_ncbi_ids.py +210 -0
nmdc_runtime/site/dagster.yaml +53 -0
nmdc_runtime/site/entrypoint-daemon.sh +29 -0
nmdc_runtime/site/entrypoint-dagit-readonly.sh +26 -0
nmdc_runtime/site/entrypoint-dagit.sh +29 -0
nmdc_runtime/site/export/ncbi_xml.py +1331 -0
nmdc_runtime/site/export/ncbi_xml_utils.py +405 -0
nmdc_runtime/site/export/study_metadata.py +27 -4
nmdc_runtime/site/graphs.py +294 -45
nmdc_runtime/site/ops.py +1008 -230
nmdc_runtime/site/repair/database_updater.py +451 -0
nmdc_runtime/site/repository.py +368 -133
nmdc_runtime/site/resources.py +154 -80
nmdc_runtime/site/translation/gold_translator.py +235 -83
nmdc_runtime/site/translation/neon_benthic_translator.py +212 -188
nmdc_runtime/site/translation/neon_soil_translator.py +82 -58
nmdc_runtime/site/translation/neon_surface_water_translator.py +698 -0
nmdc_runtime/site/translation/neon_utils.py +24 -7
nmdc_runtime/site/translation/submission_portal_translator.py +616 -162
nmdc_runtime/site/translation/translator.py +73 -3
nmdc_runtime/site/util.py +26 -7
nmdc_runtime/site/validation/emsl.py +1 -0
nmdc_runtime/site/validation/gold.py +1 -0
nmdc_runtime/site/validation/util.py +16 -12
nmdc_runtime/site/workspace.yaml +13 -0
nmdc_runtime/static/NMDC_logo.svg +1073 -0
nmdc_runtime/static/ORCID-iD_icon_vector.svg +4 -0
nmdc_runtime/static/README.md +5 -0
nmdc_runtime/static/favicon.ico +0 -0
nmdc_runtime/util.py +236 -192
nmdc_runtime-2.12.0.dist-info/METADATA +45 -0
nmdc_runtime-2.12.0.dist-info/RECORD +131 -0
{nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info}/WHEEL +1 -2
{nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info}/entry_points.txt +0 -1
nmdc_runtime/containers.py +0 -14
nmdc_runtime/core/db/Database.py +0 -15
nmdc_runtime/core/exceptions/__init__.py +0 -23
nmdc_runtime/core/exceptions/base.py +0 -47
nmdc_runtime/core/exceptions/token.py +0 -13
nmdc_runtime/domain/users/queriesInterface.py +0 -18
nmdc_runtime/domain/users/userSchema.py +0 -37
nmdc_runtime/domain/users/userService.py +0 -14
nmdc_runtime/infrastructure/database/db.py +0 -3
nmdc_runtime/infrastructure/database/models/user.py +0 -10
nmdc_runtime/lib/__init__.py +0 -1
nmdc_runtime/lib/extract_nmdc_data.py +0 -41
nmdc_runtime/lib/load_nmdc_data.py +0 -121
nmdc_runtime/lib/nmdc_dataframes.py +0 -829
nmdc_runtime/lib/nmdc_etl_class.py +0 -402
nmdc_runtime/lib/transform_nmdc_data.py +0 -1117
nmdc_runtime/site/drsobjects/ingest.py +0 -93
nmdc_runtime/site/drsobjects/registration.py +0 -131
nmdc_runtime/site/terminusdb/generate.py +0 -198
nmdc_runtime/site/terminusdb/ingest.py +0 -44
nmdc_runtime/site/terminusdb/schema.py +0 -1671
nmdc_runtime/site/translation/emsl.py +0 -42
nmdc_runtime/site/translation/gold.py +0 -53
nmdc_runtime/site/translation/jgi.py +0 -31
nmdc_runtime/site/translation/util.py +0 -132
nmdc_runtime/site/validation/jgi.py +0 -42
nmdc_runtime-1.3.1.dist-info/METADATA +0 -181
nmdc_runtime-1.3.1.dist-info/RECORD +0 -81
nmdc_runtime-1.3.1.dist-info/top_level.txt +0 -1
/nmdc_runtime/{client → api}/__init__.py +0 -0
/nmdc_runtime/{core → api/boot}/__init__.py +0 -0
/nmdc_runtime/{core/db → api/core}/__init__.py +0 -0
/nmdc_runtime/{domain → api/db}/__init__.py +0 -0
/nmdc_runtime/{domain/users → api/endpoints}/__init__.py +0 -0
/nmdc_runtime/{infrastructure → api/endpoints/lib}/__init__.py +0 -0
/nmdc_runtime/{infrastructure/database → api/models}/__init__.py +0 -0
/nmdc_runtime/{infrastructure/database/models → api/models/lib}/__init__.py +0 -0
/nmdc_runtime/{site/drsobjects/__init__.py → api/models/minter.py} +0 -0
/nmdc_runtime/site/{terminusdb → repair}/__init__.py +0 -0
{nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info/licenses}/LICENSE +0 -0

nmdc_runtime/api/endpoints/wf_file_staging.py ADDED Viewed

@@ -0,0 +1,307 @@
+from fastapi import APIRouter, Depends, Query
+from pymongo.database import Database
+from typing import Annotated
+from toolz import merge
+import logging
+from nmdc_runtime.api.core.util import raise404_if_none, HTTPException, status
+from nmdc_runtime.api.db.mongo import get_mongo_db
+from nmdc_runtime.api.endpoints.util import (
+    check_action_permitted,
+    list_resources,
+    strip_oid,
+)
+from nmdc_runtime.api.models.metadata import Doc
+from nmdc_runtime.api.models.user import User, get_current_active_user
+from nmdc_runtime.api.models.util import ListRequest, ListResponse
+from nmdc_runtime.api.models.wfe_file_stages import (
+    GlobusTask,
+    GlobusTaskStatus,
+    JDPFileStatus,
+    JGISample,
+    JGISequencingProject,
+    WorkflowFileStagingCollectionName as CollectionName,
+)
+router = APIRouter()
+def check_can_run_wf_file_staging_endpoints(user: User):
+    """
+    Check if the user is permitted to run the wf_file_staging endpoints in this file.
+    """
+    if not check_action_permitted(user.username, "/wf_file_staging"):
+        raise HTTPException(
+            status_code=status.HTTP_403_FORBIDDEN,
+            detail="Only specific users are allowed to issue wf_file_staging commands.",
+        )
+@router.post(
+    "/wf_file_staging/globus_tasks",
+    status_code=status.HTTP_201_CREATED,
+    response_model=GlobusTask,
+)
+def create_globus_tasks(
+    globus_in: GlobusTask,
+    mdb: Database = Depends(get_mongo_db),
+    user: User = Depends(get_current_active_user),
+):
+    """Create a `GlobusTask`."""
+    # check for permissions first
+    check_can_run_wf_file_staging_endpoints(user)
+    # check if record with same task_id already exists
+    existing = mdb["wf_file_staging.globus_tasks"].find_one(
+        {"task_id": globus_in.task_id}
+    )
+    if existing is not None:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=f"Globus task with task_id {globus_in.task_id} already exists.",
+        )
+    # check the status exists in the Enum, if not log a warning
+    if globus_in.task_status not in GlobusTaskStatus.__members__.values():
+        logging.warning(
+            f"Globus task status {globus_in.task_status} does not exist in GlobusTaskStatus enum."
+        )
+    globus_dict = globus_in.model_dump()
+    mdb["wf_file_staging.globus_tasks"].insert_one(globus_dict)
+    return globus_dict
+@router.get("/wf_file_staging/globus_tasks/{task_id}", response_model=GlobusTask)
+def get_globus_tasks(
+    task_id: str,
+    mdb: Database = Depends(get_mongo_db),
+    user: User = Depends(get_current_active_user),
+):
+    """Retrieve a `GlobusTask`."""
+    # check for permissions first
+    check_can_run_wf_file_staging_endpoints(user)
+    return raise404_if_none(
+        mdb["wf_file_staging.globus_tasks"].find_one({"task_id": task_id})
+    )
+@router.patch("/wf_file_staging/globus_tasks/{task_id}", response_model=GlobusTask)
+def update_globus_tasks(
+    task_id: str,
+    globus_patch: GlobusTask,
+    mdb: Database = Depends(get_mongo_db),
+    user: User = Depends(get_current_active_user),
+):
+    """Update a `GlobusTask`."""
+    # check for permissions first
+    check_can_run_wf_file_staging_endpoints(user)
+    if task_id != globus_patch.task_id:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="task_id in path and body must match.",
+        )
+    doc = raise404_if_none(
+        mdb["wf_file_staging.globus_tasks"].find_one({"task_id": task_id})
+    )
+    doc_globus_patched = merge(doc, globus_patch.model_dump(exclude_unset=True))
+    mdb["wf_file_staging.globus_tasks"].replace_one(
+        {"task_id": task_id}, doc_globus_patched
+    )
+    return doc_globus_patched
+# Note: We use the generic `Doc` class—instead of the `GlobusTask` class—to describe the response
+#       because this endpoint (via `ListRequest`) supports projection, which can be used to omit
+#       fields from the response, even fields the `GlobusTask` class says are required.
+@router.get(
+    "/wf_file_staging/globus_tasks",
+    response_model=ListResponse[Doc],
+    response_model_exclude_unset=True,
+)
+def list_globus_tasks(
+    req: Annotated[ListRequest, Query()],
+    mdb: Database = Depends(get_mongo_db),
+    user: User = Depends(get_current_active_user),
+):
+    """Get a list of `GlobusTask`s."""
+    # check for permissions first
+    check_can_run_wf_file_staging_endpoints(user)
+    rv = list_resources(req, mdb, "wf_file_staging.globus_tasks")
+    rv["resources"] = [strip_oid(d) for d in rv["resources"]]
+    return rv
+@router.post(
+    "/wf_file_staging/jgi_samples",
+    status_code=status.HTTP_201_CREATED,
+    response_model=JGISample,
+)
+def create_jgi_sample(
+    jgi_in: JGISample,
+    mdb: Database = Depends(get_mongo_db),
+    user: User = Depends(get_current_active_user),
+):
+    """
+    Create a JGI Sample.
+    """
+    # check for permissions first
+    check_can_run_wf_file_staging_endpoints(user)
+    # check if record with same jdp_file_id already exists
+    existing = mdb["wf_file_staging.jgi_samples"].find_one(
+        {"jdp_file_id": jgi_in.jdp_file_id}
+    )
+    if existing is not None:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=f"JGI sample with jdp_file_id {jgi_in.jdp_file_id} already exists.",
+        )
+    # check the status exists in the enum, if not log a warning
+    if jgi_in.jdp_file_status not in JDPFileStatus.__members__.values():
+        logging.warning(
+            f"JDP file status {jgi_in.jdp_file_status} does not exist in JDPFileStatus enum."
+        )
+    if jgi_in.globus_file_status not in GlobusTaskStatus.__members__.values():
+        logging.warning(
+            f"Globus file status {jgi_in.globus_file_status} does not exist in GlobusTaskStatus enum."
+        )
+    sample_dict = jgi_in.model_dump(exclude_unset=True)
+    try:
+        mdb["wf_file_staging.jgi_samples"].insert_one(sample_dict)
+        return sample_dict
+    except Exception as e:
+        logging.error(f"Error during jgi sample insertion: {str(e)}", exc_info=True)
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Error during insertion: {str(e)}",
+        )
+# Note: We use the generic `Doc` class—instead of the `JGISample` class—to describe the response
+#       because this endpoint (via `ListRequest`) supports projection, which can be used to omit
+#       fields from the response, even fields the `JGISample` class says are required.
+@router.get(
+    "/wf_file_staging/jgi_samples",
+    response_model=ListResponse[Doc],
+    response_model_exclude_unset=True,
+)
+def list_jgi_samples(
+    req: Annotated[ListRequest, Query()],
+    mdb: Database = Depends(get_mongo_db),
+    user: User = Depends(get_current_active_user),
+):
+    r"""
+    Retrieves JGI Sample records that match the specified filter criteria. Uses Mongo-like filters.
+    """
+    # perm check
+    check_can_run_wf_file_staging_endpoints(user)
+    rv = list_resources(req, mdb, "wf_file_staging.jgi_samples")
+    rv["resources"] = [strip_oid(d) for d in rv["resources"]]
+    return rv
+@router.patch("/wf_file_staging/jgi_samples/{jdp_file_id}", response_model=JGISample)
+def update_jgi_samples(
+    jdp_file_id: str,
+    jgi_sample_patch: JGISample,
+    mdb: Database = Depends(get_mongo_db),
+    user: User = Depends(get_current_active_user),
+):
+    """
+    Update a JGI Sample record by its jdp_file_id.
+    """
+    # check for permissions first
+    check_can_run_wf_file_staging_endpoints(user)
+    if jdp_file_id != jgi_sample_patch.jdp_file_id:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="Cannot modify jdp_file_id (jdp_file_id in path and body must match).",
+        )
+    doc_jgi_sample_original = raise404_if_none(
+        mdb["wf_file_staging.jgi_samples"].find_one({"jdp_file_id": jdp_file_id})
+    )
+    doc_jgi_sample_patched = merge(
+        doc_jgi_sample_original, jgi_sample_patch.model_dump(exclude_unset=True)
+    )
+    mdb["wf_file_staging.jgi_samples"].replace_one(
+        {"jdp_file_id": jdp_file_id}, doc_jgi_sample_patched
+    )
+    return doc_jgi_sample_patched
+# Note: We use the generic `Doc` class—instead of the `JGISequencingProject` class—to describe the response
+#       because this endpoint (via `ListRequest`) supports projection, which can be used to omit
+#       fields from the response, even fields the `JGISequencingProject` class says are required.
+@router.get(
+    "/wf_file_staging/jgi_sequencing_projects",
+    response_model=ListResponse[Doc],
+    response_model_exclude_unset=True,
+)
+def list_sequencing_project_records(
+    req: Annotated[ListRequest, Query()],
+    mdb: Database = Depends(get_mongo_db),
+    user: User = Depends(get_current_active_user),
+):
+    """Get a list of `JGISequencingProject`s."""
+    check_can_run_wf_file_staging_endpoints(user)
+    rv = list_resources(req, mdb, CollectionName.JGI_SEQUENCING_PROJECTS.value)
+    rv["resources"] = [strip_oid(d) for d in rv["resources"]]
+    return rv
+@router.post(
+    "/wf_file_staging/jgi_sequencing_projects",
+    status_code=status.HTTP_201_CREATED,
+    response_model=JGISequencingProject,
+)
+def create_sequencing_record(
+    sequencing_project_in: JGISequencingProject,
+    mdb: Database = Depends(get_mongo_db),
+    user: User = Depends(get_current_active_user),
+):
+    """Create a `JGISequencingProject`."""
+    check_can_run_wf_file_staging_endpoints(user)
+    existing = mdb[CollectionName.JGI_SEQUENCING_PROJECTS.value].find_one(
+        {"sequencing_project_name": sequencing_project_in.sequencing_project_name}
+    )
+    if existing is not None:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=f"JGISequencingProject with project name {sequencing_project_in.sequencing_project_name} already exists.",
+        )
+    sequencing_project_dict = sequencing_project_in.model_dump()
+    mdb[CollectionName.JGI_SEQUENCING_PROJECTS.value].insert_one(
+        sequencing_project_dict
+    )
+    return sequencing_project_dict
+@router.get(
+    "/wf_file_staging/jgi_sequencing_projects/{sequencing_project_name}",
+    response_model=JGISequencingProject,
+)
+def get_sequencing_project(
+    sequencing_project_name: str,
+    mdb: Database = Depends(get_mongo_db),
+    user: User = Depends(get_current_active_user),
+):
+    """Retrieve a `JGISequencingProject`."""
+    check_can_run_wf_file_staging_endpoints(user)
+    return raise404_if_none(
+        mdb[CollectionName.JGI_SEQUENCING_PROJECTS.value].find_one(
+            {"sequencing_project_name": sequencing_project_name}
+        )
+    )

nmdc_runtime/api/endpoints/workflows.py ADDED Viewed

@@ -0,0 +1,353 @@
+import logging
+import os
+from typing import Any, List, Set, Annotated
+import pymongo
+from bson import ObjectId
+from fastapi import APIRouter, Depends, HTTPException, Path
+from pymongo.database import Database as MongoDatabase
+from pymongo.errors import BulkWriteError
+from starlette import status
+from nmdc_runtime.api.core.util import raise404_if_none
+from nmdc_runtime.api.endpoints.queries import (
+    _run_mdb_cmd,
+    check_can_update_and_delete,
+    _run_delete_nonschema,
+)
+from nmdc_runtime.api.db.mongo import get_mongo_db, validate_json
+from nmdc_runtime.api.models.capability import Capability
+from nmdc_runtime.api.models.object_type import ObjectType
+from nmdc_runtime.api.models.query import DeleteCommand, DeleteStatement
+from nmdc_runtime.api.models.site import Site, get_current_client_site
+from nmdc_runtime.api.models.user import User, get_current_active_user
+from nmdc_runtime.api.models.util import DeleteResponse
+from nmdc_runtime.api.models.workflow import Workflow
+from nmdc_runtime.site.resources import MongoDB
+from nmdc_schema.nmdc import (
+    MetagenomeAnnotation,
+    MetaproteomicsAnalysis,
+    MetatranscriptomeAnnotation,
+)
+router = APIRouter()
+@router.get("/workflows", response_model=List[Workflow])
+def list_workflows(
+    mdb: pymongo.database.Database = Depends(get_mongo_db),
+):
+    return list(mdb.workflows.find())
+@router.get("/workflows/{workflow_id}", response_model=Workflow)
+def get_workflow(
+    workflow_id: str,
+    mdb: pymongo.database.Database = Depends(get_mongo_db),
+):
+    return raise404_if_none(mdb.workflows.find_one({"id": workflow_id}))
+@router.get("/workflows/{workflow_id}/object_types", response_model=List[ObjectType])
+def list_workflow_object_types(
+    workflow_id: str, mdb: pymongo.database.Database = Depends(get_mongo_db)
+):
+    object_type_ids = [
+        doc["object_type_id"] for doc in mdb.triggers.find({"workflow_id": workflow_id})
+    ]
+    return list(mdb.object_types.find({"id": {"$in": object_type_ids}}))
+@router.get("/workflows/{workflow_id}/capabilities", response_model=List[Capability])
+def list_workflow_capabilities(
+    workflow_id: str, mdb: pymongo.database.Database = Depends(get_mongo_db)
+):
+    doc = raise404_if_none(mdb.workflows.find_one({"id": workflow_id}))
+    return list(mdb.capabilities.find({"id": {"$in": doc.get("capability_ids", [])}}))
+@router.post("/workflows/activities", status_code=status.HTTP_410_GONE, deprecated=True)
+async def post_activity(
+    activity_set: dict[str, Any],
+    site: Site = Depends(get_current_client_site),
+    mdb: MongoDatabase = Depends(get_mongo_db),
+):
+    """
+    DEPRECATED: migrate all workflows from this endpoint to `/workflows/workflow_executions`.
+    """
+    return f"DEPRECATED: POST your request to `/workflows/workflow_executions` instead."
+@router.post("/workflows/workflow_executions")
+async def post_workflow_execution(
+    workflow_execution_set: dict[str, Any],
+    site: Site = Depends(get_current_client_site),
+    mdb: MongoDatabase = Depends(get_mongo_db),
+):
+    """
+    Post workflow execution set to database and claim job.
+    Parameters
+    -------
+    workflow_execution_set: dict[str,Any]
+             Set of workflow executions for specific workflows, in the form of a nmdc:Database.
+             Other collections (such as data_object_set) are allowed, as they may be associated
+             with the workflow executions submitted.
+    site: Site
+    mdb: MongoDatabase
+    Returns
+    -------
+    dict[str,str]
+    """
+    _ = site  # must be authenticated
+    try:
+        # validate request JSON
+        rv = validate_json(
+            workflow_execution_set, mdb, check_inter_document_references=True
+        )
+        if rv["result"] == "errors":
+            raise HTTPException(
+                status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
+                detail=str(rv),
+            )
+        # create mongodb instance for dagster
+        mongo_resource = MongoDB(
+            host=os.getenv("MONGO_HOST"),
+            dbname=os.getenv("MONGO_DBNAME"),
+            username=os.getenv("MONGO_USERNAME"),
+            password=os.getenv("MONGO_PASSWORD"),
+        )
+        mongo_resource.add_docs(workflow_execution_set, validate=False, replace=True)
+        return {"message": "jobs accepted"}
+    except BulkWriteError as e:
+        raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail=str(e))
+    except ValueError as e:
+        raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail=str(e))
+@router.delete(
+    "/workflows/workflow_executions/{workflow_execution_id}",
+    response_model=DeleteResponse,
+    description="Delete a workflow execution and its downstream workflow executions, data objects, "
+    "functional annotation aggregation members, and related job records.\n\n"
+    "This endpoint performs recursive deletion of the specified workflow execution, "
+    "all downstream workflow executions that depend on this workflow execution's outputs, "
+    "all functional annotation aggregation members generated by deleted workflow executions, "
+    "all data objects that are outputs of deleted workflow executions, "
+    "and all job records that have the workflow execution ID as their config.activity_id.",
+)
+async def delete_workflow_execution(
+    workflow_execution_id: Annotated[
+        str,
+        Path(
+            title="Workflow Execution ID",
+            description="The `id` of the `WorkflowExecution` you want to delete.\n\n_Example_: `nmdc:wfmgan-11-abc123.1`",
+            examples=["nmdc:wfmgan-11-abc123.1"],
+        ),
+    ],
+    user: User = Depends(get_current_active_user),
+    mdb: MongoDatabase = Depends(get_mongo_db),
+):
+    """
+    Delete a given workflow execution and its downstream workflow executions, data objects,
+    functional annotation aggregation members, and related job records.
+    This endpoint performs recursive deletion of:
+    1. The specified workflow execution
+    2. All downstream workflow executions that depend on this execution's outputs
+    3. All functional annotation aggregation members generated by deleted workflow executions
+    4. All data objects that are outputs of deleted workflow executions
+    5. All job records that have the workflow execution ID as their config.activity_id
+    Input data objects (has_input) are preserved as they may be used by other workflow executions.
+    TODO: Consider deleting input data objects that are _not_ used by other workflow executions
+          (otherwise, they may accumulate in the database as so-called "orphaned documents").
+    Parameters
+    ----------
+    workflow_execution_id : str
+        ID of the workflow execution to delete
+    user : User
+        Authenticated user (required)
+    mdb : MongoDatabase
+        MongoDB database connection
+    Returns
+    -------
+    dict
+        Catalog of deleted workflow executions, data objects, functional annotation aggregation members, and job records
+    """
+    # Check user permissions for delete operations
+    # TODO: Decouple this endpoint's authorization criteria from that of the `/queries:run` endpoint.
+    #       Currently, both endpoints rely on the "/queries:run(query_cmd:DeleteCommand)" allowance.
+    check_can_update_and_delete(user)
+    try:
+        # Check if workflow execution exists
+        workflow_execution = mdb.workflow_execution_set.find_one(
+            {"id": workflow_execution_id}
+        )
+        if not workflow_execution:
+            raise HTTPException(
+                status_code=status.HTTP_404_NOT_FOUND,
+                detail=f"Workflow execution {workflow_execution_id} not found",
+            )
+        # Track what we've deleted to avoid cycles and provide summary
+        deleted_workflow_execution_ids: Set[str] = set()
+        deleted_data_object_ids: Set[str] = set()
+        deleted_functional_annotation_agg_oids: Set[str] = set()
+        deleted_job_ids: Set[str] = set()
+        def find_linked_workflow_executions(
+            data_object_ids: List[str],
+        ) -> List[str]:
+            """Find workflow executions that use any of the given data objects as inputs."""
+            if not data_object_ids:
+                return []
+            linked_wfes = list(
+                mdb.workflow_execution_set.find(
+                    {"has_input": {"$in": data_object_ids}}, {"id": 1}
+                )
+            )
+            return [wfe["id"] for wfe in linked_wfes]
+        def recursive_delete_workflow_execution(wfe_id: str) -> None:
+            """Recursively delete a workflow execution and all its downstream dependencies."""
+            if wfe_id in deleted_workflow_execution_ids:
+                return  # Already deleted or in progress
+            # Get the workflow execution
+            wfe = mdb.workflow_execution_set.find_one({"id": wfe_id})
+            if not wfe:
+                return  # Already deleted or doesn't exist
+            # Mark as being processed to prevent cycles
+            deleted_workflow_execution_ids.add(wfe_id)
+            # Get output data objects from this workflow execution
+            output_data_object_ids = wfe.get("has_output", [])
+            # Check if this is an AnnotatingWorkflow (e.g., metagenome annotation)
+            # If so, we need to also delete functional_annotation_agg records
+            wfe_type = wfe.get("type", "")
+            is_annotating_workflow = wfe_type in [
+                MetagenomeAnnotation.class_class_curie,
+                MetatranscriptomeAnnotation.class_class_curie,
+                MetaproteomicsAnalysis.class_class_curie,
+            ]
+            # Find linked workflow executions that use these data objects as inputs
+            linked_wfe_ids = find_linked_workflow_executions(output_data_object_ids)
+            # Recursively delete linked workflow executions first
+            for linked_wfe_id in linked_wfe_ids:
+                if linked_wfe_id not in deleted_workflow_execution_ids:
+                    recursive_delete_workflow_execution(linked_wfe_id)
+            # Add data objects to deletion set
+            deleted_data_object_ids.update(output_data_object_ids)
+            # If this is an AnnotatingWorkflow, mark functional annotation records for deletion
+            if is_annotating_workflow:
+                func_annotation_records = list(
+                    mdb.functional_annotation_agg.find(
+                        {"was_generated_by": wfe_id}, {"_id": 1}
+                    )
+                )
+                if func_annotation_records:
+                    # Store the ObjectIds for deletion from functional_annotation_agg
+                    deleted_functional_annotation_agg_oids.update(
+                        [str(record["_id"]) for record in func_annotation_records]
+                    )
+            # Find and mark job records for deletion that have this workflow execution as activity_id
+            job_records = list(mdb.jobs.find({"config.activity_id": wfe_id}, {"id": 1}))
+            if job_records:
+                deleted_job_ids.update([job["id"] for job in job_records])
+        # Start recursive deletion from the target workflow execution
+        recursive_delete_workflow_execution(workflow_execution_id)
+        # Prepare deletion payload
+        docs_to_delete = {}
+        if deleted_workflow_execution_ids:
+            docs_to_delete["workflow_execution_set"] = list(
+                deleted_workflow_execution_ids
+            )
+        if deleted_data_object_ids:
+            docs_to_delete["data_object_set"] = list(deleted_data_object_ids)
+        if deleted_functional_annotation_agg_oids:
+            docs_to_delete["functional_annotation_agg"] = list(
+                deleted_functional_annotation_agg_oids
+            )
+        if deleted_job_ids:
+            docs_to_delete["jobs"] = list(deleted_job_ids)
+        # Perform the actual deletion using `_run_mdb_cmd`, so the operations
+        # undergo schema, validation and referential integrity checking, and
+        # deleted documents are backed up to the `nmdc_deleted` database.
+        deletion_results = {}
+        for collection_name, doc_ids in docs_to_delete.items():
+            if not doc_ids:
+                continue
+            # Handle special case for functional_annotation_agg which uses _id instead of id
+            if collection_name == "functional_annotation_agg":
+                # Convert string ObjectIds back to ObjectId instances for the filter
+                object_ids = [ObjectId(doc_id) for doc_id in doc_ids]
+                filter_dict = {"_id": {"$in": object_ids}}
+            else:
+                # Standard case - use id field
+                filter_dict = {"id": {"$in": doc_ids}}
+            # Create delete command
+            delete_cmd = DeleteCommand(
+                delete=collection_name,
+                deletes=[
+                    DeleteStatement(q=filter_dict, limit=0)
+                ],  # limit=0 means delete all matching
+            )
+            logging.warning(
+                f"Executing cascading delete command for {collection_name} - you may temporarily encounter broken references."
+            )
+            # Execute the delete command
+            if collection_name == "jobs":
+                response = _run_delete_nonschema(delete_cmd, mdb)
+            else:
+                response = _run_mdb_cmd(delete_cmd, mdb, allow_broken_refs=True)
+            # Store the result
+            deletion_results[collection_name] = {
+                "deleted_count": response.n,
+                "doc_ids": doc_ids,
+            }
+        return {
+            "message": "Workflow execution and dependencies deleted successfully",
+            "deleted_workflow_execution_ids": list(deleted_workflow_execution_ids),
+            "deleted_data_object_ids": list(deleted_data_object_ids),
+            "deleted_functional_annotation_agg_oids": [
+                str(oid) for oid in deleted_functional_annotation_agg_oids
+            ],
+            "deleted_job_ids": list(deleted_job_ids),
+        }
+    except HTTPException:
+        raise
+    except Exception as e:
+        logging.error(
+            f"Error during workflow execution deletion: {str(e)}", exc_info=True
+        )
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Error during deletion: {str(e)}",
+        )

nmdc_runtime/api/entrypoint.sh ADDED Viewed

@@ -0,0 +1,7 @@
+#!/bin/bash
+set -euo pipefail
+exec gunicorn --worker-tmp-dir /dev/shm --workers=2 \
+              --threads=4 --worker-class gthread \
+              --log-file=- --bind 0.0.0.0:8000 nmdc_runtime.api.main:app

nmdc-runtime 1.3.1__py3-none-any.whl → 2.12.0__py3-none-any.whl

nmdc-runtime 1.3.1py3-none-any.whl → 2.12.0py3-none-any.whl