PyPI - nmdc-runtime - Versions diffs - 1.3.1__py3-none-any.whl → 2.12.0__py3-none-any.whl - Mend

nmdc-runtime 1.3.1py3-none-any.whl → 2.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (143) hide show

nmdc_runtime/Dockerfile +177 -0
nmdc_runtime/api/analytics.py +90 -0
nmdc_runtime/api/boot/capabilities.py +9 -0
nmdc_runtime/api/boot/object_types.py +126 -0
nmdc_runtime/api/boot/triggers.py +84 -0
nmdc_runtime/api/boot/workflows.py +116 -0
nmdc_runtime/api/core/auth.py +212 -0
nmdc_runtime/api/core/idgen.py +200 -0
nmdc_runtime/api/core/metadata.py +777 -0
nmdc_runtime/api/core/util.py +114 -0
nmdc_runtime/api/db/mongo.py +436 -0
nmdc_runtime/api/db/s3.py +37 -0
nmdc_runtime/api/endpoints/capabilities.py +25 -0
nmdc_runtime/api/endpoints/find.py +634 -0
nmdc_runtime/api/endpoints/jobs.py +206 -0
nmdc_runtime/api/endpoints/lib/helpers.py +274 -0
nmdc_runtime/api/endpoints/lib/linked_instances.py +193 -0
nmdc_runtime/api/endpoints/lib/path_segments.py +165 -0
nmdc_runtime/api/endpoints/metadata.py +260 -0
nmdc_runtime/api/endpoints/nmdcschema.py +515 -0
nmdc_runtime/api/endpoints/object_types.py +38 -0
nmdc_runtime/api/endpoints/objects.py +277 -0
nmdc_runtime/api/endpoints/operations.py +78 -0
nmdc_runtime/api/endpoints/queries.py +701 -0
nmdc_runtime/api/endpoints/runs.py +98 -0
nmdc_runtime/api/endpoints/search.py +38 -0
nmdc_runtime/api/endpoints/sites.py +205 -0
nmdc_runtime/api/endpoints/triggers.py +25 -0
nmdc_runtime/api/endpoints/users.py +214 -0
nmdc_runtime/api/endpoints/util.py +817 -0
nmdc_runtime/api/endpoints/wf_file_staging.py +307 -0
nmdc_runtime/api/endpoints/workflows.py +353 -0
nmdc_runtime/api/entrypoint.sh +7 -0
nmdc_runtime/api/main.py +495 -0
nmdc_runtime/api/middleware.py +43 -0
nmdc_runtime/api/models/capability.py +14 -0
nmdc_runtime/api/models/id.py +92 -0
nmdc_runtime/api/models/job.py +57 -0
nmdc_runtime/api/models/lib/helpers.py +78 -0
nmdc_runtime/api/models/metadata.py +11 -0
nmdc_runtime/api/models/nmdc_schema.py +146 -0
nmdc_runtime/api/models/object.py +180 -0
nmdc_runtime/api/models/object_type.py +20 -0
nmdc_runtime/api/models/operation.py +66 -0
nmdc_runtime/api/models/query.py +246 -0
nmdc_runtime/api/models/query_continuation.py +111 -0
nmdc_runtime/api/models/run.py +161 -0
nmdc_runtime/api/models/site.py +87 -0
nmdc_runtime/api/models/trigger.py +13 -0
nmdc_runtime/api/models/user.py +207 -0
nmdc_runtime/api/models/util.py +260 -0
nmdc_runtime/api/models/wfe_file_stages.py +122 -0
nmdc_runtime/api/models/workflow.py +15 -0
nmdc_runtime/api/openapi.py +178 -0
nmdc_runtime/api/swagger_ui/assets/EllipsesButton.js +146 -0
nmdc_runtime/api/swagger_ui/assets/EndpointSearchWidget.js +369 -0
nmdc_runtime/api/swagger_ui/assets/script.js +252 -0
nmdc_runtime/api/swagger_ui/assets/style.css +155 -0
nmdc_runtime/api/swagger_ui/swagger_ui.py +34 -0
nmdc_runtime/config.py +56 -0
nmdc_runtime/minter/adapters/repository.py +22 -2
nmdc_runtime/minter/config.py +30 -4
nmdc_runtime/minter/domain/model.py +55 -1
nmdc_runtime/minter/entrypoints/fastapi_app.py +1 -1
nmdc_runtime/mongo_util.py +89 -0
nmdc_runtime/site/backup/nmdcdb_mongodump.py +1 -1
nmdc_runtime/site/backup/nmdcdb_mongoexport.py +1 -3
nmdc_runtime/site/changesheets/data/OmicsProcessing-to-catted-Biosamples.tsv +1561 -0
nmdc_runtime/site/changesheets/scripts/missing_neon_soils_ecosystem_data.py +311 -0
nmdc_runtime/site/changesheets/scripts/neon_soils_add_ncbi_ids.py +210 -0
nmdc_runtime/site/dagster.yaml +53 -0
nmdc_runtime/site/entrypoint-daemon.sh +29 -0
nmdc_runtime/site/entrypoint-dagit-readonly.sh +26 -0
nmdc_runtime/site/entrypoint-dagit.sh +29 -0
nmdc_runtime/site/export/ncbi_xml.py +1331 -0
nmdc_runtime/site/export/ncbi_xml_utils.py +405 -0
nmdc_runtime/site/export/study_metadata.py +27 -4
nmdc_runtime/site/graphs.py +294 -45
nmdc_runtime/site/ops.py +1008 -230
nmdc_runtime/site/repair/database_updater.py +451 -0
nmdc_runtime/site/repository.py +368 -133
nmdc_runtime/site/resources.py +154 -80
nmdc_runtime/site/translation/gold_translator.py +235 -83
nmdc_runtime/site/translation/neon_benthic_translator.py +212 -188
nmdc_runtime/site/translation/neon_soil_translator.py +82 -58
nmdc_runtime/site/translation/neon_surface_water_translator.py +698 -0
nmdc_runtime/site/translation/neon_utils.py +24 -7
nmdc_runtime/site/translation/submission_portal_translator.py +616 -162
nmdc_runtime/site/translation/translator.py +73 -3
nmdc_runtime/site/util.py +26 -7
nmdc_runtime/site/validation/emsl.py +1 -0
nmdc_runtime/site/validation/gold.py +1 -0
nmdc_runtime/site/validation/util.py +16 -12
nmdc_runtime/site/workspace.yaml +13 -0
nmdc_runtime/static/NMDC_logo.svg +1073 -0
nmdc_runtime/static/ORCID-iD_icon_vector.svg +4 -0
nmdc_runtime/static/README.md +5 -0
nmdc_runtime/static/favicon.ico +0 -0
nmdc_runtime/util.py +236 -192
nmdc_runtime-2.12.0.dist-info/METADATA +45 -0
nmdc_runtime-2.12.0.dist-info/RECORD +131 -0
{nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info}/WHEEL +1 -2
{nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info}/entry_points.txt +0 -1
nmdc_runtime/containers.py +0 -14
nmdc_runtime/core/db/Database.py +0 -15
nmdc_runtime/core/exceptions/__init__.py +0 -23
nmdc_runtime/core/exceptions/base.py +0 -47
nmdc_runtime/core/exceptions/token.py +0 -13
nmdc_runtime/domain/users/queriesInterface.py +0 -18
nmdc_runtime/domain/users/userSchema.py +0 -37
nmdc_runtime/domain/users/userService.py +0 -14
nmdc_runtime/infrastructure/database/db.py +0 -3
nmdc_runtime/infrastructure/database/models/user.py +0 -10
nmdc_runtime/lib/__init__.py +0 -1
nmdc_runtime/lib/extract_nmdc_data.py +0 -41
nmdc_runtime/lib/load_nmdc_data.py +0 -121
nmdc_runtime/lib/nmdc_dataframes.py +0 -829
nmdc_runtime/lib/nmdc_etl_class.py +0 -402
nmdc_runtime/lib/transform_nmdc_data.py +0 -1117
nmdc_runtime/site/drsobjects/ingest.py +0 -93
nmdc_runtime/site/drsobjects/registration.py +0 -131
nmdc_runtime/site/terminusdb/generate.py +0 -198
nmdc_runtime/site/terminusdb/ingest.py +0 -44
nmdc_runtime/site/terminusdb/schema.py +0 -1671
nmdc_runtime/site/translation/emsl.py +0 -42
nmdc_runtime/site/translation/gold.py +0 -53
nmdc_runtime/site/translation/jgi.py +0 -31
nmdc_runtime/site/translation/util.py +0 -132
nmdc_runtime/site/validation/jgi.py +0 -42
nmdc_runtime-1.3.1.dist-info/METADATA +0 -181
nmdc_runtime-1.3.1.dist-info/RECORD +0 -81
nmdc_runtime-1.3.1.dist-info/top_level.txt +0 -1
/nmdc_runtime/{client → api}/__init__.py +0 -0
/nmdc_runtime/{core → api/boot}/__init__.py +0 -0
/nmdc_runtime/{core/db → api/core}/__init__.py +0 -0
/nmdc_runtime/{domain → api/db}/__init__.py +0 -0
/nmdc_runtime/{domain/users → api/endpoints}/__init__.py +0 -0
/nmdc_runtime/{infrastructure → api/endpoints/lib}/__init__.py +0 -0
/nmdc_runtime/{infrastructure/database → api/models}/__init__.py +0 -0
/nmdc_runtime/{infrastructure/database/models → api/models/lib}/__init__.py +0 -0
/nmdc_runtime/{site/drsobjects/__init__.py → api/models/minter.py} +0 -0
/nmdc_runtime/site/{terminusdb → repair}/__init__.py +0 -0
{nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info/licenses}/LICENSE +0 -0

nmdc_runtime/api/models/user.py ADDED Viewed

@@ -0,0 +1,207 @@
+import logging
+from typing import List, Optional, Union
+import pymongo.database
+from fastapi import Depends, HTTPException, status
+from jose import jwt
+from pydantic import BaseModel
+from jose.exceptions import ExpiredSignatureError, JWTClaimsError, JWTError
+from nmdc_runtime.api.core.auth import (
+    verify_password,
+    SECRET_KEY,
+    ALGORITHM,
+    oauth2_scheme,
+    credentials_exception,
+    TokenData,
+    bearer_scheme,
+)
+from nmdc_runtime.api.models.site import get_site
+from nmdc_runtime.api.db.mongo import get_mongo_db
+class User(BaseModel):
+    username: str
+    email: Optional[str] = None
+    full_name: Optional[str] = None
+    site_admin: Optional[List[str]] = []
+    disabled: Optional[bool] = False
+class UserIn(User):
+    password: str
+class UserInDB(User):
+    hashed_password: str
+def get_user(mdb, username: str) -> Optional[UserInDB]:
+    r"""
+    Returns the user having the specified username.
+    """
+    user = mdb.users.find_one({"username": username})
+    if user is not None:
+        return UserInDB(**user)
+def authenticate_user(mdb, username: str, password: str) -> Union[UserInDB, bool]:
+    r"""
+    Returns the user, if any, having the specified username/password combination.
+    """
+    user = get_user(mdb, username)
+    if not user:
+        return False
+    if not verify_password(password, user.hashed_password):
+        return False
+    return user
+async def get_current_user(
+    token: str = Depends(oauth2_scheme),
+    bearer_credentials: str = Depends(bearer_scheme),
+    mdb: pymongo.database.Database = Depends(get_mongo_db),
+) -> UserInDB:
+    r"""
+    Returns a user based upon the provided token.
+    If the token belongs to a site client, the returned user is an ephemeral "user"
+    whose username is the site client's `client_id`.
+    Raises an exception if the token is invalid.
+    Reference: The following web page contains information about JWT claims:
+               https://auth0.com/docs/secure/tokens/json-web-tokens/json-web-token-claims
+    """
+    # Define some exceptions, which contain actionable—but not sensitive—information.
+    invalid_subject_exception = HTTPException(
+        status_code=status.HTTP_401_UNAUTHORIZED,
+        detail="Access token is invalid. Please log in again.",
+        headers={"WWW-Authenticate": "Bearer"},
+    )
+    invalid_claims_exception = HTTPException(
+        status_code=status.HTTP_401_UNAUTHORIZED,
+        detail="Access token is invalid. Please log in again.",
+        headers={"WWW-Authenticate": "Bearer"},
+    )
+    invalid_token_exception = HTTPException(
+        status_code=status.HTTP_401_UNAUTHORIZED,
+        detail="Access token is invalid. Please log in again.",
+        headers={"WWW-Authenticate": "Bearer"},
+    )
+    invalidated_token_exception = HTTPException(
+        status_code=status.HTTP_401_UNAUTHORIZED,
+        detail="Access token has been invalidated. Please log in again.",
+        headers={"WWW-Authenticate": "Bearer"},
+    )
+    expired_token_exception = HTTPException(
+        status_code=status.HTTP_401_UNAUTHORIZED,
+        detail="Access token has expired. Please log in again.",
+        headers={"WWW-Authenticate": "Bearer"},
+    )
+    invalid_or_missing_token_exception = HTTPException(
+        status_code=status.HTTP_401_UNAUTHORIZED,
+        detail="Access token is invalid or missing. Please log in again.",
+        headers={"WWW-Authenticate": "Bearer"},
+    )
+    # Check whether there is a token, and whether it has been invalidated.
+    if token is None:
+        raise invalid_or_missing_token_exception
+    elif mdb.invalidated_tokens.find_one({"_id": token}):
+        raise invalidated_token_exception
+    # Validate the signature of the JWT and extract its payload.
+    try:
+        payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
+    except ExpiredSignatureError as e:
+        logging.exception(e)
+        raise expired_token_exception
+    except JWTClaimsError as e:
+        logging.exception(e)
+        raise invalid_claims_exception
+    except (JWTError, AttributeError) as e:
+        logging.exception(e)
+        raise invalid_token_exception
+    # Extract the prefix and the username from the subject.
+    subject: Optional[str] = payload.get("sub", None)
+    if isinstance(subject, str):
+        if subject.startswith("user:"):
+            subject_prefix = "user:"
+        elif subject.startswith("client:"):
+            subject_prefix = "client:"
+        else:
+            logging.warning("The subject contains an invalid prefix.")
+            raise invalid_subject_exception
+        username = subject.removeprefix(subject_prefix)
+        if username == "":
+            logging.warning("The subject contains nothing after the prefix.")
+            raise invalid_subject_exception
+    else:
+        logging.warning("The subject is not a string.")
+        raise invalid_subject_exception
+    token_data = TokenData(subject=username)
+    # Coerce a "client" into a "user"
+    # TODO: consolidate the client/user distinction.
+    if not isinstance(token_data.subject, str):
+        logging.warning("The subject is not a string.")
+        raise invalid_subject_exception
+    elif subject_prefix == "user:":
+        user = get_user(mdb, username=token_data.subject)
+    elif subject_prefix == "client:":
+        # construct a user from the client_id
+        user = get_client_user(mdb, client_id=token_data.subject)
+    else:
+        # Note: We already validate the subject's prefix above, so we expect this case to never occur.
+        logging.warning("The subject prefix is not something we recognize.")
+        user = None
+    if user is None:
+        logging.warning(
+            f"Failed to resolve token subject '{token_data.subject}' to a user."
+        )
+        raise invalid_subject_exception
+    return user
+def get_client_user(mdb, client_id: str) -> UserInDB:
+    r"""
+    Returns an ephemeral "user" whose username is the specified `client_id`
+    and whose password is the hashed secret of the client; provided that the
+    specified `client_id` is associated with a site in the database.
+    TODO: Clarify the above summary of the function.
+    """
+    # Get the site associated with the identified client.
+    site = get_site(mdb, client_id)
+    if site is None:
+        raise credentials_exception
+    # Get the client, itself, via the site.
+    client = next(client for client in site.clients if client.id == client_id)
+    if client is None:
+        raise credentials_exception
+    # Make an ephemeral "user" whose username matches the client's `id`.
+    user = UserInDB(username=client.id, hashed_password=client.hashed_secret)
+    return user
+async def get_current_active_user(
+    current_user: UserInDB = Depends(get_current_user),
+) -> UserInDB:
+    r"""
+    Returns the current user, provided their user account is not disabled.
+    """
+    if current_user.disabled:
+        raise HTTPException(status_code=400, detail="Inactive user")
+    return current_user

nmdc_runtime/api/models/util.py ADDED Viewed

@@ -0,0 +1,260 @@
+from typing import TypeVar, List, Optional, Generic, Annotated
+from pydantic import model_validator, Field, BaseModel
+ResultT = TypeVar("ResultT")
+class ListResponse(BaseModel, Generic[ResultT]):
+    resources: List[ResultT]
+    next_page_token: Optional[str] = None
+class ListRequest(BaseModel):
+    r"""
+    An encapsulation of a set of parameters accepted by API endpoints related to listing things.
+    Note: This class was documented after the `FindRequest` class was documented. You can refer to the documentation of
+          the latter class for additional context about the usage of Pydantic's `Field` constructor in this class.
+    """
+    filter: Optional[str] = Field(
+        default=None,
+        title="Filter",
+        description="""The criteria by which you want to filter the resources, in the same format as the [`query`
+                    parameter](https://www.mongodb.com/docs/manual/reference/method/db.collection.find/#std-label-method-find-query)
+                    of MongoDB's `db.collection.find()` method.\n\n_Example:_
+                    `{"lat_lon.latitude": {"$gt": 45.0}, "ecosystem_category": "Plants"}`""",
+        examples=[
+            r'{"ecosystem_type": "Freshwater"}',
+            r'{"lat_lon.latitude": {"$gt": 45.0}, "ecosystem_category": "Plants"}',
+        ],
+    )
+    # TODO: Document the following things about this type hint and `Field` definition:
+    #       (a) why the type here is `int` as opposed to `PerPageRange` (`FindRequest` uses the latter),
+    #       (b) why the default value here is 20 as opposed to 25 (the default value in `FindRequest`), and
+    #       (c) why there is no upper limit on the value (the `PerPageRange` type has an upper limit of 2000).
+    #
+    # Note: If the HTTP request lacks a value for this parameter, Pydantic will fall back to the default value specified here.
+    max_page_size: int = Field(
+        default=20,
+        title="Resources per page",
+        description="How many resources you want _each page_ to contain, formatted as a positive integer.",
+        examples=[20],
+    )
+    page_token: Optional[str] = Field(
+        default=None,
+        title="Next page token",
+        description="""A bookmark you can use to fetch the _next_ page of resources. You can get this from the
+                    `next_page_token` field in a previous response from this endpoint.\n\n_Example_:
+                    `nmdc:sys0zr0fbt71`""",
+        examples=[
+            "nmdc:sys0zr0fbt71",
+        ],
+    )
+    # TODO: Document the endpoint's behavior when a projection includes a _nested_ field identifier (i.e. `foo.bar`),
+    #       and ensure the endpoint doesn't break when the projection includes field descriptors that contain commas.
+    projection: Optional[str] = Field(
+        default=None,
+        title="Projection",
+        description="""Comma-delimited list of the names of the fields you want the resources in the response to
+                    include. Note: In addition to those fields, the response will also include the `id`
+                    field.\n\n_Example_: `name, ecosystem_type`""",
+        examples=[
+            "name, ecosystem_type",
+        ],
+    )
+PerPageRange = Annotated[int, Field(gt=0, le=2_000)]
+class FindRequest(BaseModel):
+    r"""
+    An encapsulation of a set of parameters accepted by API endpoints related to finding things.
+    Notes:
+    - The "Query Parameter Models" section of the FastAPI docs says that this way of encapsulating
+      a set of query parameter definitions in a Pydantic model — so that Swagger UI displays a given
+      parameter's _description_ — was introduced in FastAPI 0.115.0.
+      Reference: https://fastapi.tiangolo.com/tutorial/query-param-models/
+    - While Swagger UI does show the parameter's _description_, specifically, it does not currently show the
+      parameter's _title_ or example value(s). The approach shown in the "Classes as Dependencies" section
+      of the FastAPI docs (i.e. https://fastapi.tiangolo.com/tutorial/dependencies/classes-as-dependencies/)
+      does result in Swagger UI showing those additional things, but the approach involves not inheriting
+      from Pydantic's `BaseModel` class and involves defining an `__init__` method for the class. That is
+      further than I want to take these classes from their existing selves at this point. To compensate
+      for that, I have included examples _within_ some of the descriptions.
+      Reference: https://github.com/fastapi/fastapi/issues/318#issuecomment-507043221
+    - The "Fields" section of the Pydantic docs says:
+      > "The `Field` function is used to customize and add metadata to fields of models."
+      References: https://docs.pydantic.dev/latest/concepts/fields/
+    """
+    filter: Optional[str] = Field(
+        default=None,
+        title="Filter",
+        description="""The criteria by which you want to filter the resources, formatted as a comma-separated list of
+                    `attribute:value` pairs. The `value` can include a comparison operator (e.g. `>=`). If the attribute
+                    is of type _string_ and you append `.search` to its name, the server will perform a full-text
+                    search.\n\n_Example:_ `ecosystem_category:Plants, lat_lon.latitude:>35.0`""",
+        examples=[
+            "ecosystem_category:Plants",
+            "ecosystem_category:Plants, lat_lon.latitude:>35.0",
+        ],
+    )
+    search: Optional[str] = Field(
+        default=None,
+        title="Search",
+        description="N/A _(not implemented yet)_",
+    )
+    sort: Optional[str] = Field(
+        default=None,
+        title="Sort",
+        description="""How you want the resources to be ordered in the response, formatted as a comma-separated list of
+                    `attribute:value` pairs. Each `attribute` is the name of a field you want the resources to be
+                    ordered by, and each `value` is the direction you want the values in that field to be ordered
+                    (i.e. `asc` or no value for _ascending_ order, and `desc` for _descending_ order).\n\n_Example:_
+                    `depth.has_numeric_value:desc, ecosystem_type`""",
+        examples=[
+            "depth.has_numeric_value:desc",
+            "depth.has_numeric_value:desc, ecosystem_type",
+        ],
+    )
+    page: Optional[int] = Field(
+        default=None,
+        title="Page number",
+        description="""_Which page_ of resources you want to retrieve, when using page number-based pagination.
+                    This is the page number formatted as an integer ≥ 1.
+                    **Limitation:** When using _page number_-based pagination, only the first 10,000 resources
+                    are accessible. You can access resources beyond that by using _cursor_-based pagination.""",
+        examples=[1],
+    )
+    per_page: PerPageRange = Field(
+        default=25,
+        title="Resources per page",
+        description="How many resources you want _each page_ to contain, formatted as a positive integer ≤ 2000.",
+        examples=[25],
+    )
+    cursor: Optional[str] = Field(
+        default=None,
+        title="Cursor",
+        description="""A bookmark you can use to fetch the _next_ page of resources, when using cursor-based pagination.
+                    To begin using cursor-based pagination, set the `cursor` parameter to `*`. The response's `meta` object will
+                    include a `next_cursor` field, whose value can be used as the `cursor` parameter in a subsequent
+                    request.\n\n_Example_: `nmdc:sys0zr0fbt71`""",
+        examples=[
+            "*",
+            "nmdc:sys0zr0fbt71",
+        ],
+    )
+    group_by: Optional[str] = Field(
+        default=None,
+        title="Group by",
+        description="N/A _(not implemented yet)_",
+    )
+    fields: Optional[str] = Field(
+        default=None,
+        title="Fields",
+        description="""The fields you want the resources to include in the response, formatted as a comma-separated list
+                    of field names. This can be used to reduce the size and complexity of the response.\n\n_Example:_
+                    `name, ess_dive_datasets`""",
+        examples=[
+            "name",
+            "name, ess_dive_datasets",
+        ],
+    )
+    # Reference: https://docs.pydantic.dev/latest/concepts/validators/#model-validators
+    @model_validator(mode="before")
+    def set_page_if_cursor_unset(cls, values):
+        page, cursor = values.get("page"), values.get("cursor")
+        if page is not None and cursor is not None:
+            raise ValueError("cannot use cursor- and page-based pagination together")
+        if page is None and cursor is None:
+            values["page"] = 1
+        return values
+class FindResponse(BaseModel):
+    meta: dict
+    results: List[dict]
+    group_by: List[dict]
+class DeleteResponse(BaseModel):
+    r"""
+    Response model for "delete" operations. It summarizes the result of the
+    operation and it lists identifiers of the documents that were deleted.
+    """
+    message: str = Field(
+        description="Success message describing the deletion operation"
+    )
+    deleted_workflow_execution_ids: List[str] = Field(
+        # Note: `default_factory=list` sets this to an empty list by default.
+        default_factory=list,
+        description="The `id`s of the `WorkflowExecution`s that were deleted",
+    )
+    deleted_data_object_ids: List[str] = Field(
+        default_factory=list,
+        description="The `id`s of the `DataObject`s that were deleted",
+    )
+    deleted_functional_annotation_agg_oids: List[str] = Field(
+        default_factory=list,
+        description="The internal MongoDB `ObjectId`s of the `FunctionalAnnotationAggMember`s that were deleted",
+    )
+    deleted_job_ids: List[str] = Field(
+        default_factory=list,
+        description="The `id`s of the `jobs` documents that were deleted",
+    )
+# Note: For MongoDB, a single collection can have no more than 64 indexes
+# Note: Each collection has a unique index set on "id" elsewhere.
+entity_attributes_to_index = {
+    "biosample_set": {
+        "alternative_identifiers",
+        "env_broad_scale.has_raw_value",
+        "env_local_scale.has_raw_value",
+        "env_medium.has_raw_value",
+        "collection_date.has_raw_value",
+        "ecosystem",
+        "ecosystem_category",
+        "ecosystem_type",
+        "ecosystem_subtype",
+        "specific_ecosystem",
+        # Note: if `lat_lon` was GeoJSON, i.e. {type,coordinates}, MongoDB has a "2dsphere" index
+        "lat_lon.latitude",
+        "lat_lon.longitude",
+    },
+    "study_set": {
+        "has_credit_associations.applied_roles",
+        "has_credit_associations.applies_to_person.name",
+        "has_credit_associations.applies_to_person.orcid",
+    },
+    "data_object_set": {
+        "data_object_type",
+        "file_size_bytes",
+        "md5_checksum",
+        "url",
+    },
+    # TODO: Refrain from ensuring indexes exist in the `omics_processing_set` collection,
+    #       since that collection was deleted as part of the "Berkeley schema" refactor.
+    #       Reference: https://microbiomedata.github.io/nmdc-schema/v10-vs-v11-retrospective/#slots-removed-from-database
+    "omics_processing_set": {
+        "has_input",
+        "has_output",
+        "instrument_name",
+        "alternative_identifiers",
+    },
+    "functional_annotation_agg": {"was_generated_by"},
+    "workflow_execution_set": {
+        "has_input",
+        "has_output",
+    },
+    # Note: The `jobs` collection is not described by the NMDC schema.
+    "jobs": {
+        "config.activity_id",
+    },
+}

nmdc_runtime/api/models/wfe_file_stages.py ADDED Viewed

@@ -0,0 +1,122 @@
+from pydantic import BaseModel, Field
+from typing import Optional
+from enum import Enum
+import datetime
+class WorkflowFileStagingCollectionName(str, Enum):
+    """The name of a MongoDB collection related to workflow file staging."""
+    JGI_SEQUENCING_PROJECTS = "wf_file_staging.jgi_sequencing_projects"
+class GlobusTaskStatus(str, Enum):
+    ACTIVE = "ACTIVE"
+    INACTIVE = "INACTIVE"
+    SUCCEEDED = "SUCCEEDED"
+    FAILED = "FAILED"
+    PENDING = "PENDING"
+    IN_PROGRESS = "IN_PROGRESS"
+    COMPLETED = "COMPLETED"
+class JDPFileStatus(str, Enum):
+    RESTORED = "RESTORED"
+    PURGED = "PURGED"
+    READY = "READY"
+    EXPIRED = "EXPIRED"
+class GlobusTask(BaseModel):
+    """
+    Represents a Globus file transfer configuration.
+    """
+    task_id: str = Field(
+        ..., description="ID from Globus of the task", examples=["Some task id"]
+    )
+    task_status: str = Field(
+        ..., description="Status of the Globus task.", examples=["Some status"]
+    )
+class JGISample(BaseModel):
+    """
+    Represents a JGI Sample for workflow file staging. Information from JDP, Gold, and Globus is gathered on these records.
+    """
+    jdp_file_id: str = Field(
+        ...,
+        description="JGI Data Portal File ID",
+        examples=["6011bc6e117e5d4b9d2b2073"],
+    )
+    ap_gold_id: str = Field(
+        ..., description="Gold Analysis Project ID", examples=["Ga0307276"]
+    )
+    gold_study_id: str = Field(..., description="Gold Study ID", examples=["Gs0135149"])
+    its_ap_id: str = Field(
+        ..., description="ITS Analysis Project ID from the JDP", examples=["1196479.0"]
+    )
+    sequencing_project_name: str = Field(
+        ...,
+        description="Sequencing project name. This relates to a record in the `/wf_staging_file/sequencing_project` endpoints.",
+        examples=["Some Project Name"],
+    )
+    gold_biosample_id: str = Field(
+        ..., description="Gold Biosample ID", examples=["Gb0191643"]
+    )
+    gold_seq_id: str = Field(..., description="Gold Sequence ID", examples=["1196479"])
+    file_name: str = Field(..., description="File Name", examples=["filename.tar.gz"])
+    jdp_file_status: str = Field(
+        ...,
+        description="File staging status. Grabbed from the JDP file restoration endpoint.",
+        examples=["RESTORED"],
+    )
+    globus_file_status: str = Field(
+        ...,
+        description="File staging status. Recieved from Globus when the file state is queried.",
+        examples=["ACTIVE"],
+    )
+    jdp_file_size: int = Field(
+        ..., description="File size in bytes from JDP.", examples=[123456]
+    )
+    md5sum: Optional[str] = Field(
+        None, description="MD5 Sum", examples=["D43F2404CA13E22594E5C8B04D3BBB81"]
+    )
+    jgi_ap_id: str = Field(
+        ..., description="JGI Analysis Project ID", examples=["1196479"]
+    )
+    create_date: datetime.datetime = Field(
+        ..., description="Creation Date", examples=["2023-01-01T00:00:00Z"]
+    )
+    update_date: Optional[datetime.datetime] = Field(
+        None, description="Update Date", examples=["2023-01-01T00:00:00Z"]
+    )
+    request_id: int = Field(
+        ...,
+        description="Request ID from the JGI data portal after a request to have the files restored from tape is submitted.",
+        examples=[1],
+    )
+class JGISequencingProject(BaseModel):
+    """
+    A representation of a JGI sequencing project and its associated metadata.
+    """
+    sequencing_project_name: str = Field(
+        ...,
+        description="Name of the sequencing project that we can refer to while staging files.",
+        examples=["Human Genome Project"],
+    )
+    sequencing_project_description: str = Field(
+        ...,
+        description="Detailed description of the sequencing project",
+        examples=["A project to sequence the human genome."],
+    )
+    jgi_proposal_id: str = Field(
+        ..., description="JGI proposal ID", examples=["503568"]
+    )
+    nmdc_study_id: str = Field(
+        ..., description="NMDC study ID", examples=["nmdc:sty-11-28tm5d36"]
+    )

nmdc_runtime/api/models/workflow.py ADDED Viewed

@@ -0,0 +1,15 @@
+import datetime
+from typing import Optional, List
+from pydantic import BaseModel
+class WorkflowBase(BaseModel):
+    name: Optional[str] = None
+    description: Optional[str] = None
+    capability_ids: Optional[List[str]] = None
+class Workflow(WorkflowBase):
+    id: str
+    created_at: Optional[datetime.datetime] = None

nmdc-runtime 1.3.1__py3-none-any.whl → 2.12.0__py3-none-any.whl

nmdc-runtime 1.3.1py3-none-any.whl → 2.12.0py3-none-any.whl