PyPI - nmdc-runtime - Versions diffs - 1.3.1__py3-none-any.whl → 2.12.0__py3-none-any.whl - Mend

nmdc-runtime 1.3.1py3-none-any.whl → 2.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (143) hide show

nmdc_runtime/Dockerfile +177 -0
nmdc_runtime/api/analytics.py +90 -0
nmdc_runtime/api/boot/capabilities.py +9 -0
nmdc_runtime/api/boot/object_types.py +126 -0
nmdc_runtime/api/boot/triggers.py +84 -0
nmdc_runtime/api/boot/workflows.py +116 -0
nmdc_runtime/api/core/auth.py +212 -0
nmdc_runtime/api/core/idgen.py +200 -0
nmdc_runtime/api/core/metadata.py +777 -0
nmdc_runtime/api/core/util.py +114 -0
nmdc_runtime/api/db/mongo.py +436 -0
nmdc_runtime/api/db/s3.py +37 -0
nmdc_runtime/api/endpoints/capabilities.py +25 -0
nmdc_runtime/api/endpoints/find.py +634 -0
nmdc_runtime/api/endpoints/jobs.py +206 -0
nmdc_runtime/api/endpoints/lib/helpers.py +274 -0
nmdc_runtime/api/endpoints/lib/linked_instances.py +193 -0
nmdc_runtime/api/endpoints/lib/path_segments.py +165 -0
nmdc_runtime/api/endpoints/metadata.py +260 -0
nmdc_runtime/api/endpoints/nmdcschema.py +515 -0
nmdc_runtime/api/endpoints/object_types.py +38 -0
nmdc_runtime/api/endpoints/objects.py +277 -0
nmdc_runtime/api/endpoints/operations.py +78 -0
nmdc_runtime/api/endpoints/queries.py +701 -0
nmdc_runtime/api/endpoints/runs.py +98 -0
nmdc_runtime/api/endpoints/search.py +38 -0
nmdc_runtime/api/endpoints/sites.py +205 -0
nmdc_runtime/api/endpoints/triggers.py +25 -0
nmdc_runtime/api/endpoints/users.py +214 -0
nmdc_runtime/api/endpoints/util.py +817 -0
nmdc_runtime/api/endpoints/wf_file_staging.py +307 -0
nmdc_runtime/api/endpoints/workflows.py +353 -0
nmdc_runtime/api/entrypoint.sh +7 -0
nmdc_runtime/api/main.py +495 -0
nmdc_runtime/api/middleware.py +43 -0
nmdc_runtime/api/models/capability.py +14 -0
nmdc_runtime/api/models/id.py +92 -0
nmdc_runtime/api/models/job.py +57 -0
nmdc_runtime/api/models/lib/helpers.py +78 -0
nmdc_runtime/api/models/metadata.py +11 -0
nmdc_runtime/api/models/nmdc_schema.py +146 -0
nmdc_runtime/api/models/object.py +180 -0
nmdc_runtime/api/models/object_type.py +20 -0
nmdc_runtime/api/models/operation.py +66 -0
nmdc_runtime/api/models/query.py +246 -0
nmdc_runtime/api/models/query_continuation.py +111 -0
nmdc_runtime/api/models/run.py +161 -0
nmdc_runtime/api/models/site.py +87 -0
nmdc_runtime/api/models/trigger.py +13 -0
nmdc_runtime/api/models/user.py +207 -0
nmdc_runtime/api/models/util.py +260 -0
nmdc_runtime/api/models/wfe_file_stages.py +122 -0
nmdc_runtime/api/models/workflow.py +15 -0
nmdc_runtime/api/openapi.py +178 -0
nmdc_runtime/api/swagger_ui/assets/EllipsesButton.js +146 -0
nmdc_runtime/api/swagger_ui/assets/EndpointSearchWidget.js +369 -0
nmdc_runtime/api/swagger_ui/assets/script.js +252 -0
nmdc_runtime/api/swagger_ui/assets/style.css +155 -0
nmdc_runtime/api/swagger_ui/swagger_ui.py +34 -0
nmdc_runtime/config.py +56 -0
nmdc_runtime/minter/adapters/repository.py +22 -2
nmdc_runtime/minter/config.py +30 -4
nmdc_runtime/minter/domain/model.py +55 -1
nmdc_runtime/minter/entrypoints/fastapi_app.py +1 -1
nmdc_runtime/mongo_util.py +89 -0
nmdc_runtime/site/backup/nmdcdb_mongodump.py +1 -1
nmdc_runtime/site/backup/nmdcdb_mongoexport.py +1 -3
nmdc_runtime/site/changesheets/data/OmicsProcessing-to-catted-Biosamples.tsv +1561 -0
nmdc_runtime/site/changesheets/scripts/missing_neon_soils_ecosystem_data.py +311 -0
nmdc_runtime/site/changesheets/scripts/neon_soils_add_ncbi_ids.py +210 -0
nmdc_runtime/site/dagster.yaml +53 -0
nmdc_runtime/site/entrypoint-daemon.sh +29 -0
nmdc_runtime/site/entrypoint-dagit-readonly.sh +26 -0
nmdc_runtime/site/entrypoint-dagit.sh +29 -0
nmdc_runtime/site/export/ncbi_xml.py +1331 -0
nmdc_runtime/site/export/ncbi_xml_utils.py +405 -0
nmdc_runtime/site/export/study_metadata.py +27 -4
nmdc_runtime/site/graphs.py +294 -45
nmdc_runtime/site/ops.py +1008 -230
nmdc_runtime/site/repair/database_updater.py +451 -0
nmdc_runtime/site/repository.py +368 -133
nmdc_runtime/site/resources.py +154 -80
nmdc_runtime/site/translation/gold_translator.py +235 -83
nmdc_runtime/site/translation/neon_benthic_translator.py +212 -188
nmdc_runtime/site/translation/neon_soil_translator.py +82 -58
nmdc_runtime/site/translation/neon_surface_water_translator.py +698 -0
nmdc_runtime/site/translation/neon_utils.py +24 -7
nmdc_runtime/site/translation/submission_portal_translator.py +616 -162
nmdc_runtime/site/translation/translator.py +73 -3
nmdc_runtime/site/util.py +26 -7
nmdc_runtime/site/validation/emsl.py +1 -0
nmdc_runtime/site/validation/gold.py +1 -0
nmdc_runtime/site/validation/util.py +16 -12
nmdc_runtime/site/workspace.yaml +13 -0
nmdc_runtime/static/NMDC_logo.svg +1073 -0
nmdc_runtime/static/ORCID-iD_icon_vector.svg +4 -0
nmdc_runtime/static/README.md +5 -0
nmdc_runtime/static/favicon.ico +0 -0
nmdc_runtime/util.py +236 -192
nmdc_runtime-2.12.0.dist-info/METADATA +45 -0
nmdc_runtime-2.12.0.dist-info/RECORD +131 -0
{nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info}/WHEEL +1 -2
{nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info}/entry_points.txt +0 -1
nmdc_runtime/containers.py +0 -14
nmdc_runtime/core/db/Database.py +0 -15
nmdc_runtime/core/exceptions/__init__.py +0 -23
nmdc_runtime/core/exceptions/base.py +0 -47
nmdc_runtime/core/exceptions/token.py +0 -13
nmdc_runtime/domain/users/queriesInterface.py +0 -18
nmdc_runtime/domain/users/userSchema.py +0 -37
nmdc_runtime/domain/users/userService.py +0 -14
nmdc_runtime/infrastructure/database/db.py +0 -3
nmdc_runtime/infrastructure/database/models/user.py +0 -10
nmdc_runtime/lib/__init__.py +0 -1
nmdc_runtime/lib/extract_nmdc_data.py +0 -41
nmdc_runtime/lib/load_nmdc_data.py +0 -121
nmdc_runtime/lib/nmdc_dataframes.py +0 -829
nmdc_runtime/lib/nmdc_etl_class.py +0 -402
nmdc_runtime/lib/transform_nmdc_data.py +0 -1117
nmdc_runtime/site/drsobjects/ingest.py +0 -93
nmdc_runtime/site/drsobjects/registration.py +0 -131
nmdc_runtime/site/terminusdb/generate.py +0 -198
nmdc_runtime/site/terminusdb/ingest.py +0 -44
nmdc_runtime/site/terminusdb/schema.py +0 -1671
nmdc_runtime/site/translation/emsl.py +0 -42
nmdc_runtime/site/translation/gold.py +0 -53
nmdc_runtime/site/translation/jgi.py +0 -31
nmdc_runtime/site/translation/util.py +0 -132
nmdc_runtime/site/validation/jgi.py +0 -42
nmdc_runtime-1.3.1.dist-info/METADATA +0 -181
nmdc_runtime-1.3.1.dist-info/RECORD +0 -81
nmdc_runtime-1.3.1.dist-info/top_level.txt +0 -1
/nmdc_runtime/{client → api}/__init__.py +0 -0
/nmdc_runtime/{core → api/boot}/__init__.py +0 -0
/nmdc_runtime/{core/db → api/core}/__init__.py +0 -0
/nmdc_runtime/{domain → api/db}/__init__.py +0 -0
/nmdc_runtime/{domain/users → api/endpoints}/__init__.py +0 -0
/nmdc_runtime/{infrastructure → api/endpoints/lib}/__init__.py +0 -0
/nmdc_runtime/{infrastructure/database → api/models}/__init__.py +0 -0
/nmdc_runtime/{infrastructure/database/models → api/models/lib}/__init__.py +0 -0
/nmdc_runtime/{site/drsobjects/__init__.py → api/models/minter.py} +0 -0
/nmdc_runtime/site/{terminusdb → repair}/__init__.py +0 -0
{nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info/licenses}/LICENSE +0 -0

nmdc_runtime/Dockerfile ADDED Viewed

@@ -0,0 +1,177 @@
+# Note: Most of the steps for the `base` image were copied verbatim from either `fastapi.Dockerfile`,
+#       `dagster.Dockerfile`, or `test.Dockerfile` (indeed, most of the steps were present in all three files).
+#       Reference: https://docs.docker.com/get-started/docker-concepts/building-images/multi-stage-builds/
+#
+# Base this image upon a variant of the official Python 3.10 image that is, in turn,
+# based upon a minimal (slim) variant of the Debian 11 (bullseye) image.
+# Reference: https://hub.docker.com/_/python
+# ────────────────────────────────────────────────────────────────────────────┐
+FROM python:3.10-slim-bullseye AS base
+# ────────────────────────────────────────────────────────────────────────────┘
+# Install and upgrade system-level software in a non-interactive way, then delete temporary files.
+# Note: Setting `DEBIAN_FRONTEND=noninteractive` and passing `-y` to `apt-get` makes things non-interactive.
+RUN export DEBIAN_FRONTEND=noninteractive && \
+  apt-get update && \
+  apt-get -y upgrade && \
+  apt-get install -y --no-install-recommends \
+    tini \
+    procps \
+    net-tools \
+    build-essential \
+    git \
+    make \
+    zip \
+    curl \
+    wget \
+    gnupg && \
+  apt-get -y clean && \
+  rm -rf /var/lib/apt/lists/*
+# Enable Python's "fault handler" feature, so, when low-level errors occur (e.g. segfaults), Python prints lots of info.
+# Reference: https://docs.python.org/3/using/cmdline.html#envvar-PYTHONFAULTHANDLER
+ENV PYTHONFAULTHANDLER=1
+# Configure Git to consider the `/code` directory to be "safe", so that, when a Git repository
+# created outside of the container gets mounted at that path within the container, the
+# `uv-dynamic-versioning` tool running within the container does not fail with the error:
+# > "Detected Git repository, but failed because of dubious ownership"
+# Reference: https://git-scm.com/docs/git-config#Documentation/git-config.txt-safedirectory
+RUN git config --global --add safe.directory /code
+# Install `uv`.
+# Reference: https://docs.astral.sh/uv/guides/integration/docker/#installing-uv
+ADD https://astral.sh/uv/install.sh /uv-installer.sh
+RUN sh /uv-installer.sh && \
+    rm /uv-installer.sh
+ENV PATH="/root/.local/bin/:$PATH"
+# Install Python dependencies (production dependencies only).
+#
+# Note: We copy only the files that `uv` needs in order to install dependencies. That way,
+#       we minimize the number of files whose changes would invalidate cached image layers
+#
+# Note: We use the `VIRTUAL_ENV` environment variable to specify the path to the Python virtual
+#       environment that we want the `uv` program inside the container to create and use.
+#
+#       Q: Why don't we use `./.venv` in the repository file tree?
+#       A: If we were to do that, then, whenever a developer would mount (via our Docker Compose file)
+#          the repository file tree from their host machine (which may include a `.venv/` directory
+#          created by their host machine) into the container, it would overwrite the Python virtual
+#          environment that the `uv` program inside the container is using.
+#
+#       Q: What is special about the `VIRTUAL_ENV` environment variable?
+#       A: When using `uv`'s `--active` option (as we do in later stages of this Dockerfile),
+#          `uv` determines which virtual environment is active by looking at `VIRTUAL_ENV'. This
+#          is the case, even though the documentation of the `venv` module (in Python's standard
+#          library) specifically says: "`VIRTUAL_ENV` cannot be relied upon to determine whether
+#          a virtual environment is being used."
+#
+#       References:
+#       - https://docs.astral.sh/uv/pip/environments/#using-arbitrary-python-environments (RE: `VIRTUAL_ENV`)
+#       - https://docs.astral.sh/uv/reference/environment/#virtual_env (RE: `VIRTUAL_ENV`, from uv's perspective)
+#       - https://docs.python.org/3/library/venv.html#how-venvs-work (RE: `VIRTUAL_ENV`, from venv's perspective)
+#       - https://docs.astral.sh/uv/concepts/projects/sync/#partial-installations (RE: `--no-install-project`)
+#
+# Note: In the `RUN` command, we use a "cache mount" (a feature of Docker) to cache production dependencies
+#       across builds. This is a performance optimization technique shown in the `uv` docs.
+#       Reference:
+#       - https://docs.astral.sh/uv/guides/integration/docker/#caching (RE: the technique)
+#       - https://docs.docker.com/build/cache/optimize/#use-cache-mounts (RE: the feature)
+#       - https://docs.astral.sh/uv/reference/settings/#link-mode (RE: `UV_LINK_MODE`)
+#       - https://docs.astral.sh/uv/reference/cli/#uv-sync--no-install-project (RE: `--no-install-project`)
+#
+# Note: We use `--compile-bytecode` so that Python compiles `.py` files to `.pyc` files now,
+#       instead of when the container is running. By default, `uv` defers this compilation
+#       to "import time," whereas `pip` (by default) performs it at "install time" (like this).
+#
+# Note: We use `--locked` so that `uv sync` exits with an error if the `uv.lock` file isn't _already_
+#       up to date. By default, `uv sync` would automatically update the lock file if necessary.
+#       Reference: https://docs.astral.sh/uv/reference/cli/#uv-sync--locked
+#
+ENV VIRTUAL_ENV="/venv"
+RUN mkdir -p "${VIRTUAL_ENV}"
+COPY ./pyproject.toml /code/pyproject.toml
+COPY ./uv.lock        /code/uv.lock
+RUN --mount=type=cache,target=/root/.cache/uv \
+    cd /code && \
+    UV_LINK_MODE=copy uv sync --active --no-dev --no-install-project --compile-bytecode --locked
+# ────────────────────────────────────────────────────────────────────────────┐
+FROM base AS fastapi
+# ────────────────────────────────────────────────────────────────────────────┘
+# Copy repository contents into image.
+COPY . /code
+# Install the project in editable mode.
+RUN --mount=type=cache,target=/root/.cache/uv \
+    cd /code && \
+    uv sync --active --no-dev --compile-bytecode --locked
+# Use Uvicorn to serve the FastAPI app on port 8000.
+#
+# Note: We include the `--no-sync` option to prevent `uv run` from automatically syncing dependencies.
+#       If it were to sync dependencies at this point, it would install development dependencies, since
+#       we exclude them above, but they are listed in uv's `default-groups` configuration by default.
+#       This is explained at: https://github.com/astral-sh/uv/issues/12558#issuecomment-2764611918
+#
+EXPOSE 8000
+WORKDIR /code
+CMD ["uv", "run", "--active", "--no-sync", "uvicorn", "nmdc_runtime.api.main:app", "--proxy-headers", "--host", "0.0.0.0", "--port", "8000"]
+# ────────────────────────────────────────────────────────────────────────────┐
+FROM base AS dagster
+# ────────────────────────────────────────────────────────────────────────────┘
+# Copy repository contents into image.
+#
+# Note: This path (i.e. "/opt/dagster/lib/") is hard-coded in a few places in `nmdc_runtime/site/ops.py`. That's why
+#       this image does not store the repository contents in `/code`, unlike the other images in this Dockerfile.
+#
+COPY . /opt/dagster/lib
+# Install the project in editable mode.
+RUN --mount=type=cache,target=/root/.cache/uv \
+    cd /opt/dagster/lib && \
+    uv sync --active --no-dev --compile-bytecode --locked
+# Move Dagster configuration files to the place Dagster expects.
+ENV DAGSTER_HOME="/opt/dagster/dagster_home/"
+RUN mkdir -p                                             "${DAGSTER_HOME}" && \
+    cp /opt/dagster/lib/nmdc_runtime/site/dagster.yaml   "${DAGSTER_HOME}" && \
+    cp /opt/dagster/lib/nmdc_runtime/site/workspace.yaml "${DAGSTER_HOME}"
+# Use Tini to run Dagit.
+#
+# Notes:
+# - The port number (i.e. "3000") is hard-coded in `nmdc_runtime/site/entrypoint-dagit.sh`.
+# - Dagster daemon (versus Dagit) can be launched by overriding the `ENTRYPOINT` defined here.
+#
+# Reference: https://github.com/krallin/tini
+#
+EXPOSE 3000
+WORKDIR /opt/dagster/dagster_home/
+ENTRYPOINT ["tini", "--", "../lib/nmdc_runtime/site/entrypoint-dagit.sh"]
+# ────────────────────────────────────────────────────────────────────────────┐
+FROM base AS test
+# ────────────────────────────────────────────────────────────────────────────┘
+# Copy all repository contents into image.
+COPY . /code
+# Install the project in editable mode, and install development dependencies.
+RUN --mount=type=cache,target=/root/.cache/uv \
+    cd /code && \
+    uv sync --active --compile-bytecode --locked
+# Make `wait-for-it.sh` executable.
+RUN chmod +x /code/.docker/wait-for-it.sh
+WORKDIR /code
+# Ensure started container does not exit, so that a subsequent `docker exec` command can run tests.
+# For an example `docker exec` command, see `Makefile`'s `run-test` target.
+# Such a command should use `wait-for-it.sh` to run `pytest` no earlier than when the FastAPI server is accessible.
+ENTRYPOINT ["tail", "-f", "/dev/null"]

nmdc_runtime/api/analytics.py ADDED Viewed

@@ -0,0 +1,90 @@
+"""
+Based on <https://github.com/tom-draper/api-analytics/tree/main/analytics/python/fastapi>
+under MIT License <https://github.com/tom-draper/api-analytics/blob/main/analytics/python/fastapi/LICENSE>
+"""
+from datetime import datetime
+import threading
+from time import time
+from typing import Dict, List
+from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
+from starlette.requests import Request
+from starlette.responses import Response
+from starlette.types import ASGIApp
+from toolz import merge
+from nmdc_runtime.api.db.mongo import get_mongo_db
+# This is a queue of the "request descriptors" that we will eventually insert into the database.
+_requests = []
+_last_posted = datetime.now()
+def _post_requests(collection: str, requests_data: List[Dict], source: str):
+    """Inserts the specified request descriptors into the specified MongoDB collection."""
+    mdb = get_mongo_db()
+    mdb[collection].insert_many([merge(d, {"source": source}) for d in requests_data])
+def log_request(collection: str, request_data: Dict, source: str = "FastAPI"):
+    """Flushes the queue of request descriptors to the database if enough time has passed since the previous time."""
+    global _requests, _last_posted
+    _requests.append(request_data)
+    now = datetime.now()
+    # flush queue every minute at most
+    if (now - _last_posted).total_seconds() > 60.0:
+        # Note: This use of threading is an attempt to avoid blocking the current thread
+        #       while performing the insertion(s).
+        #
+        # TODO: Is there is a race condition here? If multiple requests arrive at approximately
+        #       the same time, is it possible that each one causes a different thread to be
+        #       started, each with a different (and possibly overlapping) set of requests to
+        #       insert?
+        #
+        # TODO: If the insertion fails, will the requests be lost?
+        #
+        # Note: The author of this function said it may have been a "standard" solution copied
+        #       from some documentation. Indeed, the comment at the top of this module contains
+        #       a link to code on which it was based.
+        #
+        threading.Thread(
+            target=_post_requests, args=(collection, _requests, source)
+        ).start()
+        _requests = []  # empties the queue
+        _last_posted = now
+class Analytics(BaseHTTPMiddleware):
+    def __init__(self, app: ASGIApp, collection: str = "_runtime.analytics"):
+        super().__init__(app)
+        self.collection = collection
+    async def dispatch(
+        self, request: Request, call_next: RequestResponseEndpoint
+    ) -> Response:
+        start = time()
+        response = await call_next(request)
+        # Use a fallback IP address value (currently an empty string) if we can't derive one from the request.
+        ip_address: str = "" if request.client is None else request.client.host
+        # Build a dictionary that describes the incoming request.
+        #
+        # Note: `request.headers` is an instance of `MultiDict`. References:
+        #       - https://www.starlette.io/requests/#headers
+        #       - https://multidict.aio-libs.org/en/stable/multidict/
+        #
+        request_data = {
+            "hostname": request.url.hostname,
+            "ip_address": ip_address,
+            "path": request.url.path,
+            "user_agent": request.headers.get("user-agent"),
+            "method": request.method,
+            "status": response.status_code,
+            "response_time": int((time() - start) * 1000),
+            "created_at": datetime.now().isoformat(),
+        }
+        log_request(self.collection, request_data, "FastAPI")
+        return response

nmdc_runtime/api/boot/capabilities.py ADDED Viewed

@@ -0,0 +1,9 @@
+from nmdc_runtime.api.models.capability import Capability
+import nmdc_runtime.api.boot.workflows as workflows_boot
+# Include 1-to-1 "I can run this workflow" capabilities.
+_raw = [item for item in workflows_boot._raw]
+def construct():
+    return [Capability(**kwargs) for kwargs in _raw]

nmdc_runtime/api/boot/object_types.py ADDED Viewed

@@ -0,0 +1,126 @@
+from datetime import datetime, timezone
+from toolz import get_in
+from nmdc_runtime.api.models.object_type import ObjectType
+from nmdc_runtime.util import nmdc_jsonschema
+_raw = [
+    {
+        "id": "read_qc_analysis_activity_set",
+        "created_at": datetime(2021, 9, 14, tzinfo=timezone.utc),
+        "name": "metaP analysis activity",
+        # "description": "JSON documents satisfying schema for readqc analysis activity",
+    },
+    {
+        "id": "metagenome_sequencing_activity_set",
+        "created_at": datetime(2021, 9, 14, tzinfo=timezone.utc),
+        "name": "metaP analysis activity",
+        # "description": "JSON documents satisfying schema for metagenome sequencing activity",
+    },
+    {
+        "id": "mags_activity_set",
+        "created_at": datetime(2021, 9, 14, tzinfo=timezone.utc),
+        "name": "metaP analysis activity",
+        # "description": "JSON documents satisfying schema for mags activity",
+    },
+    {
+        "id": "metagenome_annotation_activity_set",
+        "created_at": datetime(2021, 9, 14, tzinfo=timezone.utc),
+        "name": "metaP analysis activity",
+        # "description": "JSON documents satisfying schema for metagenome annotation activity",
+    },
+    {
+        "id": "metagenome_assembly_set",
+        "created_at": datetime(2021, 9, 14, tzinfo=timezone.utc),
+        "name": "metaP analysis activity",
+        # "description": "JSON documents satisfying schema for metagenome assembly activity",
+    },
+    {
+        "id": "read_based_taxonomy_analysis_activity_set",
+        "created_at": datetime(2021, 9, 14, tzinfo=timezone.utc),
+        "name": "metaP analysis activity",
+        # "description": "JSON documents satisfying schema for read based analysis activity",
+    },
+    {
+        "id": "metadata-in",
+        "created_at": datetime(2021, 6, 1, tzinfo=timezone.utc),
+        "name": "metadata submission",
+        "description": "Input to the portal ETL process",
+    },
+    {
+        "id": "metaproteomics_analysis_activity_set",
+        "created_at": datetime(2021, 8, 23, tzinfo=timezone.utc),
+        "name": "metaP analysis activity",
+        "description": "JSON documents satisfying schema for metaproteomics analysis activity",
+    },
+    {
+        "id": "metagenome_raw_paired_end_reads",
+        "created_at": datetime(2021, 8, 24, tzinfo=timezone.utc),
+        "name": "Metagenome Raw Paired-End Reads Workflow Input",
+        "description": "workflow input",
+    },
+    {
+        "id": "metatranscriptome_raw_paired_end_reads",
+        "created_at": datetime(2021, 9, 7, tzinfo=timezone.utc),
+        "name": "Metatranscriptome Raw Paired-End Reads Workflow Input",
+        "description": "workflow input 2",
+    },
+    {
+        "id": "gcms-metab-input",
+        "created_at": datetime(2021, 9, 7, tzinfo=timezone.utc),
+        "name": "Raw GCMS MetaB Input",
+        "description": "",
+    },
+    {
+        "id": "gcms-metab-calibration",
+        "created_at": datetime(2021, 9, 7, tzinfo=timezone.utc),
+        "name": "Raw GCMS MetaB Calibration",
+        "description": "",
+    },
+    {
+        "id": "nom-input",
+        "created_at": datetime(2021, 9, 7, tzinfo=timezone.utc),
+        "name": "Raw FTMS MetaB Input",
+        "description": "",
+    },
+    {
+        "id": "test",
+        "created_at": datetime(2021, 9, 7, tzinfo=timezone.utc),
+        "name": "A test object type",
+        "description": "For use in unit and integration tests",
+    },
+    {
+        "id": "metadata-changesheet",
+        "created_at": datetime(2021, 9, 30, tzinfo=timezone.utc),
+        "name": "metadata changesheet",
+        "description": "Specification for changes to existing metadata",
+    },
+]
+_raw.extend(
+    [
+        {
+            "id": key,
+            "created_at": datetime(2021, 9, 14, tzinfo=timezone.utc),
+            "name": key,
+            # "description": spec["description"],
+        }
+        for key, spec in nmdc_jsonschema["properties"].items()
+        if key.endswith("_set")
+    ]
+)
+_raw.append(
+    {
+        "id": "schema#/definitions/Database",
+        "created_at": datetime(2021, 9, 14, tzinfo=timezone.utc),
+        "name": "Bundle of one or more metadata `*_set`s.",
+        "description": get_in(
+            ["definitions", "Database", "description"], nmdc_jsonschema
+        ),
+    }
+)
+def construct():
+    return [ObjectType(**kwargs) for kwargs in _raw]

nmdc_runtime/api/boot/triggers.py ADDED Viewed

@@ -0,0 +1,84 @@
+from datetime import datetime, timezone
+from nmdc_runtime.api.models.trigger import Trigger
+_raw = [
+    {
+        "created_at": datetime(2021, 6, 1, tzinfo=timezone.utc),
+        "object_type_id": "metadata-in",
+        "workflow_id": "metadata-in-1.0.0",
+    },
+    {
+        "created_at": datetime(2021, 9, 1, tzinfo=timezone.utc),
+        "object_type_id": "metaproteomics_analysis_activity_set",
+        "workflow_id": "metap-metadata-1.0.0",
+    },
+    {
+        "created_at": datetime(2021, 9, 1, tzinfo=timezone.utc),
+        "object_type_id": "metagenome_raw_paired_end_reads",
+        "workflow_id": "metag-1.0.0",
+    },
+    {
+        "created_at": datetime(2021, 9, 7, tzinfo=timezone.utc),
+        "object_type_id": "metatranscriptome_raw_paired_end_reads",
+        "workflow_id": "metat-1.0.0",
+    },
+    {
+        "created_at": datetime(2021, 9, 9, tzinfo=timezone.utc),
+        "object_type_id": "test",
+        "workflow_id": "test",
+    },
+    {
+        "created_at": datetime(2021, 9, 20, tzinfo=timezone.utc),
+        "object_type_id": "nom-input",
+        "workflow_id": "nom-1.0.0",
+    },
+    {
+        "created_at": datetime(2021, 9, 20, tzinfo=timezone.utc),
+        "object_type_id": "gcms-metab-input",
+        "workflow_id": "gcms-metab-1.0.0",
+    },
+    {
+        "created_at": datetime(2021, 9, 30, tzinfo=timezone.utc),
+        "object_type_id": "metadata-changesheet",
+        "workflow_id": "apply-changesheet-1.0.0",
+    },
+    {
+        "created_at": datetime(2022, 1, 20, tzinfo=timezone.utc),
+        "object_type_id": "metagenome_sequencing_activity_set",
+        "workflow_id": "mgrc-1.0.6",
+    },
+    {
+        "created_at": datetime(2022, 1, 20, tzinfo=timezone.utc),
+        "object_type_id": "metagenome_sequencing_activity_set",
+        "workflow_id": "metag-1.0.0",
+    },
+    {
+        "created_at": datetime(2022, 1, 20, tzinfo=timezone.utc),
+        "object_type_id": "metagenome_annotation_activity_set",
+        "workflow_id": "mags-1.0.4",
+    },
+    {
+        "created_at": datetime(2022, 1, 20, tzinfo=timezone.utc),
+        "object_type_id": "metagenome_assembly_set",
+        "workflow_id": "mgann-1.0.0",
+    },
+    {
+        "created_at": datetime(2022, 1, 20, tzinfo=timezone.utc),
+        "object_type_id": "read_qc_analysis_activity_set",
+        "workflow_id": "mgasm-1.0.3",
+    },
+    {
+        "created_at": datetime(2022, 1, 20, tzinfo=timezone.utc),
+        "object_type_id": "read_qc_analysis_activity_set",
+        "workflow_id": "mgrba-1.0.2",
+    },
+]
+def construct():
+    models = []
+    for kwargs in _raw:
+        kwargs["id"] = f'{kwargs["object_type_id"]}--{kwargs["workflow_id"]}'
+        models.append(Trigger(**kwargs))
+    return models

nmdc_runtime/api/boot/workflows.py ADDED Viewed

@@ -0,0 +1,116 @@
+from datetime import datetime, timezone
+from nmdc_runtime.api.models.workflow import Workflow
+_raw = [
+    {
+        "id": "metag-1.0.0",
+        "created_at": datetime(2021, 8, 24, tzinfo=timezone.utc),
+        "name": "Metagenome Analysis Workflow (v1.0.0)",
+    },
+    {
+        "id": "readqc-1.0.6",
+        "created_at": datetime(2021, 6, 1, tzinfo=timezone.utc),
+        "name": "Reads QC Workflow (v1.0.1)",
+    },
+    {
+        "id": "mags-1.0.4",
+        "created_at": datetime(2021, 6, 1, tzinfo=timezone.utc),
+        "name": "Read-based Analysis (v1.0.1)",
+    },
+    {
+        "id": "mgrba-1.0.2",
+        "created_at": datetime(2021, 6, 1, tzinfo=timezone.utc),
+        "name": "Read-based Analysis (v1.0.1)",
+    },
+    {
+        "id": "mgasm-1.0.3",
+        "created_at": datetime(2021, 6, 1, tzinfo=timezone.utc),
+        "name": "Metagenome Assembly (v1.0.1)",
+    },
+    {
+        "id": "mgann-1.0.0",
+        "created_at": datetime(2021, 6, 1, tzinfo=timezone.utc),
+        "name": "Metagenome Annotation (v1.0.0)",
+    },
+    {
+        "id": "mgasmbgen-1.0.1",
+        "created_at": datetime(2021, 6, 1, tzinfo=timezone.utc),
+        "name": "Metagenome Assembled Genomes (v1.0.2)",
+    },
+    {
+        "id": "metat-0.0.2",
+        "created_at": datetime(2021, 6, 1, tzinfo=timezone.utc),
+        "name": "Metatranscriptome (v0.0.2)",
+    },
+    {
+        "id": "metap-1.0.0",
+        "created_at": datetime(2021, 6, 1, tzinfo=timezone.utc),
+        "name": "Metaproteomic (v1.0.0)",
+    },
+    {
+        "id": "metab-2.1.0",
+        "created_at": datetime(2021, 6, 1, tzinfo=timezone.utc),
+        "name": "Metabolomics  (v2.1.0)",
+    },
+    {
+        "id": "gold-translation-1.0.0",
+        "created_at": datetime(2021, 6, 1, tzinfo=timezone.utc),
+        "name": "GOLD db dump translation",
+        "description": "Transform metadata obtained from the JGI GOLD database.",
+    },
+    {
+        "id": "metap-metadata-1.0.0",
+        "created_at": datetime(2021, 6, 1, tzinfo=timezone.utc),
+        "name": "metaP metadata ETL",
+        "description": "Ingest and validate metaP metadata",
+    },
+    {
+        "id": "metadata-in-1.0.0",
+        "created_at": datetime(2021, 10, 12, tzinfo=timezone.utc),
+        "name": "general metadata ETL",
+        "description": "Validate and ingest metadata from JSON files",
+    },
+    {
+        "id": "test",
+        "created_at": datetime(2021, 9, 9, tzinfo=timezone.utc),
+        "name": "A test workflow",
+        "description": "For use in unit and integration tests",
+    },
+    {
+        "id": "gcms-metab-1.0.0",
+        "created_at": datetime(2021, 9, 20, tzinfo=timezone.utc),
+        "name": "GCMS-based metabolomics",
+    },
+    {
+        "id": "nom-1.0.0",
+        "created_at": datetime(2021, 9, 20, tzinfo=timezone.utc),
+        "name": "Natural Organic Matter characterization",
+    },
+    {
+        "id": "apply-changesheet-1.0.0",
+        "created_at": datetime(2021, 9, 30, tzinfo=timezone.utc),
+        "name": "apply metadata changesheet",
+        "description": "Validate and apply metadata changes from TSV/CSV files",
+    },
+    {
+        "id": "export-study-biosamples-as-csv-1.0.0",
+        "created_at": datetime(2022, 6, 8, tzinfo=timezone.utc),
+        "name": "export study biosamples metadata as CSV",
+        "description": "Export study biosamples metadata as CSV",
+    },
+    {
+        "id": "gold_study_to_database",
+        "created_at": datetime(2023, 2, 17, tzinfo=timezone.utc),
+        "name": "Get nmdc:Database for GOLD study",
+        "description": "For a given GOLD study ID, produce an nmdc:Database representing that study and related entities",
+    },
+]
+def construct():
+    models = []
+    for kwargs in _raw:
+        kwargs["capability_ids"] = []
+        models.append(Workflow(**kwargs))
+    return models

nmdc-runtime 1.3.1__py3-none-any.whl → 2.12.0__py3-none-any.whl

nmdc-runtime 1.3.1py3-none-any.whl → 2.12.0py3-none-any.whl