nmdc-runtime 1.3.1__py3-none-any.whl → 2.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. nmdc_runtime/Dockerfile +177 -0
  2. nmdc_runtime/api/analytics.py +90 -0
  3. nmdc_runtime/api/boot/capabilities.py +9 -0
  4. nmdc_runtime/api/boot/object_types.py +126 -0
  5. nmdc_runtime/api/boot/triggers.py +84 -0
  6. nmdc_runtime/api/boot/workflows.py +116 -0
  7. nmdc_runtime/api/core/auth.py +212 -0
  8. nmdc_runtime/api/core/idgen.py +200 -0
  9. nmdc_runtime/api/core/metadata.py +777 -0
  10. nmdc_runtime/api/core/util.py +114 -0
  11. nmdc_runtime/api/db/mongo.py +436 -0
  12. nmdc_runtime/api/db/s3.py +37 -0
  13. nmdc_runtime/api/endpoints/capabilities.py +25 -0
  14. nmdc_runtime/api/endpoints/find.py +634 -0
  15. nmdc_runtime/api/endpoints/jobs.py +206 -0
  16. nmdc_runtime/api/endpoints/lib/helpers.py +274 -0
  17. nmdc_runtime/api/endpoints/lib/linked_instances.py +193 -0
  18. nmdc_runtime/api/endpoints/lib/path_segments.py +165 -0
  19. nmdc_runtime/api/endpoints/metadata.py +260 -0
  20. nmdc_runtime/api/endpoints/nmdcschema.py +515 -0
  21. nmdc_runtime/api/endpoints/object_types.py +38 -0
  22. nmdc_runtime/api/endpoints/objects.py +277 -0
  23. nmdc_runtime/api/endpoints/operations.py +78 -0
  24. nmdc_runtime/api/endpoints/queries.py +701 -0
  25. nmdc_runtime/api/endpoints/runs.py +98 -0
  26. nmdc_runtime/api/endpoints/search.py +38 -0
  27. nmdc_runtime/api/endpoints/sites.py +205 -0
  28. nmdc_runtime/api/endpoints/triggers.py +25 -0
  29. nmdc_runtime/api/endpoints/users.py +214 -0
  30. nmdc_runtime/api/endpoints/util.py +817 -0
  31. nmdc_runtime/api/endpoints/wf_file_staging.py +307 -0
  32. nmdc_runtime/api/endpoints/workflows.py +353 -0
  33. nmdc_runtime/api/entrypoint.sh +7 -0
  34. nmdc_runtime/api/main.py +495 -0
  35. nmdc_runtime/api/middleware.py +43 -0
  36. nmdc_runtime/api/models/capability.py +14 -0
  37. nmdc_runtime/api/models/id.py +92 -0
  38. nmdc_runtime/api/models/job.py +57 -0
  39. nmdc_runtime/api/models/lib/helpers.py +78 -0
  40. nmdc_runtime/api/models/metadata.py +11 -0
  41. nmdc_runtime/api/models/nmdc_schema.py +146 -0
  42. nmdc_runtime/api/models/object.py +180 -0
  43. nmdc_runtime/api/models/object_type.py +20 -0
  44. nmdc_runtime/api/models/operation.py +66 -0
  45. nmdc_runtime/api/models/query.py +246 -0
  46. nmdc_runtime/api/models/query_continuation.py +111 -0
  47. nmdc_runtime/api/models/run.py +161 -0
  48. nmdc_runtime/api/models/site.py +87 -0
  49. nmdc_runtime/api/models/trigger.py +13 -0
  50. nmdc_runtime/api/models/user.py +207 -0
  51. nmdc_runtime/api/models/util.py +260 -0
  52. nmdc_runtime/api/models/wfe_file_stages.py +122 -0
  53. nmdc_runtime/api/models/workflow.py +15 -0
  54. nmdc_runtime/api/openapi.py +178 -0
  55. nmdc_runtime/api/swagger_ui/assets/EllipsesButton.js +146 -0
  56. nmdc_runtime/api/swagger_ui/assets/EndpointSearchWidget.js +369 -0
  57. nmdc_runtime/api/swagger_ui/assets/script.js +252 -0
  58. nmdc_runtime/api/swagger_ui/assets/style.css +155 -0
  59. nmdc_runtime/api/swagger_ui/swagger_ui.py +34 -0
  60. nmdc_runtime/config.py +56 -0
  61. nmdc_runtime/minter/adapters/repository.py +22 -2
  62. nmdc_runtime/minter/config.py +30 -4
  63. nmdc_runtime/minter/domain/model.py +55 -1
  64. nmdc_runtime/minter/entrypoints/fastapi_app.py +1 -1
  65. nmdc_runtime/mongo_util.py +89 -0
  66. nmdc_runtime/site/backup/nmdcdb_mongodump.py +1 -1
  67. nmdc_runtime/site/backup/nmdcdb_mongoexport.py +1 -3
  68. nmdc_runtime/site/changesheets/data/OmicsProcessing-to-catted-Biosamples.tsv +1561 -0
  69. nmdc_runtime/site/changesheets/scripts/missing_neon_soils_ecosystem_data.py +311 -0
  70. nmdc_runtime/site/changesheets/scripts/neon_soils_add_ncbi_ids.py +210 -0
  71. nmdc_runtime/site/dagster.yaml +53 -0
  72. nmdc_runtime/site/entrypoint-daemon.sh +29 -0
  73. nmdc_runtime/site/entrypoint-dagit-readonly.sh +26 -0
  74. nmdc_runtime/site/entrypoint-dagit.sh +29 -0
  75. nmdc_runtime/site/export/ncbi_xml.py +1331 -0
  76. nmdc_runtime/site/export/ncbi_xml_utils.py +405 -0
  77. nmdc_runtime/site/export/study_metadata.py +27 -4
  78. nmdc_runtime/site/graphs.py +294 -45
  79. nmdc_runtime/site/ops.py +1008 -230
  80. nmdc_runtime/site/repair/database_updater.py +451 -0
  81. nmdc_runtime/site/repository.py +368 -133
  82. nmdc_runtime/site/resources.py +154 -80
  83. nmdc_runtime/site/translation/gold_translator.py +235 -83
  84. nmdc_runtime/site/translation/neon_benthic_translator.py +212 -188
  85. nmdc_runtime/site/translation/neon_soil_translator.py +82 -58
  86. nmdc_runtime/site/translation/neon_surface_water_translator.py +698 -0
  87. nmdc_runtime/site/translation/neon_utils.py +24 -7
  88. nmdc_runtime/site/translation/submission_portal_translator.py +616 -162
  89. nmdc_runtime/site/translation/translator.py +73 -3
  90. nmdc_runtime/site/util.py +26 -7
  91. nmdc_runtime/site/validation/emsl.py +1 -0
  92. nmdc_runtime/site/validation/gold.py +1 -0
  93. nmdc_runtime/site/validation/util.py +16 -12
  94. nmdc_runtime/site/workspace.yaml +13 -0
  95. nmdc_runtime/static/NMDC_logo.svg +1073 -0
  96. nmdc_runtime/static/ORCID-iD_icon_vector.svg +4 -0
  97. nmdc_runtime/static/README.md +5 -0
  98. nmdc_runtime/static/favicon.ico +0 -0
  99. nmdc_runtime/util.py +236 -192
  100. nmdc_runtime-2.12.0.dist-info/METADATA +45 -0
  101. nmdc_runtime-2.12.0.dist-info/RECORD +131 -0
  102. {nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info}/WHEEL +1 -2
  103. {nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info}/entry_points.txt +0 -1
  104. nmdc_runtime/containers.py +0 -14
  105. nmdc_runtime/core/db/Database.py +0 -15
  106. nmdc_runtime/core/exceptions/__init__.py +0 -23
  107. nmdc_runtime/core/exceptions/base.py +0 -47
  108. nmdc_runtime/core/exceptions/token.py +0 -13
  109. nmdc_runtime/domain/users/queriesInterface.py +0 -18
  110. nmdc_runtime/domain/users/userSchema.py +0 -37
  111. nmdc_runtime/domain/users/userService.py +0 -14
  112. nmdc_runtime/infrastructure/database/db.py +0 -3
  113. nmdc_runtime/infrastructure/database/models/user.py +0 -10
  114. nmdc_runtime/lib/__init__.py +0 -1
  115. nmdc_runtime/lib/extract_nmdc_data.py +0 -41
  116. nmdc_runtime/lib/load_nmdc_data.py +0 -121
  117. nmdc_runtime/lib/nmdc_dataframes.py +0 -829
  118. nmdc_runtime/lib/nmdc_etl_class.py +0 -402
  119. nmdc_runtime/lib/transform_nmdc_data.py +0 -1117
  120. nmdc_runtime/site/drsobjects/ingest.py +0 -93
  121. nmdc_runtime/site/drsobjects/registration.py +0 -131
  122. nmdc_runtime/site/terminusdb/generate.py +0 -198
  123. nmdc_runtime/site/terminusdb/ingest.py +0 -44
  124. nmdc_runtime/site/terminusdb/schema.py +0 -1671
  125. nmdc_runtime/site/translation/emsl.py +0 -42
  126. nmdc_runtime/site/translation/gold.py +0 -53
  127. nmdc_runtime/site/translation/jgi.py +0 -31
  128. nmdc_runtime/site/translation/util.py +0 -132
  129. nmdc_runtime/site/validation/jgi.py +0 -42
  130. nmdc_runtime-1.3.1.dist-info/METADATA +0 -181
  131. nmdc_runtime-1.3.1.dist-info/RECORD +0 -81
  132. nmdc_runtime-1.3.1.dist-info/top_level.txt +0 -1
  133. /nmdc_runtime/{client → api}/__init__.py +0 -0
  134. /nmdc_runtime/{core → api/boot}/__init__.py +0 -0
  135. /nmdc_runtime/{core/db → api/core}/__init__.py +0 -0
  136. /nmdc_runtime/{domain → api/db}/__init__.py +0 -0
  137. /nmdc_runtime/{domain/users → api/endpoints}/__init__.py +0 -0
  138. /nmdc_runtime/{infrastructure → api/endpoints/lib}/__init__.py +0 -0
  139. /nmdc_runtime/{infrastructure/database → api/models}/__init__.py +0 -0
  140. /nmdc_runtime/{infrastructure/database/models → api/models/lib}/__init__.py +0 -0
  141. /nmdc_runtime/{site/drsobjects/__init__.py → api/models/minter.py} +0 -0
  142. /nmdc_runtime/site/{terminusdb → repair}/__init__.py +0 -0
  143. {nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,177 @@
1
+ # Note: Most of the steps for the `base` image were copied verbatim from either `fastapi.Dockerfile`,
2
+ # `dagster.Dockerfile`, or `test.Dockerfile` (indeed, most of the steps were present in all three files).
3
+ # Reference: https://docs.docker.com/get-started/docker-concepts/building-images/multi-stage-builds/
4
+ #
5
+ # Base this image upon a variant of the official Python 3.10 image that is, in turn,
6
+ # based upon a minimal (slim) variant of the Debian 11 (bullseye) image.
7
+ # Reference: https://hub.docker.com/_/python
8
+ # ────────────────────────────────────────────────────────────────────────────┐
9
+ FROM python:3.10-slim-bullseye AS base
10
+ # ────────────────────────────────────────────────────────────────────────────┘
11
+
12
+ # Install and upgrade system-level software in a non-interactive way, then delete temporary files.
13
+ # Note: Setting `DEBIAN_FRONTEND=noninteractive` and passing `-y` to `apt-get` makes things non-interactive.
14
+ RUN export DEBIAN_FRONTEND=noninteractive && \
15
+ apt-get update && \
16
+ apt-get -y upgrade && \
17
+ apt-get install -y --no-install-recommends \
18
+ tini \
19
+ procps \
20
+ net-tools \
21
+ build-essential \
22
+ git \
23
+ make \
24
+ zip \
25
+ curl \
26
+ wget \
27
+ gnupg && \
28
+ apt-get -y clean && \
29
+ rm -rf /var/lib/apt/lists/*
30
+
31
+ # Enable Python's "fault handler" feature, so, when low-level errors occur (e.g. segfaults), Python prints lots of info.
32
+ # Reference: https://docs.python.org/3/using/cmdline.html#envvar-PYTHONFAULTHANDLER
33
+ ENV PYTHONFAULTHANDLER=1
34
+
35
+ # Configure Git to consider the `/code` directory to be "safe", so that, when a Git repository
36
+ # created outside of the container gets mounted at that path within the container, the
37
+ # `uv-dynamic-versioning` tool running within the container does not fail with the error:
38
+ # > "Detected Git repository, but failed because of dubious ownership"
39
+ # Reference: https://git-scm.com/docs/git-config#Documentation/git-config.txt-safedirectory
40
+ RUN git config --global --add safe.directory /code
41
+
42
+ # Install `uv`.
43
+ # Reference: https://docs.astral.sh/uv/guides/integration/docker/#installing-uv
44
+ ADD https://astral.sh/uv/install.sh /uv-installer.sh
45
+ RUN sh /uv-installer.sh && \
46
+ rm /uv-installer.sh
47
+ ENV PATH="/root/.local/bin/:$PATH"
48
+
49
+ # Install Python dependencies (production dependencies only).
50
+ #
51
+ # Note: We copy only the files that `uv` needs in order to install dependencies. That way,
52
+ # we minimize the number of files whose changes would invalidate cached image layers
53
+ #
54
+ # Note: We use the `VIRTUAL_ENV` environment variable to specify the path to the Python virtual
55
+ # environment that we want the `uv` program inside the container to create and use.
56
+ #
57
+ # Q: Why don't we use `./.venv` in the repository file tree?
58
+ # A: If we were to do that, then, whenever a developer would mount (via our Docker Compose file)
59
+ # the repository file tree from their host machine (which may include a `.venv/` directory
60
+ # created by their host machine) into the container, it would overwrite the Python virtual
61
+ # environment that the `uv` program inside the container is using.
62
+ #
63
+ # Q: What is special about the `VIRTUAL_ENV` environment variable?
64
+ # A: When using `uv`'s `--active` option (as we do in later stages of this Dockerfile),
65
+ # `uv` determines which virtual environment is active by looking at `VIRTUAL_ENV'. This
66
+ # is the case, even though the documentation of the `venv` module (in Python's standard
67
+ # library) specifically says: "`VIRTUAL_ENV` cannot be relied upon to determine whether
68
+ # a virtual environment is being used."
69
+ #
70
+ # References:
71
+ # - https://docs.astral.sh/uv/pip/environments/#using-arbitrary-python-environments (RE: `VIRTUAL_ENV`)
72
+ # - https://docs.astral.sh/uv/reference/environment/#virtual_env (RE: `VIRTUAL_ENV`, from uv's perspective)
73
+ # - https://docs.python.org/3/library/venv.html#how-venvs-work (RE: `VIRTUAL_ENV`, from venv's perspective)
74
+ # - https://docs.astral.sh/uv/concepts/projects/sync/#partial-installations (RE: `--no-install-project`)
75
+ #
76
+ # Note: In the `RUN` command, we use a "cache mount" (a feature of Docker) to cache production dependencies
77
+ # across builds. This is a performance optimization technique shown in the `uv` docs.
78
+ # Reference:
79
+ # - https://docs.astral.sh/uv/guides/integration/docker/#caching (RE: the technique)
80
+ # - https://docs.docker.com/build/cache/optimize/#use-cache-mounts (RE: the feature)
81
+ # - https://docs.astral.sh/uv/reference/settings/#link-mode (RE: `UV_LINK_MODE`)
82
+ # - https://docs.astral.sh/uv/reference/cli/#uv-sync--no-install-project (RE: `--no-install-project`)
83
+ #
84
+ # Note: We use `--compile-bytecode` so that Python compiles `.py` files to `.pyc` files now,
85
+ # instead of when the container is running. By default, `uv` defers this compilation
86
+ # to "import time," whereas `pip` (by default) performs it at "install time" (like this).
87
+ #
88
+ # Note: We use `--locked` so that `uv sync` exits with an error if the `uv.lock` file isn't _already_
89
+ # up to date. By default, `uv sync` would automatically update the lock file if necessary.
90
+ # Reference: https://docs.astral.sh/uv/reference/cli/#uv-sync--locked
91
+ #
92
+ ENV VIRTUAL_ENV="/venv"
93
+ RUN mkdir -p "${VIRTUAL_ENV}"
94
+ COPY ./pyproject.toml /code/pyproject.toml
95
+ COPY ./uv.lock /code/uv.lock
96
+ RUN --mount=type=cache,target=/root/.cache/uv \
97
+ cd /code && \
98
+ UV_LINK_MODE=copy uv sync --active --no-dev --no-install-project --compile-bytecode --locked
99
+
100
+ # ────────────────────────────────────────────────────────────────────────────┐
101
+ FROM base AS fastapi
102
+ # ────────────────────────────────────────────────────────────────────────────┘
103
+
104
+ # Copy repository contents into image.
105
+ COPY . /code
106
+
107
+ # Install the project in editable mode.
108
+ RUN --mount=type=cache,target=/root/.cache/uv \
109
+ cd /code && \
110
+ uv sync --active --no-dev --compile-bytecode --locked
111
+
112
+ # Use Uvicorn to serve the FastAPI app on port 8000.
113
+ #
114
+ # Note: We include the `--no-sync` option to prevent `uv run` from automatically syncing dependencies.
115
+ # If it were to sync dependencies at this point, it would install development dependencies, since
116
+ # we exclude them above, but they are listed in uv's `default-groups` configuration by default.
117
+ # This is explained at: https://github.com/astral-sh/uv/issues/12558#issuecomment-2764611918
118
+ #
119
+ EXPOSE 8000
120
+ WORKDIR /code
121
+ CMD ["uv", "run", "--active", "--no-sync", "uvicorn", "nmdc_runtime.api.main:app", "--proxy-headers", "--host", "0.0.0.0", "--port", "8000"]
122
+
123
+ # ────────────────────────────────────────────────────────────────────────────┐
124
+ FROM base AS dagster
125
+ # ────────────────────────────────────────────────────────────────────────────┘
126
+
127
+ # Copy repository contents into image.
128
+ #
129
+ # Note: This path (i.e. "/opt/dagster/lib/") is hard-coded in a few places in `nmdc_runtime/site/ops.py`. That's why
130
+ # this image does not store the repository contents in `/code`, unlike the other images in this Dockerfile.
131
+ #
132
+ COPY . /opt/dagster/lib
133
+
134
+ # Install the project in editable mode.
135
+ RUN --mount=type=cache,target=/root/.cache/uv \
136
+ cd /opt/dagster/lib && \
137
+ uv sync --active --no-dev --compile-bytecode --locked
138
+
139
+ # Move Dagster configuration files to the place Dagster expects.
140
+ ENV DAGSTER_HOME="/opt/dagster/dagster_home/"
141
+ RUN mkdir -p "${DAGSTER_HOME}" && \
142
+ cp /opt/dagster/lib/nmdc_runtime/site/dagster.yaml "${DAGSTER_HOME}" && \
143
+ cp /opt/dagster/lib/nmdc_runtime/site/workspace.yaml "${DAGSTER_HOME}"
144
+
145
+ # Use Tini to run Dagit.
146
+ #
147
+ # Notes:
148
+ # - The port number (i.e. "3000") is hard-coded in `nmdc_runtime/site/entrypoint-dagit.sh`.
149
+ # - Dagster daemon (versus Dagit) can be launched by overriding the `ENTRYPOINT` defined here.
150
+ #
151
+ # Reference: https://github.com/krallin/tini
152
+ #
153
+ EXPOSE 3000
154
+ WORKDIR /opt/dagster/dagster_home/
155
+ ENTRYPOINT ["tini", "--", "../lib/nmdc_runtime/site/entrypoint-dagit.sh"]
156
+
157
+ # ────────────────────────────────────────────────────────────────────────────┐
158
+ FROM base AS test
159
+ # ────────────────────────────────────────────────────────────────────────────┘
160
+
161
+ # Copy all repository contents into image.
162
+ COPY . /code
163
+
164
+ # Install the project in editable mode, and install development dependencies.
165
+ RUN --mount=type=cache,target=/root/.cache/uv \
166
+ cd /code && \
167
+ uv sync --active --compile-bytecode --locked
168
+
169
+ # Make `wait-for-it.sh` executable.
170
+ RUN chmod +x /code/.docker/wait-for-it.sh
171
+
172
+ WORKDIR /code
173
+
174
+ # Ensure started container does not exit, so that a subsequent `docker exec` command can run tests.
175
+ # For an example `docker exec` command, see `Makefile`'s `run-test` target.
176
+ # Such a command should use `wait-for-it.sh` to run `pytest` no earlier than when the FastAPI server is accessible.
177
+ ENTRYPOINT ["tail", "-f", "/dev/null"]
@@ -0,0 +1,90 @@
1
+ """
2
+ Based on <https://github.com/tom-draper/api-analytics/tree/main/analytics/python/fastapi>
3
+ under MIT License <https://github.com/tom-draper/api-analytics/blob/main/analytics/python/fastapi/LICENSE>
4
+ """
5
+
6
+ from datetime import datetime
7
+ import threading
8
+ from time import time
9
+ from typing import Dict, List
10
+
11
+ from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
12
+ from starlette.requests import Request
13
+ from starlette.responses import Response
14
+ from starlette.types import ASGIApp
15
+ from toolz import merge
16
+
17
+ from nmdc_runtime.api.db.mongo import get_mongo_db
18
+
19
+ # This is a queue of the "request descriptors" that we will eventually insert into the database.
20
+ _requests = []
21
+ _last_posted = datetime.now()
22
+
23
+
24
+ def _post_requests(collection: str, requests_data: List[Dict], source: str):
25
+ """Inserts the specified request descriptors into the specified MongoDB collection."""
26
+ mdb = get_mongo_db()
27
+ mdb[collection].insert_many([merge(d, {"source": source}) for d in requests_data])
28
+
29
+
30
+ def log_request(collection: str, request_data: Dict, source: str = "FastAPI"):
31
+ """Flushes the queue of request descriptors to the database if enough time has passed since the previous time."""
32
+ global _requests, _last_posted
33
+ _requests.append(request_data)
34
+ now = datetime.now()
35
+ # flush queue every minute at most
36
+ if (now - _last_posted).total_seconds() > 60.0:
37
+ # Note: This use of threading is an attempt to avoid blocking the current thread
38
+ # while performing the insertion(s).
39
+ #
40
+ # TODO: Is there is a race condition here? If multiple requests arrive at approximately
41
+ # the same time, is it possible that each one causes a different thread to be
42
+ # started, each with a different (and possibly overlapping) set of requests to
43
+ # insert?
44
+ #
45
+ # TODO: If the insertion fails, will the requests be lost?
46
+ #
47
+ # Note: The author of this function said it may have been a "standard" solution copied
48
+ # from some documentation. Indeed, the comment at the top of this module contains
49
+ # a link to code on which it was based.
50
+ #
51
+ threading.Thread(
52
+ target=_post_requests, args=(collection, _requests, source)
53
+ ).start()
54
+ _requests = [] # empties the queue
55
+ _last_posted = now
56
+
57
+
58
+ class Analytics(BaseHTTPMiddleware):
59
+ def __init__(self, app: ASGIApp, collection: str = "_runtime.analytics"):
60
+ super().__init__(app)
61
+ self.collection = collection
62
+
63
+ async def dispatch(
64
+ self, request: Request, call_next: RequestResponseEndpoint
65
+ ) -> Response:
66
+ start = time()
67
+ response = await call_next(request)
68
+
69
+ # Use a fallback IP address value (currently an empty string) if we can't derive one from the request.
70
+ ip_address: str = "" if request.client is None else request.client.host
71
+
72
+ # Build a dictionary that describes the incoming request.
73
+ #
74
+ # Note: `request.headers` is an instance of `MultiDict`. References:
75
+ # - https://www.starlette.io/requests/#headers
76
+ # - https://multidict.aio-libs.org/en/stable/multidict/
77
+ #
78
+ request_data = {
79
+ "hostname": request.url.hostname,
80
+ "ip_address": ip_address,
81
+ "path": request.url.path,
82
+ "user_agent": request.headers.get("user-agent"),
83
+ "method": request.method,
84
+ "status": response.status_code,
85
+ "response_time": int((time() - start) * 1000),
86
+ "created_at": datetime.now().isoformat(),
87
+ }
88
+
89
+ log_request(self.collection, request_data, "FastAPI")
90
+ return response
@@ -0,0 +1,9 @@
1
+ from nmdc_runtime.api.models.capability import Capability
2
+ import nmdc_runtime.api.boot.workflows as workflows_boot
3
+
4
+ # Include 1-to-1 "I can run this workflow" capabilities.
5
+ _raw = [item for item in workflows_boot._raw]
6
+
7
+
8
+ def construct():
9
+ return [Capability(**kwargs) for kwargs in _raw]
@@ -0,0 +1,126 @@
1
+ from datetime import datetime, timezone
2
+
3
+ from toolz import get_in
4
+
5
+ from nmdc_runtime.api.models.object_type import ObjectType
6
+ from nmdc_runtime.util import nmdc_jsonschema
7
+
8
+ _raw = [
9
+ {
10
+ "id": "read_qc_analysis_activity_set",
11
+ "created_at": datetime(2021, 9, 14, tzinfo=timezone.utc),
12
+ "name": "metaP analysis activity",
13
+ # "description": "JSON documents satisfying schema for readqc analysis activity",
14
+ },
15
+ {
16
+ "id": "metagenome_sequencing_activity_set",
17
+ "created_at": datetime(2021, 9, 14, tzinfo=timezone.utc),
18
+ "name": "metaP analysis activity",
19
+ # "description": "JSON documents satisfying schema for metagenome sequencing activity",
20
+ },
21
+ {
22
+ "id": "mags_activity_set",
23
+ "created_at": datetime(2021, 9, 14, tzinfo=timezone.utc),
24
+ "name": "metaP analysis activity",
25
+ # "description": "JSON documents satisfying schema for mags activity",
26
+ },
27
+ {
28
+ "id": "metagenome_annotation_activity_set",
29
+ "created_at": datetime(2021, 9, 14, tzinfo=timezone.utc),
30
+ "name": "metaP analysis activity",
31
+ # "description": "JSON documents satisfying schema for metagenome annotation activity",
32
+ },
33
+ {
34
+ "id": "metagenome_assembly_set",
35
+ "created_at": datetime(2021, 9, 14, tzinfo=timezone.utc),
36
+ "name": "metaP analysis activity",
37
+ # "description": "JSON documents satisfying schema for metagenome assembly activity",
38
+ },
39
+ {
40
+ "id": "read_based_taxonomy_analysis_activity_set",
41
+ "created_at": datetime(2021, 9, 14, tzinfo=timezone.utc),
42
+ "name": "metaP analysis activity",
43
+ # "description": "JSON documents satisfying schema for read based analysis activity",
44
+ },
45
+ {
46
+ "id": "metadata-in",
47
+ "created_at": datetime(2021, 6, 1, tzinfo=timezone.utc),
48
+ "name": "metadata submission",
49
+ "description": "Input to the portal ETL process",
50
+ },
51
+ {
52
+ "id": "metaproteomics_analysis_activity_set",
53
+ "created_at": datetime(2021, 8, 23, tzinfo=timezone.utc),
54
+ "name": "metaP analysis activity",
55
+ "description": "JSON documents satisfying schema for metaproteomics analysis activity",
56
+ },
57
+ {
58
+ "id": "metagenome_raw_paired_end_reads",
59
+ "created_at": datetime(2021, 8, 24, tzinfo=timezone.utc),
60
+ "name": "Metagenome Raw Paired-End Reads Workflow Input",
61
+ "description": "workflow input",
62
+ },
63
+ {
64
+ "id": "metatranscriptome_raw_paired_end_reads",
65
+ "created_at": datetime(2021, 9, 7, tzinfo=timezone.utc),
66
+ "name": "Metatranscriptome Raw Paired-End Reads Workflow Input",
67
+ "description": "workflow input 2",
68
+ },
69
+ {
70
+ "id": "gcms-metab-input",
71
+ "created_at": datetime(2021, 9, 7, tzinfo=timezone.utc),
72
+ "name": "Raw GCMS MetaB Input",
73
+ "description": "",
74
+ },
75
+ {
76
+ "id": "gcms-metab-calibration",
77
+ "created_at": datetime(2021, 9, 7, tzinfo=timezone.utc),
78
+ "name": "Raw GCMS MetaB Calibration",
79
+ "description": "",
80
+ },
81
+ {
82
+ "id": "nom-input",
83
+ "created_at": datetime(2021, 9, 7, tzinfo=timezone.utc),
84
+ "name": "Raw FTMS MetaB Input",
85
+ "description": "",
86
+ },
87
+ {
88
+ "id": "test",
89
+ "created_at": datetime(2021, 9, 7, tzinfo=timezone.utc),
90
+ "name": "A test object type",
91
+ "description": "For use in unit and integration tests",
92
+ },
93
+ {
94
+ "id": "metadata-changesheet",
95
+ "created_at": datetime(2021, 9, 30, tzinfo=timezone.utc),
96
+ "name": "metadata changesheet",
97
+ "description": "Specification for changes to existing metadata",
98
+ },
99
+ ]
100
+
101
+ _raw.extend(
102
+ [
103
+ {
104
+ "id": key,
105
+ "created_at": datetime(2021, 9, 14, tzinfo=timezone.utc),
106
+ "name": key,
107
+ # "description": spec["description"],
108
+ }
109
+ for key, spec in nmdc_jsonschema["properties"].items()
110
+ if key.endswith("_set")
111
+ ]
112
+ )
113
+ _raw.append(
114
+ {
115
+ "id": "schema#/definitions/Database",
116
+ "created_at": datetime(2021, 9, 14, tzinfo=timezone.utc),
117
+ "name": "Bundle of one or more metadata `*_set`s.",
118
+ "description": get_in(
119
+ ["definitions", "Database", "description"], nmdc_jsonschema
120
+ ),
121
+ }
122
+ )
123
+
124
+
125
+ def construct():
126
+ return [ObjectType(**kwargs) for kwargs in _raw]
@@ -0,0 +1,84 @@
1
+ from datetime import datetime, timezone
2
+
3
+ from nmdc_runtime.api.models.trigger import Trigger
4
+
5
+ _raw = [
6
+ {
7
+ "created_at": datetime(2021, 6, 1, tzinfo=timezone.utc),
8
+ "object_type_id": "metadata-in",
9
+ "workflow_id": "metadata-in-1.0.0",
10
+ },
11
+ {
12
+ "created_at": datetime(2021, 9, 1, tzinfo=timezone.utc),
13
+ "object_type_id": "metaproteomics_analysis_activity_set",
14
+ "workflow_id": "metap-metadata-1.0.0",
15
+ },
16
+ {
17
+ "created_at": datetime(2021, 9, 1, tzinfo=timezone.utc),
18
+ "object_type_id": "metagenome_raw_paired_end_reads",
19
+ "workflow_id": "metag-1.0.0",
20
+ },
21
+ {
22
+ "created_at": datetime(2021, 9, 7, tzinfo=timezone.utc),
23
+ "object_type_id": "metatranscriptome_raw_paired_end_reads",
24
+ "workflow_id": "metat-1.0.0",
25
+ },
26
+ {
27
+ "created_at": datetime(2021, 9, 9, tzinfo=timezone.utc),
28
+ "object_type_id": "test",
29
+ "workflow_id": "test",
30
+ },
31
+ {
32
+ "created_at": datetime(2021, 9, 20, tzinfo=timezone.utc),
33
+ "object_type_id": "nom-input",
34
+ "workflow_id": "nom-1.0.0",
35
+ },
36
+ {
37
+ "created_at": datetime(2021, 9, 20, tzinfo=timezone.utc),
38
+ "object_type_id": "gcms-metab-input",
39
+ "workflow_id": "gcms-metab-1.0.0",
40
+ },
41
+ {
42
+ "created_at": datetime(2021, 9, 30, tzinfo=timezone.utc),
43
+ "object_type_id": "metadata-changesheet",
44
+ "workflow_id": "apply-changesheet-1.0.0",
45
+ },
46
+ {
47
+ "created_at": datetime(2022, 1, 20, tzinfo=timezone.utc),
48
+ "object_type_id": "metagenome_sequencing_activity_set",
49
+ "workflow_id": "mgrc-1.0.6",
50
+ },
51
+ {
52
+ "created_at": datetime(2022, 1, 20, tzinfo=timezone.utc),
53
+ "object_type_id": "metagenome_sequencing_activity_set",
54
+ "workflow_id": "metag-1.0.0",
55
+ },
56
+ {
57
+ "created_at": datetime(2022, 1, 20, tzinfo=timezone.utc),
58
+ "object_type_id": "metagenome_annotation_activity_set",
59
+ "workflow_id": "mags-1.0.4",
60
+ },
61
+ {
62
+ "created_at": datetime(2022, 1, 20, tzinfo=timezone.utc),
63
+ "object_type_id": "metagenome_assembly_set",
64
+ "workflow_id": "mgann-1.0.0",
65
+ },
66
+ {
67
+ "created_at": datetime(2022, 1, 20, tzinfo=timezone.utc),
68
+ "object_type_id": "read_qc_analysis_activity_set",
69
+ "workflow_id": "mgasm-1.0.3",
70
+ },
71
+ {
72
+ "created_at": datetime(2022, 1, 20, tzinfo=timezone.utc),
73
+ "object_type_id": "read_qc_analysis_activity_set",
74
+ "workflow_id": "mgrba-1.0.2",
75
+ },
76
+ ]
77
+
78
+
79
+ def construct():
80
+ models = []
81
+ for kwargs in _raw:
82
+ kwargs["id"] = f'{kwargs["object_type_id"]}--{kwargs["workflow_id"]}'
83
+ models.append(Trigger(**kwargs))
84
+ return models
@@ -0,0 +1,116 @@
1
+ from datetime import datetime, timezone
2
+
3
+ from nmdc_runtime.api.models.workflow import Workflow
4
+
5
+ _raw = [
6
+ {
7
+ "id": "metag-1.0.0",
8
+ "created_at": datetime(2021, 8, 24, tzinfo=timezone.utc),
9
+ "name": "Metagenome Analysis Workflow (v1.0.0)",
10
+ },
11
+ {
12
+ "id": "readqc-1.0.6",
13
+ "created_at": datetime(2021, 6, 1, tzinfo=timezone.utc),
14
+ "name": "Reads QC Workflow (v1.0.1)",
15
+ },
16
+ {
17
+ "id": "mags-1.0.4",
18
+ "created_at": datetime(2021, 6, 1, tzinfo=timezone.utc),
19
+ "name": "Read-based Analysis (v1.0.1)",
20
+ },
21
+ {
22
+ "id": "mgrba-1.0.2",
23
+ "created_at": datetime(2021, 6, 1, tzinfo=timezone.utc),
24
+ "name": "Read-based Analysis (v1.0.1)",
25
+ },
26
+ {
27
+ "id": "mgasm-1.0.3",
28
+ "created_at": datetime(2021, 6, 1, tzinfo=timezone.utc),
29
+ "name": "Metagenome Assembly (v1.0.1)",
30
+ },
31
+ {
32
+ "id": "mgann-1.0.0",
33
+ "created_at": datetime(2021, 6, 1, tzinfo=timezone.utc),
34
+ "name": "Metagenome Annotation (v1.0.0)",
35
+ },
36
+ {
37
+ "id": "mgasmbgen-1.0.1",
38
+ "created_at": datetime(2021, 6, 1, tzinfo=timezone.utc),
39
+ "name": "Metagenome Assembled Genomes (v1.0.2)",
40
+ },
41
+ {
42
+ "id": "metat-0.0.2",
43
+ "created_at": datetime(2021, 6, 1, tzinfo=timezone.utc),
44
+ "name": "Metatranscriptome (v0.0.2)",
45
+ },
46
+ {
47
+ "id": "metap-1.0.0",
48
+ "created_at": datetime(2021, 6, 1, tzinfo=timezone.utc),
49
+ "name": "Metaproteomic (v1.0.0)",
50
+ },
51
+ {
52
+ "id": "metab-2.1.0",
53
+ "created_at": datetime(2021, 6, 1, tzinfo=timezone.utc),
54
+ "name": "Metabolomics (v2.1.0)",
55
+ },
56
+ {
57
+ "id": "gold-translation-1.0.0",
58
+ "created_at": datetime(2021, 6, 1, tzinfo=timezone.utc),
59
+ "name": "GOLD db dump translation",
60
+ "description": "Transform metadata obtained from the JGI GOLD database.",
61
+ },
62
+ {
63
+ "id": "metap-metadata-1.0.0",
64
+ "created_at": datetime(2021, 6, 1, tzinfo=timezone.utc),
65
+ "name": "metaP metadata ETL",
66
+ "description": "Ingest and validate metaP metadata",
67
+ },
68
+ {
69
+ "id": "metadata-in-1.0.0",
70
+ "created_at": datetime(2021, 10, 12, tzinfo=timezone.utc),
71
+ "name": "general metadata ETL",
72
+ "description": "Validate and ingest metadata from JSON files",
73
+ },
74
+ {
75
+ "id": "test",
76
+ "created_at": datetime(2021, 9, 9, tzinfo=timezone.utc),
77
+ "name": "A test workflow",
78
+ "description": "For use in unit and integration tests",
79
+ },
80
+ {
81
+ "id": "gcms-metab-1.0.0",
82
+ "created_at": datetime(2021, 9, 20, tzinfo=timezone.utc),
83
+ "name": "GCMS-based metabolomics",
84
+ },
85
+ {
86
+ "id": "nom-1.0.0",
87
+ "created_at": datetime(2021, 9, 20, tzinfo=timezone.utc),
88
+ "name": "Natural Organic Matter characterization",
89
+ },
90
+ {
91
+ "id": "apply-changesheet-1.0.0",
92
+ "created_at": datetime(2021, 9, 30, tzinfo=timezone.utc),
93
+ "name": "apply metadata changesheet",
94
+ "description": "Validate and apply metadata changes from TSV/CSV files",
95
+ },
96
+ {
97
+ "id": "export-study-biosamples-as-csv-1.0.0",
98
+ "created_at": datetime(2022, 6, 8, tzinfo=timezone.utc),
99
+ "name": "export study biosamples metadata as CSV",
100
+ "description": "Export study biosamples metadata as CSV",
101
+ },
102
+ {
103
+ "id": "gold_study_to_database",
104
+ "created_at": datetime(2023, 2, 17, tzinfo=timezone.utc),
105
+ "name": "Get nmdc:Database for GOLD study",
106
+ "description": "For a given GOLD study ID, produce an nmdc:Database representing that study and related entities",
107
+ },
108
+ ]
109
+
110
+
111
+ def construct():
112
+ models = []
113
+ for kwargs in _raw:
114
+ kwargs["capability_ids"] = []
115
+ models.append(Workflow(**kwargs))
116
+ return models