nmdc-runtime 2.9.0__py3-none-any.whl → 2.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nmdc-runtime might be problematic. Click here for more details.

Files changed (131) hide show
  1. nmdc_runtime/Dockerfile +167 -0
  2. nmdc_runtime/api/analytics.py +90 -0
  3. nmdc_runtime/api/boot/capabilities.py +9 -0
  4. nmdc_runtime/api/boot/object_types.py +126 -0
  5. nmdc_runtime/api/boot/triggers.py +84 -0
  6. nmdc_runtime/api/boot/workflows.py +116 -0
  7. nmdc_runtime/api/core/auth.py +208 -0
  8. nmdc_runtime/api/core/idgen.py +200 -0
  9. nmdc_runtime/api/core/metadata.py +788 -0
  10. nmdc_runtime/api/core/util.py +109 -0
  11. nmdc_runtime/api/db/mongo.py +435 -0
  12. nmdc_runtime/api/db/s3.py +37 -0
  13. nmdc_runtime/api/endpoints/capabilities.py +25 -0
  14. nmdc_runtime/api/endpoints/find.py +634 -0
  15. nmdc_runtime/api/endpoints/jobs.py +143 -0
  16. nmdc_runtime/api/endpoints/lib/helpers.py +274 -0
  17. nmdc_runtime/api/endpoints/lib/linked_instances.py +180 -0
  18. nmdc_runtime/api/endpoints/lib/path_segments.py +165 -0
  19. nmdc_runtime/api/endpoints/metadata.py +260 -0
  20. nmdc_runtime/api/endpoints/nmdcschema.py +502 -0
  21. nmdc_runtime/api/endpoints/object_types.py +38 -0
  22. nmdc_runtime/api/endpoints/objects.py +270 -0
  23. nmdc_runtime/api/endpoints/operations.py +78 -0
  24. nmdc_runtime/api/endpoints/queries.py +701 -0
  25. nmdc_runtime/api/endpoints/runs.py +98 -0
  26. nmdc_runtime/api/endpoints/search.py +38 -0
  27. nmdc_runtime/api/endpoints/sites.py +205 -0
  28. nmdc_runtime/api/endpoints/triggers.py +25 -0
  29. nmdc_runtime/api/endpoints/users.py +214 -0
  30. nmdc_runtime/api/endpoints/util.py +796 -0
  31. nmdc_runtime/api/endpoints/workflows.py +353 -0
  32. nmdc_runtime/api/entrypoint.sh +7 -0
  33. nmdc_runtime/api/main.py +425 -0
  34. nmdc_runtime/api/middleware.py +43 -0
  35. nmdc_runtime/api/models/capability.py +14 -0
  36. nmdc_runtime/api/models/id.py +92 -0
  37. nmdc_runtime/api/models/job.py +37 -0
  38. nmdc_runtime/api/models/lib/helpers.py +78 -0
  39. nmdc_runtime/api/models/metadata.py +11 -0
  40. nmdc_runtime/api/models/nmdc_schema.py +146 -0
  41. nmdc_runtime/api/models/object.py +180 -0
  42. nmdc_runtime/api/models/object_type.py +20 -0
  43. nmdc_runtime/api/models/operation.py +66 -0
  44. nmdc_runtime/api/models/query.py +246 -0
  45. nmdc_runtime/api/models/query_continuation.py +111 -0
  46. nmdc_runtime/api/models/run.py +161 -0
  47. nmdc_runtime/api/models/site.py +87 -0
  48. nmdc_runtime/api/models/trigger.py +13 -0
  49. nmdc_runtime/api/models/user.py +140 -0
  50. nmdc_runtime/api/models/util.py +260 -0
  51. nmdc_runtime/api/models/workflow.py +15 -0
  52. nmdc_runtime/api/openapi.py +178 -0
  53. nmdc_runtime/api/swagger_ui/assets/custom-elements.js +522 -0
  54. nmdc_runtime/api/swagger_ui/assets/script.js +247 -0
  55. nmdc_runtime/api/swagger_ui/assets/style.css +155 -0
  56. nmdc_runtime/api/swagger_ui/swagger_ui.py +34 -0
  57. nmdc_runtime/config.py +7 -8
  58. nmdc_runtime/minter/adapters/repository.py +22 -2
  59. nmdc_runtime/minter/config.py +2 -0
  60. nmdc_runtime/minter/domain/model.py +55 -1
  61. nmdc_runtime/minter/entrypoints/fastapi_app.py +1 -1
  62. nmdc_runtime/mongo_util.py +1 -2
  63. nmdc_runtime/site/backup/nmdcdb_mongodump.py +1 -1
  64. nmdc_runtime/site/backup/nmdcdb_mongoexport.py +1 -3
  65. nmdc_runtime/site/changesheets/data/OmicsProcessing-to-catted-Biosamples.tsv +1561 -0
  66. nmdc_runtime/site/changesheets/scripts/missing_neon_soils_ecosystem_data.py +311 -0
  67. nmdc_runtime/site/changesheets/scripts/neon_soils_add_ncbi_ids.py +210 -0
  68. nmdc_runtime/site/dagster.yaml +53 -0
  69. nmdc_runtime/site/entrypoint-daemon.sh +26 -0
  70. nmdc_runtime/site/entrypoint-dagit-readonly.sh +26 -0
  71. nmdc_runtime/site/entrypoint-dagit.sh +26 -0
  72. nmdc_runtime/site/export/ncbi_xml.py +633 -13
  73. nmdc_runtime/site/export/ncbi_xml_utils.py +115 -1
  74. nmdc_runtime/site/graphs.py +8 -22
  75. nmdc_runtime/site/ops.py +147 -181
  76. nmdc_runtime/site/repository.py +2 -112
  77. nmdc_runtime/site/resources.py +16 -3
  78. nmdc_runtime/site/translation/gold_translator.py +4 -12
  79. nmdc_runtime/site/translation/neon_benthic_translator.py +0 -1
  80. nmdc_runtime/site/translation/neon_soil_translator.py +4 -5
  81. nmdc_runtime/site/translation/neon_surface_water_translator.py +0 -2
  82. nmdc_runtime/site/translation/submission_portal_translator.py +84 -68
  83. nmdc_runtime/site/translation/translator.py +63 -1
  84. nmdc_runtime/site/util.py +8 -3
  85. nmdc_runtime/site/validation/util.py +10 -5
  86. nmdc_runtime/site/workspace.yaml +13 -0
  87. nmdc_runtime/static/NMDC_logo.svg +1073 -0
  88. nmdc_runtime/static/ORCID-iD_icon_vector.svg +4 -0
  89. nmdc_runtime/static/README.md +5 -0
  90. nmdc_runtime/static/favicon.ico +0 -0
  91. nmdc_runtime/util.py +90 -48
  92. nmdc_runtime-2.11.0.dist-info/METADATA +46 -0
  93. nmdc_runtime-2.11.0.dist-info/RECORD +128 -0
  94. {nmdc_runtime-2.9.0.dist-info → nmdc_runtime-2.11.0.dist-info}/WHEEL +1 -2
  95. nmdc_runtime/containers.py +0 -14
  96. nmdc_runtime/core/db/Database.py +0 -15
  97. nmdc_runtime/core/exceptions/__init__.py +0 -23
  98. nmdc_runtime/core/exceptions/base.py +0 -47
  99. nmdc_runtime/core/exceptions/token.py +0 -13
  100. nmdc_runtime/domain/users/queriesInterface.py +0 -18
  101. nmdc_runtime/domain/users/userSchema.py +0 -37
  102. nmdc_runtime/domain/users/userService.py +0 -14
  103. nmdc_runtime/infrastructure/database/db.py +0 -3
  104. nmdc_runtime/infrastructure/database/models/user.py +0 -10
  105. nmdc_runtime/lib/__init__.py +0 -1
  106. nmdc_runtime/lib/extract_nmdc_data.py +0 -41
  107. nmdc_runtime/lib/load_nmdc_data.py +0 -121
  108. nmdc_runtime/lib/nmdc_dataframes.py +0 -829
  109. nmdc_runtime/lib/nmdc_etl_class.py +0 -402
  110. nmdc_runtime/lib/transform_nmdc_data.py +0 -1117
  111. nmdc_runtime/site/drsobjects/ingest.py +0 -93
  112. nmdc_runtime/site/drsobjects/registration.py +0 -131
  113. nmdc_runtime/site/translation/emsl.py +0 -43
  114. nmdc_runtime/site/translation/gold.py +0 -53
  115. nmdc_runtime/site/translation/jgi.py +0 -32
  116. nmdc_runtime/site/translation/util.py +0 -132
  117. nmdc_runtime/site/validation/jgi.py +0 -43
  118. nmdc_runtime-2.9.0.dist-info/METADATA +0 -214
  119. nmdc_runtime-2.9.0.dist-info/RECORD +0 -84
  120. nmdc_runtime-2.9.0.dist-info/top_level.txt +0 -1
  121. /nmdc_runtime/{client → api}/__init__.py +0 -0
  122. /nmdc_runtime/{core → api/boot}/__init__.py +0 -0
  123. /nmdc_runtime/{core/db → api/core}/__init__.py +0 -0
  124. /nmdc_runtime/{domain → api/db}/__init__.py +0 -0
  125. /nmdc_runtime/{domain/users → api/endpoints}/__init__.py +0 -0
  126. /nmdc_runtime/{infrastructure → api/endpoints/lib}/__init__.py +0 -0
  127. /nmdc_runtime/{infrastructure/database → api/models}/__init__.py +0 -0
  128. /nmdc_runtime/{infrastructure/database/models → api/models/lib}/__init__.py +0 -0
  129. /nmdc_runtime/{site/drsobjects/__init__.py → api/models/minter.py} +0 -0
  130. {nmdc_runtime-2.9.0.dist-info → nmdc_runtime-2.11.0.dist-info}/entry_points.txt +0 -0
  131. {nmdc_runtime-2.9.0.dist-info → nmdc_runtime-2.11.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,167 @@
1
+ # Note: Most of the steps for the `base` image were copied verbatim from either `fastapi.Dockerfile`,
2
+ # `dagster.Dockerfile`, or `test.Dockerfile` (indeed, most of the steps were present in all three files).
3
+ # Reference: https://docs.docker.com/get-started/docker-concepts/building-images/multi-stage-builds/
4
+ #
5
+ # Base this image upon a variant of the official Python 3.10 image that is, in turn,
6
+ # based upon a minimal (slim) variant of the Debian 11 (bullseye) image.
7
+ # Reference: https://hub.docker.com/_/python
8
+ # ────────────────────────────────────────────────────────────────────────────┐
9
+ FROM python:3.10-slim-bullseye AS base
10
+ # ────────────────────────────────────────────────────────────────────────────┘
11
+
12
+ # Install and upgrade system-level software in a non-interactive way, then delete temporary files.
13
+ # Note: Setting `DEBIAN_FRONTEND=noninteractive` and passing `-y` to `apt-get` makes things non-interactive.
14
+ RUN export DEBIAN_FRONTEND=noninteractive && \
15
+ apt-get update && \
16
+ apt-get -y upgrade && \
17
+ apt-get install -y --no-install-recommends \
18
+ tini \
19
+ procps \
20
+ net-tools \
21
+ build-essential \
22
+ git \
23
+ make \
24
+ zip \
25
+ curl \
26
+ wget \
27
+ gnupg && \
28
+ apt-get -y clean && \
29
+ rm -rf /var/lib/apt/lists/*
30
+
31
+ # Enable Python's "fault handler" feature, so, when low-level errors occur (e.g. segfaults), Python prints lots of info.
32
+ # Reference: https://docs.python.org/3/using/cmdline.html#envvar-PYTHONFAULTHANDLER
33
+ ENV PYTHONFAULTHANDLER=1
34
+
35
+ # Configure Git to consider the `/code` directory to be "safe", so that, when a Git repository
36
+ # created outside of the container gets mounted at that path within the container, the
37
+ # `uv-dynamic-versioning` tool running within the container does not fail with the error:
38
+ # > "Detected Git repository, but failed because of dubious ownership"
39
+ # Reference: https://git-scm.com/docs/git-config#Documentation/git-config.txt-safedirectory
40
+ RUN git config --global --add safe.directory /code
41
+
42
+ # Install `uv`.
43
+ # Reference: https://docs.astral.sh/uv/guides/integration/docker/#installing-uv
44
+ ADD https://astral.sh/uv/install.sh /uv-installer.sh
45
+ RUN sh /uv-installer.sh && \
46
+ rm /uv-installer.sh
47
+ ENV PATH="/root/.local/bin/:$PATH"
48
+
49
+ # Install Python dependencies (production dependencies only).
50
+ #
51
+ # Note: We copy only the files that `uv` needs in order to install dependencies. That way,
52
+ # we minimize the number of files whose changes would invalidate cached image layers
53
+ #
54
+ # Note: We use the `VIRTUAL_ENV` environment variable to specify the path to the Python virtual
55
+ # environment that we want the `uv` program inside the container to create and use.
56
+ #
57
+ # Q: Why don't we use `./.venv` in the repository file tree?
58
+ # A: If we were to do that, then, whenever a developer would mount (via our Docker Compose file)
59
+ # the repository file tree from their host machine (which may include a `.venv/` directory
60
+ # created by their host machine) into the container, it would overwrite the Python virtual
61
+ # environment that the `uv` program inside the container is using.
62
+ #
63
+ # Q: What is special about the `VIRTUAL_ENV` environment variable?
64
+ # A: When using `uv`'s `--active` option (as we do in later stages of this Dockerfile),
65
+ # `uv` determines which virtual environment is active by looking at `VIRTUAL_ENV'. This
66
+ # is the case, even though the documentation of the `venv` module (in Python's standard
67
+ # library) specifically says: "`VIRTUAL_ENV` cannot be relied upon to determine whether
68
+ # a virtual environment is being used."
69
+ #
70
+ # References:
71
+ # - https://docs.astral.sh/uv/pip/environments/#using-arbitrary-python-environments (RE: `VIRTUAL_ENV`)
72
+ # - https://docs.astral.sh/uv/reference/environment/#virtual_env (RE: `VIRTUAL_ENV`, from uv's perspective)
73
+ # - https://docs.python.org/3/library/venv.html#how-venvs-work (RE: `VIRTUAL_ENV`, from venv's perspective)
74
+ # - https://docs.astral.sh/uv/concepts/projects/sync/#partial-installations (RE: `--no-install-project`)
75
+ #
76
+ # Note: In the `RUN` command, we use a "cache mount" (a feature of Docker) to cache production dependencies
77
+ # across builds. This is a performance optimization technique shown in the `uv` docs.
78
+ # Reference:
79
+ # - https://docs.astral.sh/uv/guides/integration/docker/#caching (RE: the technique)
80
+ # - https://docs.docker.com/build/cache/optimize/#use-cache-mounts (RE: the feature)
81
+ # - https://docs.astral.sh/uv/reference/settings/#link-mode (RE: `UV_LINK_MODE`)
82
+ # - https://docs.astral.sh/uv/reference/cli/#uv-sync--no-install-project (RE: `--no-install-project`)
83
+ #
84
+ # Note: We use `--compile-bytecode` so that Python compiles `.py` files to `.pyc` files now,
85
+ # instead of when the container is running. By default, `uv` defers this compilation
86
+ # to "import time," whereas `pip` (by default) performs it at "install time" (like this).
87
+ #
88
+ ENV VIRTUAL_ENV="/venv"
89
+ RUN mkdir -p "${VIRTUAL_ENV}"
90
+ COPY ./pyproject.toml /code/pyproject.toml
91
+ COPY ./uv.lock /code/uv.lock
92
+ RUN --mount=type=cache,target=/root/.cache/uv \
93
+ cd /code && \
94
+ UV_LINK_MODE=copy uv sync --active --no-dev --no-install-project --compile-bytecode
95
+
96
+ # ────────────────────────────────────────────────────────────────────────────┐
97
+ FROM base AS fastapi
98
+ # ────────────────────────────────────────────────────────────────────────────┘
99
+
100
+ # Copy repository contents into image.
101
+ COPY . /code
102
+
103
+ # Install the project in editable mode.
104
+ RUN --mount=type=cache,target=/root/.cache/uv \
105
+ cd /code && \
106
+ uv sync --active --no-dev
107
+
108
+ # Use Uvicorn to serve the FastAPI app on port 8000.
109
+ EXPOSE 8000
110
+ WORKDIR /code
111
+ CMD ["uv", "run", "--active", "uvicorn", "nmdc_runtime.api.main:app", "--proxy-headers", "--host", "0.0.0.0", "--port", "8000"]
112
+
113
+ # ────────────────────────────────────────────────────────────────────────────┐
114
+ FROM base AS dagster
115
+ # ────────────────────────────────────────────────────────────────────────────┘
116
+
117
+ # Copy repository contents into image.
118
+ #
119
+ # Note: This path (i.e. "/opt/dagster/lib/") is hard-coded in a few places in `nmdc_runtime/site/ops.py`. That's why
120
+ # this image does not store the repository contents in `/code`, unlike the other images in this Dockerfile.
121
+ #
122
+ COPY . /opt/dagster/lib
123
+
124
+ # Install the project in editable mode.
125
+ RUN --mount=type=cache,target=/root/.cache/uv \
126
+ cd /opt/dagster/lib && \
127
+ uv sync --active --no-dev
128
+
129
+ # Move Dagster configuration files to the place Dagster expects.
130
+ ENV DAGSTER_HOME="/opt/dagster/dagster_home/"
131
+ RUN mkdir -p "${DAGSTER_HOME}" && \
132
+ cp /opt/dagster/lib/nmdc_runtime/site/dagster.yaml "${DAGSTER_HOME}" && \
133
+ cp /opt/dagster/lib/nmdc_runtime/site/workspace.yaml "${DAGSTER_HOME}"
134
+
135
+ # Use Tini to run Dagit.
136
+ #
137
+ # Notes:
138
+ # - The port number (i.e. "3000") is hard-coded in `nmdc_runtime/site/entrypoint-dagit.sh`.
139
+ # - Dagster daemon (versus Dagit) can be launched by overriding the `ENTRYPOINT` defined here.
140
+ #
141
+ # Reference: https://github.com/krallin/tini
142
+ #
143
+ EXPOSE 3000
144
+ WORKDIR /opt/dagster/dagster_home/
145
+ ENTRYPOINT ["tini", "--", "../lib/nmdc_runtime/site/entrypoint-dagit.sh"]
146
+
147
+ # ────────────────────────────────────────────────────────────────────────────┐
148
+ FROM base AS test
149
+ # ────────────────────────────────────────────────────────────────────────────┘
150
+
151
+ # Copy all repository contents into image.
152
+ COPY . /code
153
+
154
+ # Install the project in editable mode, and install development dependencies.
155
+ RUN --mount=type=cache,target=/root/.cache/uv \
156
+ cd /code && \
157
+ uv sync --active
158
+
159
+ # Make `wait-for-it.sh` executable.
160
+ RUN chmod +x /code/.docker/wait-for-it.sh
161
+
162
+ WORKDIR /code
163
+
164
+ # Ensure started container does not exit, so that a subsequent `docker exec` command can run tests.
165
+ # For an example `docker exec` command, see `Makefile`'s `run-test` target.
166
+ # Such a command should use `wait-for-it.sh` to run `pytest` no earlier than when the FastAPI server is accessible.
167
+ ENTRYPOINT ["tail", "-f", "/dev/null"]
@@ -0,0 +1,90 @@
1
+ """
2
+ Based on <https://github.com/tom-draper/api-analytics/tree/main/analytics/python/fastapi>
3
+ under MIT License <https://github.com/tom-draper/api-analytics/blob/main/analytics/python/fastapi/LICENSE>
4
+ """
5
+
6
+ from datetime import datetime
7
+ import threading
8
+ from time import time
9
+ from typing import Dict, List
10
+
11
+ from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
12
+ from starlette.requests import Request
13
+ from starlette.responses import Response
14
+ from starlette.types import ASGIApp
15
+ from toolz import merge
16
+
17
+ from nmdc_runtime.api.db.mongo import get_mongo_db
18
+
19
+ # This is a queue of the "request descriptors" that we will eventually insert into the database.
20
+ _requests = []
21
+ _last_posted = datetime.now()
22
+
23
+
24
+ def _post_requests(collection: str, requests_data: List[Dict], source: str):
25
+ """Inserts the specified request descriptors into the specified MongoDB collection."""
26
+ mdb = get_mongo_db()
27
+ mdb[collection].insert_many([merge(d, {"source": source}) for d in requests_data])
28
+
29
+
30
+ def log_request(collection: str, request_data: Dict, source: str = "FastAPI"):
31
+ """Flushes the queue of request descriptors to the database if enough time has passed since the previous time."""
32
+ global _requests, _last_posted
33
+ _requests.append(request_data)
34
+ now = datetime.now()
35
+ # flush queue every minute at most
36
+ if (now - _last_posted).total_seconds() > 60.0:
37
+ # Note: This use of threading is an attempt to avoid blocking the current thread
38
+ # while performing the insertion(s).
39
+ #
40
+ # TODO: Is there is a race condition here? If multiple requests arrive at approximately
41
+ # the same time, is it possible that each one causes a different thread to be
42
+ # started, each with a different (and possibly overlapping) set of requests to
43
+ # insert?
44
+ #
45
+ # TODO: If the insertion fails, will the requests be lost?
46
+ #
47
+ # Note: The author of this function said it may have been a "standard" solution copied
48
+ # from some documentation. Indeed, the comment at the top of this module contains
49
+ # a link to code on which it was based.
50
+ #
51
+ threading.Thread(
52
+ target=_post_requests, args=(collection, _requests, source)
53
+ ).start()
54
+ _requests = [] # empties the queue
55
+ _last_posted = now
56
+
57
+
58
+ class Analytics(BaseHTTPMiddleware):
59
+ def __init__(self, app: ASGIApp, collection: str = "_runtime.analytics"):
60
+ super().__init__(app)
61
+ self.collection = collection
62
+
63
+ async def dispatch(
64
+ self, request: Request, call_next: RequestResponseEndpoint
65
+ ) -> Response:
66
+ start = time()
67
+ response = await call_next(request)
68
+
69
+ # Use a fallback IP address value (currently an empty string) if we can't derive one from the request.
70
+ ip_address: str = "" if request.client is None else request.client.host
71
+
72
+ # Build a dictionary that describes the incoming request.
73
+ #
74
+ # Note: `request.headers` is an instance of `MultiDict`. References:
75
+ # - https://www.starlette.io/requests/#headers
76
+ # - https://multidict.aio-libs.org/en/stable/multidict/
77
+ #
78
+ request_data = {
79
+ "hostname": request.url.hostname,
80
+ "ip_address": ip_address,
81
+ "path": request.url.path,
82
+ "user_agent": request.headers.get("user-agent"),
83
+ "method": request.method,
84
+ "status": response.status_code,
85
+ "response_time": int((time() - start) * 1000),
86
+ "created_at": datetime.now().isoformat(),
87
+ }
88
+
89
+ log_request(self.collection, request_data, "FastAPI")
90
+ return response
@@ -0,0 +1,9 @@
1
+ from nmdc_runtime.api.models.capability import Capability
2
+ import nmdc_runtime.api.boot.workflows as workflows_boot
3
+
4
+ # Include 1-to-1 "I can run this workflow" capabilities.
5
+ _raw = [item for item in workflows_boot._raw]
6
+
7
+
8
+ def construct():
9
+ return [Capability(**kwargs) for kwargs in _raw]
@@ -0,0 +1,126 @@
1
+ from datetime import datetime, timezone
2
+
3
+ from toolz import get_in
4
+
5
+ from nmdc_runtime.api.models.object_type import ObjectType
6
+ from nmdc_runtime.util import nmdc_jsonschema
7
+
8
+ _raw = [
9
+ {
10
+ "id": "read_qc_analysis_activity_set",
11
+ "created_at": datetime(2021, 9, 14, tzinfo=timezone.utc),
12
+ "name": "metaP analysis activity",
13
+ # "description": "JSON documents satisfying schema for readqc analysis activity",
14
+ },
15
+ {
16
+ "id": "metagenome_sequencing_activity_set",
17
+ "created_at": datetime(2021, 9, 14, tzinfo=timezone.utc),
18
+ "name": "metaP analysis activity",
19
+ # "description": "JSON documents satisfying schema for metagenome sequencing activity",
20
+ },
21
+ {
22
+ "id": "mags_activity_set",
23
+ "created_at": datetime(2021, 9, 14, tzinfo=timezone.utc),
24
+ "name": "metaP analysis activity",
25
+ # "description": "JSON documents satisfying schema for mags activity",
26
+ },
27
+ {
28
+ "id": "metagenome_annotation_activity_set",
29
+ "created_at": datetime(2021, 9, 14, tzinfo=timezone.utc),
30
+ "name": "metaP analysis activity",
31
+ # "description": "JSON documents satisfying schema for metagenome annotation activity",
32
+ },
33
+ {
34
+ "id": "metagenome_assembly_set",
35
+ "created_at": datetime(2021, 9, 14, tzinfo=timezone.utc),
36
+ "name": "metaP analysis activity",
37
+ # "description": "JSON documents satisfying schema for metagenome assembly activity",
38
+ },
39
+ {
40
+ "id": "read_based_taxonomy_analysis_activity_set",
41
+ "created_at": datetime(2021, 9, 14, tzinfo=timezone.utc),
42
+ "name": "metaP analysis activity",
43
+ # "description": "JSON documents satisfying schema for read based analysis activity",
44
+ },
45
+ {
46
+ "id": "metadata-in",
47
+ "created_at": datetime(2021, 6, 1, tzinfo=timezone.utc),
48
+ "name": "metadata submission",
49
+ "description": "Input to the portal ETL process",
50
+ },
51
+ {
52
+ "id": "metaproteomics_analysis_activity_set",
53
+ "created_at": datetime(2021, 8, 23, tzinfo=timezone.utc),
54
+ "name": "metaP analysis activity",
55
+ "description": "JSON documents satisfying schema for metaproteomics analysis activity",
56
+ },
57
+ {
58
+ "id": "metagenome_raw_paired_end_reads",
59
+ "created_at": datetime(2021, 8, 24, tzinfo=timezone.utc),
60
+ "name": "Metagenome Raw Paired-End Reads Workflow Input",
61
+ "description": "workflow input",
62
+ },
63
+ {
64
+ "id": "metatranscriptome_raw_paired_end_reads",
65
+ "created_at": datetime(2021, 9, 7, tzinfo=timezone.utc),
66
+ "name": "Metatranscriptome Raw Paired-End Reads Workflow Input",
67
+ "description": "workflow input 2",
68
+ },
69
+ {
70
+ "id": "gcms-metab-input",
71
+ "created_at": datetime(2021, 9, 7, tzinfo=timezone.utc),
72
+ "name": "Raw GCMS MetaB Input",
73
+ "description": "",
74
+ },
75
+ {
76
+ "id": "gcms-metab-calibration",
77
+ "created_at": datetime(2021, 9, 7, tzinfo=timezone.utc),
78
+ "name": "Raw GCMS MetaB Calibration",
79
+ "description": "",
80
+ },
81
+ {
82
+ "id": "nom-input",
83
+ "created_at": datetime(2021, 9, 7, tzinfo=timezone.utc),
84
+ "name": "Raw FTMS MetaB Input",
85
+ "description": "",
86
+ },
87
+ {
88
+ "id": "test",
89
+ "created_at": datetime(2021, 9, 7, tzinfo=timezone.utc),
90
+ "name": "A test object type",
91
+ "description": "For use in unit and integration tests",
92
+ },
93
+ {
94
+ "id": "metadata-changesheet",
95
+ "created_at": datetime(2021, 9, 30, tzinfo=timezone.utc),
96
+ "name": "metadata changesheet",
97
+ "description": "Specification for changes to existing metadata",
98
+ },
99
+ ]
100
+
101
+ _raw.extend(
102
+ [
103
+ {
104
+ "id": key,
105
+ "created_at": datetime(2021, 9, 14, tzinfo=timezone.utc),
106
+ "name": key,
107
+ # "description": spec["description"],
108
+ }
109
+ for key, spec in nmdc_jsonschema["properties"].items()
110
+ if key.endswith("_set")
111
+ ]
112
+ )
113
+ _raw.append(
114
+ {
115
+ "id": "schema#/definitions/Database",
116
+ "created_at": datetime(2021, 9, 14, tzinfo=timezone.utc),
117
+ "name": "Bundle of one or more metadata `*_set`s.",
118
+ "description": get_in(
119
+ ["definitions", "Database", "description"], nmdc_jsonschema
120
+ ),
121
+ }
122
+ )
123
+
124
+
125
+ def construct():
126
+ return [ObjectType(**kwargs) for kwargs in _raw]
@@ -0,0 +1,84 @@
1
+ from datetime import datetime, timezone
2
+
3
+ from nmdc_runtime.api.models.trigger import Trigger
4
+
5
+ _raw = [
6
+ {
7
+ "created_at": datetime(2021, 6, 1, tzinfo=timezone.utc),
8
+ "object_type_id": "metadata-in",
9
+ "workflow_id": "metadata-in-1.0.0",
10
+ },
11
+ {
12
+ "created_at": datetime(2021, 9, 1, tzinfo=timezone.utc),
13
+ "object_type_id": "metaproteomics_analysis_activity_set",
14
+ "workflow_id": "metap-metadata-1.0.0",
15
+ },
16
+ {
17
+ "created_at": datetime(2021, 9, 1, tzinfo=timezone.utc),
18
+ "object_type_id": "metagenome_raw_paired_end_reads",
19
+ "workflow_id": "metag-1.0.0",
20
+ },
21
+ {
22
+ "created_at": datetime(2021, 9, 7, tzinfo=timezone.utc),
23
+ "object_type_id": "metatranscriptome_raw_paired_end_reads",
24
+ "workflow_id": "metat-1.0.0",
25
+ },
26
+ {
27
+ "created_at": datetime(2021, 9, 9, tzinfo=timezone.utc),
28
+ "object_type_id": "test",
29
+ "workflow_id": "test",
30
+ },
31
+ {
32
+ "created_at": datetime(2021, 9, 20, tzinfo=timezone.utc),
33
+ "object_type_id": "nom-input",
34
+ "workflow_id": "nom-1.0.0",
35
+ },
36
+ {
37
+ "created_at": datetime(2021, 9, 20, tzinfo=timezone.utc),
38
+ "object_type_id": "gcms-metab-input",
39
+ "workflow_id": "gcms-metab-1.0.0",
40
+ },
41
+ {
42
+ "created_at": datetime(2021, 9, 30, tzinfo=timezone.utc),
43
+ "object_type_id": "metadata-changesheet",
44
+ "workflow_id": "apply-changesheet-1.0.0",
45
+ },
46
+ {
47
+ "created_at": datetime(2022, 1, 20, tzinfo=timezone.utc),
48
+ "object_type_id": "metagenome_sequencing_activity_set",
49
+ "workflow_id": "mgrc-1.0.6",
50
+ },
51
+ {
52
+ "created_at": datetime(2022, 1, 20, tzinfo=timezone.utc),
53
+ "object_type_id": "metagenome_sequencing_activity_set",
54
+ "workflow_id": "metag-1.0.0",
55
+ },
56
+ {
57
+ "created_at": datetime(2022, 1, 20, tzinfo=timezone.utc),
58
+ "object_type_id": "metagenome_annotation_activity_set",
59
+ "workflow_id": "mags-1.0.4",
60
+ },
61
+ {
62
+ "created_at": datetime(2022, 1, 20, tzinfo=timezone.utc),
63
+ "object_type_id": "metagenome_assembly_set",
64
+ "workflow_id": "mgann-1.0.0",
65
+ },
66
+ {
67
+ "created_at": datetime(2022, 1, 20, tzinfo=timezone.utc),
68
+ "object_type_id": "read_qc_analysis_activity_set",
69
+ "workflow_id": "mgasm-1.0.3",
70
+ },
71
+ {
72
+ "created_at": datetime(2022, 1, 20, tzinfo=timezone.utc),
73
+ "object_type_id": "read_qc_analysis_activity_set",
74
+ "workflow_id": "mgrba-1.0.2",
75
+ },
76
+ ]
77
+
78
+
79
+ def construct():
80
+ models = []
81
+ for kwargs in _raw:
82
+ kwargs["id"] = f'{kwargs["object_type_id"]}--{kwargs["workflow_id"]}'
83
+ models.append(Trigger(**kwargs))
84
+ return models
@@ -0,0 +1,116 @@
1
+ from datetime import datetime, timezone
2
+
3
+ from nmdc_runtime.api.models.workflow import Workflow
4
+
5
+ _raw = [
6
+ {
7
+ "id": "metag-1.0.0",
8
+ "created_at": datetime(2021, 8, 24, tzinfo=timezone.utc),
9
+ "name": "Metagenome Analysis Workflow (v1.0.0)",
10
+ },
11
+ {
12
+ "id": "readqc-1.0.6",
13
+ "created_at": datetime(2021, 6, 1, tzinfo=timezone.utc),
14
+ "name": "Reads QC Workflow (v1.0.1)",
15
+ },
16
+ {
17
+ "id": "mags-1.0.4",
18
+ "created_at": datetime(2021, 6, 1, tzinfo=timezone.utc),
19
+ "name": "Read-based Analysis (v1.0.1)",
20
+ },
21
+ {
22
+ "id": "mgrba-1.0.2",
23
+ "created_at": datetime(2021, 6, 1, tzinfo=timezone.utc),
24
+ "name": "Read-based Analysis (v1.0.1)",
25
+ },
26
+ {
27
+ "id": "mgasm-1.0.3",
28
+ "created_at": datetime(2021, 6, 1, tzinfo=timezone.utc),
29
+ "name": "Metagenome Assembly (v1.0.1)",
30
+ },
31
+ {
32
+ "id": "mgann-1.0.0",
33
+ "created_at": datetime(2021, 6, 1, tzinfo=timezone.utc),
34
+ "name": "Metagenome Annotation (v1.0.0)",
35
+ },
36
+ {
37
+ "id": "mgasmbgen-1.0.1",
38
+ "created_at": datetime(2021, 6, 1, tzinfo=timezone.utc),
39
+ "name": "Metagenome Assembled Genomes (v1.0.2)",
40
+ },
41
+ {
42
+ "id": "metat-0.0.2",
43
+ "created_at": datetime(2021, 6, 1, tzinfo=timezone.utc),
44
+ "name": "Metatranscriptome (v0.0.2)",
45
+ },
46
+ {
47
+ "id": "metap-1.0.0",
48
+ "created_at": datetime(2021, 6, 1, tzinfo=timezone.utc),
49
+ "name": "Metaproteomic (v1.0.0)",
50
+ },
51
+ {
52
+ "id": "metab-2.1.0",
53
+ "created_at": datetime(2021, 6, 1, tzinfo=timezone.utc),
54
+ "name": "Metabolomics (v2.1.0)",
55
+ },
56
+ {
57
+ "id": "gold-translation-1.0.0",
58
+ "created_at": datetime(2021, 6, 1, tzinfo=timezone.utc),
59
+ "name": "GOLD db dump translation",
60
+ "description": "Transform metadata obtained from the JGI GOLD database.",
61
+ },
62
+ {
63
+ "id": "metap-metadata-1.0.0",
64
+ "created_at": datetime(2021, 6, 1, tzinfo=timezone.utc),
65
+ "name": "metaP metadata ETL",
66
+ "description": "Ingest and validate metaP metadata",
67
+ },
68
+ {
69
+ "id": "metadata-in-1.0.0",
70
+ "created_at": datetime(2021, 10, 12, tzinfo=timezone.utc),
71
+ "name": "general metadata ETL",
72
+ "description": "Validate and ingest metadata from JSON files",
73
+ },
74
+ {
75
+ "id": "test",
76
+ "created_at": datetime(2021, 9, 9, tzinfo=timezone.utc),
77
+ "name": "A test workflow",
78
+ "description": "For use in unit and integration tests",
79
+ },
80
+ {
81
+ "id": "gcms-metab-1.0.0",
82
+ "created_at": datetime(2021, 9, 20, tzinfo=timezone.utc),
83
+ "name": "GCMS-based metabolomics",
84
+ },
85
+ {
86
+ "id": "nom-1.0.0",
87
+ "created_at": datetime(2021, 9, 20, tzinfo=timezone.utc),
88
+ "name": "Natural Organic Matter characterization",
89
+ },
90
+ {
91
+ "id": "apply-changesheet-1.0.0",
92
+ "created_at": datetime(2021, 9, 30, tzinfo=timezone.utc),
93
+ "name": "apply metadata changesheet",
94
+ "description": "Validate and apply metadata changes from TSV/CSV files",
95
+ },
96
+ {
97
+ "id": "export-study-biosamples-as-csv-1.0.0",
98
+ "created_at": datetime(2022, 6, 8, tzinfo=timezone.utc),
99
+ "name": "export study biosamples metadata as CSV",
100
+ "description": "Export study biosamples metadata as CSV",
101
+ },
102
+ {
103
+ "id": "gold_study_to_database",
104
+ "created_at": datetime(2023, 2, 17, tzinfo=timezone.utc),
105
+ "name": "Get nmdc:Database for GOLD study",
106
+ "description": "For a given GOLD study ID, produce an nmdc:Database representing that study and related entities",
107
+ },
108
+ ]
109
+
110
+
111
+ def construct():
112
+ models = []
113
+ for kwargs in _raw:
114
+ kwargs["capability_ids"] = []
115
+ models.append(Workflow(**kwargs))
116
+ return models