nmdc-runtime 2.10.0__py3-none-any.whl → 2.11.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nmdc-runtime might be problematic. Click here for more details.
- nmdc_runtime/Dockerfile +177 -0
- nmdc_runtime/api/analytics.py +22 -2
- nmdc_runtime/api/core/idgen.py +36 -6
- nmdc_runtime/api/db/mongo.py +0 -12
- nmdc_runtime/api/endpoints/find.py +65 -225
- nmdc_runtime/api/endpoints/lib/linked_instances.py +180 -0
- nmdc_runtime/api/endpoints/nmdcschema.py +65 -144
- nmdc_runtime/api/endpoints/objects.py +4 -11
- nmdc_runtime/api/endpoints/operations.py +0 -27
- nmdc_runtime/api/endpoints/queries.py +22 -0
- nmdc_runtime/api/endpoints/sites.py +0 -24
- nmdc_runtime/api/endpoints/util.py +57 -35
- nmdc_runtime/api/entrypoint.sh +7 -0
- nmdc_runtime/api/main.py +84 -60
- nmdc_runtime/api/models/util.py +12 -5
- nmdc_runtime/api/openapi.py +116 -180
- nmdc_runtime/api/swagger_ui/assets/custom-elements.js +522 -0
- nmdc_runtime/api/swagger_ui/assets/script.js +247 -0
- nmdc_runtime/api/swagger_ui/assets/style.css +155 -0
- nmdc_runtime/api/swagger_ui/swagger_ui.py +34 -0
- nmdc_runtime/minter/adapters/repository.py +21 -0
- nmdc_runtime/minter/domain/model.py +20 -0
- nmdc_runtime/site/changesheets/data/OmicsProcessing-to-catted-Biosamples.tsv +1561 -0
- nmdc_runtime/site/changesheets/scripts/missing_neon_soils_ecosystem_data.py +311 -0
- nmdc_runtime/site/changesheets/scripts/neon_soils_add_ncbi_ids.py +210 -0
- nmdc_runtime/site/dagster.yaml +53 -0
- nmdc_runtime/site/entrypoint-daemon.sh +26 -0
- nmdc_runtime/site/entrypoint-dagit-readonly.sh +26 -0
- nmdc_runtime/site/entrypoint-dagit.sh +26 -0
- nmdc_runtime/site/export/ncbi_xml.py +632 -11
- nmdc_runtime/site/export/ncbi_xml_utils.py +114 -0
- nmdc_runtime/site/graphs.py +7 -0
- nmdc_runtime/site/ops.py +92 -34
- nmdc_runtime/site/repository.py +2 -0
- nmdc_runtime/site/resources.py +16 -3
- nmdc_runtime/site/translation/submission_portal_translator.py +82 -14
- nmdc_runtime/site/workspace.yaml +13 -0
- nmdc_runtime/static/NMDC_logo.svg +1073 -0
- nmdc_runtime/static/ORCID-iD_icon_vector.svg +4 -0
- nmdc_runtime/static/README.md +5 -0
- nmdc_runtime/static/favicon.ico +0 -0
- nmdc_runtime/util.py +87 -1
- nmdc_runtime-2.11.1.dist-info/METADATA +46 -0
- {nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.1.dist-info}/RECORD +47 -57
- {nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.1.dist-info}/WHEEL +1 -2
- nmdc_runtime/api/endpoints/ids.py +0 -192
- nmdc_runtime/client/__init__.py +0 -0
- nmdc_runtime/containers.py +0 -14
- nmdc_runtime/core/__init__.py +0 -0
- nmdc_runtime/core/db/Database.py +0 -13
- nmdc_runtime/core/db/__init__.py +0 -0
- nmdc_runtime/core/exceptions/__init__.py +0 -23
- nmdc_runtime/core/exceptions/base.py +0 -47
- nmdc_runtime/core/exceptions/token.py +0 -13
- nmdc_runtime/domain/__init__.py +0 -0
- nmdc_runtime/domain/users/__init__.py +0 -0
- nmdc_runtime/domain/users/queriesInterface.py +0 -18
- nmdc_runtime/domain/users/userSchema.py +0 -37
- nmdc_runtime/domain/users/userService.py +0 -14
- nmdc_runtime/infrastructure/__init__.py +0 -0
- nmdc_runtime/infrastructure/database/__init__.py +0 -0
- nmdc_runtime/infrastructure/database/db.py +0 -3
- nmdc_runtime/infrastructure/database/models/__init__.py +0 -0
- nmdc_runtime/infrastructure/database/models/user.py +0 -1
- nmdc_runtime/lib/__init__.py +0 -1
- nmdc_runtime/lib/extract_nmdc_data.py +0 -33
- nmdc_runtime/lib/load_nmdc_data.py +0 -121
- nmdc_runtime/lib/nmdc_dataframes.py +0 -825
- nmdc_runtime/lib/nmdc_etl_class.py +0 -396
- nmdc_runtime/lib/transform_nmdc_data.py +0 -1117
- nmdc_runtime/site/drsobjects/__init__.py +0 -0
- nmdc_runtime/site/drsobjects/ingest.py +0 -93
- nmdc_runtime/site/drsobjects/registration.py +0 -131
- nmdc_runtime-2.10.0.dist-info/METADATA +0 -265
- nmdc_runtime-2.10.0.dist-info/top_level.txt +0 -1
- {nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.1.dist-info}/entry_points.txt +0 -0
- {nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.1.dist-info}/licenses/LICENSE +0 -0
nmdc_runtime/Dockerfile
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
# Note: Most of the steps for the `base` image were copied verbatim from either `fastapi.Dockerfile`,
|
|
2
|
+
# `dagster.Dockerfile`, or `test.Dockerfile` (indeed, most of the steps were present in all three files).
|
|
3
|
+
# Reference: https://docs.docker.com/get-started/docker-concepts/building-images/multi-stage-builds/
|
|
4
|
+
#
|
|
5
|
+
# Base this image upon a variant of the official Python 3.10 image that is, in turn,
|
|
6
|
+
# based upon a minimal (slim) variant of the Debian 11 (bullseye) image.
|
|
7
|
+
# Reference: https://hub.docker.com/_/python
|
|
8
|
+
# ────────────────────────────────────────────────────────────────────────────┐
|
|
9
|
+
FROM python:3.10-slim-bullseye AS base
|
|
10
|
+
# ────────────────────────────────────────────────────────────────────────────┘
|
|
11
|
+
|
|
12
|
+
# Install and upgrade system-level software in a non-interactive way, then delete temporary files.
|
|
13
|
+
# Note: Setting `DEBIAN_FRONTEND=noninteractive` and passing `-y` to `apt-get` makes things non-interactive.
|
|
14
|
+
RUN export DEBIAN_FRONTEND=noninteractive && \
|
|
15
|
+
apt-get update && \
|
|
16
|
+
apt-get -y upgrade && \
|
|
17
|
+
apt-get install -y --no-install-recommends \
|
|
18
|
+
tini \
|
|
19
|
+
procps \
|
|
20
|
+
net-tools \
|
|
21
|
+
build-essential \
|
|
22
|
+
git \
|
|
23
|
+
make \
|
|
24
|
+
zip \
|
|
25
|
+
curl \
|
|
26
|
+
wget \
|
|
27
|
+
gnupg && \
|
|
28
|
+
apt-get -y clean && \
|
|
29
|
+
rm -rf /var/lib/apt/lists/*
|
|
30
|
+
|
|
31
|
+
# Enable Python's "fault handler" feature, so, when low-level errors occur (e.g. segfaults), Python prints lots of info.
|
|
32
|
+
# Reference: https://docs.python.org/3/using/cmdline.html#envvar-PYTHONFAULTHANDLER
|
|
33
|
+
ENV PYTHONFAULTHANDLER=1
|
|
34
|
+
|
|
35
|
+
# Configure Git to consider the `/code` directory to be "safe", so that, when a Git repository
|
|
36
|
+
# created outside of the container gets mounted at that path within the container, the
|
|
37
|
+
# `uv-dynamic-versioning` tool running within the container does not fail with the error:
|
|
38
|
+
# > "Detected Git repository, but failed because of dubious ownership"
|
|
39
|
+
# Reference: https://git-scm.com/docs/git-config#Documentation/git-config.txt-safedirectory
|
|
40
|
+
RUN git config --global --add safe.directory /code
|
|
41
|
+
|
|
42
|
+
# Install `uv`.
|
|
43
|
+
# Reference: https://docs.astral.sh/uv/guides/integration/docker/#installing-uv
|
|
44
|
+
ADD https://astral.sh/uv/install.sh /uv-installer.sh
|
|
45
|
+
RUN sh /uv-installer.sh && \
|
|
46
|
+
rm /uv-installer.sh
|
|
47
|
+
ENV PATH="/root/.local/bin/:$PATH"
|
|
48
|
+
|
|
49
|
+
# Install Python dependencies (production dependencies only).
|
|
50
|
+
#
|
|
51
|
+
# Note: We copy only the files that `uv` needs in order to install dependencies. That way,
|
|
52
|
+
# we minimize the number of files whose changes would invalidate cached image layers
|
|
53
|
+
#
|
|
54
|
+
# Note: We use the `VIRTUAL_ENV` environment variable to specify the path to the Python virtual
|
|
55
|
+
# environment that we want the `uv` program inside the container to create and use.
|
|
56
|
+
#
|
|
57
|
+
# Q: Why don't we use `./.venv` in the repository file tree?
|
|
58
|
+
# A: If we were to do that, then, whenever a developer would mount (via our Docker Compose file)
|
|
59
|
+
# the repository file tree from their host machine (which may include a `.venv/` directory
|
|
60
|
+
# created by their host machine) into the container, it would overwrite the Python virtual
|
|
61
|
+
# environment that the `uv` program inside the container is using.
|
|
62
|
+
#
|
|
63
|
+
# Q: What is special about the `VIRTUAL_ENV` environment variable?
|
|
64
|
+
# A: When using `uv`'s `--active` option (as we do in later stages of this Dockerfile),
|
|
65
|
+
# `uv` determines which virtual environment is active by looking at `VIRTUAL_ENV'. This
|
|
66
|
+
# is the case, even though the documentation of the `venv` module (in Python's standard
|
|
67
|
+
# library) specifically says: "`VIRTUAL_ENV` cannot be relied upon to determine whether
|
|
68
|
+
# a virtual environment is being used."
|
|
69
|
+
#
|
|
70
|
+
# References:
|
|
71
|
+
# - https://docs.astral.sh/uv/pip/environments/#using-arbitrary-python-environments (RE: `VIRTUAL_ENV`)
|
|
72
|
+
# - https://docs.astral.sh/uv/reference/environment/#virtual_env (RE: `VIRTUAL_ENV`, from uv's perspective)
|
|
73
|
+
# - https://docs.python.org/3/library/venv.html#how-venvs-work (RE: `VIRTUAL_ENV`, from venv's perspective)
|
|
74
|
+
# - https://docs.astral.sh/uv/concepts/projects/sync/#partial-installations (RE: `--no-install-project`)
|
|
75
|
+
#
|
|
76
|
+
# Note: In the `RUN` command, we use a "cache mount" (a feature of Docker) to cache production dependencies
|
|
77
|
+
# across builds. This is a performance optimization technique shown in the `uv` docs.
|
|
78
|
+
# Reference:
|
|
79
|
+
# - https://docs.astral.sh/uv/guides/integration/docker/#caching (RE: the technique)
|
|
80
|
+
# - https://docs.docker.com/build/cache/optimize/#use-cache-mounts (RE: the feature)
|
|
81
|
+
# - https://docs.astral.sh/uv/reference/settings/#link-mode (RE: `UV_LINK_MODE`)
|
|
82
|
+
# - https://docs.astral.sh/uv/reference/cli/#uv-sync--no-install-project (RE: `--no-install-project`)
|
|
83
|
+
#
|
|
84
|
+
# Note: We use `--compile-bytecode` so that Python compiles `.py` files to `.pyc` files now,
|
|
85
|
+
# instead of when the container is running. By default, `uv` defers this compilation
|
|
86
|
+
# to "import time," whereas `pip` (by default) performs it at "install time" (like this).
|
|
87
|
+
#
|
|
88
|
+
# Note: We use `--locked` so that `uv sync` exits with an error if the `uv.lock` file isn't _already_
|
|
89
|
+
# up to date. By default, `uv sync` would automatically update the lock file if necessary.
|
|
90
|
+
# Reference: https://docs.astral.sh/uv/reference/cli/#uv-sync--locked
|
|
91
|
+
#
|
|
92
|
+
ENV VIRTUAL_ENV="/venv"
|
|
93
|
+
RUN mkdir -p "${VIRTUAL_ENV}"
|
|
94
|
+
COPY ./pyproject.toml /code/pyproject.toml
|
|
95
|
+
COPY ./uv.lock /code/uv.lock
|
|
96
|
+
RUN --mount=type=cache,target=/root/.cache/uv \
|
|
97
|
+
cd /code && \
|
|
98
|
+
UV_LINK_MODE=copy uv sync --active --no-dev --no-install-project --compile-bytecode --locked
|
|
99
|
+
|
|
100
|
+
# ────────────────────────────────────────────────────────────────────────────┐
|
|
101
|
+
FROM base AS fastapi
|
|
102
|
+
# ────────────────────────────────────────────────────────────────────────────┘
|
|
103
|
+
|
|
104
|
+
# Copy repository contents into image.
|
|
105
|
+
COPY . /code
|
|
106
|
+
|
|
107
|
+
# Install the project in editable mode.
|
|
108
|
+
RUN --mount=type=cache,target=/root/.cache/uv \
|
|
109
|
+
cd /code && \
|
|
110
|
+
uv sync --active --no-dev --compile-bytecode --locked
|
|
111
|
+
|
|
112
|
+
# Use Uvicorn to serve the FastAPI app on port 8000.
|
|
113
|
+
#
|
|
114
|
+
# Note: We include the `--no-sync` option to prevent `uv run` from automatically syncing dependencies.
|
|
115
|
+
# If it were to sync dependencies at this point, it would install development dependencies, since
|
|
116
|
+
# we exclude them above, but they are listed in uv's `default-groups` configuration by default.
|
|
117
|
+
# This is explained at: https://github.com/astral-sh/uv/issues/12558#issuecomment-2764611918
|
|
118
|
+
#
|
|
119
|
+
EXPOSE 8000
|
|
120
|
+
WORKDIR /code
|
|
121
|
+
CMD ["uv", "run", "--active", "--no-sync", "uvicorn", "nmdc_runtime.api.main:app", "--proxy-headers", "--host", "0.0.0.0", "--port", "8000"]
|
|
122
|
+
|
|
123
|
+
# ────────────────────────────────────────────────────────────────────────────┐
|
|
124
|
+
FROM base AS dagster
|
|
125
|
+
# ────────────────────────────────────────────────────────────────────────────┘
|
|
126
|
+
|
|
127
|
+
# Copy repository contents into image.
|
|
128
|
+
#
|
|
129
|
+
# Note: This path (i.e. "/opt/dagster/lib/") is hard-coded in a few places in `nmdc_runtime/site/ops.py`. That's why
|
|
130
|
+
# this image does not store the repository contents in `/code`, unlike the other images in this Dockerfile.
|
|
131
|
+
#
|
|
132
|
+
COPY . /opt/dagster/lib
|
|
133
|
+
|
|
134
|
+
# Install the project in editable mode.
|
|
135
|
+
RUN --mount=type=cache,target=/root/.cache/uv \
|
|
136
|
+
cd /opt/dagster/lib && \
|
|
137
|
+
uv sync --active --no-dev --compile-bytecode --locked
|
|
138
|
+
|
|
139
|
+
# Move Dagster configuration files to the place Dagster expects.
|
|
140
|
+
ENV DAGSTER_HOME="/opt/dagster/dagster_home/"
|
|
141
|
+
RUN mkdir -p "${DAGSTER_HOME}" && \
|
|
142
|
+
cp /opt/dagster/lib/nmdc_runtime/site/dagster.yaml "${DAGSTER_HOME}" && \
|
|
143
|
+
cp /opt/dagster/lib/nmdc_runtime/site/workspace.yaml "${DAGSTER_HOME}"
|
|
144
|
+
|
|
145
|
+
# Use Tini to run Dagit.
|
|
146
|
+
#
|
|
147
|
+
# Notes:
|
|
148
|
+
# - The port number (i.e. "3000") is hard-coded in `nmdc_runtime/site/entrypoint-dagit.sh`.
|
|
149
|
+
# - Dagster daemon (versus Dagit) can be launched by overriding the `ENTRYPOINT` defined here.
|
|
150
|
+
#
|
|
151
|
+
# Reference: https://github.com/krallin/tini
|
|
152
|
+
#
|
|
153
|
+
EXPOSE 3000
|
|
154
|
+
WORKDIR /opt/dagster/dagster_home/
|
|
155
|
+
ENTRYPOINT ["tini", "--", "../lib/nmdc_runtime/site/entrypoint-dagit.sh"]
|
|
156
|
+
|
|
157
|
+
# ────────────────────────────────────────────────────────────────────────────┐
|
|
158
|
+
FROM base AS test
|
|
159
|
+
# ────────────────────────────────────────────────────────────────────────────┘
|
|
160
|
+
|
|
161
|
+
# Copy all repository contents into image.
|
|
162
|
+
COPY . /code
|
|
163
|
+
|
|
164
|
+
# Install the project in editable mode, and install development dependencies.
|
|
165
|
+
RUN --mount=type=cache,target=/root/.cache/uv \
|
|
166
|
+
cd /code && \
|
|
167
|
+
uv sync --active --compile-bytecode --locked
|
|
168
|
+
|
|
169
|
+
# Make `wait-for-it.sh` executable.
|
|
170
|
+
RUN chmod +x /code/.docker/wait-for-it.sh
|
|
171
|
+
|
|
172
|
+
WORKDIR /code
|
|
173
|
+
|
|
174
|
+
# Ensure started container does not exit, so that a subsequent `docker exec` command can run tests.
|
|
175
|
+
# For an example `docker exec` command, see `Makefile`'s `run-test` target.
|
|
176
|
+
# Such a command should use `wait-for-it.sh` to run `pytest` no earlier than when the FastAPI server is accessible.
|
|
177
|
+
ENTRYPOINT ["tail", "-f", "/dev/null"]
|
nmdc_runtime/api/analytics.py
CHANGED
|
@@ -16,25 +16,42 @@ from toolz import merge
|
|
|
16
16
|
|
|
17
17
|
from nmdc_runtime.api.db.mongo import get_mongo_db
|
|
18
18
|
|
|
19
|
+
# This is a queue of the "request descriptors" that we will eventually insert into the database.
|
|
19
20
|
_requests = []
|
|
20
21
|
_last_posted = datetime.now()
|
|
21
22
|
|
|
22
23
|
|
|
23
24
|
def _post_requests(collection: str, requests_data: List[Dict], source: str):
|
|
25
|
+
"""Inserts the specified request descriptors into the specified MongoDB collection."""
|
|
24
26
|
mdb = get_mongo_db()
|
|
25
27
|
mdb[collection].insert_many([merge(d, {"source": source}) for d in requests_data])
|
|
26
28
|
|
|
27
29
|
|
|
28
30
|
def log_request(collection: str, request_data: Dict, source: str = "FastAPI"):
|
|
31
|
+
"""Flushes the queue of request descriptors to the database if enough time has passed since the previous time."""
|
|
29
32
|
global _requests, _last_posted
|
|
30
33
|
_requests.append(request_data)
|
|
31
34
|
now = datetime.now()
|
|
32
35
|
# flush queue every minute at most
|
|
33
36
|
if (now - _last_posted).total_seconds() > 60.0:
|
|
37
|
+
# Note: This use of threading is an attempt to avoid blocking the current thread
|
|
38
|
+
# while performing the insertion(s).
|
|
39
|
+
#
|
|
40
|
+
# TODO: Is there is a race condition here? If multiple requests arrive at approximately
|
|
41
|
+
# the same time, is it possible that each one causes a different thread to be
|
|
42
|
+
# started, each with a different (and possibly overlapping) set of requests to
|
|
43
|
+
# insert?
|
|
44
|
+
#
|
|
45
|
+
# TODO: If the insertion fails, will the requests be lost?
|
|
46
|
+
#
|
|
47
|
+
# Note: The author of this function said it may have been a "standard" solution copied
|
|
48
|
+
# from some documentation. Indeed, the comment at the top of this module contains
|
|
49
|
+
# a link to code on which it was based.
|
|
50
|
+
#
|
|
34
51
|
threading.Thread(
|
|
35
52
|
target=_post_requests, args=(collection, _requests, source)
|
|
36
53
|
).start()
|
|
37
|
-
_requests = []
|
|
54
|
+
_requests = [] # empties the queue
|
|
38
55
|
_last_posted = now
|
|
39
56
|
|
|
40
57
|
|
|
@@ -49,6 +66,9 @@ class Analytics(BaseHTTPMiddleware):
|
|
|
49
66
|
start = time()
|
|
50
67
|
response = await call_next(request)
|
|
51
68
|
|
|
69
|
+
# Use a fallback IP address value (currently an empty string) if we can't derive one from the request.
|
|
70
|
+
ip_address: str = "" if request.client is None else request.client.host
|
|
71
|
+
|
|
52
72
|
# Build a dictionary that describes the incoming request.
|
|
53
73
|
#
|
|
54
74
|
# Note: `request.headers` is an instance of `MultiDict`. References:
|
|
@@ -57,7 +77,7 @@ class Analytics(BaseHTTPMiddleware):
|
|
|
57
77
|
#
|
|
58
78
|
request_data = {
|
|
59
79
|
"hostname": request.url.hostname,
|
|
60
|
-
"ip_address":
|
|
80
|
+
"ip_address": ip_address,
|
|
61
81
|
"path": request.url.path,
|
|
62
82
|
"user_agent": request.headers.get("user-agent"),
|
|
63
83
|
"method": request.method,
|
nmdc_runtime/api/core/idgen.py
CHANGED
|
@@ -89,7 +89,35 @@ def generate_ids(
|
|
|
89
89
|
shoulder: str = "fk4",
|
|
90
90
|
) -> List[str]:
|
|
91
91
|
r"""
|
|
92
|
-
|
|
92
|
+
Generate the specified number of identifiers, storing them in a MongoDB collection
|
|
93
|
+
whose name is derived from the specified Name-Assigning Authority (NAA) and Shoulder.
|
|
94
|
+
|
|
95
|
+
:param mdb: Handle to a MongoDB database
|
|
96
|
+
:param owner: String that will go in the "__ao" field of the identifier record.
|
|
97
|
+
Callers will oftentimes set this to the name of a Runtime "site"
|
|
98
|
+
(as in, a "site client" site, not a "Dagster" site).
|
|
99
|
+
:param populator: String that will go in the "who" field of the identifier record.
|
|
100
|
+
Indicates "who generated this ID." Callers will oftentimes set
|
|
101
|
+
this to the name of a Runtime "site" (as in, a "site client" site,
|
|
102
|
+
not a "Dagster" site).
|
|
103
|
+
:param ns: Namespace (see Minter docs); e.g. "changesheets"
|
|
104
|
+
:param naa: Name-Assigning Authority (see Minter docs); e.g. "nmdc"
|
|
105
|
+
:param shoulder: String that will go in the "how" field (see Minter docs); e.g. "sys0"
|
|
106
|
+
|
|
107
|
+
This function was written the way it was in an attempt to mirror the ARK spec:
|
|
108
|
+
https://www.ietf.org/archive/id/draft-kunze-ark-41.html (found via: https://arks.org/specs/)
|
|
109
|
+
|
|
110
|
+
Deviations from the ARK spec include:
|
|
111
|
+
1. The inclusion of a typecode.
|
|
112
|
+
The inclusion of a typecode came out of discussions with team members,
|
|
113
|
+
who wanted identifiers to include some non-opaque substring that could be used
|
|
114
|
+
to determine what type of resource a given identifier refers to.
|
|
115
|
+
2. Making hyphens mandatory.
|
|
116
|
+
We decided to make the hyphens mandatory, whereas the spec says they are optional.
|
|
117
|
+
> "Hyphens are considered to be insignificant and are always ignored in ARKs."
|
|
118
|
+
> Reference: https://www.ietf.org/archive/id/draft-kunze-ark-41.html#name-character-repertoires
|
|
119
|
+
In our case, we require that users include an identifier's hyphens whenever
|
|
120
|
+
they are using that identifier.
|
|
93
121
|
"""
|
|
94
122
|
collection = mdb.get_collection(collection_name(naa, shoulder))
|
|
95
123
|
estimated_document_count = collection.estimated_document_count()
|
|
@@ -119,7 +147,9 @@ def generate_ids(
|
|
|
119
147
|
if not_taken:
|
|
120
148
|
# All attribute names beginning with "__a" are reserved...
|
|
121
149
|
# https://github.com/jkunze/n2t-eggnog/blob/0f0f4c490e6dece507dba710d3557e29b8f6627e/egg#L1882
|
|
122
|
-
#
|
|
150
|
+
# The author of this function opted to refrain from using property names beginning with "_.e",
|
|
151
|
+
# because he thought it would complicate MongoDB queries involving those properties, given that
|
|
152
|
+
# the "." is used as a field delimiter in MongoDB syntax (e.g. "foo.bar.baz").
|
|
123
153
|
docs = [
|
|
124
154
|
{
|
|
125
155
|
"@context": "https://n2t.net/e/n2t_apidoc.html#identifier-metadata",
|
|
@@ -145,9 +175,9 @@ def generate_ids(
|
|
|
145
175
|
|
|
146
176
|
|
|
147
177
|
def generate_one_id(
|
|
148
|
-
mdb: MongoDatabase
|
|
178
|
+
mdb: MongoDatabase,
|
|
149
179
|
ns: str = "",
|
|
150
|
-
shoulder: str = "sys0",
|
|
180
|
+
shoulder: str = "sys0", # "sys0" represents the Runtime
|
|
151
181
|
) -> str:
|
|
152
182
|
"""Generate unique Crockford Base32-encoded ID for mdb repository.
|
|
153
183
|
|
|
@@ -156,8 +186,8 @@ def generate_one_id(
|
|
|
156
186
|
"""
|
|
157
187
|
return generate_ids(
|
|
158
188
|
mdb,
|
|
159
|
-
owner="_system",
|
|
160
|
-
populator="_system",
|
|
189
|
+
owner="_system", # "_system" represents the Runtime
|
|
190
|
+
populator="_system", # "_system" represents the Runtime
|
|
161
191
|
number=1,
|
|
162
192
|
ns=ns,
|
|
163
193
|
naa="nmdc",
|
nmdc_runtime/api/db/mongo.py
CHANGED
|
@@ -10,7 +10,6 @@ import bson
|
|
|
10
10
|
from jsonschema import Draft7Validator
|
|
11
11
|
from nmdc_schema.nmdc import Database as NMDCDatabase
|
|
12
12
|
from pymongo.errors import AutoReconnect, OperationFailure
|
|
13
|
-
from motor.motor_asyncio import AsyncIOMotorClient, AsyncIOMotorDatabase
|
|
14
13
|
from refscan.lib.Finder import Finder
|
|
15
14
|
from refscan.scanner import scan_outgoing_references
|
|
16
15
|
from tenacity import wait_random_exponential, retry, retry_if_exception_type
|
|
@@ -83,17 +82,6 @@ def get_session_bound_mongo_db(session=None) -> MongoDatabase:
|
|
|
83
82
|
return SessionBoundDatabase(mdb, session) if session is not None else mdb
|
|
84
83
|
|
|
85
84
|
|
|
86
|
-
@lru_cache
|
|
87
|
-
def get_async_mongo_db() -> AsyncIOMotorDatabase:
|
|
88
|
-
_client = AsyncIOMotorClient(
|
|
89
|
-
host=os.getenv("MONGO_HOST"),
|
|
90
|
-
username=os.getenv("MONGO_USERNAME"),
|
|
91
|
-
password=os.getenv("MONGO_PASSWORD"),
|
|
92
|
-
directConnection=True,
|
|
93
|
-
)
|
|
94
|
-
return _client[os.getenv("MONGO_DBNAME")]
|
|
95
|
-
|
|
96
|
-
|
|
97
85
|
def get_nonempty_nmdc_schema_collection_names(mdb: MongoDatabase) -> Set[str]:
|
|
98
86
|
"""
|
|
99
87
|
Returns the names of the collections that (a) exist in the database,
|