nmdc-runtime 2.10.0__py3-none-any.whl → 2.11.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nmdc-runtime might be problematic. Click here for more details.

Files changed (77) hide show
  1. nmdc_runtime/Dockerfile +177 -0
  2. nmdc_runtime/api/analytics.py +22 -2
  3. nmdc_runtime/api/core/idgen.py +36 -6
  4. nmdc_runtime/api/db/mongo.py +0 -12
  5. nmdc_runtime/api/endpoints/find.py +65 -225
  6. nmdc_runtime/api/endpoints/lib/linked_instances.py +180 -0
  7. nmdc_runtime/api/endpoints/nmdcschema.py +65 -144
  8. nmdc_runtime/api/endpoints/objects.py +4 -11
  9. nmdc_runtime/api/endpoints/operations.py +0 -27
  10. nmdc_runtime/api/endpoints/queries.py +22 -0
  11. nmdc_runtime/api/endpoints/sites.py +0 -24
  12. nmdc_runtime/api/endpoints/util.py +57 -35
  13. nmdc_runtime/api/entrypoint.sh +7 -0
  14. nmdc_runtime/api/main.py +84 -60
  15. nmdc_runtime/api/models/util.py +12 -5
  16. nmdc_runtime/api/openapi.py +116 -180
  17. nmdc_runtime/api/swagger_ui/assets/custom-elements.js +522 -0
  18. nmdc_runtime/api/swagger_ui/assets/script.js +247 -0
  19. nmdc_runtime/api/swagger_ui/assets/style.css +155 -0
  20. nmdc_runtime/api/swagger_ui/swagger_ui.py +34 -0
  21. nmdc_runtime/minter/adapters/repository.py +21 -0
  22. nmdc_runtime/minter/domain/model.py +20 -0
  23. nmdc_runtime/site/changesheets/data/OmicsProcessing-to-catted-Biosamples.tsv +1561 -0
  24. nmdc_runtime/site/changesheets/scripts/missing_neon_soils_ecosystem_data.py +311 -0
  25. nmdc_runtime/site/changesheets/scripts/neon_soils_add_ncbi_ids.py +210 -0
  26. nmdc_runtime/site/dagster.yaml +53 -0
  27. nmdc_runtime/site/entrypoint-daemon.sh +26 -0
  28. nmdc_runtime/site/entrypoint-dagit-readonly.sh +26 -0
  29. nmdc_runtime/site/entrypoint-dagit.sh +26 -0
  30. nmdc_runtime/site/export/ncbi_xml.py +632 -11
  31. nmdc_runtime/site/export/ncbi_xml_utils.py +114 -0
  32. nmdc_runtime/site/graphs.py +7 -0
  33. nmdc_runtime/site/ops.py +92 -34
  34. nmdc_runtime/site/repository.py +2 -0
  35. nmdc_runtime/site/resources.py +16 -3
  36. nmdc_runtime/site/translation/submission_portal_translator.py +82 -14
  37. nmdc_runtime/site/workspace.yaml +13 -0
  38. nmdc_runtime/static/NMDC_logo.svg +1073 -0
  39. nmdc_runtime/static/ORCID-iD_icon_vector.svg +4 -0
  40. nmdc_runtime/static/README.md +5 -0
  41. nmdc_runtime/static/favicon.ico +0 -0
  42. nmdc_runtime/util.py +87 -1
  43. nmdc_runtime-2.11.1.dist-info/METADATA +46 -0
  44. {nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.1.dist-info}/RECORD +47 -57
  45. {nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.1.dist-info}/WHEEL +1 -2
  46. nmdc_runtime/api/endpoints/ids.py +0 -192
  47. nmdc_runtime/client/__init__.py +0 -0
  48. nmdc_runtime/containers.py +0 -14
  49. nmdc_runtime/core/__init__.py +0 -0
  50. nmdc_runtime/core/db/Database.py +0 -13
  51. nmdc_runtime/core/db/__init__.py +0 -0
  52. nmdc_runtime/core/exceptions/__init__.py +0 -23
  53. nmdc_runtime/core/exceptions/base.py +0 -47
  54. nmdc_runtime/core/exceptions/token.py +0 -13
  55. nmdc_runtime/domain/__init__.py +0 -0
  56. nmdc_runtime/domain/users/__init__.py +0 -0
  57. nmdc_runtime/domain/users/queriesInterface.py +0 -18
  58. nmdc_runtime/domain/users/userSchema.py +0 -37
  59. nmdc_runtime/domain/users/userService.py +0 -14
  60. nmdc_runtime/infrastructure/__init__.py +0 -0
  61. nmdc_runtime/infrastructure/database/__init__.py +0 -0
  62. nmdc_runtime/infrastructure/database/db.py +0 -3
  63. nmdc_runtime/infrastructure/database/models/__init__.py +0 -0
  64. nmdc_runtime/infrastructure/database/models/user.py +0 -1
  65. nmdc_runtime/lib/__init__.py +0 -1
  66. nmdc_runtime/lib/extract_nmdc_data.py +0 -33
  67. nmdc_runtime/lib/load_nmdc_data.py +0 -121
  68. nmdc_runtime/lib/nmdc_dataframes.py +0 -825
  69. nmdc_runtime/lib/nmdc_etl_class.py +0 -396
  70. nmdc_runtime/lib/transform_nmdc_data.py +0 -1117
  71. nmdc_runtime/site/drsobjects/__init__.py +0 -0
  72. nmdc_runtime/site/drsobjects/ingest.py +0 -93
  73. nmdc_runtime/site/drsobjects/registration.py +0 -131
  74. nmdc_runtime-2.10.0.dist-info/METADATA +0 -265
  75. nmdc_runtime-2.10.0.dist-info/top_level.txt +0 -1
  76. {nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.1.dist-info}/entry_points.txt +0 -0
  77. {nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.1.dist-info}/licenses/LICENSE +0 -0
File without changes
@@ -1,93 +0,0 @@
1
- import json
2
- from datetime import datetime, timezone
3
-
4
- from toolz import dissoc
5
-
6
- from nmdc_runtime.api.models.job import JobOperationMetadata
7
- from nmdc_runtime.api.models.operation import Operation
8
- from nmdc_runtime.api.models.operation import UpdateOperationRequest
9
- from nmdc_runtime.api.models.util import ListRequest
10
- from nmdc_runtime.api.models.util import ResultT
11
-
12
-
13
- def load_local_json(url, prefixes_url_to_local=None):
14
- """Useful for large files cached on local filesystem.
15
-
16
- You may, for example, `cp --parents ` many files on a remote filesystem to a staging
17
- folder on that remote filesystem, gzip that folder, scp it to your local machine, and then
18
- extract to your local machine.
19
-
20
- Example:
21
- prefixes_url_to_local = {
22
- "https://data.microbiomedata.org/data/": "/Users/dwinston/nmdc_files/2021-09-scanon-meta/ficus/pipeline_products/",
23
- "https://portal.nersc.gov/project/m3408/": "/Users/dwinston/nmdc_files/2021-09-scanon-meta/www/",
24
- }
25
- """
26
- path = url
27
- for before, after in prefixes_url_to_local.items():
28
- path = path.replace(before, after)
29
- with open(path) as f:
30
- return json.load(f)
31
-
32
-
33
- def claim_metadata_ingest_jobs(
34
- client, drs_object_ids_to_ingest, wf_id, max_page_size=1000
35
- ):
36
- lr = ListRequest(
37
- filter=json.dumps(
38
- {
39
- "workflow.id": wf_id,
40
- "config.object_id": {"$in": drs_object_ids_to_ingest},
41
- }
42
- ),
43
- max_page_size=max_page_size,
44
- )
45
- jobs = []
46
- while True:
47
- rv = client.list_jobs(lr.model_dump()).json()
48
- jobs.extend(rv["resources"])
49
- if "next_page_token" not in rv:
50
- break
51
- else:
52
- lr.page_token = rv["next_page_token"]
53
-
54
- # safety escape
55
- if len(jobs) == len(drs_object_ids_to_ingest):
56
- break
57
-
58
- job_claim_responses = [client.claim_job(j["id"]) for j in jobs]
59
-
60
- return job_claim_responses
61
-
62
-
63
- def mongo_add_docs_result_as_dict(rv):
64
- return {
65
- collection_name: dissoc(bulk_write_result.bulk_api_result, "upserted")
66
- for collection_name, bulk_write_result in rv.items()
67
- }
68
-
69
-
70
- def get_metadata_ingest_job_ops(mongo, wf_id, drs_object_ids_to_ingest):
71
- return list(
72
- mongo.db.operations.find(
73
- {
74
- "metadata.job.workflow.id": wf_id,
75
- "metadata.job.config.object_id": {"$in": drs_object_ids_to_ingest},
76
- "done": False,
77
- }
78
- )
79
- )
80
-
81
-
82
- def do_metadata_ingest_job(client, mongo, job_op_doc):
83
- op = Operation[ResultT, JobOperationMetadata](**job_op_doc)
84
- object_info = client.get_object_info(op.metadata.job.config["object_id"]).json()
85
- url = object_info["access_methods"][0]["access_url"]["url"]
86
- docs = load_local_json(url)
87
- op_result = mongo.add_docs(docs, validate=False, replace=False)
88
- op_patch = UpdateOperationRequest(
89
- done=True,
90
- result=mongo_add_docs_result_as_dict(op_result),
91
- metadata={"done_at": datetime.now(timezone.utc).isoformat(timespec="seconds")},
92
- )
93
- return client.update_operation(op.id, op_patch)
@@ -1,131 +0,0 @@
1
- import json
2
- import os
3
- import re
4
- from datetime import datetime, timezone, timedelta
5
- from pathlib import Path
6
- from tempfile import TemporaryDirectory
7
-
8
- import requests
9
- from bs4 import BeautifulSoup
10
-
11
- from nmdc_runtime.api.models.object import DrsObjectIn
12
- from nmdc_runtime.util import (
13
- drs_metadata_for,
14
- nmdc_jsonschema_validator,
15
- specialize_activity_set_docs,
16
- )
17
-
18
- pattern = re.compile(r"https?://(?P<domain>[^/]+)/(?P<path>.+)")
19
-
20
-
21
- def url_to_name(url):
22
- m = pattern.match(url)
23
- return (
24
- f"{'.'.join(reversed(m.group('domain').split('.')))}"
25
- f"__{m.group('path').replace('/', '.')}"
26
- )
27
-
28
-
29
- def fetch_url(url, timeout=30):
30
- return requests.get(url, timeout=timeout)
31
-
32
-
33
- class HttpResponseNotOk(Exception):
34
- pass
35
-
36
-
37
- class HttpResponseNotJson(Exception):
38
- pass
39
-
40
-
41
- def response_to_json(response):
42
- if response.status_code != 200:
43
- raise HttpResponseNotOk()
44
- try:
45
- json_data = response.json()
46
- except ValueError:
47
- raise HttpResponseNotJson()
48
- return json_data
49
-
50
-
51
- def json_data_from_url_to_file(json_data, url, save_dir):
52
- filepath = os.path.join(save_dir, url_to_name(url))
53
- with open(filepath, "w") as f:
54
- json.dump(json_data, f)
55
- return filepath
56
-
57
-
58
- def json_clean(d, model, exclude_unset=False):
59
- return json.loads(model(**d).json(exclude_unset=exclude_unset))
60
-
61
-
62
- def drs_object_in_for(url):
63
- with TemporaryDirectory() as save_dir:
64
- response = fetch_url(url)
65
- try:
66
- json_data = response_to_json(response)
67
- except HttpResponseNotOk:
68
- return {"error": "HttpResponseNotOk"}
69
-
70
- except HttpResponseNotJson:
71
- return {"error": "HttpResponseNotJson"}
72
-
73
- filepath = json_data_from_url_to_file(json_data, url, save_dir)
74
- drs_object_in = DrsObjectIn(
75
- **drs_metadata_for(
76
- filepath,
77
- {
78
- "access_methods": [{"access_url": {"url": url}}],
79
- "name": Path(filepath).name.replace(":", "-"),
80
- },
81
- )
82
- )
83
- return {"result": drs_object_in}
84
-
85
-
86
- def create_drs_object_for(url, drs_object_in, client):
87
- rv = client.create_object(json.loads(drs_object_in.json(exclude_unset=True)))
88
- return {"url": url, "response": rv}
89
-
90
-
91
- def validate_as_metadata_and_ensure_tags_for(
92
- drs_id, client, tags=("schema#/definitions/Database", "metadata-in")
93
- ):
94
- docs = client.get_object_bytes(drs_id).json()
95
- docs, _ = specialize_activity_set_docs(docs)
96
- _ = nmdc_jsonschema_validator(docs)
97
- return {tag: client.ensure_object_tag(drs_id, tag) for tag in tags}
98
-
99
-
100
- def recent_metadata_urls(
101
- urlpath="https://portal.nersc.gov/project/m3408/meta/anno2/",
102
- urlpath_extra="?C=M;O=D",
103
- since="2021-09",
104
- ):
105
- """Scrapes recent URLs from Apache/2.4.38 (Debian) Server listing.
106
-
107
- Designed with urlpath.startwsith("https://portal.nersc.gov/project/m3408/") in mind.
108
- """
109
- if since is None:
110
- now = datetime.now(timezone.utc)
111
- recent_enuf = now - timedelta(days=30)
112
- since = f"{recent_enuf.year}-{recent_enuf.month}"
113
-
114
- rv = requests.get(f"{urlpath}{urlpath_extra}")
115
-
116
- soup = BeautifulSoup(rv.text, "html.parser")
117
-
118
- urls = []
119
-
120
- for tr in soup.find_all("tr"):
121
- tds = tr.find_all("td")
122
- if len(tds) != 5:
123
- continue
124
-
125
- _, td_name, td_last_modified, td_size, _ = tds
126
- if td_last_modified.text.startswith(since):
127
- name = td_name.a.text
128
- if name.endswith(".json"):
129
- urls.append(f"{urlpath}{name}")
130
-
131
- return urls
@@ -1,265 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: nmdc_runtime
3
- Version: 2.10.0
4
- Summary: A runtime system for NMDC data management and orchestration
5
- Home-page: https://github.com/microbiomedata/nmdc-runtime
6
- Author: Donny Winston
7
- Author-email: donny@polyneme.xyz
8
- Classifier: Development Status :: 3 - Alpha
9
- Classifier: Programming Language :: Python :: 3
10
- Classifier: License :: OSI Approved :: Apache Software License
11
- Requires-Python: >=3.10
12
- Description-Content-Type: text/markdown
13
- License-File: LICENSE
14
- Dynamic: author
15
- Dynamic: author-email
16
- Dynamic: classifier
17
- Dynamic: description
18
- Dynamic: description-content-type
19
- Dynamic: home-page
20
- Dynamic: license-file
21
- Dynamic: requires-python
22
- Dynamic: summary
23
-
24
- A runtime system for NMDC data management and orchestration.
25
-
26
- ## Service Status
27
-
28
- http://nmdcstatus.polyneme.xyz/
29
-
30
- ## How It Fits In
31
-
32
- * [issues](https://github.com/microbiomedata/issues)
33
- tracks issues related to NMDC, which may necessitate work across multiple repos.
34
-
35
- * [nmdc-schema](https://github.com/microbiomedata/nmdc-schema/)
36
- houses the LinkML schema specification, as well as generated artifacts (e.g. JSON Schema).
37
-
38
- * [nmdc-server](https://github.com/microbiomedata/nmdc-server)
39
- houses code specific to the data portal -- its database, back-end API, and front-end application.
40
-
41
- * Workflows — documented in the [workflows](https://docs.microbiomedata.org/workflows/) section of the NMDC documentation website — take source data and produce computed data.
42
-
43
- * This repo (nmdc-runtime)
44
- * houses code that takes source data and computed data, and transforms it
45
- to broadly accommodate downstream applications such as the data portal
46
- * manages execution of the above (i.e., lightweight data transformations) and also
47
- of computationally- and data-intensive workflows performed at other sites,
48
- ensuring that claimed jobs have access to needed configuration and data resources.
49
-
50
- ## Data exports
51
-
52
- The NMDC metadata as of 2021-10 is available here:
53
-
54
- https://drs.microbiomedata.org/ga4gh/drs/v1/objects/sys086d541
55
-
56
- The link returns a [GA4GH DRS API bundle object record](https://ga4gh.github.io/data-repository-service-schemas/preview/release/drs-1.0.0/docs/#_drs_datatypes), with the NMDC metadata collections (study_set, biosample_set, etc.) as contents, each a DRS API blob object.
57
-
58
- For example the blob for the study_set collection export, named "study_set.jsonl.gz", is listed with DRS API ID "sys0xsry70". Thus, it is retrievable via
59
-
60
- https://drs.microbiomedata.org/ga4gh/drs/v1/objects/sys0xsry70
61
-
62
- The returned blob object record lists https://nmdc-runtime.files.polyneme.xyz/nmdcdb-mongoexport/2021-10-14/study_set.jsonl.gz as the url for an access method.
63
-
64
- The 2021-10 exports are currently all accessible at `https://nmdc-runtime.files.polyneme.xyz/nmdcdb-mongoexport/2021-10-14/${COLLECTION_NAME}.jsonl.gz`, but the DRS API indirection allows these links to change in the future, for mirroring via other URLs, etc. So, the DRS API links should be the links you share.
65
-
66
- ## Overview
67
-
68
- The runtime features:
69
-
70
- 1. [Dagster](https://docs.dagster.io/concepts) orchestration:
71
- - dagit - a web UI to monitor and manage the running system.
72
- - dagster-daemon - a service that triggers pipeline runs based on time or external state.
73
- - PostgresSQL database - for storing run history, event logs, and scheduler state.
74
- - workspace code
75
- - Code to run is loaded into a Dagster `workspace`. This code is loaded from
76
- one or more dagster `repositories`. Each Dagster `repository` may be run with a different
77
- Python virtual environment if need be, and may be loaded from a local Python file or
78
- `pip install`ed from an external source. In our case, each Dagster `repository` is simply
79
- loaded from a Python file local to the nmdc-runtime GitHub repository, and all code is
80
- run in the same Python environment.
81
- - A Dagster repository consists of `solids` and `pipelines`,
82
- and optionally `schedules` and `sensors`.
83
- - `solids` represent individual units of computation
84
- - `pipelines` are built up from solids
85
- - `schedules` trigger recurring pipeline runs based on time
86
- - `sensors` trigger pipeline runs based on external state
87
- - Each `pipeline` can declare dependencies on any runtime `resources` or additional
88
- configuration. There are MongoDB `resources` defined, as well as `preset`
89
- configuration definitions for both "dev" and "prod" `modes`. The `preset`s tell Dagster to
90
- look to a set of known environment variables to load resources configurations, depending on
91
- the `mode`.
92
-
93
- 2. A MongoDB database supporting write-once, high-throughput internal
94
- data storage by the nmdc-runtime FastAPI instance.
95
-
96
- 3. A [FastAPI](https://fastapi.tiangolo.com/) service to interface with the orchestrator and
97
- database, as a hub for data management and workflow automation.
98
-
99
- ## Local Development
100
-
101
- Ensure Docker (and Docker Compose) are installed; and the Docker engine is running.
102
-
103
- ```shell
104
- docker --version
105
- docker compose version
106
- docker info
107
- ```
108
-
109
- Ensure the permissions of `./.docker/mongoKeyFile` are such that only the file's owner can read or write the file.
110
-
111
- ```shell
112
- chmod 600 ./.docker/mongoKeyFile
113
- ```
114
-
115
- Ensure you have a `.env` file for the Docker services to source from. You may copy `.env.example` to
116
- `.env` (which is gitignore'd) to get started.
117
-
118
- ```shell
119
- cp .env.example .env
120
- ```
121
-
122
- Create environment variables in your shell session, based upon the contents of the `.env` file.
123
-
124
- ```shell
125
- set -a # automatically export all variables
126
- source .env
127
- set +a
128
- ```
129
-
130
- If you are connecting to resources that require an SSH tunnel—for example, a MongoDB server that is only accessible on
131
- the NERSC network—set up the SSH tunnel.
132
-
133
- The following command could be useful to you, either directly or as a template (see `Makefile`).
134
-
135
- ```shell
136
- make nersc-mongo-tunnels
137
- ```
138
-
139
- Finally, spin up the Docker Compose stack.
140
-
141
- ```bash
142
- make up-dev
143
- ```
144
-
145
- Docker Compose is used to start local MongoDB and PostgresSQL (used by Dagster) instances, as well
146
- as a Dagster web server (dagit) and daemon (dagster-daemon).
147
-
148
- The Dagit web server is viewable at http://127.0.0.1:3000/.
149
-
150
- The FastAPI service is viewable at http://127.0.0.1:8000/ -- e.g., rendered documentation at
151
- http://127.0.0.1:8000/redoc/.
152
-
153
-
154
- * NOTE: Any time you add or change requirements in requirements/main.in or requirements/dev.in, you must run:
155
- ```bash
156
- pip-compile --build-isolation --allow-unsafe --resolver=backtracking --strip-extras --output-file requirements/[main|dev].txt requirements/[main|dev].in
157
- ```
158
- to generate main.txt and dev.txt files respectively. main.in is kind of like a poetry dependency stanza, dev.in is kind
159
- of like poetry dev.dependencies stanza. main.txt and dev.txt are kind of like poetry.lock files to specify the exact
160
- versions of dependencies to use. main.txt and dev.txt are combined in the docker compose build process to create the
161
- final requirements.txt file and import the dependencies into the Docker image.
162
-
163
- ## Local Testing
164
-
165
- Tests can be found in `tests` and are run with the following commands:
166
-
167
- ```bash
168
- make up-test
169
- make test
170
-
171
- # Run a Specific test file eg. tests/test_api/test_endpoints.py
172
- make test ARGS="tests/test_api/test_endpoints.py"
173
-
174
- docker compose --file docker-compose.test.yml run test
175
- ```
176
-
177
- As you create Dagster solids and pipelines, add tests in `tests/` to check that your code behaves as
178
- desired and does not break over time.
179
-
180
- [For hints on how to write tests for solids and pipelines in Dagster, see their documentation
181
- tutorial on Testing](https://docs.dagster.io/guides/test/unit-testing-assets-and-ops).
182
-
183
- ### Performance profiling
184
-
185
- We use a tool called [Pyinstrument](https://pyinstrument.readthedocs.io) to profile the performance of the Runtime API while processing an individual HTTP request.
186
-
187
- Here's how you can do that:
188
-
189
- 1. In your `.env` file, set `IS_PROFILING_ENABLED` to `true`
190
- 2. Start/restart your development stack: `$ make up-dev`
191
- 3. Ensure the endpoint function whose performance you want to profile is defined using `async def` (as opposed to just `def`) ([reference](https://github.com/joerick/pyinstrument/issues/257))
192
-
193
- Then—with all of that done—submit an HTTP request that includes the URL query parameter: `profile=true`. Instructions for doing that are in the sections below.
194
-
195
- <details>
196
- <summary>Show/hide instructions for <code>GET</code> requests only (involves web browser)</summary>
197
-
198
- 1. In your web browser, visit the endpoint's URL, but add the `profile=true` query parameter to the URL. Examples:
199
- ```diff
200
- A. If the URL doesn't already have query parameters, append `?profile=true`.
201
- - http://127.0.0.1:8000/nmdcschema/biosample_set
202
- + http://127.0.0.1:8000/nmdcschema/biosample_set?profile=true
203
-
204
- B. If the URL already has query parameters, append `&profile=true`.
205
- - http://127.0.0.1:8000/nmdcschema/biosample_set?filter={}
206
- + http://127.0.0.1:8000/nmdcschema/biosample_set?filter={}&profile=true
207
- ```
208
- 2. Your web browser will display a performance profiling report.
209
- > Note: The Runtime API will have responded with a performance profiling report web page, instead of its normal response (which the Runtime discards).
210
-
211
- That'll only work for `GET` requests, though, since you're limited to specifying the request via the address bar.
212
-
213
- </details>
214
-
215
- <details>
216
- <summary>Show/hide instructions for <strong>all</strong> kinds of requests (involves <code>curl</code> + web browser)</summary>
217
-
218
- 1. At your terminal, type or paste the `curl` command you want to run (you can copy/paste one from Swagger UI).
219
- 2. Append the `profile=true` query parameter to the URL in the command, and use the `-o` option to save the response to a file whose name ends with `.html`. For example:
220
- ```diff
221
- curl -X 'POST' \
222
- - 'http://127.0.0.1:8000/metadata/json:validate' \
223
- + 'http://127.0.0.1:8000/metadata/json:validate?profile=true' \
224
- + -o /tmp/profile.html
225
- -H 'accept: application/json' \
226
- -H 'Content-Type: application/json' \
227
- -d '{"biosample_set": []}'
228
- ```
229
- 3. Run the command.
230
- > Note: The Runtime API will respond with a performance profiling report web page, instead of its normal response (which the Runtime discards). The performance profiling report web page will be saved to the `.html` file to which you redirected the command output.
231
- 4. Double-click on the `.html` file to view it in your web browser.
232
- 1. Alternatively, open your web browser and navigate to the `.html` file; e.g., enter `file:///tmp/profile.html` into the address bar.
233
-
234
- </details>
235
-
236
- ### RAM usage
237
-
238
- The `dagster-daemon` and `dagster-dagit` containers can consume a lot of RAM. If tests are failing and the console of
239
- the `test` container shows "Error 137," here is something you can try as a workaround: In Docker Desktop, go to
240
- "Settings > Resources > Advanced," and increase the memory limit. One of our team members has
241
- found **12 GB** to be sufficient for running the tests.
242
-
243
- > Dedicating 12 GB of RAM to Docker may be prohibitive for some prospective developers.
244
- > There is an open [issue](https://github.com/microbiomedata/nmdc-runtime/issues/928) about the memory requirement.
245
-
246
- ## Publish to PyPI
247
-
248
- This repository contains a GitHub Actions workflow that publishes a Python package to [PyPI](https://pypi.org/project/nmdc-runtime/).
249
-
250
- You can also _manually_ publish the Python package to PyPI by issuing the following commands in the root directory of the repository:
251
-
252
- ```
253
- rm -rf dist
254
- python -m build
255
- twine upload dist/*
256
- ```
257
-
258
- ## Links
259
-
260
- Here are links related to this repository:
261
-
262
- - Production API server: https://api.microbiomedata.org
263
- - PyPI package: https://pypi.org/project/nmdc-runtime
264
- - DockerHub image (API server): https://hub.docker.com/r/microbiomedata/nmdc-runtime-fastapi
265
- - DockerHub image (Dagster): https://hub.docker.com/r/microbiomedata/nmdc-runtime-dagster
@@ -1 +0,0 @@
1
- nmdc_runtime