nmdc-runtime 2.10.0__py3-none-any.whl → 2.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nmdc-runtime might be problematic. Click here for more details.

Files changed (77) hide show
  1. nmdc_runtime/Dockerfile +167 -0
  2. nmdc_runtime/api/analytics.py +22 -2
  3. nmdc_runtime/api/core/idgen.py +36 -6
  4. nmdc_runtime/api/db/mongo.py +0 -12
  5. nmdc_runtime/api/endpoints/find.py +65 -225
  6. nmdc_runtime/api/endpoints/lib/linked_instances.py +180 -0
  7. nmdc_runtime/api/endpoints/nmdcschema.py +65 -144
  8. nmdc_runtime/api/endpoints/objects.py +4 -11
  9. nmdc_runtime/api/endpoints/operations.py +0 -27
  10. nmdc_runtime/api/endpoints/queries.py +22 -0
  11. nmdc_runtime/api/endpoints/sites.py +0 -24
  12. nmdc_runtime/api/endpoints/util.py +57 -35
  13. nmdc_runtime/api/entrypoint.sh +7 -0
  14. nmdc_runtime/api/main.py +84 -60
  15. nmdc_runtime/api/models/util.py +12 -5
  16. nmdc_runtime/api/openapi.py +116 -180
  17. nmdc_runtime/api/swagger_ui/assets/custom-elements.js +522 -0
  18. nmdc_runtime/api/swagger_ui/assets/script.js +247 -0
  19. nmdc_runtime/api/swagger_ui/assets/style.css +155 -0
  20. nmdc_runtime/api/swagger_ui/swagger_ui.py +34 -0
  21. nmdc_runtime/minter/adapters/repository.py +21 -0
  22. nmdc_runtime/minter/domain/model.py +20 -0
  23. nmdc_runtime/site/changesheets/data/OmicsProcessing-to-catted-Biosamples.tsv +1561 -0
  24. nmdc_runtime/site/changesheets/scripts/missing_neon_soils_ecosystem_data.py +311 -0
  25. nmdc_runtime/site/changesheets/scripts/neon_soils_add_ncbi_ids.py +210 -0
  26. nmdc_runtime/site/dagster.yaml +53 -0
  27. nmdc_runtime/site/entrypoint-daemon.sh +26 -0
  28. nmdc_runtime/site/entrypoint-dagit-readonly.sh +26 -0
  29. nmdc_runtime/site/entrypoint-dagit.sh +26 -0
  30. nmdc_runtime/site/export/ncbi_xml.py +632 -11
  31. nmdc_runtime/site/export/ncbi_xml_utils.py +114 -0
  32. nmdc_runtime/site/graphs.py +7 -0
  33. nmdc_runtime/site/ops.py +92 -34
  34. nmdc_runtime/site/repository.py +2 -0
  35. nmdc_runtime/site/resources.py +16 -3
  36. nmdc_runtime/site/translation/submission_portal_translator.py +82 -14
  37. nmdc_runtime/site/workspace.yaml +13 -0
  38. nmdc_runtime/static/NMDC_logo.svg +1073 -0
  39. nmdc_runtime/static/ORCID-iD_icon_vector.svg +4 -0
  40. nmdc_runtime/static/README.md +5 -0
  41. nmdc_runtime/static/favicon.ico +0 -0
  42. nmdc_runtime/util.py +87 -1
  43. nmdc_runtime-2.11.0.dist-info/METADATA +46 -0
  44. {nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.0.dist-info}/RECORD +47 -57
  45. {nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.0.dist-info}/WHEEL +1 -2
  46. nmdc_runtime/api/endpoints/ids.py +0 -192
  47. nmdc_runtime/client/__init__.py +0 -0
  48. nmdc_runtime/containers.py +0 -14
  49. nmdc_runtime/core/__init__.py +0 -0
  50. nmdc_runtime/core/db/Database.py +0 -13
  51. nmdc_runtime/core/db/__init__.py +0 -0
  52. nmdc_runtime/core/exceptions/__init__.py +0 -23
  53. nmdc_runtime/core/exceptions/base.py +0 -47
  54. nmdc_runtime/core/exceptions/token.py +0 -13
  55. nmdc_runtime/domain/__init__.py +0 -0
  56. nmdc_runtime/domain/users/__init__.py +0 -0
  57. nmdc_runtime/domain/users/queriesInterface.py +0 -18
  58. nmdc_runtime/domain/users/userSchema.py +0 -37
  59. nmdc_runtime/domain/users/userService.py +0 -14
  60. nmdc_runtime/infrastructure/__init__.py +0 -0
  61. nmdc_runtime/infrastructure/database/__init__.py +0 -0
  62. nmdc_runtime/infrastructure/database/db.py +0 -3
  63. nmdc_runtime/infrastructure/database/models/__init__.py +0 -0
  64. nmdc_runtime/infrastructure/database/models/user.py +0 -1
  65. nmdc_runtime/lib/__init__.py +0 -1
  66. nmdc_runtime/lib/extract_nmdc_data.py +0 -33
  67. nmdc_runtime/lib/load_nmdc_data.py +0 -121
  68. nmdc_runtime/lib/nmdc_dataframes.py +0 -825
  69. nmdc_runtime/lib/nmdc_etl_class.py +0 -396
  70. nmdc_runtime/lib/transform_nmdc_data.py +0 -1117
  71. nmdc_runtime/site/drsobjects/__init__.py +0 -0
  72. nmdc_runtime/site/drsobjects/ingest.py +0 -93
  73. nmdc_runtime/site/drsobjects/registration.py +0 -131
  74. nmdc_runtime-2.10.0.dist-info/METADATA +0 -265
  75. nmdc_runtime-2.10.0.dist-info/top_level.txt +0 -1
  76. {nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.0.dist-info}/entry_points.txt +0 -0
  77. {nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.0.dist-info}/licenses/LICENSE +0 -0
@@ -6,7 +6,7 @@ from functools import lru_cache
6
6
  from json import JSONDecodeError
7
7
  from pathlib import Path
8
8
  from time import time_ns
9
- from typing import Dict, List, Optional, Set, Tuple
9
+ from typing import List, Optional, Set, Tuple
10
10
  from zoneinfo import ZoneInfo
11
11
 
12
12
  from bson import json_util
@@ -55,18 +55,23 @@ BASE_URL_EXTERNAL = os.getenv("API_HOST_EXTERNAL")
55
55
  HOSTNAME_EXTERNAL = BASE_URL_EXTERNAL.split("://", 1)[-1]
56
56
 
57
57
 
58
- def does_num_matching_docs_exceed_threshold(
59
- collection: MongoCollection, filter_: dict, threshold: int
58
+ def is_num_matching_docs_within_limit(
59
+ collection: MongoCollection, filter_: dict, limit: int
60
60
  ) -> bool:
61
- """Check whether a MongoDB collection contains more than `threshold` documents matching the filter."""
62
- if threshold < 0:
63
- raise ValueError("Threshold must be at least 0.")
61
+ """
62
+ Check whether the number of documents in a MongoDB collection that match
63
+ the filter is within (i.e. is no greater than) the specified limit.
64
+ """
65
+ if limit < 0:
66
+ raise ValueError("Limit must be at least 0.")
64
67
 
68
+ # Count the number of documents matching the filter, but only count up to limit + 1,
69
+ # since that's enough to determine whether the number exceeds the limit.
65
70
  limited_num_matching_docs = collection.count_documents(
66
71
  filter=filter_,
67
- limit=threshold + 1,
72
+ limit=limit + 1,
68
73
  )
69
- return limited_num_matching_docs > threshold
74
+ return limited_num_matching_docs <= limit
70
75
 
71
76
 
72
77
  def check_filter(filter_: str):
@@ -87,22 +92,44 @@ def check_filter(filter_: str):
87
92
  return filter_
88
93
 
89
94
 
90
- def list_resources(req: ListRequest, mdb: MongoDatabase, collection_name: str):
91
- r"""
95
+ def list_resources(
96
+ req: ListRequest, mdb: MongoDatabase, collection_name: str = ""
97
+ ) -> dict:
98
+ """
92
99
  Returns a dictionary containing the requested MongoDB documents, maybe alongside pagination information.
93
100
 
94
- Note: If the specified page size (`req.max_page_size`) is non-zero and more documents match the filter
95
- criteria than can fit on a page of that size, this function will paginate the resources.
101
+ `mdb.page_tokens` docs are `{"_id": req.page_token, "ns": collection_name}`, Because `page_token` is globally
102
+ unique, and because the `mdb.page_tokens.find_one({"_id": req.page_token})` document stores `collection_name` in
103
+ the "ns" (namespace) field, the value for `collection_name` stored there takes precedence over any value supplied
104
+ as an argument to this function's `collection_name` parameter.
105
+
106
+ If the specified page size (`req.max_page_size`) is non-zero and more documents match the filter criteria than
107
+ can fit on a page of that size, this function will paginate the resources.
96
108
  """
109
+ if collection_name == "" and req.page_token is None:
110
+ raise HTTPException(
111
+ status_code=status.HTTP_400_BAD_REQUEST,
112
+ detail="Must specify a collection name if no page token is supplied.",
113
+ )
114
+ if req.page_token:
115
+ doc = mdb.page_tokens.find_one({"_id": req.page_token})
116
+ if doc is None:
117
+ raise HTTPException(
118
+ status_code=status.HTTP_400_BAD_REQUEST, detail="`page_token` not found"
119
+ )
120
+ collection_name = doc["ns"]
121
+ last_id = doc["last_id"]
122
+ mdb.page_tokens.delete_one({"_id": req.page_token})
123
+ else:
124
+ last_id = None
97
125
 
98
126
  id_field = "id"
99
127
  if "id_1" not in mdb[collection_name].index_information():
100
128
  logging.warning(
101
129
  f"list_resources: no index set on 'id' for collection {collection_name}"
102
130
  )
103
- id_field = (
104
- "_id" # currently expected for `functional_annotation_agg` collection
105
- )
131
+ id_field = "_id" # expected for `functional_annotation_agg` collection
132
+
106
133
  max_page_size = req.max_page_size
107
134
  filter_ = json_util.loads(check_filter(req.filter)) if req.filter else {}
108
135
  projection = (
@@ -110,16 +137,6 @@ def list_resources(req: ListRequest, mdb: MongoDatabase, collection_name: str):
110
137
  if req.projection
111
138
  else None
112
139
  )
113
- if req.page_token:
114
- doc = mdb.page_tokens.find_one({"_id": req.page_token, "ns": collection_name})
115
- if doc is None:
116
- raise HTTPException(
117
- status_code=status.HTTP_400_BAD_REQUEST, detail="Bad page_token"
118
- )
119
- last_id = doc["last_id"]
120
- mdb.page_tokens.delete_one({"_id": req.page_token})
121
- else:
122
- last_id = None
123
140
  if last_id is not None:
124
141
  if id_field in filter_:
125
142
  filter_[id_field] = merge(filter_[id_field], {"$gt": last_id})
@@ -128,17 +145,12 @@ def list_resources(req: ListRequest, mdb: MongoDatabase, collection_name: str):
128
145
 
129
146
  # Determine whether we will paginate the results.
130
147
  #
131
- # Note: We will paginate them unless either:
132
- # - the `max_page_size` is not a positive integer
133
- # - the number of documents matching the filter does not exceed `max_page_size`
148
+ # Note: We will paginate them unless either (a) the `max_page_size` is less than 1,
149
+ # or (b) the number of documents matching the filter can fit on a single page.
134
150
  #
135
151
  will_paginate = True
136
- if not isinstance(max_page_size, int):
137
- will_paginate = False
138
- elif max_page_size < 1:
139
- will_paginate = False
140
- elif not does_num_matching_docs_exceed_threshold(
141
- collection=mdb[collection_name], filter_=filter_, threshold=max_page_size
152
+ if max_page_size < 1 or is_num_matching_docs_within_limit(
153
+ collection=mdb[collection_name], filter_=filter_, limit=max_page_size
142
154
  ):
143
155
  will_paginate = False
144
156
 
@@ -304,9 +316,19 @@ def find_resources(req: FindRequest, mdb: MongoDatabase, collection_name: str):
304
316
  if req.page:
305
317
  skip = (req.page - 1) * req.per_page
306
318
  if skip > 10_000:
319
+ # Note: because _page number_-based pagination is currently implemented via MongoDB's `skip` and `limit`
320
+ # parameters, a full (slow) collection scan is performed to skip to the requested page. This scan takes
321
+ # longer and longer as `skip` increases, which is why cursor-based pagination is preferred for large
322
+ # collections.
307
323
  raise HTTPException(
308
324
  status_code=status.HTTP_400_BAD_REQUEST,
309
- detail="Use cursor-based pagination for paging beyond 10,000 items",
325
+ detail=(
326
+ "Use cursor-based pagination for paging beyond 10,000 items. "
327
+ "That is, instead of specifying the `page` query parameter for this endpoint, "
328
+ "specify the `cursor` query parameter. In particular, set `cursor` to `*` to get the first page, "
329
+ "and use the value of `meta.next_cursor` in the response, if not `null`, as the value to which "
330
+ "you set `cursor` in the next request."
331
+ ),
310
332
  )
311
333
  limit = req.per_page
312
334
  results, db_response_time_ms = timeit(
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+
3
+ set -euo pipefail
4
+
5
+ exec gunicorn --worker-tmp-dir /dev/shm --workers=2 \
6
+ --threads=4 --worker-class gthread \
7
+ --log-file=- --bind 0.0.0.0:8000 nmdc_runtime.api.main:app
nmdc_runtime/api/main.py CHANGED
@@ -1,5 +1,6 @@
1
1
  import os
2
2
  from contextlib import asynccontextmanager
3
+ from html import escape
3
4
  from importlib import import_module
4
5
  from importlib.metadata import version
5
6
  from typing import Annotated
@@ -12,7 +13,6 @@ from fastapi import APIRouter, FastAPI, Cookie
12
13
  from fastapi.middleware.cors import CORSMiddleware
13
14
  from fastapi.openapi.docs import get_swagger_ui_html
14
15
  from fastapi.staticfiles import StaticFiles
15
- from setuptools_scm import get_version
16
16
  from starlette import status
17
17
  from starlette.responses import RedirectResponse, HTMLResponse, FileResponse
18
18
  from refscan.lib.helpers import get_collection_names_from_schema
@@ -55,29 +55,32 @@ from nmdc_runtime.api.endpoints.util import BASE_URL_EXTERNAL
55
55
  from nmdc_runtime.api.models.site import SiteClientInDB, SiteInDB
56
56
  from nmdc_runtime.api.models.user import UserInDB
57
57
  from nmdc_runtime.api.models.util import entity_attributes_to_index
58
- from nmdc_runtime.api.openapi import ordered_tag_descriptors, make_api_description
59
- from nmdc_runtime.api.v1.router import router_v1
58
+ from nmdc_runtime.api.openapi import (
59
+ OpenAPITag,
60
+ ordered_tag_descriptors,
61
+ make_api_description,
62
+ )
63
+ from nmdc_runtime.api.swagger_ui.swagger_ui import base_swagger_ui_parameters
60
64
  from nmdc_runtime.minter.bootstrap import bootstrap as minter_bootstrap
61
65
  from nmdc_runtime.minter.entrypoints.fastapi_app import router as minter_router
62
66
 
63
67
 
64
68
  api_router = APIRouter()
65
- api_router.include_router(users.router, tags=["users"])
66
- api_router.include_router(operations.router, tags=["operations"])
67
- api_router.include_router(sites.router, tags=["sites"])
68
- api_router.include_router(jobs.router, tags=["jobs"])
69
- api_router.include_router(objects.router, tags=["objects"])
70
- api_router.include_router(capabilities.router, tags=["capabilities"])
71
- api_router.include_router(triggers.router, tags=["triggers"])
72
- api_router.include_router(workflows.router, tags=["workflows"])
73
- api_router.include_router(object_types.router, tags=["object types"])
74
- api_router.include_router(queries.router, tags=["queries"])
75
- api_router.include_router(metadata.router, tags=["metadata"])
76
- api_router.include_router(nmdcschema.router, tags=["metadata"])
77
- api_router.include_router(find.router, tags=["find"])
78
- api_router.include_router(runs.router, tags=["runs"])
79
- api_router.include_router(router_v1, tags=["v1"])
80
- api_router.include_router(minter_router, prefix="/pids", tags=["minter"])
69
+ api_router.include_router(find.router, tags=[OpenAPITag.METADATA_ACCESS.value])
70
+ api_router.include_router(nmdcschema.router, tags=[OpenAPITag.METADATA_ACCESS.value])
71
+ api_router.include_router(queries.router, tags=[OpenAPITag.METADATA_ACCESS.value])
72
+ api_router.include_router(metadata.router, tags=[OpenAPITag.METADATA_ACCESS.value])
73
+ api_router.include_router(sites.router, tags=[OpenAPITag.WORKFLOWS.value])
74
+ api_router.include_router(workflows.router, tags=[OpenAPITag.WORKFLOWS.value])
75
+ api_router.include_router(capabilities.router, tags=[OpenAPITag.WORKFLOWS.value])
76
+ api_router.include_router(object_types.router, tags=[OpenAPITag.WORKFLOWS.value])
77
+ api_router.include_router(triggers.router, tags=[OpenAPITag.WORKFLOWS.value])
78
+ api_router.include_router(jobs.router, tags=[OpenAPITag.WORKFLOWS.value])
79
+ api_router.include_router(objects.router, tags=[OpenAPITag.WORKFLOWS.value])
80
+ api_router.include_router(operations.router, tags=[OpenAPITag.WORKFLOWS.value])
81
+ api_router.include_router(runs.router, tags=[OpenAPITag.WORKFLOWS.value])
82
+ api_router.include_router(minter_router, prefix="/pids", tags=[OpenAPITag.MINTER.value])
83
+ api_router.include_router(users.router, tags=[OpenAPITag.USERS.value])
81
84
 
82
85
 
83
86
  def ensure_initial_resources_on_boot():
@@ -219,9 +222,6 @@ async def lifespan(app: FastAPI):
219
222
  From the [FastAPI documentation](https://fastapi.tiangolo.com/advanced/events/#lifespan-function):
220
223
  > You can define logic (code) that should be executed before the application starts up. This means that
221
224
  > this code will be executed once, before the application starts receiving requests.
222
-
223
- Note: Based on my own observations, I think this function gets called when the first request starts coming in,
224
- but not before that (i.e. not when the application is idle before any requests start coming in).
225
225
  """
226
226
  ensure_initial_resources_on_boot()
227
227
  ensure_attribute_indexes()
@@ -242,21 +242,24 @@ async def root():
242
242
  )
243
243
 
244
244
 
245
- @api_router.get("/version")
245
+ @api_router.get("/version", tags=[OpenAPITag.SYSTEM_ADMINISTRATION.value])
246
246
  async def get_versions():
247
247
  return {
248
- "nmdc-runtime": get_version(),
248
+ "nmdc-runtime": version("nmdc_runtime"),
249
249
  "fastapi": fastapi.__version__,
250
250
  "nmdc-schema": version("nmdc_schema"),
251
251
  }
252
252
 
253
253
 
254
+ # Build an ORCID Login URL for the Swagger UI page, based upon some environment variables.
255
+ orcid_login_url = f"{ORCID_BASE_URL}/oauth/authorize?client_id={ORCID_NMDC_CLIENT_ID}&response_type=code&scope=openid&redirect_uri={BASE_URL_EXTERNAL}/orcid_code"
256
+
257
+
254
258
  app = FastAPI(
255
259
  title="NMDC Runtime API",
256
- version=get_version(),
260
+ version=version("nmdc_runtime"),
257
261
  description=make_api_description(
258
- schema_version=version("nmdc_schema"),
259
- orcid_login_url=f"{ORCID_BASE_URL}/oauth/authorize?client_id={ORCID_NMDC_CLIENT_ID}&response_type=code&scope=openid&redirect_uri={BASE_URL_EXTERNAL}/orcid_code",
262
+ api_version=version("nmdc_runtime"), schema_version=version("nmdc_schema")
260
263
  ),
261
264
  openapi_tags=ordered_tag_descriptors,
262
265
  lifespan=lifespan,
@@ -309,6 +312,14 @@ async def get_scalar_html():
309
312
  def custom_swagger_ui_html(
310
313
  user_id_token: Annotated[str | None, Cookie()] = None,
311
314
  ):
315
+ r"""Returns the HTML markup for an interactive API docs web page powered by Swagger UI.
316
+
317
+ If the `user_id_token` cookie is present and not empty, this function will send its value to
318
+ the `/token` endpoint in an attempt to get an access token. If it gets one, this function will
319
+ inject that access token into the web page so Swagger UI will consider the user to be logged in.
320
+
321
+ Reference: https://fastapi.tiangolo.com/tutorial/cookie-params/
322
+ """
312
323
  access_token = None
313
324
  if user_id_token:
314
325
  # get bearer token
@@ -329,32 +340,9 @@ def custom_swagger_ui_html(
329
340
  rv.raise_for_status()
330
341
  access_token = rv.json()["access_token"]
331
342
 
332
- swagger_ui_parameters = {"withCredentials": True}
333
343
  onComplete = ""
334
344
  if access_token is not None:
335
- onComplete += f"""
336
- ui.preauthorizeApiKey('bearerAuth', '{access_token}');
337
-
338
- token_info = document.createElement('section');
339
- token_info.classList.add('nmdc-info', 'nmdc-info-token', 'block', 'col-12');
340
- token_info.innerHTML = <double-quote>
341
- <p>You are now authorized. Prefer a command-line interface (CLI)? Use this header for HTTP requests:</p>
342
- <p>
343
- <code>
344
- <span>Authorization: Bearer </span>
345
- <span id='token' data-token-value='{access_token}' data-state='masked'>***</span>
346
- </code>
347
- </p>
348
- <p>
349
- <button id='token-mask-toggler'>Show token</button>
350
- <button id='token-copier'>Copy token</button>
351
- <span id='token-copier-message'></span>
352
- </p>
353
- </double-quote>;
354
- document.querySelector('.information-container').append(token_info);
355
- """.replace(
356
- "\n", " "
357
- )
345
+ onComplete += f"ui.preauthorizeApiKey('bearerAuth', '{access_token}');"
358
346
  if os.getenv("INFO_BANNER_INNERHTML"):
359
347
  info_banner_innerhtml = os.getenv("INFO_BANNER_INNERHTML")
360
348
  onComplete += f"""
@@ -365,14 +353,14 @@ def custom_swagger_ui_html(
365
353
  """.replace(
366
354
  "\n", " "
367
355
  )
368
- if onComplete:
369
- # Note: The `nmdcInit` JavaScript event is a custom event we use to trigger anything that is listening for it.
370
- # Reference: https://developer.mozilla.org/en-US/docs/Web/Events/Creating_and_triggering_events
371
- swagger_ui_parameters.update(
372
- {
373
- "onComplete": f"""<unquote-safe>() => {{ {onComplete}; dispatchEvent(new Event('nmdcInit')); }}</unquote-safe>""",
374
- }
375
- )
356
+ swagger_ui_parameters = base_swagger_ui_parameters.copy()
357
+ # Note: The `nmdcInit` JavaScript event is a custom event we use to trigger anything that is listening for it.
358
+ # Reference: https://developer.mozilla.org/en-US/docs/Web/Events/Creating_and_triggering_events
359
+ swagger_ui_parameters.update(
360
+ {
361
+ "onComplete": f"""<unquote-safe>() => {{ {onComplete}; dispatchEvent(new Event('nmdcInit')); }}</unquote-safe>""",
362
+ }
363
+ )
376
364
  response = get_swagger_ui_html(
377
365
  openapi_url=app.openapi_url,
378
366
  title=app.title,
@@ -383,15 +371,51 @@ def custom_swagger_ui_html(
383
371
  assets_dir_path = Path(__file__).parent / "swagger_ui" / "assets"
384
372
  style_css: str = Path(assets_dir_path / "style.css").read_text()
385
373
  script_js: str = Path(assets_dir_path / "script.js").read_text()
374
+ custom_elements_js: str = Path(assets_dir_path / "custom-elements.js").read_text()
386
375
  content = (
387
376
  response.body.decode()
388
377
  .replace('"<unquote-safe>', "")
389
378
  .replace('</unquote-safe>"', "")
390
379
  .replace("<double-quote>", '"')
391
380
  .replace("</double-quote>", '"')
381
+ # TODO: Consider using a "custom layout" implemented as a React component.
382
+ # Reference: https://github.com/swagger-api/swagger-ui/blob/master/docs/customization/custom-layout.md
383
+ #
384
+ # Note: Custom layouts are specified via the Swagger UI parameter named `layout`, whose value identifies
385
+ # a component that is specified via the Swagger UI parameter named `plugins`. The Swagger UI
386
+ # JavaScript code expects each item in the `plugins` array to be a JavaScript function,
387
+ # but FastAPI's `get_swagger_ui_html` function serializes each parameter's value into JSON,
388
+ # preventing us from specifying a JavaScript function as a value in the `plugins` array.
389
+ #
390
+ # As a workaround, we could use the string `replace`-ment technique shown below to put the literal
391
+ # JavaScript characters into place in the final HTML document. Using that approach, I _have_ been
392
+ # able to display a custom layout (a custom React component), but I have _not_ been able to get
393
+ # that custom layout to display Swagger UI's `BaseLayout` component (which includes the core
394
+ # Swagger UI functionality). That's a deal breaker.
395
+ #
396
+ .replace(r'"{{ NMDC_SWAGGER_UI_PARAMETERS_PLUGINS_PLACEHOLDER }}"', r"[]")
397
+ # Inject HTML elements containing data that can be read via JavaScript (e.g., `swagger_ui/assets/script.js`).
398
+ # Note: We escape the values here so they can be safely used as HTML attribute values.
399
+ .replace(
400
+ "</head>",
401
+ f"""
402
+ </head>
403
+ <div
404
+ id="nmdc-access-token"
405
+ data-token="{escape(access_token if access_token is not None else '')}"
406
+ style="display: none"
407
+ ></div>
408
+ <div
409
+ id="nmdc-orcid-login-url"
410
+ data-url="{escape(orcid_login_url)}"
411
+ style="display: none"
412
+ ></div>
413
+ """,
414
+ )
392
415
  # Inject a custom CSS stylesheet immediately before the closing `</head>` tag.
393
416
  .replace("</head>", f"<style>\n{style_css}\n</style>\n</head>")
394
- # Inject a custom JavaScript script immediately before the closing `</body>` tag.
417
+ # Inject custom JavaScript scripts immediately before the closing `</body>` tag.
418
+ .replace("</body>", f"<script>\n{custom_elements_js}\n</script>\n</body>")
395
419
  .replace("</body>", f"<script>\n{script_js}\n</script>\n</body>")
396
420
  )
397
421
  return HTMLResponse(content=content)
@@ -30,8 +30,13 @@ class ListRequest(BaseModel):
30
30
  r'{"lat_lon.latitude": {"$gt": 45.0}, "ecosystem_category": "Plants"}',
31
31
  ],
32
32
  )
33
- # TODO: Document why the optional type here is `int` as opposed to `PerPageRange` (`FindRequest` uses the latter).
34
- max_page_size: Optional[int] = Field(
33
+ # TODO: Document the following things about this type hint and `Field` definition:
34
+ # (a) why the type here is `int` as opposed to `PerPageRange` (`FindRequest` uses the latter),
35
+ # (b) why the default value here is 20 as opposed to 25 (the default value in `FindRequest`), and
36
+ # (c) why there is no upper limit on the value (the `PerPageRange` type has an upper limit of 2000).
37
+ #
38
+ # Note: If the HTTP request lacks a value for this parameter, Pydantic will fall back to the default value specified here.
39
+ max_page_size: int = Field(
35
40
  default=20,
36
41
  title="Resources per page",
37
42
  description="How many resources you want _each page_ to contain, formatted as a positive integer.",
@@ -120,10 +125,12 @@ class FindRequest(BaseModel):
120
125
  default=None,
121
126
  title="Page number",
122
127
  description="""_Which page_ of resources you want to retrieve, when using page number-based pagination.
123
- This is the page number formatted as an integer ≥ 1.""",
128
+ This is the page number formatted as an integer ≥ 1.
129
+ **Limitation:** When using _page number_-based pagination, only the first 10,000 resources
130
+ are accessible. You can access resources beyond that by using _cursor_-based pagination.""",
124
131
  examples=[1],
125
132
  )
126
- per_page: Optional[PerPageRange] = Field(
133
+ per_page: PerPageRange = Field(
127
134
  default=25,
128
135
  title="Resources per page",
129
136
  description="How many resources you want _each page_ to contain, formatted as a positive integer ≤ 2000.",
@@ -133,7 +140,7 @@ class FindRequest(BaseModel):
133
140
  default=None,
134
141
  title="Cursor",
135
142
  description="""A bookmark you can use to fetch the _next_ page of resources, when using cursor-based pagination.
136
- To use cursor-based pagination, set the `cursor` parameter to `*`. The response's `meta` object will
143
+ To begin using cursor-based pagination, set the `cursor` parameter to `*`. The response's `meta` object will
137
144
  include a `next_cursor` field, whose value can be used as the `cursor` parameter in a subsequent
138
145
  request.\n\n_Example_: `nmdc:sys0zr0fbt71`""",
139
146
  examples=[