nmdc-runtime 1.3.1__py3-none-any.whl → 2.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. nmdc_runtime/Dockerfile +177 -0
  2. nmdc_runtime/api/analytics.py +90 -0
  3. nmdc_runtime/api/boot/capabilities.py +9 -0
  4. nmdc_runtime/api/boot/object_types.py +126 -0
  5. nmdc_runtime/api/boot/triggers.py +84 -0
  6. nmdc_runtime/api/boot/workflows.py +116 -0
  7. nmdc_runtime/api/core/auth.py +212 -0
  8. nmdc_runtime/api/core/idgen.py +200 -0
  9. nmdc_runtime/api/core/metadata.py +777 -0
  10. nmdc_runtime/api/core/util.py +114 -0
  11. nmdc_runtime/api/db/mongo.py +436 -0
  12. nmdc_runtime/api/db/s3.py +37 -0
  13. nmdc_runtime/api/endpoints/capabilities.py +25 -0
  14. nmdc_runtime/api/endpoints/find.py +634 -0
  15. nmdc_runtime/api/endpoints/jobs.py +206 -0
  16. nmdc_runtime/api/endpoints/lib/helpers.py +274 -0
  17. nmdc_runtime/api/endpoints/lib/linked_instances.py +193 -0
  18. nmdc_runtime/api/endpoints/lib/path_segments.py +165 -0
  19. nmdc_runtime/api/endpoints/metadata.py +260 -0
  20. nmdc_runtime/api/endpoints/nmdcschema.py +515 -0
  21. nmdc_runtime/api/endpoints/object_types.py +38 -0
  22. nmdc_runtime/api/endpoints/objects.py +277 -0
  23. nmdc_runtime/api/endpoints/operations.py +78 -0
  24. nmdc_runtime/api/endpoints/queries.py +701 -0
  25. nmdc_runtime/api/endpoints/runs.py +98 -0
  26. nmdc_runtime/api/endpoints/search.py +38 -0
  27. nmdc_runtime/api/endpoints/sites.py +205 -0
  28. nmdc_runtime/api/endpoints/triggers.py +25 -0
  29. nmdc_runtime/api/endpoints/users.py +214 -0
  30. nmdc_runtime/api/endpoints/util.py +817 -0
  31. nmdc_runtime/api/endpoints/wf_file_staging.py +307 -0
  32. nmdc_runtime/api/endpoints/workflows.py +353 -0
  33. nmdc_runtime/api/entrypoint.sh +7 -0
  34. nmdc_runtime/api/main.py +495 -0
  35. nmdc_runtime/api/middleware.py +43 -0
  36. nmdc_runtime/api/models/capability.py +14 -0
  37. nmdc_runtime/api/models/id.py +92 -0
  38. nmdc_runtime/api/models/job.py +57 -0
  39. nmdc_runtime/api/models/lib/helpers.py +78 -0
  40. nmdc_runtime/api/models/metadata.py +11 -0
  41. nmdc_runtime/api/models/nmdc_schema.py +146 -0
  42. nmdc_runtime/api/models/object.py +180 -0
  43. nmdc_runtime/api/models/object_type.py +20 -0
  44. nmdc_runtime/api/models/operation.py +66 -0
  45. nmdc_runtime/api/models/query.py +246 -0
  46. nmdc_runtime/api/models/query_continuation.py +111 -0
  47. nmdc_runtime/api/models/run.py +161 -0
  48. nmdc_runtime/api/models/site.py +87 -0
  49. nmdc_runtime/api/models/trigger.py +13 -0
  50. nmdc_runtime/api/models/user.py +207 -0
  51. nmdc_runtime/api/models/util.py +260 -0
  52. nmdc_runtime/api/models/wfe_file_stages.py +122 -0
  53. nmdc_runtime/api/models/workflow.py +15 -0
  54. nmdc_runtime/api/openapi.py +178 -0
  55. nmdc_runtime/api/swagger_ui/assets/EllipsesButton.js +146 -0
  56. nmdc_runtime/api/swagger_ui/assets/EndpointSearchWidget.js +369 -0
  57. nmdc_runtime/api/swagger_ui/assets/script.js +252 -0
  58. nmdc_runtime/api/swagger_ui/assets/style.css +155 -0
  59. nmdc_runtime/api/swagger_ui/swagger_ui.py +34 -0
  60. nmdc_runtime/config.py +56 -0
  61. nmdc_runtime/minter/adapters/repository.py +22 -2
  62. nmdc_runtime/minter/config.py +30 -4
  63. nmdc_runtime/minter/domain/model.py +55 -1
  64. nmdc_runtime/minter/entrypoints/fastapi_app.py +1 -1
  65. nmdc_runtime/mongo_util.py +89 -0
  66. nmdc_runtime/site/backup/nmdcdb_mongodump.py +1 -1
  67. nmdc_runtime/site/backup/nmdcdb_mongoexport.py +1 -3
  68. nmdc_runtime/site/changesheets/data/OmicsProcessing-to-catted-Biosamples.tsv +1561 -0
  69. nmdc_runtime/site/changesheets/scripts/missing_neon_soils_ecosystem_data.py +311 -0
  70. nmdc_runtime/site/changesheets/scripts/neon_soils_add_ncbi_ids.py +210 -0
  71. nmdc_runtime/site/dagster.yaml +53 -0
  72. nmdc_runtime/site/entrypoint-daemon.sh +29 -0
  73. nmdc_runtime/site/entrypoint-dagit-readonly.sh +26 -0
  74. nmdc_runtime/site/entrypoint-dagit.sh +29 -0
  75. nmdc_runtime/site/export/ncbi_xml.py +1331 -0
  76. nmdc_runtime/site/export/ncbi_xml_utils.py +405 -0
  77. nmdc_runtime/site/export/study_metadata.py +27 -4
  78. nmdc_runtime/site/graphs.py +294 -45
  79. nmdc_runtime/site/ops.py +1008 -230
  80. nmdc_runtime/site/repair/database_updater.py +451 -0
  81. nmdc_runtime/site/repository.py +368 -133
  82. nmdc_runtime/site/resources.py +154 -80
  83. nmdc_runtime/site/translation/gold_translator.py +235 -83
  84. nmdc_runtime/site/translation/neon_benthic_translator.py +212 -188
  85. nmdc_runtime/site/translation/neon_soil_translator.py +82 -58
  86. nmdc_runtime/site/translation/neon_surface_water_translator.py +698 -0
  87. nmdc_runtime/site/translation/neon_utils.py +24 -7
  88. nmdc_runtime/site/translation/submission_portal_translator.py +616 -162
  89. nmdc_runtime/site/translation/translator.py +73 -3
  90. nmdc_runtime/site/util.py +26 -7
  91. nmdc_runtime/site/validation/emsl.py +1 -0
  92. nmdc_runtime/site/validation/gold.py +1 -0
  93. nmdc_runtime/site/validation/util.py +16 -12
  94. nmdc_runtime/site/workspace.yaml +13 -0
  95. nmdc_runtime/static/NMDC_logo.svg +1073 -0
  96. nmdc_runtime/static/ORCID-iD_icon_vector.svg +4 -0
  97. nmdc_runtime/static/README.md +5 -0
  98. nmdc_runtime/static/favicon.ico +0 -0
  99. nmdc_runtime/util.py +236 -192
  100. nmdc_runtime-2.12.0.dist-info/METADATA +45 -0
  101. nmdc_runtime-2.12.0.dist-info/RECORD +131 -0
  102. {nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info}/WHEEL +1 -2
  103. {nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info}/entry_points.txt +0 -1
  104. nmdc_runtime/containers.py +0 -14
  105. nmdc_runtime/core/db/Database.py +0 -15
  106. nmdc_runtime/core/exceptions/__init__.py +0 -23
  107. nmdc_runtime/core/exceptions/base.py +0 -47
  108. nmdc_runtime/core/exceptions/token.py +0 -13
  109. nmdc_runtime/domain/users/queriesInterface.py +0 -18
  110. nmdc_runtime/domain/users/userSchema.py +0 -37
  111. nmdc_runtime/domain/users/userService.py +0 -14
  112. nmdc_runtime/infrastructure/database/db.py +0 -3
  113. nmdc_runtime/infrastructure/database/models/user.py +0 -10
  114. nmdc_runtime/lib/__init__.py +0 -1
  115. nmdc_runtime/lib/extract_nmdc_data.py +0 -41
  116. nmdc_runtime/lib/load_nmdc_data.py +0 -121
  117. nmdc_runtime/lib/nmdc_dataframes.py +0 -829
  118. nmdc_runtime/lib/nmdc_etl_class.py +0 -402
  119. nmdc_runtime/lib/transform_nmdc_data.py +0 -1117
  120. nmdc_runtime/site/drsobjects/ingest.py +0 -93
  121. nmdc_runtime/site/drsobjects/registration.py +0 -131
  122. nmdc_runtime/site/terminusdb/generate.py +0 -198
  123. nmdc_runtime/site/terminusdb/ingest.py +0 -44
  124. nmdc_runtime/site/terminusdb/schema.py +0 -1671
  125. nmdc_runtime/site/translation/emsl.py +0 -42
  126. nmdc_runtime/site/translation/gold.py +0 -53
  127. nmdc_runtime/site/translation/jgi.py +0 -31
  128. nmdc_runtime/site/translation/util.py +0 -132
  129. nmdc_runtime/site/validation/jgi.py +0 -42
  130. nmdc_runtime-1.3.1.dist-info/METADATA +0 -181
  131. nmdc_runtime-1.3.1.dist-info/RECORD +0 -81
  132. nmdc_runtime-1.3.1.dist-info/top_level.txt +0 -1
  133. /nmdc_runtime/{client → api}/__init__.py +0 -0
  134. /nmdc_runtime/{core → api/boot}/__init__.py +0 -0
  135. /nmdc_runtime/{core/db → api/core}/__init__.py +0 -0
  136. /nmdc_runtime/{domain → api/db}/__init__.py +0 -0
  137. /nmdc_runtime/{domain/users → api/endpoints}/__init__.py +0 -0
  138. /nmdc_runtime/{infrastructure → api/endpoints/lib}/__init__.py +0 -0
  139. /nmdc_runtime/{infrastructure/database → api/models}/__init__.py +0 -0
  140. /nmdc_runtime/{infrastructure/database/models → api/models/lib}/__init__.py +0 -0
  141. /nmdc_runtime/{site/drsobjects/__init__.py → api/models/minter.py} +0 -0
  142. /nmdc_runtime/site/{terminusdb → repair}/__init__.py +0 -0
  143. {nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,155 @@
1
+ .nmdc-info {
2
+ padding: 1em;
3
+ background-color: #448aff1a;
4
+ border: .075rem solid #448aff;
5
+ border-radius: 4px;
6
+ }
7
+ .nmdc-info-token code {
8
+ font-size: x-small;
9
+ }
10
+ .nmdc-success {
11
+ color: green;
12
+ }
13
+ .nmdc-error {
14
+ color: red;
15
+ }
16
+
17
+ /**
18
+ * Style the NMDC logo we add to the top of the Swagger UI page via JavaScript.
19
+ *
20
+ * Notes:
21
+ * - We set the background size larger than the element size, and then offset
22
+ * the background a small amount, so that we do not display the thin border
23
+ * that is baked into this SVG image.
24
+ * - On wide screens, we set the heading group (which contains the logo, and a
25
+ * wrapper element—introduced via JavaScript—containing the normal API title
26
+ * and link to the OpenAPI schema) to use `display: flex`, so that the logo
27
+ * appears next to that wrapper element. On narrow screens, we allow them
28
+ * to stack like they normally would.
29
+ */
30
+ @media screen and (min-width: 768px) {
31
+ .nmdc-heading-group {
32
+ display: flex;
33
+ }
34
+ }
35
+ .nmdc-heading-group .nmdc-logo {
36
+ width: 64px;
37
+ height: 64px;
38
+ margin-right: 16px;
39
+ background-image: url("/static/NMDC_logo.svg");
40
+ background-repeat: no-repeat;
41
+ background-size: 68px 68px;
42
+ background-position: -2px -2px;
43
+ border-radius: 4px;
44
+ }
45
+
46
+ /**
47
+ * Hides the following text from the Swagger UI modal login form:
48
+ *
49
+ * > Scopes are used to grant an application different levels of
50
+ * > access to data on behalf of the end user. Each API may declare
51
+ * > one or more scopes.
52
+ * >
53
+ * > API requires the following scopes. Select which ones you want
54
+ * > to grant to Swagger UI.
55
+ *
56
+ * TODO: Check whether this text can be hidden via standard
57
+ * Swagger UI configuration, rather than custom CSS.
58
+ */
59
+ .auth-wrapper .modal-ux-content .auth-container .scope-def {
60
+ display: none;
61
+ }
62
+
63
+ /**
64
+ * Hides the following text from the Swagger UI modal login form:
65
+ *
66
+ * > Scopes are used to grant an application different levels of
67
+ * > access to data on behalf of the end user. Each API may declare
68
+ * > one or more scopes.
69
+ * >
70
+ * > API requires the following scopes. Select which ones you want
71
+ * > to grant to Swagger UI.
72
+ *
73
+ * TODO: Check whether this text can be hidden via standard
74
+ * Swagger UI configuration, rather than custom CSS.
75
+ */
76
+ .auth-wrapper .modal-ux-content .auth-container .scope-def {
77
+ display: none;
78
+ }
79
+
80
+ /**
81
+ * Style the ORCID Login widget we inject via JavaScript into the
82
+ * Swagger UI modal login form.
83
+ */
84
+ .auth-container.nmdc-orcid-login {
85
+ padding-bottom: 20px;
86
+ font-size: 14px;
87
+ }
88
+ .auth-container.nmdc-orcid-login .nmdc-orcid-login-icon-link {
89
+ display: flex;
90
+ align-items: center;
91
+ gap: 0.5em;
92
+ }
93
+ .auth-container.nmdc-orcid-login .nmdc-orcid-login-icon-link > a {
94
+ color: #4990e2;
95
+ text-decoration: none;
96
+ }
97
+ .auth-container.nmdc-orcid-login .nmdc-orcid-login-icon-link > a:hover {
98
+ color: #1f69c0;
99
+ }
100
+
101
+ /**
102
+ * In the tag description, hide the details portion (i.e. the portion the user
103
+ * can toggle the visibility of), and color the hyperlinks the same as in
104
+ * the overall introductory text at the top of the Swagger UI page.
105
+ */
106
+ .tag-description-details.hidden {
107
+ display: none;
108
+ }
109
+ .tag-description-details a {
110
+ color: #4990e2;
111
+ }
112
+ .tag-description-details a:hover {
113
+ color: #1f69c0;
114
+ }
115
+
116
+ /*****************************************************************************
117
+ * Customize Swagger UI's default elements.
118
+ *****************************************************************************/
119
+
120
+ /* Standardize the line height of the description in the top section of the page. */
121
+ .swagger-ui .information-container .info__description {
122
+ line-height: 1em;
123
+ }
124
+ /* Slightly deemphasize the version numbers in the description. */
125
+ .swagger-ui .information-container .info__description code {
126
+ font-weight: normal;
127
+ }
128
+
129
+ /* Remove the box shadow from the top section of the page. */
130
+ .swagger-ui .scheme-container {
131
+ box-shadow: none;
132
+ }
133
+ /* Draw a border around each section. */
134
+ .swagger-ui div.opblock-tag-section {
135
+ border: 1px solid rgba(59,65,81,.3);
136
+ border-radius: 4px;
137
+ padding-top: 15px;
138
+ padding-left: 15px;
139
+ padding-right: 15px;
140
+ margin-bottom: 15px;
141
+ }
142
+ /* Stack the elements of each section header vertically. */
143
+ .swagger-ui .opblock-tag {
144
+ flex-direction: column;
145
+ align-items: stretch;
146
+ border-bottom: none;
147
+ }
148
+ .swagger-ui .opblock-tag:hover {
149
+ background-color: transparent;
150
+ }
151
+ /* Remove the left margin from the description and chevron icon rows. */
152
+ .swagger-ui .opblock-tag > small,
153
+ .swagger-ui .opblock-tag > button {
154
+ padding-left: 0;
155
+ }
@@ -0,0 +1,34 @@
1
+ """Constants related to configuring Swagger UI."""
2
+
3
+ # Reference: https://swagger.io/docs/open-source-tools/swagger-ui/usage/configuration/#parameters
4
+ base_swagger_ui_parameters: dict = {
5
+ "withCredentials": True,
6
+ # Collapse the "Schemas" section by default.
7
+ # Note: `-1` would omit the section entirely.
8
+ "defaultModelsExpandDepth": 0,
9
+ # Display the response times of the requests performed via "Try it out".
10
+ # Note: In my local testing, the response times reported by this
11
+ # are about 50-100ms longer than the response times reported
12
+ # by Chrome DevTools. That is the case whether the actual
13
+ # response time is short (e.g. 100ms) or long (e.g. 60s);
14
+ # i.e. not proportional to the actual response time.
15
+ "displayRequestDuration": True,
16
+ # Expand all tag sections (i.e. groups of endpoints) by default.
17
+ # Note: `"list"` expands them, and `"none"` collapses them.
18
+ "docExpansion": "list",
19
+ # Make it so a logged-in user remains logged in even after reloading
20
+ # the web page (or leaving the web page and revisiting it later).
21
+ "persistAuthorization": True,
22
+ # Specify the Swagger UI plugins we want to use (see note below).
23
+ #
24
+ # Note: FastAPI's `get_swagger_ui_html` function always serializes
25
+ # the value of this property as a _string_, while the Swagger UI
26
+ # JavaScript code requires it to be an _array_. To work around that,
27
+ # we just add a placeholder string here; then, after we pass this
28
+ # dictionary to FastAPI's `get_swagger_ui_html` function and get the
29
+ # returned HTML for the web page, we replace this placeholder string
30
+ # (within the returned HTML) with the JavaScript array we wanted
31
+ # the "plugins" property to contain all along.
32
+ #
33
+ "plugins": r"{{ NMDC_SWAGGER_UI_PARAMETERS_PLUGINS_PLACEHOLDER }}",
34
+ }
nmdc_runtime/config.py ADDED
@@ -0,0 +1,56 @@
1
+ """
2
+ This module acts as a unified interface between the codebase and the environment.
3
+ We will eventually move all of the Runtime's environment variables reads into this
4
+ module, instead of leaving them sprinkled throughout the codebase.
5
+
6
+ TODO: Move all environment variable reads into this module and update references accordingly.
7
+ """
8
+
9
+ from typing import Set
10
+ import os
11
+
12
+
13
+ def is_env_var_true(name: str, default: str = "false") -> bool:
14
+ r"""
15
+ Checks whether the value of the specified environment variable
16
+ meets our criteria for true-ness.
17
+
18
+ Reference: https://docs.python.org/3/library/os.html#os.environ
19
+
20
+ Run doctests via: $ python -m doctest nmdc_runtime/config.py
21
+
22
+ >>> import os
23
+ >>> name = "EXAMPLE_ENV_VAR"
24
+ >>> os.unsetenv(name) # Undefined
25
+ >>> is_env_var_true(name)
26
+ False
27
+ >>> is_env_var_true(name, "true") # Undefined, overridden default
28
+ True
29
+ >>> os.environ[name] = "false" # Defined as false
30
+ >>> is_env_var_true(name)
31
+ False
32
+ >>> os.environ[name] = "true" # Defined as true
33
+ >>> is_env_var_true(name)
34
+ True
35
+ >>> os.environ[name] = "TRUE" # Case-insensitive
36
+ >>> is_env_var_true(name)
37
+ True
38
+ >>> os.environ[name] = "potato" # Non-boolean string
39
+ >>> is_env_var_true(name)
40
+ False
41
+ """
42
+ lowercase_true_strings: Set[str] = {"true"}
43
+ return os.environ.get(name, default).lower() in lowercase_true_strings
44
+
45
+
46
+ # Feature flag to enable/disable the `/nmdcschema/linked_instances` endpoint and the tests that target it.
47
+ IS_LINKED_INSTANCES_ENDPOINT_ENABLED: bool = is_env_var_true(
48
+ "IS_LINKED_INSTANCES_ENDPOINT_ENABLED", default="true"
49
+ )
50
+
51
+ # Feature flag that can be used to enable/disable the `/scalar` endpoint.
52
+ IS_SCALAR_ENABLED: bool = is_env_var_true("IS_SCALAR_ENABLED", default="true")
53
+
54
+ # Feature flag that can be used to enable/disable performance profiling,
55
+ # which can be activated via the `?profile=true` URL query parameter.
56
+ IS_PROFILING_ENABLED: bool = is_env_var_true("IS_PROFILING_ENABLED", default="false")
@@ -2,9 +2,8 @@ import abc
2
2
  import re
3
3
  from typing import Union
4
4
 
5
- from fastapi import HTTPException
6
5
  from pymongo import ReturnDocument
7
- from toolz import merge, dissoc
6
+ from toolz import merge
8
7
  from pymongo.database import Database as MongoDatabase
9
8
 
10
9
 
@@ -137,6 +136,10 @@ class MongoIDStore(abc.ABC):
137
136
  self.db = mdb
138
137
 
139
138
  def mint(self, req_mint: MintingRequest) -> list[Identifier]:
139
+ """
140
+ TODO: Document this method.
141
+ """
142
+
140
143
  if not self.db["minter.services"].find_one({"id": req_mint.service.id}):
141
144
  raise MinterError(f"Unknown service {req_mint.service.id}")
142
145
  if not self.db["minter.requesters"].find_one({"id": req_mint.requester.id}):
@@ -191,6 +194,10 @@ class MongoIDStore(abc.ABC):
191
194
  return collected
192
195
 
193
196
  def bind(self, req_bind: BindingRequest) -> Identifier:
197
+ """Associate the specified arbitrary metadata with the specified ID.
198
+
199
+ TODO: Do not allow users to bind identifiers minted by _other_ users.
200
+ """
194
201
  id_stored = self.resolve(req_bind)
195
202
  if id_stored is None:
196
203
  raise MinterError(f"ID {req_bind.id_name} is unknown")
@@ -208,15 +215,28 @@ class MongoIDStore(abc.ABC):
208
215
  )
209
216
 
210
217
  def resolve(self, req_res: ResolutionRequest) -> Union[Identifier, None]:
218
+ """Get the metadata that is bound to the specified identifier."""
211
219
  match re.match(r"nmdc:([^-]+)-([^-]+)-.*", req_res.id_name).groups():
212
220
  case (_, _):
213
221
  doc = self.db["minter.id_records"].find_one({"id": req_res.id_name})
214
222
  # TODO if draft ID, check requester
223
+ #
224
+ # Note: The above "TODO" comment is about checking whether the user that wants to
225
+ # resolve the identifier, is the same user that minted the identifier. If
226
+ # it isn't, then... what? (i.e. allow resolution, or deny resolution)?
227
+ #
215
228
  return Identifier(**doc) if doc else None
216
229
  case _:
217
230
  raise MinterError("Invalid ID name")
218
231
 
219
232
  def delete(self, req_del: DeleteRequest):
233
+ """Delete an identifier that is still in the draft state.
234
+
235
+ Note: You can mint (draft) as many IDs as you want. As long as you don't bind them
236
+ (i.e. as long as they are still in the draft state), you can still delete them.
237
+
238
+ TODO: Do not allow users to delete identifiers minted by _other_ users.
239
+ """
220
240
  id_stored = self.resolve(req_del)
221
241
  if id_stored is None:
222
242
  raise MinterError(f"ID {req_del.id_name} is unknown")
@@ -1,8 +1,10 @@
1
1
  import os
2
2
  from functools import lru_cache
3
+ from typing import List
3
4
 
4
- from nmdc_runtime.util import get_nmdc_jsonschema_dict
5
+ from nmdc_schema.id_helpers import get_typecode_for_future_ids
5
6
 
7
+ from nmdc_runtime.util import get_nmdc_jsonschema_dict
6
8
  from nmdc_runtime.api.db.mongo import get_mongo_db
7
9
 
8
10
 
@@ -12,17 +14,41 @@ def minting_service_id() -> str | None:
12
14
 
13
15
 
14
16
  @lru_cache()
15
- def typecodes():
17
+ def typecodes() -> List[dict]:
18
+ r"""
19
+ Returns a list of dictionaries containing typecodes and associated information derived from the schema.
20
+
21
+ Note: In this function, we rely on a helper function provided by the `nmdc-schema` package to extract—from a given
22
+ class's `id` slot's pattern—the typecode that the minter would use when generating an ID for an instance of
23
+ that class _today_; regardless of what it may have used in the past.
24
+
25
+ >>> typecode_descriptors = typecodes()
26
+
27
+ # Test #1: We get the typecode we expect, for a class whose pattern contains only one typecode.
28
+ >>> any((td["name"] == "sty" and td["schema_class"] == "nmdc:Study") for td in typecode_descriptors)
29
+ True
30
+
31
+ # Tests #2 and #3: We get only the typecode we expect, for a class whose pattern contains multiple typecodes.
32
+ >>> any((td["name"] == "dgms" and td["schema_class"] == "nmdc:MassSpectrometry") for td in typecode_descriptors)
33
+ True
34
+ >>> any((td["name"] == "omprc" and td["schema_class"] == "nmdc:MassSpectrometry") for td in typecode_descriptors)
35
+ False
36
+ """
37
+ id_pattern_prefix = r"^(nmdc):"
38
+
16
39
  rv = []
17
40
  schema_dict = get_nmdc_jsonschema_dict()
18
41
  for cls_name, defn in schema_dict["$defs"].items():
19
42
  match defn.get("properties"):
20
- case {"id": {"pattern": p}} if p.startswith("^(nmdc):"):
43
+ case {"id": {"pattern": p}} if p.startswith(id_pattern_prefix):
44
+ # Extract the typecode from the pattern.
45
+ typecode_for_future_ids = get_typecode_for_future_ids(slot_pattern=p)
46
+
21
47
  rv.append(
22
48
  {
23
49
  "id": "nmdc:" + cls_name + "_" + "typecode",
24
50
  "schema_class": "nmdc:" + cls_name,
25
- "name": p.split(":", maxsplit=1)[-1].split("-", maxsplit=1)[0],
51
+ "name": typecode_for_future_ids,
26
52
  }
27
53
  )
28
54
  case _:
@@ -1,9 +1,11 @@
1
1
  from enum import Enum
2
+ import re
2
3
  from typing import Optional
3
4
 
5
+ from base32_lib import base32
4
6
  from pydantic import BaseModel, PositiveInt
5
7
 
6
- from nmdc_runtime.minter.config import schema_classes
8
+ from nmdc_runtime.minter.config import schema_classes, typecodes
7
9
 
8
10
 
9
11
  class Entity(BaseModel):
@@ -20,9 +22,29 @@ class ValueObject(BaseModel):
20
22
 
21
23
 
22
24
  class Status(str, Enum):
25
+ """Status of an identifier.
26
+
27
+ Note: These state values were chosen in an attempt to mirror those that DataCite uses for DOIs,
28
+ which are (currently) "Draft", "Registered", and "Findable" (we use "Indexed" instead).
29
+ Reference: https://support.datacite.org/docs/doi-states
30
+ """
31
+
23
32
  draft = "draft"
33
+ """
34
+ Draft; i.e., the identifier is reserved for potential use. The identifier can still be deleted.
35
+ """
36
+
24
37
  registered = "registered"
38
+ """
39
+ Registered; i.e., the identifier is in use, but the resource it identifies is not publicly accessible
40
+ (yet, or anymore). The identifier cannot be deleted.
41
+ """
42
+
25
43
  indexed = "indexed"
44
+ """
45
+ Indexed; i.e., the resource identified by the identifier is publicly accessible (i.e. in the
46
+ production database). The identifier cannot be deleted.
47
+ """
26
48
 
27
49
 
28
50
  class MintingRequest(ValueObject):
@@ -71,3 +93,35 @@ class Identifier(Entity):
71
93
  class Typecode(Entity):
72
94
  schema_class: str
73
95
  name: str
96
+
97
+
98
+ id_prefix_pattern = rf"(?P<prefix>nmdc)"
99
+ id_typecode_pattern = rf"(?P<typecode>[a-z]{{1,6}})"
100
+ id_shoulder_pattern = rf"(?P<shoulder>[0-9][a-z]{{0,6}}[0-9])"
101
+ id_blade_pattern = rf"(?P<blade>[A-Za-z0-9]+)"
102
+ id_version_pattern = rf"(?P<version>(\.[A-Za-z0-9]+)*)"
103
+ id_locus_pattern = rf"(?P<locus>_[A-Za-z0-9_\.-]+)?"
104
+ id_pattern = (
105
+ rf"^{id_prefix_pattern}:{id_typecode_pattern}-{id_shoulder_pattern}-"
106
+ rf"{id_blade_pattern}{id_version_pattern}{id_locus_pattern}$"
107
+ )
108
+ ID_TYPECODE_VALUES = [t["name"] for t in typecodes()]
109
+ id_typecode_pattern_strict = rf"(?P<typecode_strict>({'|'.join(ID_TYPECODE_VALUES)}))"
110
+ id_blade_pattern_strict = rf"(?P<blade_strict>[{base32.ENCODING_CHARS}]+)"
111
+ id_pattern_strict = (
112
+ rf"^{id_prefix_pattern}:{id_typecode_pattern_strict}-{id_shoulder_pattern}-"
113
+ rf"{id_blade_pattern_strict}{id_version_pattern}{id_locus_pattern}$"
114
+ )
115
+ id_pattern_strict_compiled = re.compile(id_pattern_strict)
116
+
117
+
118
+ def check_valid_ids(ids: list[str]):
119
+ for id_ in ids:
120
+ if not re.match(id_pattern, id_):
121
+ raise ValueError(
122
+ (
123
+ f"Invalid ID format for given ID: '{id_}'.\n\nAn ID must match the pattern: '{id_pattern}'.\n\n"
124
+ "See: <https://microbiomedata.github.io/nmdc-schema/identifiers/#ids-minted-for-use-within-nmdc>"
125
+ )
126
+ )
127
+ return ids
@@ -8,7 +8,7 @@ from nmdc_runtime.api.core.util import raise404_if_none
8
8
  from nmdc_runtime.api.db.mongo import get_mongo_db
9
9
  from nmdc_runtime.api.models.site import get_current_client_site, Site
10
10
  from nmdc_runtime.minter.adapters.repository import MongoIDStore, MinterError
11
- from nmdc_runtime.minter.config import minting_service_id, schema_classes
11
+ from nmdc_runtime.minter.config import minting_service_id
12
12
  from nmdc_runtime.minter.domain.model import (
13
13
  Identifier,
14
14
  AuthenticatedMintingRequest,
@@ -0,0 +1,89 @@
1
+ from pymongo.database import Database
2
+ from pymongo.collection import Collection
3
+ from typing import Any, Optional
4
+ from pymongo.client_session import ClientSession
5
+ import inspect
6
+
7
+
8
+ def _wrap_with_session(obj: Any, name: str, session: Optional[ClientSession]) -> Any:
9
+ """
10
+ Wraps a callable attribute of an object to automatically include a session
11
+ if the callable accepts a 'session' keyword argument.
12
+ """
13
+ attr = getattr(obj, name)
14
+ if callable(attr):
15
+ signature = inspect.signature(attr)
16
+ parameters = signature.parameters
17
+ accepts_session = any(
18
+ param.name == "session"
19
+ for param in parameters.values()
20
+ if param.kind
21
+ in (inspect.Parameter.POSITIONAL_OR_KEYWORD, inspect.Parameter.KEYWORD_ONLY)
22
+ )
23
+
24
+ def wrapper(*args, **kwargs):
25
+ if session is not None and accepts_session and "session" not in kwargs:
26
+ kwargs["session"] = session
27
+ return attr(*args, **kwargs)
28
+
29
+ return wrapper
30
+ return attr
31
+
32
+
33
+ class SessionBoundCollection:
34
+ """
35
+ A wrapper around pymongo.collection.Collection that automatically passes a session
36
+ to methods that accept it.
37
+ """
38
+
39
+ def __init__(self, collection: Collection, session: Optional[ClientSession] = None):
40
+ self._collection = collection
41
+ self._session = session
42
+
43
+ def __getattr__(self, name: str):
44
+ return _wrap_with_session(self._collection, name, self._session)
45
+
46
+ def __getitem__(self, name: str) -> "SessionBoundCollection":
47
+ return SessionBoundCollection(self._collection[name], self._session)
48
+
49
+
50
+ class SessionBoundDatabase(Database):
51
+ """
52
+ A wrapper around pymongo.database.Database that automatically passes a session
53
+ to methods that accept it.
54
+ """
55
+
56
+ def __init__(self, database: Database, session: Optional[ClientSession] = None):
57
+ super().__init__(
58
+ database.client,
59
+ database.name,
60
+ database.codec_options,
61
+ database.read_preference,
62
+ database.write_concern,
63
+ database.read_concern,
64
+ )
65
+ self._database = database
66
+ self._session = session
67
+
68
+ def __getattr__(self, name: str):
69
+ return _wrap_with_session(self._database, name, self._session)
70
+
71
+ def __getitem__(self, name: str) -> SessionBoundCollection:
72
+ return SessionBoundCollection(self._database[name], self._session)
73
+
74
+ def get_collection(self, name: str, **kwargs) -> SessionBoundCollection:
75
+ """Get a :class:`~pymongo.collection.Collection` with the given name and options."""
76
+ collection = super().get_collection(name, **kwargs)
77
+ return SessionBoundCollection(collection, self._session)
78
+
79
+ @property
80
+ def client(self):
81
+ return self._database.client
82
+
83
+ @property
84
+ def unbounded(self):
85
+ return self._database
86
+
87
+ @property
88
+ def name(self):
89
+ return self._database.name
@@ -6,7 +6,7 @@ $ nmdcdb-mongodump
6
6
 
7
7
  import os
8
8
  import subprocess
9
- from datetime import datetime, timezone
9
+ from datetime import datetime
10
10
  from pathlib import Path
11
11
  from zoneinfo import ZoneInfo
12
12
 
@@ -16,9 +16,7 @@ from toolz import assoc
16
16
 
17
17
  from nmdc_runtime.api.core.util import pick
18
18
  from nmdc_runtime.api.db.mongo import get_mongo_db
19
- from nmdc_runtime.site.repository import run_config_frozen__normal_env
20
- from nmdc_runtime.site.resources import get_mongo
21
- from nmdc_runtime.util import nmdc_jsonschema, schema_collection_names_with_id_field
19
+ from nmdc_runtime.util import schema_collection_names_with_id_field
22
20
 
23
21
 
24
22
  def collection_stats(mdb: MongoDatabase):