nmdc-runtime 2.6.0__py3-none-any.whl → 2.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. nmdc_runtime/Dockerfile +177 -0
  2. nmdc_runtime/api/analytics.py +90 -0
  3. nmdc_runtime/api/boot/capabilities.py +9 -0
  4. nmdc_runtime/api/boot/object_types.py +126 -0
  5. nmdc_runtime/api/boot/triggers.py +84 -0
  6. nmdc_runtime/api/boot/workflows.py +116 -0
  7. nmdc_runtime/api/core/auth.py +212 -0
  8. nmdc_runtime/api/core/idgen.py +200 -0
  9. nmdc_runtime/api/core/metadata.py +777 -0
  10. nmdc_runtime/api/core/util.py +114 -0
  11. nmdc_runtime/api/db/mongo.py +436 -0
  12. nmdc_runtime/api/db/s3.py +37 -0
  13. nmdc_runtime/api/endpoints/capabilities.py +25 -0
  14. nmdc_runtime/api/endpoints/find.py +634 -0
  15. nmdc_runtime/api/endpoints/jobs.py +206 -0
  16. nmdc_runtime/api/endpoints/lib/helpers.py +274 -0
  17. nmdc_runtime/api/endpoints/lib/linked_instances.py +193 -0
  18. nmdc_runtime/api/endpoints/lib/path_segments.py +165 -0
  19. nmdc_runtime/api/endpoints/metadata.py +260 -0
  20. nmdc_runtime/api/endpoints/nmdcschema.py +515 -0
  21. nmdc_runtime/api/endpoints/object_types.py +38 -0
  22. nmdc_runtime/api/endpoints/objects.py +277 -0
  23. nmdc_runtime/api/endpoints/operations.py +78 -0
  24. nmdc_runtime/api/endpoints/queries.py +701 -0
  25. nmdc_runtime/api/endpoints/runs.py +98 -0
  26. nmdc_runtime/api/endpoints/search.py +38 -0
  27. nmdc_runtime/api/endpoints/sites.py +205 -0
  28. nmdc_runtime/api/endpoints/triggers.py +25 -0
  29. nmdc_runtime/api/endpoints/users.py +214 -0
  30. nmdc_runtime/api/endpoints/util.py +817 -0
  31. nmdc_runtime/api/endpoints/wf_file_staging.py +307 -0
  32. nmdc_runtime/api/endpoints/workflows.py +353 -0
  33. nmdc_runtime/api/entrypoint.sh +7 -0
  34. nmdc_runtime/api/main.py +495 -0
  35. nmdc_runtime/api/middleware.py +43 -0
  36. nmdc_runtime/api/models/capability.py +14 -0
  37. nmdc_runtime/api/models/id.py +92 -0
  38. nmdc_runtime/api/models/job.py +57 -0
  39. nmdc_runtime/api/models/lib/helpers.py +78 -0
  40. nmdc_runtime/api/models/metadata.py +11 -0
  41. nmdc_runtime/api/models/nmdc_schema.py +146 -0
  42. nmdc_runtime/api/models/object.py +180 -0
  43. nmdc_runtime/api/models/object_type.py +20 -0
  44. nmdc_runtime/api/models/operation.py +66 -0
  45. nmdc_runtime/api/models/query.py +246 -0
  46. nmdc_runtime/api/models/query_continuation.py +111 -0
  47. nmdc_runtime/api/models/run.py +161 -0
  48. nmdc_runtime/api/models/site.py +87 -0
  49. nmdc_runtime/api/models/trigger.py +13 -0
  50. nmdc_runtime/api/models/user.py +207 -0
  51. nmdc_runtime/api/models/util.py +260 -0
  52. nmdc_runtime/api/models/wfe_file_stages.py +122 -0
  53. nmdc_runtime/api/models/workflow.py +15 -0
  54. nmdc_runtime/api/openapi.py +178 -0
  55. nmdc_runtime/api/swagger_ui/assets/EllipsesButton.js +146 -0
  56. nmdc_runtime/api/swagger_ui/assets/EndpointSearchWidget.js +369 -0
  57. nmdc_runtime/api/swagger_ui/assets/script.js +252 -0
  58. nmdc_runtime/api/swagger_ui/assets/style.css +155 -0
  59. nmdc_runtime/api/swagger_ui/swagger_ui.py +34 -0
  60. nmdc_runtime/config.py +56 -1
  61. nmdc_runtime/minter/adapters/repository.py +22 -2
  62. nmdc_runtime/minter/config.py +2 -0
  63. nmdc_runtime/minter/domain/model.py +55 -1
  64. nmdc_runtime/minter/entrypoints/fastapi_app.py +1 -1
  65. nmdc_runtime/mongo_util.py +89 -0
  66. nmdc_runtime/site/backup/nmdcdb_mongodump.py +1 -1
  67. nmdc_runtime/site/backup/nmdcdb_mongoexport.py +1 -3
  68. nmdc_runtime/site/changesheets/data/OmicsProcessing-to-catted-Biosamples.tsv +1561 -0
  69. nmdc_runtime/site/changesheets/scripts/missing_neon_soils_ecosystem_data.py +311 -0
  70. nmdc_runtime/site/changesheets/scripts/neon_soils_add_ncbi_ids.py +210 -0
  71. nmdc_runtime/site/dagster.yaml +53 -0
  72. nmdc_runtime/site/entrypoint-daemon.sh +29 -0
  73. nmdc_runtime/site/entrypoint-dagit-readonly.sh +26 -0
  74. nmdc_runtime/site/entrypoint-dagit.sh +29 -0
  75. nmdc_runtime/site/export/ncbi_xml.py +731 -40
  76. nmdc_runtime/site/export/ncbi_xml_utils.py +142 -26
  77. nmdc_runtime/site/graphs.py +80 -29
  78. nmdc_runtime/site/ops.py +522 -183
  79. nmdc_runtime/site/repair/database_updater.py +210 -1
  80. nmdc_runtime/site/repository.py +108 -117
  81. nmdc_runtime/site/resources.py +72 -36
  82. nmdc_runtime/site/translation/gold_translator.py +22 -21
  83. nmdc_runtime/site/translation/neon_benthic_translator.py +1 -1
  84. nmdc_runtime/site/translation/neon_soil_translator.py +5 -5
  85. nmdc_runtime/site/translation/neon_surface_water_translator.py +1 -2
  86. nmdc_runtime/site/translation/submission_portal_translator.py +216 -69
  87. nmdc_runtime/site/translation/translator.py +64 -1
  88. nmdc_runtime/site/util.py +8 -3
  89. nmdc_runtime/site/validation/util.py +16 -12
  90. nmdc_runtime/site/workspace.yaml +13 -0
  91. nmdc_runtime/static/NMDC_logo.svg +1073 -0
  92. nmdc_runtime/static/ORCID-iD_icon_vector.svg +4 -0
  93. nmdc_runtime/static/README.md +5 -0
  94. nmdc_runtime/static/favicon.ico +0 -0
  95. nmdc_runtime/util.py +175 -348
  96. nmdc_runtime-2.12.0.dist-info/METADATA +45 -0
  97. nmdc_runtime-2.12.0.dist-info/RECORD +131 -0
  98. {nmdc_runtime-2.6.0.dist-info → nmdc_runtime-2.12.0.dist-info}/WHEEL +1 -2
  99. nmdc_runtime/containers.py +0 -14
  100. nmdc_runtime/core/db/Database.py +0 -15
  101. nmdc_runtime/core/exceptions/__init__.py +0 -23
  102. nmdc_runtime/core/exceptions/base.py +0 -47
  103. nmdc_runtime/core/exceptions/token.py +0 -13
  104. nmdc_runtime/domain/users/queriesInterface.py +0 -18
  105. nmdc_runtime/domain/users/userSchema.py +0 -37
  106. nmdc_runtime/domain/users/userService.py +0 -14
  107. nmdc_runtime/infrastructure/database/db.py +0 -3
  108. nmdc_runtime/infrastructure/database/models/user.py +0 -10
  109. nmdc_runtime/lib/__init__.py +0 -1
  110. nmdc_runtime/lib/extract_nmdc_data.py +0 -41
  111. nmdc_runtime/lib/load_nmdc_data.py +0 -121
  112. nmdc_runtime/lib/nmdc_dataframes.py +0 -829
  113. nmdc_runtime/lib/nmdc_etl_class.py +0 -402
  114. nmdc_runtime/lib/transform_nmdc_data.py +0 -1117
  115. nmdc_runtime/site/drsobjects/ingest.py +0 -93
  116. nmdc_runtime/site/drsobjects/registration.py +0 -131
  117. nmdc_runtime/site/translation/emsl.py +0 -43
  118. nmdc_runtime/site/translation/gold.py +0 -53
  119. nmdc_runtime/site/translation/jgi.py +0 -32
  120. nmdc_runtime/site/translation/util.py +0 -132
  121. nmdc_runtime/site/validation/jgi.py +0 -43
  122. nmdc_runtime-2.6.0.dist-info/METADATA +0 -199
  123. nmdc_runtime-2.6.0.dist-info/RECORD +0 -83
  124. nmdc_runtime-2.6.0.dist-info/top_level.txt +0 -1
  125. /nmdc_runtime/{client → api}/__init__.py +0 -0
  126. /nmdc_runtime/{core → api/boot}/__init__.py +0 -0
  127. /nmdc_runtime/{core/db → api/core}/__init__.py +0 -0
  128. /nmdc_runtime/{domain → api/db}/__init__.py +0 -0
  129. /nmdc_runtime/{domain/users → api/endpoints}/__init__.py +0 -0
  130. /nmdc_runtime/{infrastructure → api/endpoints/lib}/__init__.py +0 -0
  131. /nmdc_runtime/{infrastructure/database → api/models}/__init__.py +0 -0
  132. /nmdc_runtime/{infrastructure/database/models → api/models/lib}/__init__.py +0 -0
  133. /nmdc_runtime/{site/drsobjects/__init__.py → api/models/minter.py} +0 -0
  134. {nmdc_runtime-2.6.0.dist-info → nmdc_runtime-2.12.0.dist-info}/entry_points.txt +0 -0
  135. {nmdc_runtime-2.6.0.dist-info → nmdc_runtime-2.12.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,212 @@
1
+ import os
2
+ from datetime import datetime, timedelta, timezone
3
+ from typing import Optional, Dict
4
+
5
+ from fastapi import Depends
6
+ from fastapi.exceptions import HTTPException
7
+ from fastapi.openapi.models import OAuthFlows as OAuthFlowsModel
8
+ from fastapi.param_functions import Form
9
+ from fastapi.security import (
10
+ OAuth2,
11
+ HTTPBasic,
12
+ HTTPBasicCredentials,
13
+ HTTPBearer,
14
+ HTTPAuthorizationCredentials,
15
+ )
16
+ from fastapi.security.utils import get_authorization_scheme_param
17
+ from jose import JWTError, jwt
18
+ from passlib.context import CryptContext
19
+ from pydantic import BaseModel
20
+ from starlette.requests import Request
21
+ from starlette.status import HTTP_400_BAD_REQUEST, HTTP_401_UNAUTHORIZED
22
+
23
+ ORCID_PRODUCTION_BASE_URL = "https://orcid.org"
24
+
25
+ SECRET_KEY = os.getenv("JWT_SECRET_KEY")
26
+ ALGORITHM = "HS256"
27
+ ORCID_NMDC_CLIENT_ID = os.getenv("ORCID_NMDC_CLIENT_ID")
28
+ ORCID_NMDC_CLIENT_SECRET = os.getenv("ORCID_NMDC_CLIENT_SECRET")
29
+ ORCID_BASE_URL = os.getenv("ORCID_BASE_URL", default=ORCID_PRODUCTION_BASE_URL)
30
+
31
+ # Define the JSON Web Key Set (JWKS) for ORCID.
32
+ #
33
+ # Note: The URL from which we got this dictionary is: https://orcid.org/oauth/jwks
34
+ # We got _that_ URL from the dictionary at: https://orcid.org/.well-known/openid-configuration
35
+ #
36
+ # TODO: Consider _live-loading_ this dictionary from the Internet.
37
+ #
38
+ ORCID_JWK = {
39
+ "e": "AQAB",
40
+ "kid": "production-orcid-org-7hdmdswarosg3gjujo8agwtazgkp1ojs",
41
+ "kty": "RSA",
42
+ "n": "jxTIntA7YvdfnYkLSN4wk__E2zf_wbb0SV_HLHFvh6a9ENVRD1_rHK0EijlBzikb-1rgDQihJETcgBLsMoZVQqGj8fDUUuxnVHsuGav_bf41PA7E_58HXKPrB2C0cON41f7K3o9TStKpVJOSXBrRWURmNQ64qnSSryn1nCxMzXpaw7VUo409ohybbvN6ngxVy4QR2NCC7Fr0QVdtapxD7zdlwx6lEwGemuqs_oG5oDtrRuRgeOHmRps2R6gG5oc-JqVMrVRv6F9h4ja3UgxCDBQjOVT1BFPWmMHnHCsVYLqbbXkZUfvP2sO1dJiYd_zrQhi-FtNth9qrLLv3gkgtwQ",
43
+ "use": "sig",
44
+ }
45
+ # If the application is using a _non-production_ ORCID environment, overwrite
46
+ # the "kid" and "n" values with those from the sandbox ORCID environment.
47
+ #
48
+ # Source: https://sandbox.orcid.org/oauth/jwks
49
+ #
50
+ if ORCID_BASE_URL != ORCID_PRODUCTION_BASE_URL:
51
+ ORCID_JWK["kid"] = "sandbox-orcid-org-3hpgosl3b6lapenh1ewsgdob3fawepoj"
52
+ ORCID_JWK["n"] = (
53
+ "pl-jp-kTAGf6BZUrWIYUJTvqqMVd4iAnoLS6vve-KNV0q8TxKvMre7oi9IulDcqTuJ1alHrZAIVlgrgFn88MKirZuTqHG6LCtEsr7qGD9XyVcz64oXrb9vx4FO9tLNQxvdnIWCIwyPAYWtPMHMSSD5oEVUtVL_5IaxfCJvU-FchdHiwfxvXMWmA-i3mcEEe9zggag2vUPPIqUwbPVUFNj2hE7UsZbasuIToEMFRZqSB6juc9zv6PEUueQ5hAJCEylTkzMwyBMibrt04TmtZk2w9DfKJR91555s2ZMstX4G_su1_FqQ6p9vgcuLQ6tCtrW77tta-Rw7McF_tyPmvnhQ"
54
+ )
55
+
56
+ ORCID_JWS_VERITY_ALGORITHM = "RS256"
57
+
58
+
59
+ class ClientCredentials(BaseModel):
60
+ client_id: str
61
+ client_secret: str
62
+
63
+
64
+ class TokenExpires(BaseModel):
65
+ days: Optional[int] = 1
66
+ hours: Optional[int] = 0
67
+ minutes: Optional[int] = 0
68
+
69
+
70
+ ACCESS_TOKEN_EXPIRES = TokenExpires(days=1, hours=0, minutes=0)
71
+
72
+
73
+ class Token(BaseModel):
74
+ access_token: str
75
+ token_type: str
76
+ expires: Optional[TokenExpires] = None
77
+
78
+
79
+ class TokenData(BaseModel):
80
+ subject: Optional[str] = None
81
+
82
+
83
+ pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
84
+
85
+ credentials_exception = HTTPException(
86
+ status_code=HTTP_401_UNAUTHORIZED,
87
+ detail="Could not validate credentials",
88
+ headers={"WWW-Authenticate": "Bearer"},
89
+ )
90
+
91
+
92
+ def verify_password(plain_password, hashed_password):
93
+ return pwd_context.verify(plain_password, hashed_password)
94
+
95
+
96
+ def get_password_hash(password):
97
+ return pwd_context.hash(password)
98
+
99
+
100
+ def create_access_token(data: dict, expires_delta: Optional[timedelta] = None):
101
+ to_encode = data.copy()
102
+ if expires_delta:
103
+ expire = datetime.now(timezone.utc) + expires_delta
104
+ else:
105
+ expire = datetime.now(timezone.utc) + timedelta(minutes=15)
106
+ to_encode.update({"exp": expire})
107
+ encoded_jwt = jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
108
+ return encoded_jwt
109
+
110
+
111
+ def get_access_token_expiration(token) -> datetime:
112
+ try:
113
+ payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
114
+ return payload.get("exp")
115
+ except JWTError:
116
+ raise credentials_exception
117
+
118
+
119
+ class OAuth2PasswordOrClientCredentialsBearer(OAuth2):
120
+ """
121
+ TODO: Document this undocumented class.
122
+ """
123
+
124
+ def __init__(
125
+ self,
126
+ tokenUrl: str,
127
+ scheme_name: Optional[str] = None,
128
+ scopes: Optional[Dict[str, str]] = None,
129
+ auto_error: bool = True,
130
+ ):
131
+ if not scopes:
132
+ scopes = {}
133
+ flows = OAuthFlowsModel(
134
+ password={"tokenUrl": tokenUrl, "scopes": scopes},
135
+ clientCredentials={"tokenUrl": tokenUrl},
136
+ )
137
+ super().__init__(flows=flows, scheme_name=scheme_name, auto_error=auto_error)
138
+
139
+ async def __call__(self, request: Request) -> Optional[str]:
140
+ authorization: str = request.headers.get("Authorization")
141
+ scheme, param = get_authorization_scheme_param(authorization)
142
+ if not authorization or scheme.lower() != "bearer":
143
+ if self.auto_error:
144
+ raise HTTPException(
145
+ status_code=HTTP_401_UNAUTHORIZED,
146
+ detail="Not authenticated",
147
+ headers={"WWW-Authenticate": "Bearer"},
148
+ )
149
+ else:
150
+ print(request.url)
151
+ return None
152
+ return param
153
+
154
+
155
+ oauth2_scheme = OAuth2PasswordOrClientCredentialsBearer(
156
+ tokenUrl="token", auto_error=False
157
+ )
158
+ optional_oauth2_scheme = OAuth2PasswordOrClientCredentialsBearer(
159
+ tokenUrl="token", auto_error=False
160
+ )
161
+
162
+ bearer_scheme = HTTPBearer(scheme_name="bearerAuth", auto_error=False)
163
+
164
+
165
+ async def basic_credentials(req: Request):
166
+ return await HTTPBasic(auto_error=False)(req)
167
+
168
+
169
+ async def bearer_credentials(req: Request):
170
+ return await HTTPBearer(scheme_name="bearerAuth", auto_error=False)(req)
171
+
172
+
173
+ class OAuth2PasswordOrClientCredentialsRequestForm:
174
+ def __init__(
175
+ self,
176
+ basic_creds: Optional[HTTPBasicCredentials] = Depends(basic_credentials),
177
+ bearer_creds: Optional[HTTPAuthorizationCredentials] = Depends(
178
+ bearer_credentials
179
+ ),
180
+ grant_type: str = Form(None, pattern="^password$|^client_credentials$"),
181
+ username: Optional[str] = Form(None),
182
+ password: Optional[str] = Form(None),
183
+ scope: str = Form(""),
184
+ client_id: Optional[str] = Form(None),
185
+ client_secret: Optional[str] = Form(None),
186
+ ):
187
+ if bearer_creds:
188
+ self.grant_type = "client_credentials"
189
+ self.username, self.password = None, None
190
+ self.scopes = scope.split()
191
+ self.client_id = bearer_creds.credentials
192
+ self.client_secret = None
193
+ elif grant_type == "password" and (username is None or password is None):
194
+ raise HTTPException(
195
+ status_code=HTTP_400_BAD_REQUEST,
196
+ detail="grant_type password requires username and password",
197
+ )
198
+ elif grant_type == "client_credentials" and (client_id is None):
199
+ if basic_creds:
200
+ client_id = basic_creds.username
201
+ client_secret = basic_creds.password
202
+ else:
203
+ raise HTTPException(
204
+ status_code=HTTP_400_BAD_REQUEST,
205
+ detail="grant_type client_credentials requires client_id and client_secret",
206
+ )
207
+ self.grant_type = grant_type
208
+ self.username = username
209
+ self.password = password
210
+ self.scopes = scope.split()
211
+ self.client_id = client_id
212
+ self.client_secret = client_secret
@@ -0,0 +1,200 @@
1
+ from datetime import datetime, timezone
2
+ from typing import List
3
+
4
+ import base32_lib as base32
5
+ from pymongo.database import Database as MongoDatabase
6
+
7
+
8
+ def generate_id(length=10, split_every=4, checksum=True) -> str:
9
+ """Generate random base32 string: a user-shareable ID for a database entity.
10
+
11
+ Uses Douglas Crockford Base32 encoding: <https://www.crockford.com/base32.html>
12
+
13
+ Default is 8 characters (5-bits each) plus 2 digit characters for ISO 7064 checksum,
14
+ so 2**40 ~ 1 trillion possible values, *much* larger than the number of statements
15
+ feasibly storable by the database. Hyphen splits are optional for human readability,
16
+ and the default is one split after 5 characters, so an example output using the default
17
+ settings is '3sbk2-5j060'.
18
+
19
+ :param length: non-hyphen identifier length *including* checksum
20
+ :param split_every: hyphenates every that many characters
21
+ :param checksum: computes and appends ISO-7064 checksum
22
+ :returns: identifier as a string
23
+ """
24
+ return base32.generate(length=length, split_every=split_every, checksum=checksum)
25
+
26
+
27
+ def decode_id(encoded: str, checksum=True) -> int:
28
+ """Decodes generated string ID (via `generate_id`) to a number.
29
+
30
+ The string is normalized -- lowercased, hyphens removed,
31
+ {I,i,l,L}=>1 and {O,o}=>0 (user typos corrected) -- before decoding.
32
+
33
+ If `checksum` is enabled, raises a ValueError on checksum error.
34
+
35
+ :param encoded: string to decode
36
+ :param checksum: extract checksum and validate
37
+ :returns: original number.
38
+ """
39
+ return base32.decode(encoded=encoded, checksum=checksum)
40
+
41
+
42
+ def encode_id(number: int, split_every=4, min_length=10, checksum=True) -> int:
43
+ """Encodes `number` to URI-friendly Douglas Crockford base32 string.
44
+
45
+ :param number: number to encode
46
+ :param split_every: if provided, insert '-' every `split_every` characters
47
+ going from left to right
48
+ :param min_length: 0-pad beginning of string to obtain minimum desired length
49
+ :param checksum: append modulo 97-10 (ISO 7064) checksum to string
50
+ :returns: A random Douglas Crockford base32 encoded string composed only
51
+ of valid URI characters.
52
+ """
53
+ return base32.encode(
54
+ number, split_every=split_every, min_length=min_length, checksum=checksum
55
+ )
56
+
57
+
58
+ # sping: "semi-opaque string" (https://n2t.net/e/n2t_apidoc.html).
59
+ #
60
+ # Note: The result is always the following list of tuples:
61
+ # ```
62
+ # [
63
+ # ( 2, 512),
64
+ # ( 4, 524288),
65
+ # ( 6, 536870912),
66
+ # ( 8, 549755813888),
67
+ # (10, 562949953421312)
68
+ # ]
69
+ # ````
70
+ SPING_SIZE_THRESHOLDS = [(n, (2 ** (5 * n)) // 2) for n in [2, 4, 6, 8, 10]]
71
+
72
+
73
+ def collection_name(naa, shoulder):
74
+ r"""
75
+ Returns a string designed to be used as a MongoDB collection name.
76
+
77
+ TODO: Document the function parameters, including expanding the "naa" acronym.
78
+ """
79
+ return f"ids_{naa}_{shoulder}"
80
+
81
+
82
+ def generate_ids(
83
+ mdb: MongoDatabase,
84
+ owner: str,
85
+ populator: str,
86
+ number: int,
87
+ ns: str = "",
88
+ naa: str = "nmdc",
89
+ shoulder: str = "fk4",
90
+ ) -> List[str]:
91
+ r"""
92
+ Generate the specified number of identifiers, storing them in a MongoDB collection
93
+ whose name is derived from the specified Name-Assigning Authority (NAA) and Shoulder.
94
+
95
+ :param mdb: Handle to a MongoDB database
96
+ :param owner: String that will go in the "__ao" field of the identifier record.
97
+ Callers will oftentimes set this to the name of a Runtime "site"
98
+ (as in, a "site client" site, not a "Dagster" site).
99
+ :param populator: String that will go in the "who" field of the identifier record.
100
+ Indicates "who generated this ID." Callers will oftentimes set
101
+ this to the name of a Runtime "site" (as in, a "site client" site,
102
+ not a "Dagster" site).
103
+ :param ns: Namespace (see Minter docs); e.g. "changesheets"
104
+ :param naa: Name-Assigning Authority (see Minter docs); e.g. "nmdc"
105
+ :param shoulder: String that will go in the "how" field (see Minter docs); e.g. "sys0"
106
+
107
+ This function was written the way it was in an attempt to mirror the ARK spec:
108
+ https://www.ietf.org/archive/id/draft-kunze-ark-41.html (found via: https://arks.org/specs/)
109
+
110
+ Deviations from the ARK spec include:
111
+ 1. The inclusion of a typecode.
112
+ The inclusion of a typecode came out of discussions with team members,
113
+ who wanted identifiers to include some non-opaque substring that could be used
114
+ to determine what type of resource a given identifier refers to.
115
+ 2. Making hyphens mandatory.
116
+ We decided to make the hyphens mandatory, whereas the spec says they are optional.
117
+ > "Hyphens are considered to be insignificant and are always ignored in ARKs."
118
+ > Reference: https://www.ietf.org/archive/id/draft-kunze-ark-41.html#name-character-repertoires
119
+ In our case, we require that users include an identifier's hyphens whenever
120
+ they are using that identifier.
121
+ """
122
+ collection = mdb.get_collection(collection_name(naa, shoulder))
123
+ estimated_document_count = collection.estimated_document_count()
124
+ n_chars = next(
125
+ (
126
+ n
127
+ for n, t in SPING_SIZE_THRESHOLDS
128
+ if (number + estimated_document_count) < t
129
+ ),
130
+ 12,
131
+ )
132
+ collected = []
133
+
134
+ while True:
135
+ eids = set()
136
+ n_to_generate = number - len(collected)
137
+ while len(eids) < n_to_generate:
138
+ eids.add(generate_id(length=(n_chars + 2), split_every=0, checksum=True))
139
+ eids = list(eids)
140
+ deids = [decode_id(eid) for eid in eids]
141
+ taken = {d["_id"] for d in collection.find({"_id": {"$in": deids}}, {"_id": 1})}
142
+ not_taken = [
143
+ (eid, eid_decoded)
144
+ for eid, eid_decoded in zip(eids, deids)
145
+ if eid_decoded not in taken
146
+ ]
147
+ if not_taken:
148
+ # All attribute names beginning with "__a" are reserved...
149
+ # https://github.com/jkunze/n2t-eggnog/blob/0f0f4c490e6dece507dba710d3557e29b8f6627e/egg#L1882
150
+ # The author of this function opted to refrain from using property names beginning with "_.e",
151
+ # because he thought it would complicate MongoDB queries involving those properties, given that
152
+ # the "." is used as a field delimiter in MongoDB syntax (e.g. "foo.bar.baz").
153
+ docs = [
154
+ {
155
+ "@context": "https://n2t.net/e/n2t_apidoc.html#identifier-metadata",
156
+ "_id": eid_decoded,
157
+ "who": populator,
158
+ "what": (f"{ns}/{eid}" if ns else "(:tba) Work in progress"),
159
+ "when": datetime.now(timezone.utc).isoformat(timespec="seconds"),
160
+ "how": shoulder,
161
+ "where": f"{naa}:{shoulder}{eid}",
162
+ "__as": "reserved", # status, public|reserved|unavailable
163
+ "__ao": owner, # owner
164
+ "__ac": datetime.now(timezone.utc).isoformat(
165
+ timespec="seconds"
166
+ ), # created
167
+ }
168
+ for eid, eid_decoded in not_taken
169
+ ]
170
+ collection.insert_many(docs)
171
+ collected.extend(docs)
172
+ if len(collected) == number:
173
+ break
174
+ return [d["where"] for d in collected]
175
+
176
+
177
+ def generate_one_id(
178
+ mdb: MongoDatabase,
179
+ ns: str = "",
180
+ shoulder: str = "sys0", # "sys0" represents the Runtime
181
+ ) -> str:
182
+ """Generate unique Crockford Base32-encoded ID for mdb repository.
183
+
184
+ Can associate ID with namespace ns to facilitate ID deletion/recycling.
185
+
186
+ """
187
+ return generate_ids(
188
+ mdb,
189
+ owner="_system", # "_system" represents the Runtime
190
+ populator="_system", # "_system" represents the Runtime
191
+ number=1,
192
+ ns=ns,
193
+ naa="nmdc",
194
+ shoulder=shoulder,
195
+ )[0]
196
+
197
+
198
+ def local_part(id_):
199
+ """nmdc:fk0123 -> fk0123"""
200
+ return id_.split(":", maxsplit=1)[1]