nmdc-runtime 2.8.0__py3-none-any.whl → 2.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nmdc-runtime might be problematic. Click here for more details.

Files changed (100) hide show
  1. nmdc_runtime/api/__init__.py +0 -0
  2. nmdc_runtime/api/analytics.py +70 -0
  3. nmdc_runtime/api/boot/__init__.py +0 -0
  4. nmdc_runtime/api/boot/capabilities.py +9 -0
  5. nmdc_runtime/api/boot/object_types.py +126 -0
  6. nmdc_runtime/api/boot/triggers.py +84 -0
  7. nmdc_runtime/api/boot/workflows.py +116 -0
  8. nmdc_runtime/api/core/__init__.py +0 -0
  9. nmdc_runtime/api/core/auth.py +208 -0
  10. nmdc_runtime/api/core/idgen.py +170 -0
  11. nmdc_runtime/api/core/metadata.py +788 -0
  12. nmdc_runtime/api/core/util.py +109 -0
  13. nmdc_runtime/api/db/__init__.py +0 -0
  14. nmdc_runtime/api/db/mongo.py +447 -0
  15. nmdc_runtime/api/db/s3.py +37 -0
  16. nmdc_runtime/api/endpoints/__init__.py +0 -0
  17. nmdc_runtime/api/endpoints/capabilities.py +25 -0
  18. nmdc_runtime/api/endpoints/find.py +794 -0
  19. nmdc_runtime/api/endpoints/ids.py +192 -0
  20. nmdc_runtime/api/endpoints/jobs.py +143 -0
  21. nmdc_runtime/api/endpoints/lib/__init__.py +0 -0
  22. nmdc_runtime/api/endpoints/lib/helpers.py +274 -0
  23. nmdc_runtime/api/endpoints/lib/path_segments.py +165 -0
  24. nmdc_runtime/api/endpoints/metadata.py +260 -0
  25. nmdc_runtime/api/endpoints/nmdcschema.py +581 -0
  26. nmdc_runtime/api/endpoints/object_types.py +38 -0
  27. nmdc_runtime/api/endpoints/objects.py +277 -0
  28. nmdc_runtime/api/endpoints/operations.py +105 -0
  29. nmdc_runtime/api/endpoints/queries.py +679 -0
  30. nmdc_runtime/api/endpoints/runs.py +98 -0
  31. nmdc_runtime/api/endpoints/search.py +38 -0
  32. nmdc_runtime/api/endpoints/sites.py +229 -0
  33. nmdc_runtime/api/endpoints/triggers.py +25 -0
  34. nmdc_runtime/api/endpoints/users.py +214 -0
  35. nmdc_runtime/api/endpoints/util.py +774 -0
  36. nmdc_runtime/api/endpoints/workflows.py +353 -0
  37. nmdc_runtime/api/main.py +401 -0
  38. nmdc_runtime/api/middleware.py +43 -0
  39. nmdc_runtime/api/models/__init__.py +0 -0
  40. nmdc_runtime/api/models/capability.py +14 -0
  41. nmdc_runtime/api/models/id.py +92 -0
  42. nmdc_runtime/api/models/job.py +37 -0
  43. nmdc_runtime/api/models/lib/__init__.py +0 -0
  44. nmdc_runtime/api/models/lib/helpers.py +78 -0
  45. nmdc_runtime/api/models/metadata.py +11 -0
  46. nmdc_runtime/api/models/minter.py +0 -0
  47. nmdc_runtime/api/models/nmdc_schema.py +146 -0
  48. nmdc_runtime/api/models/object.py +180 -0
  49. nmdc_runtime/api/models/object_type.py +20 -0
  50. nmdc_runtime/api/models/operation.py +66 -0
  51. nmdc_runtime/api/models/query.py +246 -0
  52. nmdc_runtime/api/models/query_continuation.py +111 -0
  53. nmdc_runtime/api/models/run.py +161 -0
  54. nmdc_runtime/api/models/site.py +87 -0
  55. nmdc_runtime/api/models/trigger.py +13 -0
  56. nmdc_runtime/api/models/user.py +140 -0
  57. nmdc_runtime/api/models/util.py +253 -0
  58. nmdc_runtime/api/models/workflow.py +15 -0
  59. nmdc_runtime/api/openapi.py +242 -0
  60. nmdc_runtime/config.py +55 -4
  61. nmdc_runtime/core/db/Database.py +1 -3
  62. nmdc_runtime/infrastructure/database/models/user.py +0 -9
  63. nmdc_runtime/lib/extract_nmdc_data.py +0 -8
  64. nmdc_runtime/lib/nmdc_dataframes.py +3 -7
  65. nmdc_runtime/lib/nmdc_etl_class.py +1 -7
  66. nmdc_runtime/minter/adapters/repository.py +1 -2
  67. nmdc_runtime/minter/config.py +2 -0
  68. nmdc_runtime/minter/domain/model.py +35 -1
  69. nmdc_runtime/minter/entrypoints/fastapi_app.py +1 -1
  70. nmdc_runtime/mongo_util.py +1 -2
  71. nmdc_runtime/site/backup/nmdcdb_mongodump.py +1 -1
  72. nmdc_runtime/site/backup/nmdcdb_mongoexport.py +1 -3
  73. nmdc_runtime/site/export/ncbi_xml.py +1 -2
  74. nmdc_runtime/site/export/ncbi_xml_utils.py +1 -1
  75. nmdc_runtime/site/graphs.py +33 -28
  76. nmdc_runtime/site/ops.py +97 -237
  77. nmdc_runtime/site/repair/database_updater.py +8 -0
  78. nmdc_runtime/site/repository.py +7 -117
  79. nmdc_runtime/site/resources.py +4 -4
  80. nmdc_runtime/site/translation/gold_translator.py +22 -21
  81. nmdc_runtime/site/translation/neon_benthic_translator.py +0 -1
  82. nmdc_runtime/site/translation/neon_soil_translator.py +4 -5
  83. nmdc_runtime/site/translation/neon_surface_water_translator.py +0 -2
  84. nmdc_runtime/site/translation/submission_portal_translator.py +64 -54
  85. nmdc_runtime/site/translation/translator.py +63 -1
  86. nmdc_runtime/site/util.py +8 -3
  87. nmdc_runtime/site/validation/util.py +10 -5
  88. nmdc_runtime/util.py +9 -321
  89. {nmdc_runtime-2.8.0.dist-info → nmdc_runtime-2.10.0.dist-info}/METADATA +57 -6
  90. nmdc_runtime-2.10.0.dist-info/RECORD +138 -0
  91. nmdc_runtime/site/translation/emsl.py +0 -43
  92. nmdc_runtime/site/translation/gold.py +0 -53
  93. nmdc_runtime/site/translation/jgi.py +0 -32
  94. nmdc_runtime/site/translation/util.py +0 -132
  95. nmdc_runtime/site/validation/jgi.py +0 -43
  96. nmdc_runtime-2.8.0.dist-info/RECORD +0 -84
  97. {nmdc_runtime-2.8.0.dist-info → nmdc_runtime-2.10.0.dist-info}/WHEEL +0 -0
  98. {nmdc_runtime-2.8.0.dist-info → nmdc_runtime-2.10.0.dist-info}/entry_points.txt +0 -0
  99. {nmdc_runtime-2.8.0.dist-info → nmdc_runtime-2.10.0.dist-info}/licenses/LICENSE +0 -0
  100. {nmdc_runtime-2.8.0.dist-info → nmdc_runtime-2.10.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,277 @@
1
+ from typing import List, Annotated
2
+
3
+ import botocore
4
+ from fastapi import APIRouter, status, Depends, HTTPException, Query
5
+ from gridfs import GridFS
6
+ from pymongo import ReturnDocument
7
+ from pymongo.database import Database as MongoDatabase
8
+ import requests
9
+ from starlette.responses import RedirectResponse
10
+ from toolz import merge
11
+
12
+ from nmdc_runtime.api.core.idgen import decode_id, generate_one_id, local_part
13
+ from nmdc_runtime.api.core.util import raise404_if_none, API_SITE_ID
14
+ from nmdc_runtime.api.db.mongo import get_mongo_db
15
+ from nmdc_runtime.api.db.s3 import S3_ID_NS, presigned_url_to_get, get_s3_client
16
+ from nmdc_runtime.api.endpoints.util import (
17
+ list_resources,
18
+ _create_object,
19
+ HOSTNAME_EXTERNAL,
20
+ BASE_URL_EXTERNAL,
21
+ )
22
+ from nmdc_runtime.api.models.object import (
23
+ DrsId,
24
+ DrsObject,
25
+ DrsObjectIn,
26
+ AccessURL,
27
+ )
28
+ from nmdc_runtime.api.models.object_type import ObjectType, DrsObjectWithTypes
29
+ from nmdc_runtime.api.models.site import Site, get_current_client_site
30
+ from nmdc_runtime.api.models.util import ListRequest, ListResponse
31
+ from nmdc_runtime.minter.config import typecodes
32
+
33
+ router = APIRouter()
34
+
35
+
36
+ def supplied_object_id(mdb, client_site, obj_doc):
37
+ if "access_methods" not in obj_doc:
38
+ return None
39
+ for method in obj_doc["access_methods"]:
40
+ if method.get("access_id") and ":" in method["access_id"]:
41
+ site_id, _, object_id = method["access_id"].rpartition(":")
42
+ if (
43
+ client_site.id == site_id
44
+ and mdb.sites.count_documents({"id": site_id})
45
+ and mdb.ids.count_documents(
46
+ {"_id": decode_id(object_id), "ns": S3_ID_NS}
47
+ )
48
+ and mdb.objects.count_documents({"id": object_id}) == 0
49
+ ):
50
+ return object_id
51
+ return None
52
+
53
+
54
+ @router.post("/objects", status_code=status.HTTP_201_CREATED, response_model=DrsObject)
55
+ def create_object(
56
+ object_in: DrsObjectIn,
57
+ mdb: MongoDatabase = Depends(get_mongo_db),
58
+ client_site: Site = Depends(get_current_client_site),
59
+ ):
60
+ """Create a new DrsObject.
61
+
62
+ You may create a *blob* or a *bundle*.
63
+
64
+ A *blob* is like a file - it's a single blob of bytes, so there there is no `contents` array,
65
+ only one or more `access_methods`.
66
+
67
+ A *bundle* is like a folder - it's a gathering of other objects (blobs and/or bundles) in a
68
+ `contents` array, and `access_methods` is optional because a data consumer can fetch each of
69
+ the bundle contents individually.
70
+
71
+ At least one checksum is required. The names of supported checksum types are given by
72
+ the set of Python 3.8 `hashlib.algorithms_guaranteed`:
73
+
74
+ > blake2b | blake2s | md5 | sha1 | sha224 | sha256 | sha384 | sha3_224 | sha3_256 | sha3_384 |
75
+ > sha3_512 | sha512 | shake_128 | shake_256
76
+
77
+ Each provided `access_method` needs either an `access_url` or an `access_id`.
78
+
79
+ """
80
+ id_supplied = supplied_object_id(
81
+ mdb, client_site, object_in.model_dump(exclude_unset=True)
82
+ )
83
+ drs_id = local_part(
84
+ id_supplied if id_supplied is not None else generate_one_id(mdb, S3_ID_NS)
85
+ )
86
+ self_uri = f"drs://{HOSTNAME_EXTERNAL}/{drs_id}"
87
+ return _create_object(
88
+ mdb, object_in, mgr_site=client_site.id, drs_id=drs_id, self_uri=self_uri
89
+ )
90
+
91
+
92
+ @router.get("/objects", response_model=ListResponse[DrsObject])
93
+ def list_objects(
94
+ req: Annotated[ListRequest, Query()],
95
+ mdb: MongoDatabase = Depends(get_mongo_db),
96
+ ):
97
+ return list_resources(req, mdb, "objects")
98
+
99
+
100
+ @router.get(
101
+ "/objects/{object_id}", response_model=DrsObject, response_model_exclude_unset=True
102
+ )
103
+ def get_object_info(
104
+ object_id: DrsId,
105
+ mdb: MongoDatabase = Depends(get_mongo_db),
106
+ ):
107
+ """
108
+ Resolution strategy:
109
+
110
+ 0. if object_id == 'nmdc', go to <https://microbiomedata.github.io/nmdc-schema/>.
111
+ 1. if object_id.startswith("sty"): # nmdc:Study typecode
112
+ then try https://data.microbiomedata.org/details/study/nmdc:{object_id}
113
+ 2. if object_id.startswith("bsm"): # nmdc:Biosample typecode
114
+ then try https://data.microbiomedata.org/details/sample/nmdc:{object_id}
115
+ 3. if object_id.startswith some known typecode
116
+ then try https://api.microbiomedata.org/nmdcschema/ids/nmdc:{object_id}
117
+ 4. try https://microbiomedata.github.io/nmdc-schema/{object_id}
118
+ 5. try mdb.objects.find_one({"id": object_id})
119
+ """
120
+ if object_id == "nmdc":
121
+ return RedirectResponse(
122
+ "https://microbiomedata.github.io/nmdc-schema",
123
+ status_code=status.HTTP_307_TEMPORARY_REDIRECT,
124
+ )
125
+ if object_id.startswith("sty-"):
126
+ url_to_try = f"https://data.microbiomedata.org/api/study/nmdc:{object_id}"
127
+ rv = requests.get(
128
+ url_to_try, allow_redirects=True
129
+ ) # TODO use HEAD when enabled upstream
130
+ if rv.status_code != 404:
131
+ return RedirectResponse(
132
+ f"https://data.microbiomedata.org/details/study/nmdc:{object_id}",
133
+ status_code=status.HTTP_307_TEMPORARY_REDIRECT,
134
+ )
135
+ elif object_id.startswith("bsm-"):
136
+ url_to_try = f"https://data.microbiomedata.org/api/biosample/nmdc:{object_id}"
137
+ rv = requests.get(
138
+ url_to_try, allow_redirects=True
139
+ ) # TODO use HEAD when enabled upstream
140
+ if rv.status_code != 404:
141
+ return RedirectResponse(
142
+ f"https://data.microbiomedata.org/details/sample/nmdc:{object_id}",
143
+ status_code=status.HTTP_307_TEMPORARY_REDIRECT,
144
+ )
145
+
146
+ # If "sty" or "bsm" ID doesn't have preferred landing page (above), try for JSON payload
147
+ if any(object_id.startswith(f'{t["name"]}-') for t in typecodes()):
148
+ url_to_try = f"{BASE_URL_EXTERNAL}/nmdcschema/ids/nmdc:{object_id}"
149
+ rv = requests.head(url_to_try, allow_redirects=True)
150
+ if rv.status_code != 404:
151
+ return RedirectResponse(
152
+ url_to_try, status_code=status.HTTP_307_TEMPORARY_REDIRECT
153
+ )
154
+
155
+ url_to_try = f"https://microbiomedata.github.io/nmdc-schema/{object_id}"
156
+ rv = requests.head(url_to_try, allow_redirects=True)
157
+ print(rv.status_code)
158
+ if rv.status_code != 404:
159
+ return RedirectResponse(
160
+ url_to_try, status_code=status.HTTP_307_TEMPORARY_REDIRECT
161
+ )
162
+
163
+ return raise404_if_none(mdb.objects.find_one({"id": object_id}))
164
+
165
+
166
+ @router.get(
167
+ "/ga4gh/drs/v1/objects/{object_id}",
168
+ summary="Get Object Info",
169
+ response_model=DrsObject,
170
+ responses={
171
+ status.HTTP_303_SEE_OTHER: {
172
+ "description": "See other",
173
+ "headers": {"Location": {"schema": {"type": "string"}}},
174
+ },
175
+ },
176
+ )
177
+ def get_ga4gh_object_info(object_id: DrsId):
178
+ """Redirect to /objects/{object_id}."""
179
+ return RedirectResponse(
180
+ BASE_URL_EXTERNAL + f"/objects/{object_id}",
181
+ status_code=status.HTTP_303_SEE_OTHER,
182
+ )
183
+
184
+
185
+ @router.get("/objects/{object_id}/types", response_model=List[ObjectType])
186
+ def list_object_types(object_id: DrsId, mdb: MongoDatabase = Depends(get_mongo_db)):
187
+ doc = raise404_if_none(mdb.objects.find_one({"id": object_id}, ["types"]))
188
+ return list(mdb.object_types.find({"id": {"$in": doc.get("types", [])}}))
189
+
190
+
191
+ @router.put("/objects/{object_id}/types", response_model=DrsObjectWithTypes)
192
+ def replace_object_types(
193
+ object_id: str,
194
+ object_type_ids: List[str],
195
+ mdb: MongoDatabase = Depends(get_mongo_db),
196
+ ):
197
+ unknown_type_ids = set(object_type_ids) - set(mdb.object_types.distinct("id"))
198
+ if unknown_type_ids:
199
+ raise HTTPException(
200
+ status_code=status.HTTP_400_BAD_REQUEST,
201
+ detail=f"unknown type ids: {unknown_type_ids}.",
202
+ )
203
+ doc_after = mdb.objects.find_one_and_update(
204
+ {"id": object_id},
205
+ {"$set": {"types": object_type_ids}},
206
+ return_document=ReturnDocument.AFTER,
207
+ )
208
+ return doc_after
209
+
210
+
211
+ def object_access_id_ok(obj_doc, access_id):
212
+ if "access_methods" not in obj_doc:
213
+ return False
214
+ for method in obj_doc["access_methods"]:
215
+ if method.get("access_id") and method["access_id"] == access_id:
216
+ return True
217
+ return False
218
+
219
+
220
+ @router.get("/objects/{object_id}/access/{access_id}", response_model=AccessURL)
221
+ def get_object_access(
222
+ object_id: DrsId,
223
+ access_id: str,
224
+ mdb: MongoDatabase = Depends(get_mongo_db),
225
+ s3client: botocore.client.BaseClient = Depends(get_s3_client),
226
+ ):
227
+ obj_doc = raise404_if_none(mdb.objects.find_one({"id": object_id}))
228
+ if not object_access_id_ok(obj_doc, access_id):
229
+ raise HTTPException(
230
+ status_code=status.HTTP_404_NOT_FOUND,
231
+ detail="access_id not referenced by object",
232
+ )
233
+ if access_id.startswith(f"{API_SITE_ID}:"):
234
+ url = presigned_url_to_get(
235
+ f"{S3_ID_NS}/{access_id.split(':', maxsplit=1)[1]}",
236
+ client=s3client,
237
+ )
238
+ return {"url": url}
239
+ if access_id.startswith("gfs0") and object_id == access_id:
240
+ mdb_fs = GridFS(mdb)
241
+ if mdb_fs.exists(_id=access_id):
242
+ return {"url": BASE_URL_EXTERNAL + f"/metadata/stored_files/{access_id}"}
243
+ else:
244
+ raise HTTPException(
245
+ status_code=status.HTTP_404_NOT_FOUND,
246
+ detail="access_id for object not found by gfs0 handler",
247
+ )
248
+
249
+ raise HTTPException(
250
+ status_code=status.HTTP_404_NOT_FOUND,
251
+ detail="no site found to handle access_id for object",
252
+ )
253
+
254
+
255
+ @router.patch("/objects/{object_id}", response_model=DrsObject)
256
+ def update_object(
257
+ object_id: str,
258
+ object_patch: DrsObjectIn,
259
+ mdb: MongoDatabase = Depends(get_mongo_db),
260
+ client_site: Site = Depends(get_current_client_site),
261
+ ):
262
+ doc = raise404_if_none(mdb.objects.find_one({"id": object_id}))
263
+ # A site client can update object iff its site_id is _mgr_site.
264
+ object_mgr_site = doc.get("_mgr_site")
265
+ if object_mgr_site != client_site.id:
266
+ raise HTTPException(
267
+ status_code=status.HTTP_403_FORBIDDEN,
268
+ detail=f"client authorized for different site_id than {object_mgr_site}",
269
+ )
270
+ doc_object_patched = merge(doc, object_patch.model_dump(exclude_unset=True))
271
+ mdb.operations.replace_one({"id": object_id}, doc_object_patched)
272
+ return doc_object_patched
273
+
274
+
275
+ @router.put("/objects/{object_id}", response_model=DrsObject)
276
+ def replace_object():
277
+ pass
@@ -0,0 +1,105 @@
1
+ from typing import Annotated
2
+
3
+ import pymongo
4
+ from fastapi import APIRouter, Depends, status, HTTPException, Query
5
+ from toolz import get_in, merge, assoc
6
+
7
+ from nmdc_runtime.api.core.util import raise404_if_none, pick
8
+ from nmdc_runtime.api.db.mongo import get_mongo_db
9
+ from nmdc_runtime.api.endpoints.util import list_resources
10
+ from nmdc_runtime.api.models.operation import (
11
+ ListOperationsResponse,
12
+ ResultT,
13
+ MetadataT,
14
+ Operation,
15
+ UpdateOperationRequest,
16
+ )
17
+ from nmdc_runtime.api.models.site import Site, get_current_client_site
18
+ from nmdc_runtime.api.models.util import ListRequest
19
+
20
+ router = APIRouter()
21
+
22
+
23
+ @router.get("/operations", response_model=ListOperationsResponse[ResultT, MetadataT])
24
+ def list_operations(
25
+ req: Annotated[ListRequest, Query()],
26
+ mdb: pymongo.database.Database = Depends(get_mongo_db),
27
+ ):
28
+ return list_resources(req, mdb, "operations")
29
+
30
+
31
+ @router.get("/operations/{op_id}", response_model=Operation[ResultT, MetadataT])
32
+ def get_operation(
33
+ op_id: str,
34
+ mdb: pymongo.database.Database = Depends(get_mongo_db),
35
+ ):
36
+ op = raise404_if_none(mdb.operations.find_one({"id": op_id}))
37
+ return op
38
+
39
+
40
+ @router.patch("/operations/{op_id}", response_model=Operation[ResultT, MetadataT])
41
+ def update_operation(
42
+ op_id: str,
43
+ op_patch: UpdateOperationRequest,
44
+ mdb: pymongo.database.Database = Depends(get_mongo_db),
45
+ client_site: Site = Depends(get_current_client_site),
46
+ ):
47
+ """
48
+
49
+ A site client can update an operation if and only if its site_id is the operation's
50
+ `metadata.site_id`.
51
+
52
+ The following fields in `metadata` are used by the system and are read-only:
53
+ - site_id
54
+ - job
55
+ - model
56
+ """
57
+ # TODO be able to make job "undone" and "redone" to re-trigger downstream ETL.
58
+ doc_op = raise404_if_none(mdb.operations.find_one({"id": op_id}))
59
+ site_id_op = get_in(["metadata", "site_id"], doc_op)
60
+ if site_id_op != client_site.id:
61
+ raise HTTPException(
62
+ status_code=status.HTTP_403_FORBIDDEN,
63
+ detail=f"client authorized for different site_id than {site_id_op}",
64
+ )
65
+ op_patch_metadata = merge(
66
+ op_patch.model_dump(exclude_unset=True).get("metadata", {}),
67
+ pick(["site_id", "job", "model"], doc_op.get("metadata", {})),
68
+ )
69
+ doc_op_patched = merge(
70
+ doc_op,
71
+ assoc(
72
+ op_patch.model_dump(exclude_unset=True),
73
+ "metadata",
74
+ op_patch_metadata,
75
+ ),
76
+ )
77
+ mdb.operations.replace_one({"id": op_id}, doc_op_patched)
78
+ return doc_op_patched
79
+
80
+
81
+ @router.post(
82
+ "/operations/{op_id}:wait",
83
+ description=(
84
+ "Wait until the operation is resolved or rejected before returning the result."
85
+ " This is a 'blocking' alternative to client-side polling, and may not be available"
86
+ " for operation types know to be particularly long-running."
87
+ ),
88
+ )
89
+ def wait_operation():
90
+ pass
91
+
92
+
93
+ @router.post("/operations/{op_id}:cancel")
94
+ def cancel_operation():
95
+ pass
96
+
97
+
98
+ @router.post("/operations/{op_id}:pause")
99
+ def pause_operation():
100
+ pass
101
+
102
+
103
+ @router.post("/operations/{op_id}:resume")
104
+ def resume_operation():
105
+ pass