nmdc-runtime 2.9.0__py3-none-any.whl → 2.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nmdc-runtime might be problematic. Click here for more details.
- nmdc_runtime/api/__init__.py +0 -0
- nmdc_runtime/api/analytics.py +70 -0
- nmdc_runtime/api/boot/__init__.py +0 -0
- nmdc_runtime/api/boot/capabilities.py +9 -0
- nmdc_runtime/api/boot/object_types.py +126 -0
- nmdc_runtime/api/boot/triggers.py +84 -0
- nmdc_runtime/api/boot/workflows.py +116 -0
- nmdc_runtime/api/core/__init__.py +0 -0
- nmdc_runtime/api/core/auth.py +208 -0
- nmdc_runtime/api/core/idgen.py +170 -0
- nmdc_runtime/api/core/metadata.py +788 -0
- nmdc_runtime/api/core/util.py +109 -0
- nmdc_runtime/api/db/__init__.py +0 -0
- nmdc_runtime/api/db/mongo.py +447 -0
- nmdc_runtime/api/db/s3.py +37 -0
- nmdc_runtime/api/endpoints/__init__.py +0 -0
- nmdc_runtime/api/endpoints/capabilities.py +25 -0
- nmdc_runtime/api/endpoints/find.py +794 -0
- nmdc_runtime/api/endpoints/ids.py +192 -0
- nmdc_runtime/api/endpoints/jobs.py +143 -0
- nmdc_runtime/api/endpoints/lib/__init__.py +0 -0
- nmdc_runtime/api/endpoints/lib/helpers.py +274 -0
- nmdc_runtime/api/endpoints/lib/path_segments.py +165 -0
- nmdc_runtime/api/endpoints/metadata.py +260 -0
- nmdc_runtime/api/endpoints/nmdcschema.py +581 -0
- nmdc_runtime/api/endpoints/object_types.py +38 -0
- nmdc_runtime/api/endpoints/objects.py +277 -0
- nmdc_runtime/api/endpoints/operations.py +105 -0
- nmdc_runtime/api/endpoints/queries.py +679 -0
- nmdc_runtime/api/endpoints/runs.py +98 -0
- nmdc_runtime/api/endpoints/search.py +38 -0
- nmdc_runtime/api/endpoints/sites.py +229 -0
- nmdc_runtime/api/endpoints/triggers.py +25 -0
- nmdc_runtime/api/endpoints/users.py +214 -0
- nmdc_runtime/api/endpoints/util.py +774 -0
- nmdc_runtime/api/endpoints/workflows.py +353 -0
- nmdc_runtime/api/main.py +401 -0
- nmdc_runtime/api/middleware.py +43 -0
- nmdc_runtime/api/models/__init__.py +0 -0
- nmdc_runtime/api/models/capability.py +14 -0
- nmdc_runtime/api/models/id.py +92 -0
- nmdc_runtime/api/models/job.py +37 -0
- nmdc_runtime/api/models/lib/__init__.py +0 -0
- nmdc_runtime/api/models/lib/helpers.py +78 -0
- nmdc_runtime/api/models/metadata.py +11 -0
- nmdc_runtime/api/models/minter.py +0 -0
- nmdc_runtime/api/models/nmdc_schema.py +146 -0
- nmdc_runtime/api/models/object.py +180 -0
- nmdc_runtime/api/models/object_type.py +20 -0
- nmdc_runtime/api/models/operation.py +66 -0
- nmdc_runtime/api/models/query.py +246 -0
- nmdc_runtime/api/models/query_continuation.py +111 -0
- nmdc_runtime/api/models/run.py +161 -0
- nmdc_runtime/api/models/site.py +87 -0
- nmdc_runtime/api/models/trigger.py +13 -0
- nmdc_runtime/api/models/user.py +140 -0
- nmdc_runtime/api/models/util.py +253 -0
- nmdc_runtime/api/models/workflow.py +15 -0
- nmdc_runtime/api/openapi.py +242 -0
- nmdc_runtime/config.py +7 -8
- nmdc_runtime/core/db/Database.py +1 -3
- nmdc_runtime/infrastructure/database/models/user.py +0 -9
- nmdc_runtime/lib/extract_nmdc_data.py +0 -8
- nmdc_runtime/lib/nmdc_dataframes.py +3 -7
- nmdc_runtime/lib/nmdc_etl_class.py +1 -7
- nmdc_runtime/minter/adapters/repository.py +1 -2
- nmdc_runtime/minter/config.py +2 -0
- nmdc_runtime/minter/domain/model.py +35 -1
- nmdc_runtime/minter/entrypoints/fastapi_app.py +1 -1
- nmdc_runtime/mongo_util.py +1 -2
- nmdc_runtime/site/backup/nmdcdb_mongodump.py +1 -1
- nmdc_runtime/site/backup/nmdcdb_mongoexport.py +1 -3
- nmdc_runtime/site/export/ncbi_xml.py +1 -2
- nmdc_runtime/site/export/ncbi_xml_utils.py +1 -1
- nmdc_runtime/site/graphs.py +1 -22
- nmdc_runtime/site/ops.py +60 -152
- nmdc_runtime/site/repository.py +0 -112
- nmdc_runtime/site/translation/gold_translator.py +4 -12
- nmdc_runtime/site/translation/neon_benthic_translator.py +0 -1
- nmdc_runtime/site/translation/neon_soil_translator.py +4 -5
- nmdc_runtime/site/translation/neon_surface_water_translator.py +0 -2
- nmdc_runtime/site/translation/submission_portal_translator.py +2 -54
- nmdc_runtime/site/translation/translator.py +63 -1
- nmdc_runtime/site/util.py +8 -3
- nmdc_runtime/site/validation/util.py +10 -5
- nmdc_runtime/util.py +3 -47
- {nmdc_runtime-2.9.0.dist-info → nmdc_runtime-2.10.0.dist-info}/METADATA +57 -6
- nmdc_runtime-2.10.0.dist-info/RECORD +138 -0
- nmdc_runtime/site/translation/emsl.py +0 -43
- nmdc_runtime/site/translation/gold.py +0 -53
- nmdc_runtime/site/translation/jgi.py +0 -32
- nmdc_runtime/site/translation/util.py +0 -132
- nmdc_runtime/site/validation/jgi.py +0 -43
- nmdc_runtime-2.9.0.dist-info/RECORD +0 -84
- {nmdc_runtime-2.9.0.dist-info → nmdc_runtime-2.10.0.dist-info}/WHEEL +0 -0
- {nmdc_runtime-2.9.0.dist-info → nmdc_runtime-2.10.0.dist-info}/entry_points.txt +0 -0
- {nmdc_runtime-2.9.0.dist-info → nmdc_runtime-2.10.0.dist-info}/licenses/LICENSE +0 -0
- {nmdc_runtime-2.9.0.dist-info → nmdc_runtime-2.10.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
"""
|
|
2
|
+
`router` here is deprecated in favor of `nmdc_runtime.minter.entrypoints.fastapi_app.router`
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from typing import List, Dict, Any
|
|
7
|
+
|
|
8
|
+
from fastapi import APIRouter, Depends, HTTPException
|
|
9
|
+
from pydantic import ValidationError
|
|
10
|
+
from pymongo.database import Database as MongoDatabase
|
|
11
|
+
from starlette import status
|
|
12
|
+
from toolz import dissoc
|
|
13
|
+
|
|
14
|
+
from nmdc_runtime.api.core.idgen import (
|
|
15
|
+
generate_ids,
|
|
16
|
+
decode_id,
|
|
17
|
+
collection_name,
|
|
18
|
+
)
|
|
19
|
+
from nmdc_runtime.api.core.util import raise404_if_none, pick
|
|
20
|
+
from nmdc_runtime.api.db.mongo import get_mongo_db
|
|
21
|
+
from nmdc_runtime.api.models.id import (
|
|
22
|
+
MintRequest,
|
|
23
|
+
pattern_shoulder,
|
|
24
|
+
AssignedBaseName,
|
|
25
|
+
pattern_assigned_base_name,
|
|
26
|
+
IdBindingRequest,
|
|
27
|
+
pattern_base_object_name,
|
|
28
|
+
IdThreeParts,
|
|
29
|
+
pattern_naa,
|
|
30
|
+
)
|
|
31
|
+
from nmdc_runtime.api.models.site import get_current_client_site, Site
|
|
32
|
+
|
|
33
|
+
router = APIRouter()
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@router.post("/ids/mint", response_model=List[str])
|
|
37
|
+
def mint_ids(
|
|
38
|
+
mint_req: MintRequest,
|
|
39
|
+
mdb: MongoDatabase = Depends(get_mongo_db),
|
|
40
|
+
site: Site = Depends(get_current_client_site),
|
|
41
|
+
):
|
|
42
|
+
"""Generate one or more identifiers.
|
|
43
|
+
|
|
44
|
+
Leaving `populator` blank will set it to the site ID of the request client.
|
|
45
|
+
"""
|
|
46
|
+
ids = generate_ids(
|
|
47
|
+
mdb,
|
|
48
|
+
owner=site.id,
|
|
49
|
+
populator=(mint_req.populator or site.id),
|
|
50
|
+
number=mint_req.number,
|
|
51
|
+
naa=mint_req.naa,
|
|
52
|
+
shoulder=mint_req.shoulder,
|
|
53
|
+
)
|
|
54
|
+
return ids
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@router.post("/ids/bindings", response_model=List[Dict[str, Any]])
|
|
58
|
+
def set_id_bindings(
|
|
59
|
+
binding_requests: List[IdBindingRequest],
|
|
60
|
+
mdb: MongoDatabase = Depends(get_mongo_db),
|
|
61
|
+
site: Site = Depends(get_current_client_site),
|
|
62
|
+
):
|
|
63
|
+
bons = [r.i for r in binding_requests]
|
|
64
|
+
ids: List[IdThreeParts] = []
|
|
65
|
+
for bon in bons:
|
|
66
|
+
m = re.match(pattern_base_object_name, bon)
|
|
67
|
+
ids.append(
|
|
68
|
+
IdThreeParts(
|
|
69
|
+
naa=m.group("naa"),
|
|
70
|
+
shoulder=m.group("shoulder"),
|
|
71
|
+
blade=m.group("blade"),
|
|
72
|
+
)
|
|
73
|
+
)
|
|
74
|
+
# Ensure that user owns all supplied identifiers.
|
|
75
|
+
for id_, r in zip(ids, binding_requests):
|
|
76
|
+
collection = mdb.get_collection(collection_name(id_.naa, id_.shoulder))
|
|
77
|
+
doc = collection.find_one({"_id": decode_id(str(id_.blade))}, ["__ao"])
|
|
78
|
+
if doc is None:
|
|
79
|
+
raise HTTPException(
|
|
80
|
+
status_code=status.HTTP_404_NOT_FOUND,
|
|
81
|
+
detail=f"id {r.i} not found",
|
|
82
|
+
)
|
|
83
|
+
elif doc.get("__ao") != site.id:
|
|
84
|
+
raise HTTPException(
|
|
85
|
+
status_code=status.HTTP_403_FORBIDDEN,
|
|
86
|
+
detail=(
|
|
87
|
+
f"authenticated site client does not manage {r.i} "
|
|
88
|
+
f"(client represents site {site.id}).",
|
|
89
|
+
),
|
|
90
|
+
)
|
|
91
|
+
# Ensure no attempts to set reserved attributes.
|
|
92
|
+
if any(r.a.startswith("__a") for r in binding_requests):
|
|
93
|
+
raise HTTPException(
|
|
94
|
+
status_code=status.HTTP_403_FORBIDDEN,
|
|
95
|
+
detail="Cannot set attribute names beginning with '__a'.",
|
|
96
|
+
)
|
|
97
|
+
# Process binding requests
|
|
98
|
+
docs = []
|
|
99
|
+
for id_, r in zip(ids, binding_requests):
|
|
100
|
+
collection = mdb.get_collection(collection_name(id_.naa, id_.shoulder))
|
|
101
|
+
|
|
102
|
+
filter_ = {"_id": decode_id(id_.blade)}
|
|
103
|
+
if r.o == "purge":
|
|
104
|
+
docs.append(collection.find_one_and_delete(filter_))
|
|
105
|
+
elif r.o == "rm":
|
|
106
|
+
docs.append(collection.find_one_and_update(filter_, {"$unset": {r.a: ""}}))
|
|
107
|
+
elif r.o == "set":
|
|
108
|
+
docs.append(collection.find_one_and_update(filter_, {"$set": {r.a: r.v}}))
|
|
109
|
+
elif r.o == "addToSet":
|
|
110
|
+
docs.append(
|
|
111
|
+
collection.find_one_and_update(filter_, {"$addToSet": {r.a: r.v}})
|
|
112
|
+
)
|
|
113
|
+
else:
|
|
114
|
+
# Note: IdBindingRequest root_validator methods should preclude this.
|
|
115
|
+
raise HTTPException(
|
|
116
|
+
status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid operation 'o'."
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
return [dissoc(d, "_id") for d in docs]
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
@router.get("/ids/bindings/{rest:path}", response_model=Dict[str, Any])
|
|
123
|
+
def get_id_bindings(
|
|
124
|
+
rest: str,
|
|
125
|
+
mdb: MongoDatabase = Depends(get_mongo_db),
|
|
126
|
+
):
|
|
127
|
+
cleaned = rest.replace("-", "")
|
|
128
|
+
parts = cleaned.split(":")
|
|
129
|
+
if len(parts) != 2:
|
|
130
|
+
raise HTTPException(
|
|
131
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
|
132
|
+
detail=(
|
|
133
|
+
"Invalid ID - needs both name assigning authority (NAA) part"
|
|
134
|
+
"(e.g. 'nmdc') and name part (e.g. 'fk4ra92'), separated by a colon (':')."
|
|
135
|
+
),
|
|
136
|
+
)
|
|
137
|
+
naa = parts[0]
|
|
138
|
+
suffix_parts = parts[1].split("/")
|
|
139
|
+
if len(suffix_parts) == 2 and suffix_parts[-1] != "": # one '/', or ends with '/'
|
|
140
|
+
assigned_base_name, attribute = suffix_parts
|
|
141
|
+
else:
|
|
142
|
+
assigned_base_name = suffix_parts[0]
|
|
143
|
+
attribute = None
|
|
144
|
+
|
|
145
|
+
if re.match(pattern_naa, naa) is None:
|
|
146
|
+
raise HTTPException(
|
|
147
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
|
148
|
+
detail=f"Invalid ID - invalid name assigning authority (NAA) '{naa}'.",
|
|
149
|
+
)
|
|
150
|
+
print(assigned_base_name)
|
|
151
|
+
if re.match(pattern_shoulder, assigned_base_name) is None:
|
|
152
|
+
raise HTTPException(
|
|
153
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
|
154
|
+
detail=(
|
|
155
|
+
"Invalid ID - invalid shoulder. "
|
|
156
|
+
"Every name part begins with a 'shoulder', a "
|
|
157
|
+
"sequence of letters followed by a number, "
|
|
158
|
+
"for example 'fk4'. "
|
|
159
|
+
"Did you forget to include the shoulder?",
|
|
160
|
+
),
|
|
161
|
+
)
|
|
162
|
+
try:
|
|
163
|
+
m = re.match(pattern_assigned_base_name, AssignedBaseName(assigned_base_name))
|
|
164
|
+
shoulder, blade = m.group("shoulder"), m.group("blade")
|
|
165
|
+
id_decoded = decode_id(blade)
|
|
166
|
+
except (AttributeError, ValidationError):
|
|
167
|
+
raise HTTPException(
|
|
168
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
|
169
|
+
detail="Invalid ID - characters used outside of base32.",
|
|
170
|
+
)
|
|
171
|
+
except ValueError:
|
|
172
|
+
raise HTTPException(
|
|
173
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
|
174
|
+
detail="Invalid ID - failed checksum. Did you copy it incorrectly?",
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
collection = mdb.get_collection(collection_name(naa, shoulder))
|
|
178
|
+
d = raise404_if_none(collection.find_one({"_id": id_decoded}))
|
|
179
|
+
d = dissoc(d, "_id")
|
|
180
|
+
if attribute is not None:
|
|
181
|
+
if attribute not in d:
|
|
182
|
+
raise HTTPException(
|
|
183
|
+
status_code=status.HTTP_404_NOT_FOUND,
|
|
184
|
+
detail=(
|
|
185
|
+
f"attribute '{attribute}' not found in "
|
|
186
|
+
f"{naa}:{assigned_base_name}."
|
|
187
|
+
),
|
|
188
|
+
)
|
|
189
|
+
rv = pick(["where", attribute], d)
|
|
190
|
+
else:
|
|
191
|
+
rv = d
|
|
192
|
+
return rv
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import Optional, Annotated
|
|
3
|
+
|
|
4
|
+
from pymongo.database import Database
|
|
5
|
+
from fastapi import APIRouter, Depends, Query, HTTPException, Path
|
|
6
|
+
from pymongo.errors import ConnectionFailure, OperationFailure
|
|
7
|
+
from starlette import status
|
|
8
|
+
|
|
9
|
+
from nmdc_runtime.api.core.util import (
|
|
10
|
+
raise404_if_none,
|
|
11
|
+
)
|
|
12
|
+
from nmdc_runtime.api.db.mongo import get_mongo_db
|
|
13
|
+
from nmdc_runtime.api.endpoints.util import list_resources, _claim_job
|
|
14
|
+
from nmdc_runtime.api.models.job import Job, JobClaim
|
|
15
|
+
from nmdc_runtime.api.models.operation import Operation, MetadataT
|
|
16
|
+
from nmdc_runtime.api.models.site import (
|
|
17
|
+
Site,
|
|
18
|
+
maybe_get_current_client_site,
|
|
19
|
+
get_current_client_site,
|
|
20
|
+
)
|
|
21
|
+
from nmdc_runtime.api.models.util import ListRequest, ListResponse, ResultT
|
|
22
|
+
|
|
23
|
+
router = APIRouter()
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@router.get(
|
|
27
|
+
"/jobs", response_model=ListResponse[Job], response_model_exclude_unset=True
|
|
28
|
+
)
|
|
29
|
+
def list_jobs(
|
|
30
|
+
req: Annotated[ListRequest, Query()],
|
|
31
|
+
mdb: Database = Depends(get_mongo_db),
|
|
32
|
+
maybe_site: Optional[Site] = Depends(maybe_get_current_client_site),
|
|
33
|
+
):
|
|
34
|
+
"""List pre-configured workflow jobs.
|
|
35
|
+
|
|
36
|
+
If authenticated as a site client, `req.filter` defaults to fetch unclaimed jobs
|
|
37
|
+
that are claimable by the site client. This default can be overridden to view all jobs
|
|
38
|
+
by explicitly passing a `req.filter` of `{}`.
|
|
39
|
+
"""
|
|
40
|
+
if isinstance(maybe_site, Site) and req.filter is None:
|
|
41
|
+
req.filter = json.dumps({"claims.site_id": {"$ne": maybe_site.id}})
|
|
42
|
+
return list_resources(req, mdb, "jobs")
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@router.get("/jobs/{job_id}", response_model=Job, response_model_exclude_unset=True)
|
|
46
|
+
def get_job_info(
|
|
47
|
+
job_id: str,
|
|
48
|
+
mdb: Database = Depends(get_mongo_db),
|
|
49
|
+
):
|
|
50
|
+
return raise404_if_none(mdb.jobs.find_one({"id": job_id}))
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@router.post("/jobs/{job_id}:claim", response_model=Operation[ResultT, MetadataT])
|
|
54
|
+
def claim_job(
|
|
55
|
+
job_id: str,
|
|
56
|
+
mdb: Database = Depends(get_mongo_db),
|
|
57
|
+
site: Site = Depends(get_current_client_site),
|
|
58
|
+
):
|
|
59
|
+
return _claim_job(job_id, mdb, site)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@router.post("/jobs/{job_id}:release")
|
|
63
|
+
def release_job(
|
|
64
|
+
job_id: Annotated[
|
|
65
|
+
str,
|
|
66
|
+
Path(
|
|
67
|
+
title="Job ID",
|
|
68
|
+
description="The `id` of the job.\n\n_Example_: `nmdc:f81d4fae-7dec-11d0-a765-00a0c91e6bf6`",
|
|
69
|
+
examples=["nmdc:f81d4fae-7dec-11d0-a765-00a0c91e6bf6"],
|
|
70
|
+
),
|
|
71
|
+
],
|
|
72
|
+
mdb: Database = Depends(get_mongo_db),
|
|
73
|
+
site: Site = Depends(get_current_client_site),
|
|
74
|
+
) -> Optional[Job]:
|
|
75
|
+
r"""
|
|
76
|
+
Release the specified job.
|
|
77
|
+
|
|
78
|
+
Releasing a job cancels all the unfinished operations (of that job)
|
|
79
|
+
claimed by the `site` associated with the logged-in site client.
|
|
80
|
+
|
|
81
|
+
Return the updated job, reflecting that the aforementioned operations have been cancelled.
|
|
82
|
+
"""
|
|
83
|
+
job = Job(**raise404_if_none(mdb.jobs.find_one({"id": job_id})))
|
|
84
|
+
active_job_claims_by_this_site = list(
|
|
85
|
+
mdb.operations.find(
|
|
86
|
+
{
|
|
87
|
+
"metadata.job.id": job_id,
|
|
88
|
+
"metadata.site_id": site.id,
|
|
89
|
+
"done": False,
|
|
90
|
+
},
|
|
91
|
+
["id"],
|
|
92
|
+
)
|
|
93
|
+
)
|
|
94
|
+
job_claims_by_this_site_post_release = [
|
|
95
|
+
JobClaim(op_id=claim["id"], site_id=site.id, done=True, cancelled=True)
|
|
96
|
+
for claim in active_job_claims_by_this_site
|
|
97
|
+
]
|
|
98
|
+
job_claims_not_by_this_site = [
|
|
99
|
+
claim for claim in job.claims if (claim.site_id != site.id)
|
|
100
|
+
]
|
|
101
|
+
|
|
102
|
+
# Execute MongoDB transaction to ensure atomic change of job document plus relevant set of operations documents.
|
|
103
|
+
def transactional_update(session):
|
|
104
|
+
mdb.operations.update_many(
|
|
105
|
+
{"id": {"$in": [claim["id"] for claim in active_job_claims_by_this_site]}},
|
|
106
|
+
{"$set": {"metadata.cancelled": True, "metadata.done": True}},
|
|
107
|
+
session=session,
|
|
108
|
+
)
|
|
109
|
+
job_claim_subdocuments_post_release = [
|
|
110
|
+
claim.model_dump(exclude_unset=True)
|
|
111
|
+
for claim in (
|
|
112
|
+
job_claims_not_by_this_site + job_claims_by_this_site_post_release
|
|
113
|
+
)
|
|
114
|
+
]
|
|
115
|
+
mdb.jobs.update_one(
|
|
116
|
+
{"id": job_id},
|
|
117
|
+
{"$set": {"claims": job_claim_subdocuments_post_release}},
|
|
118
|
+
session=session,
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
try:
|
|
122
|
+
with mdb.client.start_session() as session:
|
|
123
|
+
with session.start_transaction():
|
|
124
|
+
transactional_update(session)
|
|
125
|
+
except (ConnectionFailure, OperationFailure) as e:
|
|
126
|
+
raise HTTPException(
|
|
127
|
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
128
|
+
detail=f"Transaction failed: {e}",
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
# Return the updated `jobs` document.
|
|
132
|
+
#
|
|
133
|
+
# TODO: Consider retrieving the document within the transaction
|
|
134
|
+
# to ensure it still exists.
|
|
135
|
+
#
|
|
136
|
+
updated_job = mdb.jobs.find_one({"id": job_id})
|
|
137
|
+
if updated_job is None:
|
|
138
|
+
# Note: We return `None` in this case because that's what the
|
|
139
|
+
# endpoint originally did in this case, and we don't want
|
|
140
|
+
# to introduce a breaking change as part of this refactor.
|
|
141
|
+
return None
|
|
142
|
+
else:
|
|
143
|
+
return Job(**updated_job)
|
|
File without changes
|
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import bson.json_util
|
|
3
|
+
from typing import List
|
|
4
|
+
|
|
5
|
+
from pymongo.database import Database
|
|
6
|
+
from refscan.lib.Finder import Finder
|
|
7
|
+
from refscan.lib.helpers import derive_schema_class_name_from_document
|
|
8
|
+
from refscan.scanner import identify_referring_documents, scan_outgoing_references
|
|
9
|
+
|
|
10
|
+
from nmdc_runtime.api.models.lib.helpers import derive_update_specs
|
|
11
|
+
from nmdc_runtime.api.models.query import UpdateCommand, UpdateSpecs
|
|
12
|
+
from nmdc_runtime.util import get_allowed_references, nmdc_schema_view
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def make_violation_message(
|
|
16
|
+
collection_name: str,
|
|
17
|
+
source_document_id: str,
|
|
18
|
+
source_field_name: str,
|
|
19
|
+
target_document_id: str,
|
|
20
|
+
) -> str:
|
|
21
|
+
r"""
|
|
22
|
+
Constructs a violation message that indicates that a document would contain a broken reference.
|
|
23
|
+
|
|
24
|
+
:param collection_name: The name of the collection containing the document containing the broken reference
|
|
25
|
+
:param source_document_id: The `id` of the document containing the broken reference
|
|
26
|
+
:param source_field_name: The name of the field containing the broken reference
|
|
27
|
+
:param target_document_id: The `id` of the document that is being referenced
|
|
28
|
+
|
|
29
|
+
:return: A formatted string describing the violation
|
|
30
|
+
"""
|
|
31
|
+
return (
|
|
32
|
+
f"The document having id='{source_document_id}' in "
|
|
33
|
+
f"the collection '{collection_name}' contains a "
|
|
34
|
+
f"reference (in its '{source_field_name}' field, "
|
|
35
|
+
f"referring to the document having id='{target_document_id}') "
|
|
36
|
+
f"which would be broken."
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def simulate_updates_and_check_references(
|
|
41
|
+
db: Database, update_cmd: UpdateCommand
|
|
42
|
+
) -> List[str]:
|
|
43
|
+
r"""
|
|
44
|
+
Checks whether, if the specified updates were performed on the specified database,
|
|
45
|
+
both of the following things would be true afterward:
|
|
46
|
+
1. (Regarding outgoing references): The updated documents do not contain any
|
|
47
|
+
broken references.
|
|
48
|
+
2. (Regarding incoming references): The documents that originally _referenced_
|
|
49
|
+
any of the updated documents do not contain any broken references.
|
|
50
|
+
This check is necessary because update operations can currently change `id`
|
|
51
|
+
and `type` values, which can affect what can legally reference those documents.
|
|
52
|
+
|
|
53
|
+
This function checks those things by performing the updates within a MongoDB
|
|
54
|
+
transaction, leaving the transaction in the _pending_ (i.e. not committed) state,
|
|
55
|
+
and then performing various checks on the database in that _pending_ state.
|
|
56
|
+
|
|
57
|
+
:param db: The database on which to simulate performing the updates
|
|
58
|
+
:param update_cmd: The command that specifies the updates
|
|
59
|
+
|
|
60
|
+
:return: List of violation messages. If the list is empty, it means that—if
|
|
61
|
+
the updates had been performed (instead of only simulated) here—they
|
|
62
|
+
would not have left behind any broken references.
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
# Initialize the list of violation messages that we will return.
|
|
66
|
+
violation_messages: List[str] = []
|
|
67
|
+
|
|
68
|
+
# Instantiate a `Finder` bound to the Mongo database. This will be
|
|
69
|
+
# used later, to identify and check inter-document references.
|
|
70
|
+
finder = Finder(database=db)
|
|
71
|
+
|
|
72
|
+
# Extract the collection name from the command.
|
|
73
|
+
collection_name = update_cmd.update
|
|
74
|
+
|
|
75
|
+
# Derive the update specifications from the command.
|
|
76
|
+
update_specs: UpdateSpecs = derive_update_specs(update_cmd)
|
|
77
|
+
|
|
78
|
+
# Get a reference to a `SchemaView` bound to the NMDC schema, so we can
|
|
79
|
+
# use it to, for example, map `type` field values to schema class names.
|
|
80
|
+
schema_view = nmdc_schema_view()
|
|
81
|
+
|
|
82
|
+
# Get some data structures that indicate which fields of which documents
|
|
83
|
+
# can legally contain references, according to the NMDC schema.
|
|
84
|
+
legal_references = get_allowed_references()
|
|
85
|
+
reference_field_names_by_source_class_name = (
|
|
86
|
+
legal_references.get_reference_field_names_by_source_class_name()
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
# Start a "throwaway" MongoDB transaction so we can simulate the updates.
|
|
90
|
+
with db.client.start_session() as session:
|
|
91
|
+
with session.start_transaction():
|
|
92
|
+
|
|
93
|
+
# Make a list of the `_id`, `id`, and `type` values of the documents that
|
|
94
|
+
# the user wants to update.
|
|
95
|
+
projection = {"_id": 1, "id": 1, "type": 1}
|
|
96
|
+
subject_document_summaries_pre_update = list(
|
|
97
|
+
db[collection_name].find(
|
|
98
|
+
filter={"$or": [spec["filter"] for spec in update_specs]},
|
|
99
|
+
projection=projection,
|
|
100
|
+
session=session,
|
|
101
|
+
)
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
# Make a set of the `_id` values of the subject documents so that (later) we can
|
|
105
|
+
# check whether a given _referring_ document is also one of the _subject_
|
|
106
|
+
# documents (i.e. is among the documents the user wants to update).
|
|
107
|
+
subject_document_object_ids = set(
|
|
108
|
+
tdd["_id"] for tdd in subject_document_summaries_pre_update
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
# Identify _all_ documents that reference any of the subject documents.
|
|
112
|
+
all_referring_document_descriptors_pre_update = []
|
|
113
|
+
for subject_document_summary in subject_document_summaries_pre_update:
|
|
114
|
+
# If the document summary lacks the "id" field, we already know that no
|
|
115
|
+
# documents reference it (since they would have to _use_ that "id" value to
|
|
116
|
+
# do so); so, we abort this iteration and move on to the next subject document.
|
|
117
|
+
if "id" not in subject_document_summary:
|
|
118
|
+
continue
|
|
119
|
+
|
|
120
|
+
referring_document_descriptors = identify_referring_documents(
|
|
121
|
+
document=subject_document_summary, # expects at least "id" and "type"
|
|
122
|
+
schema_view=schema_view,
|
|
123
|
+
references=legal_references,
|
|
124
|
+
finder=finder,
|
|
125
|
+
client_session=session,
|
|
126
|
+
)
|
|
127
|
+
all_referring_document_descriptors_pre_update.extend(
|
|
128
|
+
referring_document_descriptors
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
# Simulate the updates (i.e. apply them within the context of the transaction).
|
|
132
|
+
db.command(
|
|
133
|
+
# Note: This expression was copied from the `_run_mdb_cmd` function in `queries.py`.
|
|
134
|
+
# TODO: Document this expression (i.e. the Pydantic->JSON->BSON chain).
|
|
135
|
+
bson.json_util.loads(
|
|
136
|
+
json.dumps(update_cmd.model_dump(exclude_unset=True))
|
|
137
|
+
),
|
|
138
|
+
session=session,
|
|
139
|
+
)
|
|
140
|
+
# For each referring document, check whether any of its outgoing references
|
|
141
|
+
# is broken (in the context of the transaction).
|
|
142
|
+
for descriptor in all_referring_document_descriptors_pre_update:
|
|
143
|
+
referring_document_oid = descriptor["source_document_object_id"]
|
|
144
|
+
referring_document_id = descriptor["source_document_id"]
|
|
145
|
+
referring_collection_name = descriptor["source_collection_name"]
|
|
146
|
+
# If the referring document is among the documents that the user wanted to
|
|
147
|
+
# update, we skip it for now. We will check its outgoing references later
|
|
148
|
+
# (i.e. when we check the outgoing references of _all_ updated documents).
|
|
149
|
+
if referring_document_oid in subject_document_object_ids:
|
|
150
|
+
continue
|
|
151
|
+
# Get the referring document, so we can check its outgoing references.
|
|
152
|
+
# Note: We project only the fields that can legally contain references,
|
|
153
|
+
# plus other fields involved in referential integrity checking.
|
|
154
|
+
referring_document_reference_field_names = (
|
|
155
|
+
reference_field_names_by_source_class_name[
|
|
156
|
+
descriptor["source_class_name"]
|
|
157
|
+
]
|
|
158
|
+
)
|
|
159
|
+
projection = {
|
|
160
|
+
field_name: 1
|
|
161
|
+
for field_name in referring_document_reference_field_names
|
|
162
|
+
} | {
|
|
163
|
+
"_id": 1,
|
|
164
|
+
"id": 1,
|
|
165
|
+
"type": 1,
|
|
166
|
+
} # note: `|` unions the dicts
|
|
167
|
+
referring_document = db[referring_collection_name].find_one(
|
|
168
|
+
{"_id": referring_document_oid},
|
|
169
|
+
projection=projection,
|
|
170
|
+
session=session,
|
|
171
|
+
)
|
|
172
|
+
# Note: We assert that the referring document exists (to satisfy the type checker).
|
|
173
|
+
assert (
|
|
174
|
+
referring_document is not None
|
|
175
|
+
), "A referring document has vanished."
|
|
176
|
+
violations = scan_outgoing_references(
|
|
177
|
+
document=referring_document,
|
|
178
|
+
source_collection_name=referring_collection_name,
|
|
179
|
+
schema_view=schema_view,
|
|
180
|
+
references=legal_references,
|
|
181
|
+
finder=finder,
|
|
182
|
+
client_session=session, # so it uses the pending transaction's session
|
|
183
|
+
)
|
|
184
|
+
# For each violation (i.e. broken reference) that exists, add a violation message
|
|
185
|
+
# to the list of violation messages.
|
|
186
|
+
#
|
|
187
|
+
# TODO: The violation might not involve a reference to one of the
|
|
188
|
+
# subject documents. The `scan_outgoing_references` function
|
|
189
|
+
# scans _all_ references emanating from the document.
|
|
190
|
+
#
|
|
191
|
+
for violation in violations:
|
|
192
|
+
source_field_name = violation.source_field_name
|
|
193
|
+
target_id = violation.target_id
|
|
194
|
+
violation_messages.append(
|
|
195
|
+
make_violation_message(
|
|
196
|
+
collection_name=referring_collection_name,
|
|
197
|
+
source_document_id=referring_document_id,
|
|
198
|
+
source_field_name=source_field_name,
|
|
199
|
+
target_document_id=target_id,
|
|
200
|
+
)
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
# For each updated document, check whether any of its outgoing references
|
|
204
|
+
# is broken (in the context of the transaction).
|
|
205
|
+
for subject_document_summary in subject_document_summaries_pre_update:
|
|
206
|
+
subject_document_oid = subject_document_summary["_id"]
|
|
207
|
+
subject_document_id = subject_document_summary["id"]
|
|
208
|
+
subject_document_class_name = derive_schema_class_name_from_document(
|
|
209
|
+
document=subject_document_summary,
|
|
210
|
+
schema_view=schema_view,
|
|
211
|
+
)
|
|
212
|
+
assert (
|
|
213
|
+
subject_document_class_name is not None
|
|
214
|
+
), "The updated document does not represent a valid schema class instance."
|
|
215
|
+
subject_collection_name = (
|
|
216
|
+
collection_name # makes a disambiguating alias
|
|
217
|
+
)
|
|
218
|
+
# Get the updated document, so we can check its outgoing references.
|
|
219
|
+
# Note: We project only the fields that can legally contain references,
|
|
220
|
+
# plus other fields involved in referential integrity checking.
|
|
221
|
+
updated_document_reference_field_names = (
|
|
222
|
+
reference_field_names_by_source_class_name[
|
|
223
|
+
subject_document_class_name
|
|
224
|
+
]
|
|
225
|
+
)
|
|
226
|
+
projection = {
|
|
227
|
+
field_name: 1
|
|
228
|
+
for field_name in updated_document_reference_field_names
|
|
229
|
+
} | {
|
|
230
|
+
"_id": 1,
|
|
231
|
+
"id": 1,
|
|
232
|
+
"type": 1,
|
|
233
|
+
} # note: `|` unions the dicts
|
|
234
|
+
updated_document = db[subject_collection_name].find_one(
|
|
235
|
+
{"_id": subject_document_oid},
|
|
236
|
+
projection=projection,
|
|
237
|
+
session=session,
|
|
238
|
+
)
|
|
239
|
+
# Note: We assert that the updated document exists (to satisfy the type checker).
|
|
240
|
+
assert updated_document is not None, "An updated document has vanished."
|
|
241
|
+
violations = scan_outgoing_references(
|
|
242
|
+
document=updated_document,
|
|
243
|
+
source_collection_name=subject_collection_name,
|
|
244
|
+
schema_view=schema_view,
|
|
245
|
+
references=legal_references,
|
|
246
|
+
finder=finder,
|
|
247
|
+
client_session=session, # so it uses the pending transaction's session
|
|
248
|
+
)
|
|
249
|
+
# For each violation (i.e. broken reference) that exists, add a violation message
|
|
250
|
+
# to the list of violation messages.
|
|
251
|
+
for violation in violations:
|
|
252
|
+
source_field_name = violation.source_field_name
|
|
253
|
+
target_id = violation.target_id
|
|
254
|
+
violation_messages.append(
|
|
255
|
+
make_violation_message(
|
|
256
|
+
collection_name=subject_collection_name,
|
|
257
|
+
source_document_id=subject_document_id,
|
|
258
|
+
source_field_name=source_field_name,
|
|
259
|
+
target_document_id=target_id,
|
|
260
|
+
)
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
# Whatever happens (i.e. whether there are violations or not), abort the transaction.
|
|
264
|
+
#
|
|
265
|
+
# Note: If an exception was raised within this `with` block, the transaction
|
|
266
|
+
# will already have been aborted automatically (and execution will not
|
|
267
|
+
# have reached this statement). On the other hand, if no exception
|
|
268
|
+
# was raised, we explicitly abort the transaction so that the updates
|
|
269
|
+
# that we "simulated" in this block do not get applied to the real database.
|
|
270
|
+
# Reference: https://pymongo.readthedocs.io/en/stable/api/pymongo/client_session.html
|
|
271
|
+
#
|
|
272
|
+
session.abort_transaction()
|
|
273
|
+
|
|
274
|
+
return violation_messages
|