nmdc-runtime 1.3.1__py3-none-any.whl → 2.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nmdc_runtime/Dockerfile +177 -0
- nmdc_runtime/api/analytics.py +90 -0
- nmdc_runtime/api/boot/capabilities.py +9 -0
- nmdc_runtime/api/boot/object_types.py +126 -0
- nmdc_runtime/api/boot/triggers.py +84 -0
- nmdc_runtime/api/boot/workflows.py +116 -0
- nmdc_runtime/api/core/auth.py +212 -0
- nmdc_runtime/api/core/idgen.py +200 -0
- nmdc_runtime/api/core/metadata.py +777 -0
- nmdc_runtime/api/core/util.py +114 -0
- nmdc_runtime/api/db/mongo.py +436 -0
- nmdc_runtime/api/db/s3.py +37 -0
- nmdc_runtime/api/endpoints/capabilities.py +25 -0
- nmdc_runtime/api/endpoints/find.py +634 -0
- nmdc_runtime/api/endpoints/jobs.py +206 -0
- nmdc_runtime/api/endpoints/lib/helpers.py +274 -0
- nmdc_runtime/api/endpoints/lib/linked_instances.py +193 -0
- nmdc_runtime/api/endpoints/lib/path_segments.py +165 -0
- nmdc_runtime/api/endpoints/metadata.py +260 -0
- nmdc_runtime/api/endpoints/nmdcschema.py +515 -0
- nmdc_runtime/api/endpoints/object_types.py +38 -0
- nmdc_runtime/api/endpoints/objects.py +277 -0
- nmdc_runtime/api/endpoints/operations.py +78 -0
- nmdc_runtime/api/endpoints/queries.py +701 -0
- nmdc_runtime/api/endpoints/runs.py +98 -0
- nmdc_runtime/api/endpoints/search.py +38 -0
- nmdc_runtime/api/endpoints/sites.py +205 -0
- nmdc_runtime/api/endpoints/triggers.py +25 -0
- nmdc_runtime/api/endpoints/users.py +214 -0
- nmdc_runtime/api/endpoints/util.py +817 -0
- nmdc_runtime/api/endpoints/wf_file_staging.py +307 -0
- nmdc_runtime/api/endpoints/workflows.py +353 -0
- nmdc_runtime/api/entrypoint.sh +7 -0
- nmdc_runtime/api/main.py +495 -0
- nmdc_runtime/api/middleware.py +43 -0
- nmdc_runtime/api/models/capability.py +14 -0
- nmdc_runtime/api/models/id.py +92 -0
- nmdc_runtime/api/models/job.py +57 -0
- nmdc_runtime/api/models/lib/helpers.py +78 -0
- nmdc_runtime/api/models/metadata.py +11 -0
- nmdc_runtime/api/models/nmdc_schema.py +146 -0
- nmdc_runtime/api/models/object.py +180 -0
- nmdc_runtime/api/models/object_type.py +20 -0
- nmdc_runtime/api/models/operation.py +66 -0
- nmdc_runtime/api/models/query.py +246 -0
- nmdc_runtime/api/models/query_continuation.py +111 -0
- nmdc_runtime/api/models/run.py +161 -0
- nmdc_runtime/api/models/site.py +87 -0
- nmdc_runtime/api/models/trigger.py +13 -0
- nmdc_runtime/api/models/user.py +207 -0
- nmdc_runtime/api/models/util.py +260 -0
- nmdc_runtime/api/models/wfe_file_stages.py +122 -0
- nmdc_runtime/api/models/workflow.py +15 -0
- nmdc_runtime/api/openapi.py +178 -0
- nmdc_runtime/api/swagger_ui/assets/EllipsesButton.js +146 -0
- nmdc_runtime/api/swagger_ui/assets/EndpointSearchWidget.js +369 -0
- nmdc_runtime/api/swagger_ui/assets/script.js +252 -0
- nmdc_runtime/api/swagger_ui/assets/style.css +155 -0
- nmdc_runtime/api/swagger_ui/swagger_ui.py +34 -0
- nmdc_runtime/config.py +56 -0
- nmdc_runtime/minter/adapters/repository.py +22 -2
- nmdc_runtime/minter/config.py +30 -4
- nmdc_runtime/minter/domain/model.py +55 -1
- nmdc_runtime/minter/entrypoints/fastapi_app.py +1 -1
- nmdc_runtime/mongo_util.py +89 -0
- nmdc_runtime/site/backup/nmdcdb_mongodump.py +1 -1
- nmdc_runtime/site/backup/nmdcdb_mongoexport.py +1 -3
- nmdc_runtime/site/changesheets/data/OmicsProcessing-to-catted-Biosamples.tsv +1561 -0
- nmdc_runtime/site/changesheets/scripts/missing_neon_soils_ecosystem_data.py +311 -0
- nmdc_runtime/site/changesheets/scripts/neon_soils_add_ncbi_ids.py +210 -0
- nmdc_runtime/site/dagster.yaml +53 -0
- nmdc_runtime/site/entrypoint-daemon.sh +29 -0
- nmdc_runtime/site/entrypoint-dagit-readonly.sh +26 -0
- nmdc_runtime/site/entrypoint-dagit.sh +29 -0
- nmdc_runtime/site/export/ncbi_xml.py +1331 -0
- nmdc_runtime/site/export/ncbi_xml_utils.py +405 -0
- nmdc_runtime/site/export/study_metadata.py +27 -4
- nmdc_runtime/site/graphs.py +294 -45
- nmdc_runtime/site/ops.py +1008 -230
- nmdc_runtime/site/repair/database_updater.py +451 -0
- nmdc_runtime/site/repository.py +368 -133
- nmdc_runtime/site/resources.py +154 -80
- nmdc_runtime/site/translation/gold_translator.py +235 -83
- nmdc_runtime/site/translation/neon_benthic_translator.py +212 -188
- nmdc_runtime/site/translation/neon_soil_translator.py +82 -58
- nmdc_runtime/site/translation/neon_surface_water_translator.py +698 -0
- nmdc_runtime/site/translation/neon_utils.py +24 -7
- nmdc_runtime/site/translation/submission_portal_translator.py +616 -162
- nmdc_runtime/site/translation/translator.py +73 -3
- nmdc_runtime/site/util.py +26 -7
- nmdc_runtime/site/validation/emsl.py +1 -0
- nmdc_runtime/site/validation/gold.py +1 -0
- nmdc_runtime/site/validation/util.py +16 -12
- nmdc_runtime/site/workspace.yaml +13 -0
- nmdc_runtime/static/NMDC_logo.svg +1073 -0
- nmdc_runtime/static/ORCID-iD_icon_vector.svg +4 -0
- nmdc_runtime/static/README.md +5 -0
- nmdc_runtime/static/favicon.ico +0 -0
- nmdc_runtime/util.py +236 -192
- nmdc_runtime-2.12.0.dist-info/METADATA +45 -0
- nmdc_runtime-2.12.0.dist-info/RECORD +131 -0
- {nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info}/WHEEL +1 -2
- {nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info}/entry_points.txt +0 -1
- nmdc_runtime/containers.py +0 -14
- nmdc_runtime/core/db/Database.py +0 -15
- nmdc_runtime/core/exceptions/__init__.py +0 -23
- nmdc_runtime/core/exceptions/base.py +0 -47
- nmdc_runtime/core/exceptions/token.py +0 -13
- nmdc_runtime/domain/users/queriesInterface.py +0 -18
- nmdc_runtime/domain/users/userSchema.py +0 -37
- nmdc_runtime/domain/users/userService.py +0 -14
- nmdc_runtime/infrastructure/database/db.py +0 -3
- nmdc_runtime/infrastructure/database/models/user.py +0 -10
- nmdc_runtime/lib/__init__.py +0 -1
- nmdc_runtime/lib/extract_nmdc_data.py +0 -41
- nmdc_runtime/lib/load_nmdc_data.py +0 -121
- nmdc_runtime/lib/nmdc_dataframes.py +0 -829
- nmdc_runtime/lib/nmdc_etl_class.py +0 -402
- nmdc_runtime/lib/transform_nmdc_data.py +0 -1117
- nmdc_runtime/site/drsobjects/ingest.py +0 -93
- nmdc_runtime/site/drsobjects/registration.py +0 -131
- nmdc_runtime/site/terminusdb/generate.py +0 -198
- nmdc_runtime/site/terminusdb/ingest.py +0 -44
- nmdc_runtime/site/terminusdb/schema.py +0 -1671
- nmdc_runtime/site/translation/emsl.py +0 -42
- nmdc_runtime/site/translation/gold.py +0 -53
- nmdc_runtime/site/translation/jgi.py +0 -31
- nmdc_runtime/site/translation/util.py +0 -132
- nmdc_runtime/site/validation/jgi.py +0 -42
- nmdc_runtime-1.3.1.dist-info/METADATA +0 -181
- nmdc_runtime-1.3.1.dist-info/RECORD +0 -81
- nmdc_runtime-1.3.1.dist-info/top_level.txt +0 -1
- /nmdc_runtime/{client → api}/__init__.py +0 -0
- /nmdc_runtime/{core → api/boot}/__init__.py +0 -0
- /nmdc_runtime/{core/db → api/core}/__init__.py +0 -0
- /nmdc_runtime/{domain → api/db}/__init__.py +0 -0
- /nmdc_runtime/{domain/users → api/endpoints}/__init__.py +0 -0
- /nmdc_runtime/{infrastructure → api/endpoints/lib}/__init__.py +0 -0
- /nmdc_runtime/{infrastructure/database → api/models}/__init__.py +0 -0
- /nmdc_runtime/{infrastructure/database/models → api/models/lib}/__init__.py +0 -0
- /nmdc_runtime/{site/drsobjects/__init__.py → api/models/minter.py} +0 -0
- /nmdc_runtime/site/{terminusdb → repair}/__init__.py +0 -0
- {nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info/licenses}/LICENSE +0 -0
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
"""This module defines a utility to parse a path segment with semicolon-delimited parameters.
|
|
2
|
+
|
|
3
|
+
# Example: Single segment - no lambda needed when FastAPI-path-parameter name matches `parse_path_segment`'s.
|
|
4
|
+
|
|
5
|
+
@app.get("/path/{path_segment}")
|
|
6
|
+
async def handle_segment(parsed: ParsedPathSegment = Depends(parse_path_segment)):
|
|
7
|
+
return {
|
|
8
|
+
"segment_name": parsed.segment_name,
|
|
9
|
+
"parameters": parsed.segment_parameters
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
# Example: Multiple segments with different names - lambda required.
|
|
13
|
+
|
|
14
|
+
@app.get("/items/{item_segment}/actions/{action_segment}")
|
|
15
|
+
async def handle_two_segments(
|
|
16
|
+
item_parsed: ParsedPathSegment = Depends(lambda item_segment: parse_path_segment(item_segment)),
|
|
17
|
+
action_parsed: ParsedPathSegment = Depends(lambda action_segment: parse_path_segment(action_segment))
|
|
18
|
+
):
|
|
19
|
+
return {
|
|
20
|
+
"item": {
|
|
21
|
+
"name": item_parsed.segment_name,
|
|
22
|
+
"params": item_parsed.segment_parameters
|
|
23
|
+
},
|
|
24
|
+
"action": {
|
|
25
|
+
"name": action_parsed.segment_name,
|
|
26
|
+
"params": action_parsed.segment_parameters
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
# Example URL: /items/book;category=fiction;new/actions/edit;quick;mode=advanced
|
|
31
|
+
# Results in:
|
|
32
|
+
# item_parsed.segment_name = "book"
|
|
33
|
+
# item_parsed.segment_parameters = {"category": "fiction", "new": None}
|
|
34
|
+
# action_parsed.segment_name = "edit"
|
|
35
|
+
# action_parsed.segment_parameters = {"quick": None, "mode": "advanced"}
|
|
36
|
+
|
|
37
|
+
# Implementation note
|
|
38
|
+
|
|
39
|
+
Ordering of segment parameters MUST be preserved so that they may be used e.g. to specify a transformation pipeline.
|
|
40
|
+
A hypothetical example:
|
|
41
|
+
GET `/some/image;crop=200,100,1200,900;scale=640,480` might
|
|
42
|
+
1. GET `/some/image`,
|
|
43
|
+
2. POST the response to `/transform/crop?x=200&y=100&width=1200&height=900`,
|
|
44
|
+
3. POST the response to `/transform/scale?width=640&height=480`, and finally
|
|
45
|
+
4. yield the last response to the client.
|
|
46
|
+
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
from typing import Dict, List, Union, Annotated
|
|
50
|
+
from urllib.parse import unquote
|
|
51
|
+
|
|
52
|
+
from fastapi import HTTPException, Path
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class ParsedPathSegment:
|
|
56
|
+
"""Container for parsed path segment data."""
|
|
57
|
+
|
|
58
|
+
def __init__(
|
|
59
|
+
self,
|
|
60
|
+
segment_name: str,
|
|
61
|
+
segment_parameters: Dict[str, Union[str, List[str], None]],
|
|
62
|
+
):
|
|
63
|
+
self.segment_name = segment_name
|
|
64
|
+
self.segment_parameters = segment_parameters
|
|
65
|
+
|
|
66
|
+
def __repr__(self):
|
|
67
|
+
return f"ParsedPathSegment(name='{self.segment_name}', params={self.segment_parameters})"
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def parse_path_segment(
|
|
71
|
+
path_segment: Annotated[str, Path(description="Foo")],
|
|
72
|
+
) -> ParsedPathSegment:
|
|
73
|
+
"""
|
|
74
|
+
FastAPI dependency to parse a path segment with semicolon-delimited parameters.
|
|
75
|
+
|
|
76
|
+
See [the last paragraph of RFC3986 Section 3.3](https://datatracker.ietf.org/doc/html/rfc3986#section-3.3) for
|
|
77
|
+
insight into the below parsing rules.
|
|
78
|
+
|
|
79
|
+
Parsing rules:
|
|
80
|
+
- Semicolon (`;`) delimits parameters from segment name and from each other
|
|
81
|
+
- Equals sign (`=`) separates parameter names from values
|
|
82
|
+
- Comma (`,`) separates multiple values for a single parameter
|
|
83
|
+
- Other RFC3986 sub-delimiters should be percent-encoded
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
path_segment: The raw path segment string from FastAPI
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
ParsedPathSegment containing the segment name and parsed parameters
|
|
90
|
+
|
|
91
|
+
Raises:
|
|
92
|
+
HTTPException: 400 Bad Request if segment name is empty
|
|
93
|
+
|
|
94
|
+
Examples:
|
|
95
|
+
>>> result = parse_path_segment("name")
|
|
96
|
+
>>> result.segment_name
|
|
97
|
+
'name'
|
|
98
|
+
>>> result.segment_parameters
|
|
99
|
+
{}
|
|
100
|
+
|
|
101
|
+
>>> result = parse_path_segment("name;param1;param2")
|
|
102
|
+
>>> result.segment_name
|
|
103
|
+
'name'
|
|
104
|
+
>>> result.segment_parameters
|
|
105
|
+
{'param1': None, 'param2': None}
|
|
106
|
+
|
|
107
|
+
>>> result = parse_path_segment("name;param=")
|
|
108
|
+
>>> result.segment_name
|
|
109
|
+
'name'
|
|
110
|
+
>>> result.segment_parameters
|
|
111
|
+
{'param': ''}
|
|
112
|
+
|
|
113
|
+
>>> result = parse_path_segment("name;param=value")
|
|
114
|
+
>>> result.segment_name
|
|
115
|
+
'name'
|
|
116
|
+
>>> result.segment_parameters
|
|
117
|
+
{'param': 'value'}
|
|
118
|
+
|
|
119
|
+
>>> result = parse_path_segment("name;param=val1,val2,val3")
|
|
120
|
+
>>> result.segment_name
|
|
121
|
+
'name'
|
|
122
|
+
>>> result.segment_parameters
|
|
123
|
+
{'param': ['val1', 'val2', 'val3']}
|
|
124
|
+
|
|
125
|
+
>>> result = parse_path_segment("name;p1=v1;p2=v2,v3;p3")
|
|
126
|
+
>>> result.segment_name
|
|
127
|
+
'name'
|
|
128
|
+
>>> result.segment_parameters
|
|
129
|
+
{'p1': 'v1', 'p2': ['v2', 'v3'], 'p3': None}
|
|
130
|
+
"""
|
|
131
|
+
# URL decode the entire segment first
|
|
132
|
+
decoded_segment = unquote(path_segment)
|
|
133
|
+
|
|
134
|
+
# Split on semicolons - first part is segment name, rest are parameters
|
|
135
|
+
parts = decoded_segment.split(";")
|
|
136
|
+
segment_name = parts[0] if parts else ""
|
|
137
|
+
|
|
138
|
+
# Raise HTTP 400 if segment name is empty
|
|
139
|
+
if not segment_name:
|
|
140
|
+
raise HTTPException(status_code=400, detail="Segment name cannot be empty")
|
|
141
|
+
|
|
142
|
+
segment_parameters: Dict[str, Union[str, List[str], None]] = {}
|
|
143
|
+
|
|
144
|
+
# Process each parameter
|
|
145
|
+
for param_part in parts[1:]:
|
|
146
|
+
if not param_part: # Skip empty parts
|
|
147
|
+
continue
|
|
148
|
+
|
|
149
|
+
# Split on first equals sign to separate name from value
|
|
150
|
+
if "=" in param_part:
|
|
151
|
+
param_name, param_value = param_part.split("=", 1)
|
|
152
|
+
|
|
153
|
+
# Split values on commas
|
|
154
|
+
if "," in param_value:
|
|
155
|
+
# Multiple values - return as list
|
|
156
|
+
values = [v.strip() for v in param_value.split(",")]
|
|
157
|
+
segment_parameters |= {param_name: values}
|
|
158
|
+
else:
|
|
159
|
+
# Single value - return as string
|
|
160
|
+
segment_parameters |= {param_name: param_value}
|
|
161
|
+
else:
|
|
162
|
+
# Parameter without value (flag-style parameter)
|
|
163
|
+
segment_parameters |= {param_part: None}
|
|
164
|
+
|
|
165
|
+
return ParsedPathSegment(segment_name, segment_parameters)
|
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import re
|
|
3
|
+
from typing import Annotated
|
|
4
|
+
|
|
5
|
+
from dagster import ExecuteInProcessResult
|
|
6
|
+
from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, Path
|
|
7
|
+
from gridfs import GridFS, NoFile
|
|
8
|
+
from nmdc_runtime.api.core.metadata import _validate_changesheet, df_from_sheet_in
|
|
9
|
+
from nmdc_runtime.api.core.util import API_SITE_CLIENT_ID
|
|
10
|
+
from nmdc_runtime.api.db.mongo import get_mongo_db, validate_json
|
|
11
|
+
from nmdc_runtime.api.endpoints.util import (
|
|
12
|
+
_claim_job,
|
|
13
|
+
_request_dagster_run,
|
|
14
|
+
check_action_permitted,
|
|
15
|
+
persist_content_and_get_drs_object,
|
|
16
|
+
)
|
|
17
|
+
from nmdc_runtime.api.models.job import Job
|
|
18
|
+
from nmdc_runtime.api.models.metadata import ChangesheetIn
|
|
19
|
+
from nmdc_runtime.api.models.object_type import DrsObjectWithTypes
|
|
20
|
+
from nmdc_runtime.api.models.site import get_site
|
|
21
|
+
from nmdc_runtime.api.models.user import User, get_current_active_user
|
|
22
|
+
from nmdc_runtime.site.repository import repo, run_config_frozen__normal_env
|
|
23
|
+
from nmdc_runtime.util import (
|
|
24
|
+
unfreeze,
|
|
25
|
+
)
|
|
26
|
+
from pymongo import ReturnDocument
|
|
27
|
+
from pymongo.database import Database as MongoDatabase
|
|
28
|
+
from starlette import status
|
|
29
|
+
from starlette.responses import StreamingResponse
|
|
30
|
+
from toolz import merge
|
|
31
|
+
|
|
32
|
+
router = APIRouter()
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
async def raw_changesheet_from_uploaded_file(uploaded_file: UploadFile):
|
|
36
|
+
"""
|
|
37
|
+
Extract utf8-encoded text from fastapi.UploadFile object, and
|
|
38
|
+
construct ChangesheetIn object for subsequent processing.
|
|
39
|
+
"""
|
|
40
|
+
content_type = uploaded_file.content_type
|
|
41
|
+
name = uploaded_file.filename
|
|
42
|
+
if name.endswith(".csv"):
|
|
43
|
+
content_type = "text/csv"
|
|
44
|
+
elif name.endswith(".tsv"):
|
|
45
|
+
content_type = "text/tab-separated-values"
|
|
46
|
+
contents: bytes = await uploaded_file.read()
|
|
47
|
+
text = contents.decode()
|
|
48
|
+
return ChangesheetIn(name=name, content_type=content_type, text=text)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@router.post("/metadata/changesheets:validate")
|
|
52
|
+
async def validate_changesheet(
|
|
53
|
+
uploaded_file: UploadFile = File(
|
|
54
|
+
..., description="The changesheet you want the server to validate"
|
|
55
|
+
),
|
|
56
|
+
mdb: MongoDatabase = Depends(get_mongo_db),
|
|
57
|
+
):
|
|
58
|
+
r"""
|
|
59
|
+
Validates a [changesheet](https://microbiomedata.github.io/nmdc-runtime/howto-guides/author-changesheets/)
|
|
60
|
+
that is in either CSV or TSV format.
|
|
61
|
+
"""
|
|
62
|
+
sheet_in = await raw_changesheet_from_uploaded_file(uploaded_file)
|
|
63
|
+
df_change = df_from_sheet_in(sheet_in, mdb)
|
|
64
|
+
return _validate_changesheet(df_change, mdb)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@router.post("/metadata/changesheets:submit", response_model=DrsObjectWithTypes)
|
|
68
|
+
async def submit_changesheet(
|
|
69
|
+
uploaded_file: UploadFile = File(
|
|
70
|
+
..., description="The changesheet you want the server to apply"
|
|
71
|
+
),
|
|
72
|
+
mdb: MongoDatabase = Depends(get_mongo_db),
|
|
73
|
+
user: User = Depends(get_current_active_user),
|
|
74
|
+
):
|
|
75
|
+
r"""
|
|
76
|
+
Applies a [changesheet](https://microbiomedata.github.io/nmdc-runtime/howto-guides/author-changesheets/)
|
|
77
|
+
that is in either CSV or TSV format.
|
|
78
|
+
|
|
79
|
+
**Note:** This endpoint is only accessible to users that have been granted access by a Runtime administrator.
|
|
80
|
+
"""
|
|
81
|
+
# TODO: Allow users to determine whether they have that access (i.e. whether they are allowed to perform the
|
|
82
|
+
# `/metadata/changesheets:submit` action), themselves, so that they don't have to contact an admin
|
|
83
|
+
# or submit an example changesheet in order to find that out.
|
|
84
|
+
|
|
85
|
+
if not check_action_permitted(user.username, "/metadata/changesheets:submit"):
|
|
86
|
+
raise HTTPException(
|
|
87
|
+
status_code=status.HTTP_403_FORBIDDEN,
|
|
88
|
+
detail=(
|
|
89
|
+
f"Only specific users "
|
|
90
|
+
"are allowed to apply changesheets at this time."
|
|
91
|
+
),
|
|
92
|
+
)
|
|
93
|
+
sheet_in = await raw_changesheet_from_uploaded_file(uploaded_file)
|
|
94
|
+
df_change = df_from_sheet_in(sheet_in, mdb)
|
|
95
|
+
_ = _validate_changesheet(df_change, mdb)
|
|
96
|
+
|
|
97
|
+
drs_obj_doc = persist_content_and_get_drs_object(
|
|
98
|
+
content=sheet_in.text,
|
|
99
|
+
username=user.username,
|
|
100
|
+
filename=re.sub(r"[^A-Za-z0-9._\-]", "_", sheet_in.name),
|
|
101
|
+
content_type=sheet_in.content_type,
|
|
102
|
+
description="changesheet",
|
|
103
|
+
id_ns="changesheets",
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
doc_after = mdb.objects.find_one_and_update(
|
|
107
|
+
{"id": drs_obj_doc["id"]},
|
|
108
|
+
{"$set": {"types": ["metadata-changesheet"]}},
|
|
109
|
+
return_document=ReturnDocument.AFTER,
|
|
110
|
+
)
|
|
111
|
+
return doc_after
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
@router.get("/metadata/stored_files/{object_id}", include_in_schema=False)
|
|
115
|
+
async def get_stored_metadata_object(
|
|
116
|
+
object_id: Annotated[
|
|
117
|
+
str,
|
|
118
|
+
Path(
|
|
119
|
+
title="Metadata file ObjectId",
|
|
120
|
+
description="The ObjectId (`_id`) of the metadata file you want to get.\n\n_Example_: `507f1f77bcf86cd799439011`",
|
|
121
|
+
examples=["507f1f77bcf86cd799439011"],
|
|
122
|
+
),
|
|
123
|
+
],
|
|
124
|
+
mdb: MongoDatabase = Depends(get_mongo_db),
|
|
125
|
+
):
|
|
126
|
+
r"""
|
|
127
|
+
This endpoint is subservient to our Data Repository Service (DRS) implementation, i.e. the `/objects/*` endpoints.
|
|
128
|
+
In particular, URLs resolving to this route are generated
|
|
129
|
+
by the DRS `/objects/{object_id}/access/{access_id}` endpoint if we store the raw object in our MongoDB via GridFS.
|
|
130
|
+
We currently do this for request bodies for `/metadata/json:submit` and `/metadata/changesheets:submit`.
|
|
131
|
+
A typical API user would not call this endpoint directly. Rather, it merely forms part of the API surface.
|
|
132
|
+
Therefore, we do not include it in the OpenAPI schema.
|
|
133
|
+
|
|
134
|
+
References:
|
|
135
|
+
- https://pymongo.readthedocs.io/en/stable/examples/gridfs.html
|
|
136
|
+
- https://www.mongodb.com/docs/manual/core/gridfs/#use-gridfs
|
|
137
|
+
"""
|
|
138
|
+
mdb_fs = GridFS(mdb)
|
|
139
|
+
try:
|
|
140
|
+
grid_out = mdb_fs.get(object_id)
|
|
141
|
+
except NoFile:
|
|
142
|
+
raise HTTPException(
|
|
143
|
+
status_code=status.HTTP_404_NOT_FOUND,
|
|
144
|
+
detail=f"Metadata stored file {object_id} not found",
|
|
145
|
+
)
|
|
146
|
+
filename, content_type = grid_out.filename, grid_out.content_type
|
|
147
|
+
|
|
148
|
+
def iter_grid_out():
|
|
149
|
+
yield from grid_out
|
|
150
|
+
|
|
151
|
+
return StreamingResponse(
|
|
152
|
+
iter_grid_out(),
|
|
153
|
+
media_type=content_type,
|
|
154
|
+
headers={"Content-Disposition": f"attachment; filename={filename}"},
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
@router.post("/metadata/json:validate", name="Validate JSON")
|
|
159
|
+
async def validate_json_nmdcdb(docs: dict, mdb: MongoDatabase = Depends(get_mongo_db)):
|
|
160
|
+
r"""
|
|
161
|
+
Validate a NMDC JSON Schema "nmdc:Database" object.
|
|
162
|
+
|
|
163
|
+
This API endpoint validates the JSON payload in two steps. The first step is to check the format of each document
|
|
164
|
+
(e.g., the presence, name, and value of each field). If it encounters any violations during that step, it will not
|
|
165
|
+
proceed to the second step. The second step is to check whether all documents referenced by the document exist,
|
|
166
|
+
whether in the database or the same JSON payload. We call the second step a "referential integrity check."
|
|
167
|
+
"""
|
|
168
|
+
|
|
169
|
+
return validate_json(docs, mdb, check_inter_document_references=True)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
@router.post("/metadata/json:submit", name="Submit JSON")
|
|
173
|
+
async def submit_json_nmdcdb(
|
|
174
|
+
docs: dict,
|
|
175
|
+
user: User = Depends(get_current_active_user),
|
|
176
|
+
mdb: MongoDatabase = Depends(get_mongo_db),
|
|
177
|
+
):
|
|
178
|
+
"""
|
|
179
|
+
|
|
180
|
+
Submit a NMDC JSON Schema "nmdc:Database" object.
|
|
181
|
+
|
|
182
|
+
"""
|
|
183
|
+
if not check_action_permitted(user.username, "/metadata/json:submit"):
|
|
184
|
+
raise HTTPException(
|
|
185
|
+
status_code=status.HTTP_403_FORBIDDEN,
|
|
186
|
+
detail="Only specific users are allowed to submit json at this time.",
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
# Validate the JSON payload, both (a) the format of each document and
|
|
190
|
+
# (b) the integrity of any inter-document references being introduced.
|
|
191
|
+
rv = validate_json(docs, mdb, check_inter_document_references=True)
|
|
192
|
+
if rv["result"] == "errors":
|
|
193
|
+
raise HTTPException(
|
|
194
|
+
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
|
195
|
+
detail=str(rv),
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
extra_run_config_data = _ensure_job__metadata_in(docs, user.username, mdb)
|
|
199
|
+
|
|
200
|
+
requested = _request_dagster_run(
|
|
201
|
+
nmdc_workflow_id="metadata-in-1.0.0",
|
|
202
|
+
nmdc_workflow_inputs=[], # handled by _request_dagster_run given extra_run_config_data
|
|
203
|
+
extra_run_config_data=extra_run_config_data,
|
|
204
|
+
mdb=mdb,
|
|
205
|
+
user=user,
|
|
206
|
+
)
|
|
207
|
+
if requested["type"] == "success":
|
|
208
|
+
return requested
|
|
209
|
+
else:
|
|
210
|
+
raise HTTPException(
|
|
211
|
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
212
|
+
detail=(
|
|
213
|
+
f"Runtime failed to start metadata-in-1.0.0 job. "
|
|
214
|
+
f'Detail: {requested["detail"]}'
|
|
215
|
+
),
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def _ensure_job__metadata_in(
|
|
220
|
+
docs, username, mdb, client_id=API_SITE_CLIENT_ID, drs_object_exists_ok=False
|
|
221
|
+
):
|
|
222
|
+
drs_obj_doc = persist_content_and_get_drs_object(
|
|
223
|
+
content=json.dumps(docs),
|
|
224
|
+
username=username,
|
|
225
|
+
filename=None,
|
|
226
|
+
content_type="application/json",
|
|
227
|
+
description="JSON metadata in",
|
|
228
|
+
id_ns="json-metadata-in",
|
|
229
|
+
exists_ok=drs_object_exists_ok,
|
|
230
|
+
)
|
|
231
|
+
job_spec = {
|
|
232
|
+
"workflow": {"id": "metadata-in-1.0.0"},
|
|
233
|
+
"config": {"object_id": drs_obj_doc["id"]},
|
|
234
|
+
}
|
|
235
|
+
run_config = merge(
|
|
236
|
+
unfreeze(run_config_frozen__normal_env),
|
|
237
|
+
{"ops": {"construct_jobs": {"config": {"base_jobs": [job_spec]}}}},
|
|
238
|
+
)
|
|
239
|
+
dagster_result: ExecuteInProcessResult = repo.get_job(
|
|
240
|
+
"ensure_jobs"
|
|
241
|
+
).execute_in_process(run_config=run_config)
|
|
242
|
+
job = Job(**mdb.jobs.find_one(job_spec))
|
|
243
|
+
if not dagster_result.success or job is None:
|
|
244
|
+
raise HTTPException(
|
|
245
|
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
246
|
+
detail=f'failed to complete metadata-in-1.0.0/{drs_obj_doc["id"]} job',
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
site = get_site(mdb, client_id=client_id)
|
|
250
|
+
operation = _claim_job(job.id, mdb, site)
|
|
251
|
+
return {
|
|
252
|
+
"ops": {
|
|
253
|
+
"get_json_in": {
|
|
254
|
+
"config": {
|
|
255
|
+
"object_id": job.config.get("object_id"),
|
|
256
|
+
}
|
|
257
|
+
},
|
|
258
|
+
"perform_mongo_updates": {"config": {"operation_id": operation["id"]}},
|
|
259
|
+
}
|
|
260
|
+
}
|