nmdc-runtime 2.9.0__py3-none-any.whl → 2.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nmdc-runtime might be problematic. Click here for more details.
- nmdc_runtime/Dockerfile +167 -0
- nmdc_runtime/api/analytics.py +90 -0
- nmdc_runtime/api/boot/capabilities.py +9 -0
- nmdc_runtime/api/boot/object_types.py +126 -0
- nmdc_runtime/api/boot/triggers.py +84 -0
- nmdc_runtime/api/boot/workflows.py +116 -0
- nmdc_runtime/api/core/auth.py +208 -0
- nmdc_runtime/api/core/idgen.py +200 -0
- nmdc_runtime/api/core/metadata.py +788 -0
- nmdc_runtime/api/core/util.py +109 -0
- nmdc_runtime/api/db/mongo.py +435 -0
- nmdc_runtime/api/db/s3.py +37 -0
- nmdc_runtime/api/endpoints/capabilities.py +25 -0
- nmdc_runtime/api/endpoints/find.py +634 -0
- nmdc_runtime/api/endpoints/jobs.py +143 -0
- nmdc_runtime/api/endpoints/lib/helpers.py +274 -0
- nmdc_runtime/api/endpoints/lib/linked_instances.py +180 -0
- nmdc_runtime/api/endpoints/lib/path_segments.py +165 -0
- nmdc_runtime/api/endpoints/metadata.py +260 -0
- nmdc_runtime/api/endpoints/nmdcschema.py +502 -0
- nmdc_runtime/api/endpoints/object_types.py +38 -0
- nmdc_runtime/api/endpoints/objects.py +270 -0
- nmdc_runtime/api/endpoints/operations.py +78 -0
- nmdc_runtime/api/endpoints/queries.py +701 -0
- nmdc_runtime/api/endpoints/runs.py +98 -0
- nmdc_runtime/api/endpoints/search.py +38 -0
- nmdc_runtime/api/endpoints/sites.py +205 -0
- nmdc_runtime/api/endpoints/triggers.py +25 -0
- nmdc_runtime/api/endpoints/users.py +214 -0
- nmdc_runtime/api/endpoints/util.py +796 -0
- nmdc_runtime/api/endpoints/workflows.py +353 -0
- nmdc_runtime/api/entrypoint.sh +7 -0
- nmdc_runtime/api/main.py +425 -0
- nmdc_runtime/api/middleware.py +43 -0
- nmdc_runtime/api/models/capability.py +14 -0
- nmdc_runtime/api/models/id.py +92 -0
- nmdc_runtime/api/models/job.py +37 -0
- nmdc_runtime/api/models/lib/helpers.py +78 -0
- nmdc_runtime/api/models/metadata.py +11 -0
- nmdc_runtime/api/models/nmdc_schema.py +146 -0
- nmdc_runtime/api/models/object.py +180 -0
- nmdc_runtime/api/models/object_type.py +20 -0
- nmdc_runtime/api/models/operation.py +66 -0
- nmdc_runtime/api/models/query.py +246 -0
- nmdc_runtime/api/models/query_continuation.py +111 -0
- nmdc_runtime/api/models/run.py +161 -0
- nmdc_runtime/api/models/site.py +87 -0
- nmdc_runtime/api/models/trigger.py +13 -0
- nmdc_runtime/api/models/user.py +140 -0
- nmdc_runtime/api/models/util.py +260 -0
- nmdc_runtime/api/models/workflow.py +15 -0
- nmdc_runtime/api/openapi.py +178 -0
- nmdc_runtime/api/swagger_ui/assets/custom-elements.js +522 -0
- nmdc_runtime/api/swagger_ui/assets/script.js +247 -0
- nmdc_runtime/api/swagger_ui/assets/style.css +155 -0
- nmdc_runtime/api/swagger_ui/swagger_ui.py +34 -0
- nmdc_runtime/config.py +7 -8
- nmdc_runtime/minter/adapters/repository.py +22 -2
- nmdc_runtime/minter/config.py +2 -0
- nmdc_runtime/minter/domain/model.py +55 -1
- nmdc_runtime/minter/entrypoints/fastapi_app.py +1 -1
- nmdc_runtime/mongo_util.py +1 -2
- nmdc_runtime/site/backup/nmdcdb_mongodump.py +1 -1
- nmdc_runtime/site/backup/nmdcdb_mongoexport.py +1 -3
- nmdc_runtime/site/changesheets/data/OmicsProcessing-to-catted-Biosamples.tsv +1561 -0
- nmdc_runtime/site/changesheets/scripts/missing_neon_soils_ecosystem_data.py +311 -0
- nmdc_runtime/site/changesheets/scripts/neon_soils_add_ncbi_ids.py +210 -0
- nmdc_runtime/site/dagster.yaml +53 -0
- nmdc_runtime/site/entrypoint-daemon.sh +26 -0
- nmdc_runtime/site/entrypoint-dagit-readonly.sh +26 -0
- nmdc_runtime/site/entrypoint-dagit.sh +26 -0
- nmdc_runtime/site/export/ncbi_xml.py +633 -13
- nmdc_runtime/site/export/ncbi_xml_utils.py +115 -1
- nmdc_runtime/site/graphs.py +8 -22
- nmdc_runtime/site/ops.py +147 -181
- nmdc_runtime/site/repository.py +2 -112
- nmdc_runtime/site/resources.py +16 -3
- nmdc_runtime/site/translation/gold_translator.py +4 -12
- nmdc_runtime/site/translation/neon_benthic_translator.py +0 -1
- nmdc_runtime/site/translation/neon_soil_translator.py +4 -5
- nmdc_runtime/site/translation/neon_surface_water_translator.py +0 -2
- nmdc_runtime/site/translation/submission_portal_translator.py +84 -68
- nmdc_runtime/site/translation/translator.py +63 -1
- nmdc_runtime/site/util.py +8 -3
- nmdc_runtime/site/validation/util.py +10 -5
- nmdc_runtime/site/workspace.yaml +13 -0
- nmdc_runtime/static/NMDC_logo.svg +1073 -0
- nmdc_runtime/static/ORCID-iD_icon_vector.svg +4 -0
- nmdc_runtime/static/README.md +5 -0
- nmdc_runtime/static/favicon.ico +0 -0
- nmdc_runtime/util.py +90 -48
- nmdc_runtime-2.11.0.dist-info/METADATA +46 -0
- nmdc_runtime-2.11.0.dist-info/RECORD +128 -0
- {nmdc_runtime-2.9.0.dist-info → nmdc_runtime-2.11.0.dist-info}/WHEEL +1 -2
- nmdc_runtime/containers.py +0 -14
- nmdc_runtime/core/db/Database.py +0 -15
- nmdc_runtime/core/exceptions/__init__.py +0 -23
- nmdc_runtime/core/exceptions/base.py +0 -47
- nmdc_runtime/core/exceptions/token.py +0 -13
- nmdc_runtime/domain/users/queriesInterface.py +0 -18
- nmdc_runtime/domain/users/userSchema.py +0 -37
- nmdc_runtime/domain/users/userService.py +0 -14
- nmdc_runtime/infrastructure/database/db.py +0 -3
- nmdc_runtime/infrastructure/database/models/user.py +0 -10
- nmdc_runtime/lib/__init__.py +0 -1
- nmdc_runtime/lib/extract_nmdc_data.py +0 -41
- nmdc_runtime/lib/load_nmdc_data.py +0 -121
- nmdc_runtime/lib/nmdc_dataframes.py +0 -829
- nmdc_runtime/lib/nmdc_etl_class.py +0 -402
- nmdc_runtime/lib/transform_nmdc_data.py +0 -1117
- nmdc_runtime/site/drsobjects/ingest.py +0 -93
- nmdc_runtime/site/drsobjects/registration.py +0 -131
- nmdc_runtime/site/translation/emsl.py +0 -43
- nmdc_runtime/site/translation/gold.py +0 -53
- nmdc_runtime/site/translation/jgi.py +0 -32
- nmdc_runtime/site/translation/util.py +0 -132
- nmdc_runtime/site/validation/jgi.py +0 -43
- nmdc_runtime-2.9.0.dist-info/METADATA +0 -214
- nmdc_runtime-2.9.0.dist-info/RECORD +0 -84
- nmdc_runtime-2.9.0.dist-info/top_level.txt +0 -1
- /nmdc_runtime/{client → api}/__init__.py +0 -0
- /nmdc_runtime/{core → api/boot}/__init__.py +0 -0
- /nmdc_runtime/{core/db → api/core}/__init__.py +0 -0
- /nmdc_runtime/{domain → api/db}/__init__.py +0 -0
- /nmdc_runtime/{domain/users → api/endpoints}/__init__.py +0 -0
- /nmdc_runtime/{infrastructure → api/endpoints/lib}/__init__.py +0 -0
- /nmdc_runtime/{infrastructure/database → api/models}/__init__.py +0 -0
- /nmdc_runtime/{infrastructure/database/models → api/models/lib}/__init__.py +0 -0
- /nmdc_runtime/{site/drsobjects/__init__.py → api/models/minter.py} +0 -0
- {nmdc_runtime-2.9.0.dist-info → nmdc_runtime-2.11.0.dist-info}/entry_points.txt +0 -0
- {nmdc_runtime-2.9.0.dist-info → nmdc_runtime-2.11.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,502 @@
|
|
|
1
|
+
from importlib.metadata import version
|
|
2
|
+
import re
|
|
3
|
+
from typing import List, Dict, Annotated
|
|
4
|
+
|
|
5
|
+
import pymongo
|
|
6
|
+
from fastapi import APIRouter, Depends, HTTPException, Path, Query
|
|
7
|
+
from pydantic import AfterValidator
|
|
8
|
+
from refscan.lib.helpers import (
|
|
9
|
+
get_collection_names_from_schema,
|
|
10
|
+
get_names_of_classes_eligible_for_collection,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
from nmdc_runtime.api.endpoints.lib.linked_instances import (
|
|
14
|
+
gather_linked_instances,
|
|
15
|
+
hydrated,
|
|
16
|
+
)
|
|
17
|
+
from nmdc_runtime.config import IS_LINKED_INSTANCES_ENDPOINT_ENABLED
|
|
18
|
+
from nmdc_runtime.minter.config import typecodes
|
|
19
|
+
from nmdc_runtime.minter.domain.model import check_valid_ids
|
|
20
|
+
from nmdc_runtime.util import (
|
|
21
|
+
decorate_if,
|
|
22
|
+
nmdc_database_collection_names,
|
|
23
|
+
nmdc_schema_view,
|
|
24
|
+
)
|
|
25
|
+
from pymongo.database import Database as MongoDatabase
|
|
26
|
+
from starlette import status
|
|
27
|
+
|
|
28
|
+
from nmdc_runtime.api.core.metadata import map_id_to_collection, get_collection_for_id
|
|
29
|
+
from nmdc_runtime.api.core.util import raise404_if_none
|
|
30
|
+
from nmdc_runtime.api.db.mongo import (
|
|
31
|
+
get_mongo_db,
|
|
32
|
+
)
|
|
33
|
+
from nmdc_runtime.api.endpoints.util import (
|
|
34
|
+
list_resources,
|
|
35
|
+
strip_oid,
|
|
36
|
+
comma_separated_values,
|
|
37
|
+
)
|
|
38
|
+
from nmdc_runtime.api.models.metadata import Doc
|
|
39
|
+
from nmdc_runtime.api.models.util import ListRequest, ListResponse
|
|
40
|
+
|
|
41
|
+
router = APIRouter()
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def ensure_collection_name_is_known_to_schema(collection_name: str):
|
|
45
|
+
r"""
|
|
46
|
+
Raises an exception if the specified string is _not_ the name of a collection described by the NMDC Schema.
|
|
47
|
+
"""
|
|
48
|
+
schema_view = nmdc_schema_view()
|
|
49
|
+
names = get_collection_names_from_schema(schema_view)
|
|
50
|
+
if collection_name not in names:
|
|
51
|
+
raise HTTPException(
|
|
52
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
|
53
|
+
detail=f"Collection name must be one of {sorted(names)}",
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@router.get("/nmdcschema/version")
|
|
58
|
+
def get_nmdc_schema_version():
|
|
59
|
+
r"""
|
|
60
|
+
Returns a string indicating which version of the [NMDC Schema](https://microbiomedata.github.io/nmdc-schema/)
|
|
61
|
+
the Runtime is using.
|
|
62
|
+
|
|
63
|
+
**Note:** The same information—and more—is also available via the `/version` endpoint.
|
|
64
|
+
"""
|
|
65
|
+
return version("nmdc_schema")
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@router.get("/nmdcschema/typecodes")
|
|
69
|
+
def get_nmdc_schema_typecodes() -> List[Dict[str, str]]:
|
|
70
|
+
r"""
|
|
71
|
+
Returns a list of objects, each of which indicates (a) a schema class, and (b) the typecode
|
|
72
|
+
that the minter would use when generating a new ID for an instance of that schema class.
|
|
73
|
+
|
|
74
|
+
Each object has three properties:
|
|
75
|
+
- `id`: a string that consists of "nmdc:" + the class name + "_typecode"
|
|
76
|
+
- `schema_class`: a string that consists of "nmdc:" + the class name
|
|
77
|
+
- `name`: the typecode the minter would use when minting an ID for an instance of that class
|
|
78
|
+
"""
|
|
79
|
+
return typecodes()
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@router.get("/nmdcschema/collection_stats")
|
|
83
|
+
def get_nmdc_database_collection_stats(
|
|
84
|
+
mdb: MongoDatabase = Depends(get_mongo_db),
|
|
85
|
+
):
|
|
86
|
+
"""
|
|
87
|
+
To get the NMDC Database MongoDB collection statistics, like the total count of records in a collection or the size
|
|
88
|
+
of the collection, try executing the GET /nmdcschema/collection_stats endpoint
|
|
89
|
+
|
|
90
|
+
Field reference: <https://www.mongodb.com/docs/manual/reference/command/collStats/#std-label-collStats-output>.
|
|
91
|
+
"""
|
|
92
|
+
# Take set intersection of
|
|
93
|
+
# (1) all collections defined by the NMDC schema, and
|
|
94
|
+
# (2) all runtime collections
|
|
95
|
+
# Thus, only retrieve collections from the schema that are present (i.e. having actual documents) in the runtime.
|
|
96
|
+
present_collection_names = set(nmdc_database_collection_names()) & set(
|
|
97
|
+
mdb.list_collection_names()
|
|
98
|
+
)
|
|
99
|
+
stats = []
|
|
100
|
+
for n in present_collection_names:
|
|
101
|
+
for doc in mdb[n].aggregate(
|
|
102
|
+
[
|
|
103
|
+
{"$collStats": {"storageStats": {}}},
|
|
104
|
+
{
|
|
105
|
+
"$project": {
|
|
106
|
+
"ns": 1,
|
|
107
|
+
"storageStats.size": 1,
|
|
108
|
+
"storageStats.count": 1,
|
|
109
|
+
"storageStats.avgObjSize": 1,
|
|
110
|
+
"storageStats.storageSize": 1,
|
|
111
|
+
"storageStats.totalIndexSize": 1,
|
|
112
|
+
"storageStats.totalSize": 1,
|
|
113
|
+
"storageStats.scaleFactor": 1,
|
|
114
|
+
}
|
|
115
|
+
},
|
|
116
|
+
]
|
|
117
|
+
):
|
|
118
|
+
stats.append(doc)
|
|
119
|
+
return stats
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
@decorate_if(condition=IS_LINKED_INSTANCES_ENDPOINT_ENABLED)(
|
|
123
|
+
router.get(
|
|
124
|
+
"/nmdcschema/linked_instances",
|
|
125
|
+
response_model=ListResponse[Doc],
|
|
126
|
+
response_model_exclude_unset=True,
|
|
127
|
+
)
|
|
128
|
+
)
|
|
129
|
+
def get_linked_instances(
|
|
130
|
+
ids: Annotated[
|
|
131
|
+
list[str],
|
|
132
|
+
Query(
|
|
133
|
+
title="Instance (aka Document) IDs",
|
|
134
|
+
description=(
|
|
135
|
+
"The `ids` you want to serve as the nexus for graph traversal to collect linked instances."
|
|
136
|
+
"\n\n_Example_: [`nmdc:dobj-11-nf3t6f36`]"
|
|
137
|
+
),
|
|
138
|
+
examples=["nmdc:dobj-11-nf3t6f36"],
|
|
139
|
+
),
|
|
140
|
+
AfterValidator(check_valid_ids),
|
|
141
|
+
],
|
|
142
|
+
types: Annotated[
|
|
143
|
+
list[str] | None,
|
|
144
|
+
Query(
|
|
145
|
+
title="Instance (aka Document) types",
|
|
146
|
+
description=(
|
|
147
|
+
"The `types` of instances you want to return. Can be abstract types such as `nmdc:InformationObject` "
|
|
148
|
+
"or instantiated ones such as `nmdc:DataObject`. Defaults to [`nmdc:NamedThing`]."
|
|
149
|
+
"\n\n_Example_: [`nmdc:PlannedProcess`]"
|
|
150
|
+
),
|
|
151
|
+
examples=["nmdc:bsm-11-abc123"],
|
|
152
|
+
),
|
|
153
|
+
] = None,
|
|
154
|
+
hydrate: Annotated[
|
|
155
|
+
bool,
|
|
156
|
+
Query(
|
|
157
|
+
title="Hydrate",
|
|
158
|
+
description="Whether to include full documents in the response. The default is to include slim documents.",
|
|
159
|
+
),
|
|
160
|
+
] = False,
|
|
161
|
+
page_token: Annotated[
|
|
162
|
+
str | None,
|
|
163
|
+
Query(
|
|
164
|
+
title="Next page token",
|
|
165
|
+
description="""A bookmark you can use to fetch the _next_ page of resources. You can get this from the
|
|
166
|
+
`next_page_token` field in a previous response from this endpoint.\n\n_Example_:
|
|
167
|
+
`nmdc:sys0zr0fbt71`""",
|
|
168
|
+
examples=[
|
|
169
|
+
"nmdc:sys0zr0fbt71",
|
|
170
|
+
],
|
|
171
|
+
),
|
|
172
|
+
] = None,
|
|
173
|
+
max_page_size: Annotated[
|
|
174
|
+
int,
|
|
175
|
+
Query(
|
|
176
|
+
title="Resources per page",
|
|
177
|
+
description="How many resources you want _each page_ to contain, formatted as a positive integer.",
|
|
178
|
+
examples=[20],
|
|
179
|
+
),
|
|
180
|
+
] = 20,
|
|
181
|
+
mdb: MongoDatabase = Depends(get_mongo_db),
|
|
182
|
+
):
|
|
183
|
+
"""
|
|
184
|
+
Retrieves database instances that are both (a) linked to any of `ids`, and (b) of a type in `types`.
|
|
185
|
+
|
|
186
|
+
An [instance](https://linkml.io/linkml-model/latest/docs/specification/02instances/) is an object conforming to a
|
|
187
|
+
class definition ([linkml:ClassDefinition](https://w3id.org/linkml/ClassDefinition)) in our database ([
|
|
188
|
+
nmdc:Database](https://w3id.org/nmdc/Database)). While a [nmdc:Database](https://w3id.org/nmdc/Database) is
|
|
189
|
+
organized into collections, every item in every database collection -- that is, every instance -- knows its
|
|
190
|
+
`type`, so we can (and here do) return a simple list of instances ([a LinkML CollectionInstance](
|
|
191
|
+
https://linkml.io/linkml-model/latest/docs/specification/02instances/#collections)). If hydrate is `False` (the
|
|
192
|
+
default), then the returned list contains "slim" documents that include only the `id` and `type` of each
|
|
193
|
+
instance. If hydrate is `True`, then the returned list contains "full" (aka <a
|
|
194
|
+
href="https://en.wikipedia.org/wiki/Hydration_(web_development)">"hydrated"</a>) documents of each instance,
|
|
195
|
+
suitable e.g. for a client to subsequently use to construct a corresponding
|
|
196
|
+
[nmdc:Database](https://w3id.org/nmdc/Database) instance with schema-compliant documents.
|
|
197
|
+
Both "slim" and "full" documents include (optional) `_upstream_of` and `_downstream_of` fields,
|
|
198
|
+
to indicate the returned document's relationship to `ids`.
|
|
199
|
+
|
|
200
|
+
From the nexus instance IDs given in `ids`, both "upstream" and "downstream" links are followed (transitively)
|
|
201
|
+
to collect the set of all instances linked to these `ids`.
|
|
202
|
+
|
|
203
|
+
* A link "upstream" is represented by a slot ([linkml:SlotDefinition](https://w3id.org/linkml/SlotDefinition))
|
|
204
|
+
for which the
|
|
205
|
+
range ([linkml:range](https://w3id.org/linkml/range)) instance has originated, or helped produce,
|
|
206
|
+
the domain ([linkml:domain](https://w3id.org/linkml/domain)) instance.
|
|
207
|
+
For example, we consider [nmdc:associated_studies](https://w3id.org/nmdc/associated_studies) to be
|
|
208
|
+
an "upstream" slot because we consider a [nmdc:Study](https://w3id.org/nmdc/Study) (the slot's range)
|
|
209
|
+
to be upstream of a [nmdc:Biosample](https://w3id.org/nmdc/Biosample) (the slot's domain).
|
|
210
|
+
|
|
211
|
+
* A link "downstream" is represented by a slot for which the
|
|
212
|
+
range instance has originated from, or was in part produced by, the domain instance.
|
|
213
|
+
For example, [nmdc:has_output](https://w3id.org/nmdc/has_output) is
|
|
214
|
+
a "downstream" slot because its [nmdc:NamedThing](https://w3id.org/nmdc/NamedThing) range
|
|
215
|
+
is downstream of its [nmdc:PlannedProcess](https://w3id.org/nmdc/PlannedProcess) domain.
|
|
216
|
+
|
|
217
|
+
Acceptable values for `types` are not limited only to the ones embedded in concrete instances, e.g.
|
|
218
|
+
the `schema_class` field values returned by the [`GET /nmdcschema/typecodes`](/nmdcschema/typecodes) API endpoint.
|
|
219
|
+
Rather, any subclass (of any depth) of [nmdc:NamedThing](https://w3id.org/nmdc/NamedThing) --
|
|
220
|
+
[nmdc:DataEmitterProcess](https://w3id.org/nmdc/DataEmitterProcess),
|
|
221
|
+
[nmdc:InformationObject](https://w3id.org/nmdc/InformationObject),
|
|
222
|
+
[nmdc:Sample](https://w3id.org/nmdc/Sample), etc. -- may be given.
|
|
223
|
+
If no value for `types` is given, then all [nmdc:NamedThing](https://w3id.org/nmdc/NamedThing)s are returned.
|
|
224
|
+
"""
|
|
225
|
+
if page_token is not None:
|
|
226
|
+
rv = list_resources(
|
|
227
|
+
req=ListRequest(page_token=page_token, max_page_size=max_page_size), mdb=mdb
|
|
228
|
+
)
|
|
229
|
+
rv["resources"] = hydrated(rv["resources"], mdb) if hydrate else rv["resources"]
|
|
230
|
+
rv["resources"] = [strip_oid(d) for d in rv["resources"]]
|
|
231
|
+
return rv
|
|
232
|
+
|
|
233
|
+
ids_found = [d["id"] for d in mdb.alldocs.find({"id": {"$in": ids}}, {"id": 1})]
|
|
234
|
+
ids_not_found = list(set(ids) - set(ids_found))
|
|
235
|
+
if ids_not_found:
|
|
236
|
+
raise HTTPException(
|
|
237
|
+
status_code=status.HTTP_404_NOT_FOUND,
|
|
238
|
+
detail=f"Some IDs not found: {ids_not_found}.",
|
|
239
|
+
)
|
|
240
|
+
types = types or ["nmdc:NamedThing"]
|
|
241
|
+
types_possible = set([f"nmdc:{name}" for name in nmdc_schema_view().all_classes()])
|
|
242
|
+
types_not_found = list(set(types) - types_possible)
|
|
243
|
+
if types_not_found:
|
|
244
|
+
raise HTTPException(
|
|
245
|
+
status_code=status.HTTP_404_NOT_FOUND,
|
|
246
|
+
detail=(
|
|
247
|
+
f"Some types not found: {types_not_found}. "
|
|
248
|
+
"You may need to prefix with `nmdc:`. "
|
|
249
|
+
"If you don't supply any types, the set {'nmdc:NamedThing'} will be used. "
|
|
250
|
+
f"Types possible: {types_possible}"
|
|
251
|
+
),
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
merge_into_collection_name = gather_linked_instances(
|
|
255
|
+
alldocs_collection=mdb.alldocs, ids=ids, types=types
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
rv = list_resources(
|
|
259
|
+
ListRequest(page_token=page_token, max_page_size=max_page_size),
|
|
260
|
+
mdb,
|
|
261
|
+
merge_into_collection_name,
|
|
262
|
+
)
|
|
263
|
+
rv["resources"] = hydrated(rv["resources"], mdb) if hydrate else rv["resources"]
|
|
264
|
+
rv["resources"] = [strip_oid(d) for d in rv["resources"]]
|
|
265
|
+
return rv
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
@router.get(
|
|
269
|
+
"/nmdcschema/ids/{doc_id}",
|
|
270
|
+
response_model=Doc,
|
|
271
|
+
response_model_exclude_unset=True,
|
|
272
|
+
)
|
|
273
|
+
def get_by_id(
|
|
274
|
+
doc_id: Annotated[
|
|
275
|
+
str,
|
|
276
|
+
Path(
|
|
277
|
+
title="Document ID",
|
|
278
|
+
description="The `id` of the document you want to retrieve.\n\n_Example_: `nmdc:bsm-11-abc123`",
|
|
279
|
+
examples=["nmdc:bsm-11-abc123"],
|
|
280
|
+
),
|
|
281
|
+
],
|
|
282
|
+
mdb: MongoDatabase = Depends(get_mongo_db),
|
|
283
|
+
):
|
|
284
|
+
r"""
|
|
285
|
+
Retrieves the document having the specified `id`, regardless of which schema-described collection it resides in.
|
|
286
|
+
"""
|
|
287
|
+
id_dict = map_id_to_collection(mdb)
|
|
288
|
+
collection_name = get_collection_for_id(doc_id, id_dict)
|
|
289
|
+
return strip_oid(
|
|
290
|
+
raise404_if_none(
|
|
291
|
+
collection_name and (mdb[collection_name].find_one({"id": doc_id}))
|
|
292
|
+
)
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
@router.get("/nmdcschema/ids/{doc_id}/collection-name")
|
|
297
|
+
def get_collection_name_by_doc_id(
|
|
298
|
+
doc_id: Annotated[
|
|
299
|
+
str,
|
|
300
|
+
Path(
|
|
301
|
+
title="Document ID",
|
|
302
|
+
description="The `id` of the document.\n\n_Example_: `nmdc:bsm-11-abc123`",
|
|
303
|
+
examples=["nmdc:bsm-11-abc123"],
|
|
304
|
+
),
|
|
305
|
+
],
|
|
306
|
+
mdb: MongoDatabase = Depends(get_mongo_db),
|
|
307
|
+
):
|
|
308
|
+
r"""
|
|
309
|
+
Returns the name of the collection, if any, containing the document having the specified `id`.
|
|
310
|
+
|
|
311
|
+
This endpoint uses the NMDC Schema to determine the schema class of which an instance could have
|
|
312
|
+
the specified value as its `id`; and then uses the NMDC Schema to determine the names of the
|
|
313
|
+
`Database` slots (i.e. Mongo collection names) that could contain instances of that schema class.
|
|
314
|
+
|
|
315
|
+
This endpoint then searches those Mongo collections for a document having that `id`.
|
|
316
|
+
If it finds one, it responds with the name of the collection containing the document.
|
|
317
|
+
If it does not find one, it response with an `HTTP 404 Not Found` response.
|
|
318
|
+
"""
|
|
319
|
+
# Note: The `nmdc_runtime.api.core.metadata.map_id_to_collection` function is
|
|
320
|
+
# not used here because that function (a) only processes collections whose
|
|
321
|
+
# names end with `_set` and (b) only works for `id` values that are in
|
|
322
|
+
# use in the database (as opposed to hypothetical `id` values).
|
|
323
|
+
|
|
324
|
+
# Extract the typecode portion, if any, of the specified `id`.
|
|
325
|
+
#
|
|
326
|
+
# Examples:
|
|
327
|
+
# - "nmdc:foo-123-456" → "foo"
|
|
328
|
+
# - "foo:nmdc-123-456" → `None`
|
|
329
|
+
#
|
|
330
|
+
pattern = re.compile(r"^nmdc:(\w+)?-")
|
|
331
|
+
match = pattern.search(doc_id)
|
|
332
|
+
typecode_portion = match.group(1) if match else None
|
|
333
|
+
|
|
334
|
+
if typecode_portion is None:
|
|
335
|
+
raise HTTPException(
|
|
336
|
+
status_code=status.HTTP_404_NOT_FOUND,
|
|
337
|
+
detail=f"The typecode portion of the specified `id` is invalid.",
|
|
338
|
+
)
|
|
339
|
+
|
|
340
|
+
# Determine the schema class, if any, of which the specified `id` could belong to an instance.
|
|
341
|
+
schema_class_name = None
|
|
342
|
+
for typecode in typecodes():
|
|
343
|
+
if typecode_portion == typecode["name"]:
|
|
344
|
+
schema_class_name_prefixed = typecode["schema_class"]
|
|
345
|
+
schema_class_name = schema_class_name_prefixed.replace("nmdc:", "", 1)
|
|
346
|
+
break
|
|
347
|
+
|
|
348
|
+
if schema_class_name is None:
|
|
349
|
+
raise HTTPException(
|
|
350
|
+
status_code=status.HTTP_404_NOT_FOUND,
|
|
351
|
+
detail=f"The specified `id` is not compatible with any schema classes.",
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
# Determine the Mongo collection(s) in which instances of that schema class can reside.
|
|
355
|
+
schema_view = nmdc_schema_view()
|
|
356
|
+
collection_names = []
|
|
357
|
+
for collection_name in get_collection_names_from_schema(schema_view=schema_view):
|
|
358
|
+
if schema_class_name in get_names_of_classes_eligible_for_collection(
|
|
359
|
+
schema_view=schema_view, collection_name=collection_name
|
|
360
|
+
):
|
|
361
|
+
collection_names.append(collection_name)
|
|
362
|
+
|
|
363
|
+
if len(collection_names) == 0:
|
|
364
|
+
raise HTTPException(
|
|
365
|
+
status_code=status.HTTP_404_NOT_FOUND,
|
|
366
|
+
detail=f"The specified `id` is not compatible with any database collections.",
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
# Use the Mongo database to determine which of those collections a document having that `id` actually
|
|
370
|
+
# resides in, if any. If multiple collections contain such a document, report only the first one.
|
|
371
|
+
containing_collection_name = None
|
|
372
|
+
for collection_name in collection_names:
|
|
373
|
+
collection = mdb.get_collection(name=collection_name)
|
|
374
|
+
if collection.count_documents(dict(id=doc_id), limit=1) > 0:
|
|
375
|
+
containing_collection_name = collection_name
|
|
376
|
+
break
|
|
377
|
+
|
|
378
|
+
if containing_collection_name is None:
|
|
379
|
+
raise HTTPException(
|
|
380
|
+
status_code=status.HTTP_404_NOT_FOUND,
|
|
381
|
+
detail=f"The specified `id` does not belong to any documents.",
|
|
382
|
+
)
|
|
383
|
+
|
|
384
|
+
return {
|
|
385
|
+
"id": doc_id,
|
|
386
|
+
"collection_name": containing_collection_name,
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
@router.get(
|
|
391
|
+
"/nmdcschema/collection_names",
|
|
392
|
+
response_model=List[str],
|
|
393
|
+
status_code=status.HTTP_200_OK,
|
|
394
|
+
)
|
|
395
|
+
def get_collection_names():
|
|
396
|
+
"""
|
|
397
|
+
Return all valid NMDC Schema collection names, i.e. the names of the slots of [the nmdc:Database class](
|
|
398
|
+
https://w3id.org/nmdc/Database/) that describe database collections.
|
|
399
|
+
"""
|
|
400
|
+
schema_view = nmdc_schema_view()
|
|
401
|
+
return sorted(get_collection_names_from_schema(schema_view))
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
@router.get(
|
|
405
|
+
"/nmdcschema/{collection_name}",
|
|
406
|
+
response_model=ListResponse[Doc],
|
|
407
|
+
response_model_exclude_unset=True,
|
|
408
|
+
)
|
|
409
|
+
def list_from_collection(
|
|
410
|
+
collection_name: Annotated[
|
|
411
|
+
str,
|
|
412
|
+
Path(
|
|
413
|
+
title="Collection name",
|
|
414
|
+
description="The name of the collection.\n\n_Example_: `biosample_set`",
|
|
415
|
+
examples=["biosample_set"],
|
|
416
|
+
),
|
|
417
|
+
],
|
|
418
|
+
req: Annotated[ListRequest, Query()],
|
|
419
|
+
mdb: MongoDatabase = Depends(get_mongo_db),
|
|
420
|
+
):
|
|
421
|
+
r"""
|
|
422
|
+
Retrieves resources that match the specified filter criteria and reside in the specified collection.
|
|
423
|
+
|
|
424
|
+
Searches the specified collection for documents matching the specified `filter` criteria.
|
|
425
|
+
If the `projection` parameter is used, each document in the response will only include
|
|
426
|
+
the fields specified by that parameter (plus the `id` field).
|
|
427
|
+
|
|
428
|
+
Use the [`GET /nmdcschema/collection_names`](/nmdcschema/collection_names) API endpoint to return all valid
|
|
429
|
+
collection names, i.e. the names of the slots of [the nmdc:Database class](https://w3id.org/nmdc/Database/) that
|
|
430
|
+
describe database collections.
|
|
431
|
+
|
|
432
|
+
Note: If the specified maximum page size is a number greater than zero, and _more than that number of resources_
|
|
433
|
+
in the collection match the filter criteria, this endpoint will paginate the resources. Pagination can take
|
|
434
|
+
a long time—especially for collections that contain a lot of documents (e.g. millions).
|
|
435
|
+
|
|
436
|
+
**Tips:**
|
|
437
|
+
1. When the filter includes a regex and you're using that regex to match the beginning of a string, try to ensure
|
|
438
|
+
the regex is a [prefix expression](https://www.mongodb.com/docs/manual/reference/operator/query/regex/#index-use),
|
|
439
|
+
That will allow MongoDB to optimize the way it uses the regex, making this API endpoint respond faster.
|
|
440
|
+
"""
|
|
441
|
+
|
|
442
|
+
# raise HTTP_400_BAD_REQUEST on invalid collection_name
|
|
443
|
+
ensure_collection_name_is_known_to_schema(collection_name)
|
|
444
|
+
|
|
445
|
+
rv = list_resources(req, mdb, collection_name)
|
|
446
|
+
rv["resources"] = [strip_oid(d) for d in rv["resources"]]
|
|
447
|
+
return rv
|
|
448
|
+
|
|
449
|
+
|
|
450
|
+
@router.get(
|
|
451
|
+
"/nmdcschema/{collection_name}/{doc_id}",
|
|
452
|
+
response_model=Doc,
|
|
453
|
+
response_model_exclude_unset=True,
|
|
454
|
+
)
|
|
455
|
+
def get_from_collection_by_id(
|
|
456
|
+
collection_name: Annotated[
|
|
457
|
+
str,
|
|
458
|
+
Path(
|
|
459
|
+
title="Collection name",
|
|
460
|
+
description="The name of the collection.\n\n_Example_: `biosample_set`",
|
|
461
|
+
examples=["biosample_set"],
|
|
462
|
+
),
|
|
463
|
+
],
|
|
464
|
+
doc_id: Annotated[
|
|
465
|
+
str,
|
|
466
|
+
Path(
|
|
467
|
+
title="Document ID",
|
|
468
|
+
description="The `id` of the document you want to retrieve.\n\n_Example_: `nmdc:bsm-11-abc123`",
|
|
469
|
+
examples=["nmdc:bsm-11-abc123"],
|
|
470
|
+
),
|
|
471
|
+
],
|
|
472
|
+
projection: Annotated[
|
|
473
|
+
str | None,
|
|
474
|
+
Query(
|
|
475
|
+
title="Projection",
|
|
476
|
+
description="""Comma-delimited list of the names of the fields you want the document in the response to
|
|
477
|
+
include.\n\n_Example_: `id,name,ecosystem_type`""",
|
|
478
|
+
examples=[
|
|
479
|
+
"id,name,ecosystem_type",
|
|
480
|
+
],
|
|
481
|
+
),
|
|
482
|
+
] = None,
|
|
483
|
+
mdb: MongoDatabase = Depends(get_mongo_db),
|
|
484
|
+
):
|
|
485
|
+
r"""
|
|
486
|
+
Retrieves the document having the specified `id`, from the specified collection; optionally, including only the
|
|
487
|
+
fields specified via the `projection` parameter.
|
|
488
|
+
"""
|
|
489
|
+
# raise HTTP_400_BAD_REQUEST on invalid collection_name
|
|
490
|
+
ensure_collection_name_is_known_to_schema(collection_name)
|
|
491
|
+
|
|
492
|
+
projection = comma_separated_values(projection) if projection else None
|
|
493
|
+
try:
|
|
494
|
+
return strip_oid(
|
|
495
|
+
raise404_if_none(
|
|
496
|
+
mdb[collection_name].find_one({"id": doc_id}, projection=projection)
|
|
497
|
+
)
|
|
498
|
+
)
|
|
499
|
+
except pymongo.errors.OperationFailure as e:
|
|
500
|
+
raise HTTPException(
|
|
501
|
+
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=str(e)
|
|
502
|
+
)
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
import pymongo
|
|
4
|
+
from fastapi import APIRouter, Depends
|
|
5
|
+
|
|
6
|
+
from nmdc_runtime.api.core.util import raise404_if_none
|
|
7
|
+
from nmdc_runtime.api.db.mongo import get_mongo_db
|
|
8
|
+
from nmdc_runtime.api.models.object_type import ObjectType
|
|
9
|
+
from nmdc_runtime.api.models.workflow import Workflow
|
|
10
|
+
|
|
11
|
+
router = APIRouter()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@router.get("/object_types", response_model=List[ObjectType])
|
|
15
|
+
def list_object_types(
|
|
16
|
+
mdb: pymongo.database.Database = Depends(get_mongo_db),
|
|
17
|
+
):
|
|
18
|
+
return list(mdb.object_types.find())
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@router.get("/object_types/{object_type_id}", response_model=ObjectType)
|
|
22
|
+
def get_object_type(
|
|
23
|
+
object_type_id: str,
|
|
24
|
+
mdb: pymongo.database.Database = Depends(get_mongo_db),
|
|
25
|
+
):
|
|
26
|
+
return raise404_if_none(mdb.object_types.find_one({"id": object_type_id}))
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@router.get("/object_types/{object_type_id}/workflows", response_model=List[Workflow])
|
|
30
|
+
def list_object_type_workflows(
|
|
31
|
+
object_type_id: str,
|
|
32
|
+
mdb: pymongo.database.Database = Depends(get_mongo_db),
|
|
33
|
+
):
|
|
34
|
+
workflow_ids = [
|
|
35
|
+
doc["workflow_id"]
|
|
36
|
+
for doc in mdb.triggers.find({"object_type_id": object_type_id})
|
|
37
|
+
]
|
|
38
|
+
return list(mdb.workflows.find({"id": {"$in": workflow_ids}}))
|