nmdc-runtime 1.3.1__py3-none-any.whl → 2.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nmdc_runtime/Dockerfile +177 -0
- nmdc_runtime/api/analytics.py +90 -0
- nmdc_runtime/api/boot/capabilities.py +9 -0
- nmdc_runtime/api/boot/object_types.py +126 -0
- nmdc_runtime/api/boot/triggers.py +84 -0
- nmdc_runtime/api/boot/workflows.py +116 -0
- nmdc_runtime/api/core/auth.py +212 -0
- nmdc_runtime/api/core/idgen.py +200 -0
- nmdc_runtime/api/core/metadata.py +777 -0
- nmdc_runtime/api/core/util.py +114 -0
- nmdc_runtime/api/db/mongo.py +436 -0
- nmdc_runtime/api/db/s3.py +37 -0
- nmdc_runtime/api/endpoints/capabilities.py +25 -0
- nmdc_runtime/api/endpoints/find.py +634 -0
- nmdc_runtime/api/endpoints/jobs.py +206 -0
- nmdc_runtime/api/endpoints/lib/helpers.py +274 -0
- nmdc_runtime/api/endpoints/lib/linked_instances.py +193 -0
- nmdc_runtime/api/endpoints/lib/path_segments.py +165 -0
- nmdc_runtime/api/endpoints/metadata.py +260 -0
- nmdc_runtime/api/endpoints/nmdcschema.py +515 -0
- nmdc_runtime/api/endpoints/object_types.py +38 -0
- nmdc_runtime/api/endpoints/objects.py +277 -0
- nmdc_runtime/api/endpoints/operations.py +78 -0
- nmdc_runtime/api/endpoints/queries.py +701 -0
- nmdc_runtime/api/endpoints/runs.py +98 -0
- nmdc_runtime/api/endpoints/search.py +38 -0
- nmdc_runtime/api/endpoints/sites.py +205 -0
- nmdc_runtime/api/endpoints/triggers.py +25 -0
- nmdc_runtime/api/endpoints/users.py +214 -0
- nmdc_runtime/api/endpoints/util.py +817 -0
- nmdc_runtime/api/endpoints/wf_file_staging.py +307 -0
- nmdc_runtime/api/endpoints/workflows.py +353 -0
- nmdc_runtime/api/entrypoint.sh +7 -0
- nmdc_runtime/api/main.py +495 -0
- nmdc_runtime/api/middleware.py +43 -0
- nmdc_runtime/api/models/capability.py +14 -0
- nmdc_runtime/api/models/id.py +92 -0
- nmdc_runtime/api/models/job.py +57 -0
- nmdc_runtime/api/models/lib/helpers.py +78 -0
- nmdc_runtime/api/models/metadata.py +11 -0
- nmdc_runtime/api/models/nmdc_schema.py +146 -0
- nmdc_runtime/api/models/object.py +180 -0
- nmdc_runtime/api/models/object_type.py +20 -0
- nmdc_runtime/api/models/operation.py +66 -0
- nmdc_runtime/api/models/query.py +246 -0
- nmdc_runtime/api/models/query_continuation.py +111 -0
- nmdc_runtime/api/models/run.py +161 -0
- nmdc_runtime/api/models/site.py +87 -0
- nmdc_runtime/api/models/trigger.py +13 -0
- nmdc_runtime/api/models/user.py +207 -0
- nmdc_runtime/api/models/util.py +260 -0
- nmdc_runtime/api/models/wfe_file_stages.py +122 -0
- nmdc_runtime/api/models/workflow.py +15 -0
- nmdc_runtime/api/openapi.py +178 -0
- nmdc_runtime/api/swagger_ui/assets/EllipsesButton.js +146 -0
- nmdc_runtime/api/swagger_ui/assets/EndpointSearchWidget.js +369 -0
- nmdc_runtime/api/swagger_ui/assets/script.js +252 -0
- nmdc_runtime/api/swagger_ui/assets/style.css +155 -0
- nmdc_runtime/api/swagger_ui/swagger_ui.py +34 -0
- nmdc_runtime/config.py +56 -0
- nmdc_runtime/minter/adapters/repository.py +22 -2
- nmdc_runtime/minter/config.py +30 -4
- nmdc_runtime/minter/domain/model.py +55 -1
- nmdc_runtime/minter/entrypoints/fastapi_app.py +1 -1
- nmdc_runtime/mongo_util.py +89 -0
- nmdc_runtime/site/backup/nmdcdb_mongodump.py +1 -1
- nmdc_runtime/site/backup/nmdcdb_mongoexport.py +1 -3
- nmdc_runtime/site/changesheets/data/OmicsProcessing-to-catted-Biosamples.tsv +1561 -0
- nmdc_runtime/site/changesheets/scripts/missing_neon_soils_ecosystem_data.py +311 -0
- nmdc_runtime/site/changesheets/scripts/neon_soils_add_ncbi_ids.py +210 -0
- nmdc_runtime/site/dagster.yaml +53 -0
- nmdc_runtime/site/entrypoint-daemon.sh +29 -0
- nmdc_runtime/site/entrypoint-dagit-readonly.sh +26 -0
- nmdc_runtime/site/entrypoint-dagit.sh +29 -0
- nmdc_runtime/site/export/ncbi_xml.py +1331 -0
- nmdc_runtime/site/export/ncbi_xml_utils.py +405 -0
- nmdc_runtime/site/export/study_metadata.py +27 -4
- nmdc_runtime/site/graphs.py +294 -45
- nmdc_runtime/site/ops.py +1008 -230
- nmdc_runtime/site/repair/database_updater.py +451 -0
- nmdc_runtime/site/repository.py +368 -133
- nmdc_runtime/site/resources.py +154 -80
- nmdc_runtime/site/translation/gold_translator.py +235 -83
- nmdc_runtime/site/translation/neon_benthic_translator.py +212 -188
- nmdc_runtime/site/translation/neon_soil_translator.py +82 -58
- nmdc_runtime/site/translation/neon_surface_water_translator.py +698 -0
- nmdc_runtime/site/translation/neon_utils.py +24 -7
- nmdc_runtime/site/translation/submission_portal_translator.py +616 -162
- nmdc_runtime/site/translation/translator.py +73 -3
- nmdc_runtime/site/util.py +26 -7
- nmdc_runtime/site/validation/emsl.py +1 -0
- nmdc_runtime/site/validation/gold.py +1 -0
- nmdc_runtime/site/validation/util.py +16 -12
- nmdc_runtime/site/workspace.yaml +13 -0
- nmdc_runtime/static/NMDC_logo.svg +1073 -0
- nmdc_runtime/static/ORCID-iD_icon_vector.svg +4 -0
- nmdc_runtime/static/README.md +5 -0
- nmdc_runtime/static/favicon.ico +0 -0
- nmdc_runtime/util.py +236 -192
- nmdc_runtime-2.12.0.dist-info/METADATA +45 -0
- nmdc_runtime-2.12.0.dist-info/RECORD +131 -0
- {nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info}/WHEEL +1 -2
- {nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info}/entry_points.txt +0 -1
- nmdc_runtime/containers.py +0 -14
- nmdc_runtime/core/db/Database.py +0 -15
- nmdc_runtime/core/exceptions/__init__.py +0 -23
- nmdc_runtime/core/exceptions/base.py +0 -47
- nmdc_runtime/core/exceptions/token.py +0 -13
- nmdc_runtime/domain/users/queriesInterface.py +0 -18
- nmdc_runtime/domain/users/userSchema.py +0 -37
- nmdc_runtime/domain/users/userService.py +0 -14
- nmdc_runtime/infrastructure/database/db.py +0 -3
- nmdc_runtime/infrastructure/database/models/user.py +0 -10
- nmdc_runtime/lib/__init__.py +0 -1
- nmdc_runtime/lib/extract_nmdc_data.py +0 -41
- nmdc_runtime/lib/load_nmdc_data.py +0 -121
- nmdc_runtime/lib/nmdc_dataframes.py +0 -829
- nmdc_runtime/lib/nmdc_etl_class.py +0 -402
- nmdc_runtime/lib/transform_nmdc_data.py +0 -1117
- nmdc_runtime/site/drsobjects/ingest.py +0 -93
- nmdc_runtime/site/drsobjects/registration.py +0 -131
- nmdc_runtime/site/terminusdb/generate.py +0 -198
- nmdc_runtime/site/terminusdb/ingest.py +0 -44
- nmdc_runtime/site/terminusdb/schema.py +0 -1671
- nmdc_runtime/site/translation/emsl.py +0 -42
- nmdc_runtime/site/translation/gold.py +0 -53
- nmdc_runtime/site/translation/jgi.py +0 -31
- nmdc_runtime/site/translation/util.py +0 -132
- nmdc_runtime/site/validation/jgi.py +0 -42
- nmdc_runtime-1.3.1.dist-info/METADATA +0 -181
- nmdc_runtime-1.3.1.dist-info/RECORD +0 -81
- nmdc_runtime-1.3.1.dist-info/top_level.txt +0 -1
- /nmdc_runtime/{client → api}/__init__.py +0 -0
- /nmdc_runtime/{core → api/boot}/__init__.py +0 -0
- /nmdc_runtime/{core/db → api/core}/__init__.py +0 -0
- /nmdc_runtime/{domain → api/db}/__init__.py +0 -0
- /nmdc_runtime/{domain/users → api/endpoints}/__init__.py +0 -0
- /nmdc_runtime/{infrastructure → api/endpoints/lib}/__init__.py +0 -0
- /nmdc_runtime/{infrastructure/database → api/models}/__init__.py +0 -0
- /nmdc_runtime/{infrastructure/database/models → api/models/lib}/__init__.py +0 -0
- /nmdc_runtime/{site/drsobjects/__init__.py → api/models/minter.py} +0 -0
- /nmdc_runtime/site/{terminusdb → repair}/__init__.py +0 -0
- {nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info/licenses}/LICENSE +0 -0
|
@@ -1,93 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
from datetime import datetime, timezone
|
|
3
|
-
|
|
4
|
-
from toolz import dissoc
|
|
5
|
-
|
|
6
|
-
from nmdc_runtime.api.models.job import JobOperationMetadata
|
|
7
|
-
from nmdc_runtime.api.models.operation import Operation
|
|
8
|
-
from nmdc_runtime.api.models.operation import UpdateOperationRequest
|
|
9
|
-
from nmdc_runtime.api.models.util import ListRequest
|
|
10
|
-
from nmdc_runtime.api.models.util import ResultT
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
def load_local_json(url, prefixes_url_to_local=None):
|
|
14
|
-
"""Useful for large files cached on local filesystem.
|
|
15
|
-
|
|
16
|
-
You may, for example, `cp --parents ` many files on a remote filesystem to a staging
|
|
17
|
-
folder on that remote filesystem, gzip that folder, scp it to your local machine, and then
|
|
18
|
-
extract to your local machine.
|
|
19
|
-
|
|
20
|
-
Example:
|
|
21
|
-
prefixes_url_to_local = {
|
|
22
|
-
"https://data.microbiomedata.org/data/": "/Users/dwinston/nmdc_files/2021-09-scanon-meta/ficus/pipeline_products/",
|
|
23
|
-
"https://portal.nersc.gov/project/m3408/": "/Users/dwinston/nmdc_files/2021-09-scanon-meta/www/",
|
|
24
|
-
}
|
|
25
|
-
"""
|
|
26
|
-
path = url
|
|
27
|
-
for before, after in prefixes_url_to_local.items():
|
|
28
|
-
path = path.replace(before, after)
|
|
29
|
-
with open(path) as f:
|
|
30
|
-
return json.load(f)
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
def claim_metadata_ingest_jobs(
|
|
34
|
-
client, drs_object_ids_to_ingest, wf_id, max_page_size=1000
|
|
35
|
-
):
|
|
36
|
-
lr = ListRequest(
|
|
37
|
-
filter=json.dumps(
|
|
38
|
-
{
|
|
39
|
-
"workflow.id": wf_id,
|
|
40
|
-
"config.object_id": {"$in": drs_object_ids_to_ingest},
|
|
41
|
-
}
|
|
42
|
-
),
|
|
43
|
-
max_page_size=max_page_size,
|
|
44
|
-
)
|
|
45
|
-
jobs = []
|
|
46
|
-
while True:
|
|
47
|
-
rv = client.list_jobs(lr.model_dump()).json()
|
|
48
|
-
jobs.extend(rv["resources"])
|
|
49
|
-
if "next_page_token" not in rv:
|
|
50
|
-
break
|
|
51
|
-
else:
|
|
52
|
-
lr.page_token = rv["next_page_token"]
|
|
53
|
-
|
|
54
|
-
# safety escape
|
|
55
|
-
if len(jobs) == len(drs_object_ids_to_ingest):
|
|
56
|
-
break
|
|
57
|
-
|
|
58
|
-
job_claim_responses = [client.claim_job(j["id"]) for j in jobs]
|
|
59
|
-
|
|
60
|
-
return job_claim_responses
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
def mongo_add_docs_result_as_dict(rv):
|
|
64
|
-
return {
|
|
65
|
-
collection_name: dissoc(bulk_write_result.bulk_api_result, "upserted")
|
|
66
|
-
for collection_name, bulk_write_result in rv.items()
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
def get_metadata_ingest_job_ops(mongo, wf_id, drs_object_ids_to_ingest):
|
|
71
|
-
return list(
|
|
72
|
-
mongo.db.operations.find(
|
|
73
|
-
{
|
|
74
|
-
"metadata.job.workflow.id": wf_id,
|
|
75
|
-
"metadata.job.config.object_id": {"$in": drs_object_ids_to_ingest},
|
|
76
|
-
"done": False,
|
|
77
|
-
}
|
|
78
|
-
)
|
|
79
|
-
)
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
def do_metadata_ingest_job(client, mongo, job_op_doc):
|
|
83
|
-
op = Operation[ResultT, JobOperationMetadata](**job_op_doc)
|
|
84
|
-
object_info = client.get_object_info(op.metadata.job.config["object_id"]).json()
|
|
85
|
-
url = object_info["access_methods"][0]["access_url"]["url"]
|
|
86
|
-
docs = load_local_json(url)
|
|
87
|
-
op_result = mongo.add_docs(docs, validate=False, replace=False)
|
|
88
|
-
op_patch = UpdateOperationRequest(
|
|
89
|
-
done=True,
|
|
90
|
-
result=mongo_add_docs_result_as_dict(op_result),
|
|
91
|
-
metadata={"done_at": datetime.now(timezone.utc).isoformat(timespec="seconds")},
|
|
92
|
-
)
|
|
93
|
-
return client.update_operation(op.id, op_patch)
|
|
@@ -1,131 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
import os
|
|
3
|
-
import re
|
|
4
|
-
from datetime import datetime, timezone, timedelta
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
from tempfile import TemporaryDirectory
|
|
7
|
-
|
|
8
|
-
import requests
|
|
9
|
-
from bs4 import BeautifulSoup
|
|
10
|
-
|
|
11
|
-
from nmdc_runtime.api.models.object import DrsObjectIn
|
|
12
|
-
from nmdc_runtime.util import (
|
|
13
|
-
drs_metadata_for,
|
|
14
|
-
nmdc_jsonschema_validator,
|
|
15
|
-
specialize_activity_set_docs,
|
|
16
|
-
)
|
|
17
|
-
|
|
18
|
-
pattern = re.compile(r"https?://(?P<domain>[^/]+)/(?P<path>.+)")
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
def url_to_name(url):
|
|
22
|
-
m = pattern.match(url)
|
|
23
|
-
return (
|
|
24
|
-
f"{'.'.join(reversed(m.group('domain').split('.')))}"
|
|
25
|
-
f"__{m.group('path').replace('/', '.')}"
|
|
26
|
-
)
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def fetch_url(url, timeout=30):
|
|
30
|
-
return requests.get(url, timeout=timeout)
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
class HttpResponseNotOk(Exception):
|
|
34
|
-
pass
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
class HttpResponseNotJson(Exception):
|
|
38
|
-
pass
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
def response_to_json(response):
|
|
42
|
-
if response.status_code != 200:
|
|
43
|
-
raise HttpResponseNotOk()
|
|
44
|
-
try:
|
|
45
|
-
json_data = response.json()
|
|
46
|
-
except ValueError:
|
|
47
|
-
raise HttpResponseNotJson()
|
|
48
|
-
return json_data
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
def json_data_from_url_to_file(json_data, url, save_dir):
|
|
52
|
-
filepath = os.path.join(save_dir, url_to_name(url))
|
|
53
|
-
with open(filepath, "w") as f:
|
|
54
|
-
json.dump(json_data, f)
|
|
55
|
-
return filepath
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
def json_clean(d, model, exclude_unset=False):
|
|
59
|
-
return json.loads(model(**d).json(exclude_unset=exclude_unset))
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
def drs_object_in_for(url):
|
|
63
|
-
with TemporaryDirectory() as save_dir:
|
|
64
|
-
response = fetch_url(url)
|
|
65
|
-
try:
|
|
66
|
-
json_data = response_to_json(response)
|
|
67
|
-
except HttpResponseNotOk:
|
|
68
|
-
return {"error": "HttpResponseNotOk"}
|
|
69
|
-
|
|
70
|
-
except HttpResponseNotJson:
|
|
71
|
-
return {"error": "HttpResponseNotJson"}
|
|
72
|
-
|
|
73
|
-
filepath = json_data_from_url_to_file(json_data, url, save_dir)
|
|
74
|
-
drs_object_in = DrsObjectIn(
|
|
75
|
-
**drs_metadata_for(
|
|
76
|
-
filepath,
|
|
77
|
-
{
|
|
78
|
-
"access_methods": [{"access_url": {"url": url}}],
|
|
79
|
-
"name": Path(filepath).name.replace(":", "-"),
|
|
80
|
-
},
|
|
81
|
-
)
|
|
82
|
-
)
|
|
83
|
-
return {"result": drs_object_in}
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
def create_drs_object_for(url, drs_object_in, client):
|
|
87
|
-
rv = client.create_object(json.loads(drs_object_in.json(exclude_unset=True)))
|
|
88
|
-
return {"url": url, "response": rv}
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
def validate_as_metadata_and_ensure_tags_for(
|
|
92
|
-
drs_id, client, tags=("schema#/definitions/Database", "metadata-in")
|
|
93
|
-
):
|
|
94
|
-
docs = client.get_object_bytes(drs_id).json()
|
|
95
|
-
docs, _ = specialize_activity_set_docs(docs)
|
|
96
|
-
_ = nmdc_jsonschema_validator(docs)
|
|
97
|
-
return {tag: client.ensure_object_tag(drs_id, tag) for tag in tags}
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
def recent_metadata_urls(
|
|
101
|
-
urlpath="https://portal.nersc.gov/project/m3408/meta/anno2/",
|
|
102
|
-
urlpath_extra="?C=M;O=D",
|
|
103
|
-
since="2021-09",
|
|
104
|
-
):
|
|
105
|
-
"""Scrapes recent URLs from Apache/2.4.38 (Debian) Server listing.
|
|
106
|
-
|
|
107
|
-
Designed with urlpath.startwsith("https://portal.nersc.gov/project/m3408/") in mind.
|
|
108
|
-
"""
|
|
109
|
-
if since is None:
|
|
110
|
-
now = datetime.now(timezone.utc)
|
|
111
|
-
recent_enuf = now - timedelta(days=30)
|
|
112
|
-
since = f"{recent_enuf.year}-{recent_enuf.month}"
|
|
113
|
-
|
|
114
|
-
rv = requests.get(f"{urlpath}{urlpath_extra}")
|
|
115
|
-
|
|
116
|
-
soup = BeautifulSoup(rv.text, "html.parser")
|
|
117
|
-
|
|
118
|
-
urls = []
|
|
119
|
-
|
|
120
|
-
for tr in soup.find_all("tr"):
|
|
121
|
-
tds = tr.find_all("td")
|
|
122
|
-
if len(tds) != 5:
|
|
123
|
-
continue
|
|
124
|
-
|
|
125
|
-
_, td_name, td_last_modified, td_size, _ = tds
|
|
126
|
-
if td_last_modified.text.startswith(since):
|
|
127
|
-
name = td_name.a.text
|
|
128
|
-
if name.endswith(".json"):
|
|
129
|
-
urls.append(f"{urlpath}{name}")
|
|
130
|
-
|
|
131
|
-
return urls
|
|
@@ -1,198 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Example usage:
|
|
3
|
-
$ schemagen-terminusdb ../nmdc-schema/src/schema/nmdc.yaml \
|
|
4
|
-
> nmdc_runtime/site/terminusdb/nmdc.schema.terminusdb.json
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
import json
|
|
8
|
-
import os
|
|
9
|
-
from typing import Union, TextIO, List
|
|
10
|
-
|
|
11
|
-
import click
|
|
12
|
-
from linkml.utils.generator import Generator, shared_arguments
|
|
13
|
-
from linkml_runtime.linkml_model.meta import (
|
|
14
|
-
SchemaDefinition,
|
|
15
|
-
ClassDefinition,
|
|
16
|
-
SlotDefinition,
|
|
17
|
-
)
|
|
18
|
-
from linkml_runtime.utils.formatutils import camelcase, be, underscore
|
|
19
|
-
|
|
20
|
-
# http://books.xmlschemata.org/relaxng/relax-CHP-19.html
|
|
21
|
-
XSD_Ok = {
|
|
22
|
-
"xsd:anyURI",
|
|
23
|
-
"xsd:base64Binary",
|
|
24
|
-
"xsd:boolean",
|
|
25
|
-
"xsd:byte",
|
|
26
|
-
"xsd:date",
|
|
27
|
-
"xsd:dateTime",
|
|
28
|
-
"xsd:decimal",
|
|
29
|
-
"xsd:double",
|
|
30
|
-
"xsd:duration",
|
|
31
|
-
"xsd:ENTITIES",
|
|
32
|
-
"xsd:ENTITY",
|
|
33
|
-
"xsd:float",
|
|
34
|
-
"xsd:gDay",
|
|
35
|
-
"xsd:gMonth",
|
|
36
|
-
"xsd:gMonthDay",
|
|
37
|
-
"xsd:gYear",
|
|
38
|
-
"xsd:gYearMonth",
|
|
39
|
-
"xsd:hexBinary",
|
|
40
|
-
"xsd:ID",
|
|
41
|
-
"xsd:IDREF",
|
|
42
|
-
"xsd:IDREFS",
|
|
43
|
-
"xsd:int",
|
|
44
|
-
"xsd:integer",
|
|
45
|
-
"xsd:language",
|
|
46
|
-
"xsd:long",
|
|
47
|
-
"xsd:Name",
|
|
48
|
-
"xsd:NCName",
|
|
49
|
-
"xsd:negativeInteger",
|
|
50
|
-
"xsd:NMTOKEN",
|
|
51
|
-
"xsd:NMTOKENS",
|
|
52
|
-
"xsd:nonNegativeInteger",
|
|
53
|
-
"xsd:nonPositiveInteger",
|
|
54
|
-
"xsd:normalizedString",
|
|
55
|
-
"xsd:NOTATION",
|
|
56
|
-
"xsd:positiveInteger",
|
|
57
|
-
"xsd:short",
|
|
58
|
-
"xsd:string",
|
|
59
|
-
"xsd:time",
|
|
60
|
-
"xsd:token",
|
|
61
|
-
"xsd:unsignedByte",
|
|
62
|
-
"xsd:unsignedInt",
|
|
63
|
-
"xsd:unsignedLong",
|
|
64
|
-
"xsd:unsignedShort",
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
def as_list(thing) -> list:
|
|
69
|
-
return thing if isinstance(thing, list) else [thing]
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
def has_field(graph: List[dict], cls: dict, field: str) -> bool:
|
|
73
|
-
if field in cls:
|
|
74
|
-
return True
|
|
75
|
-
for parent_id in as_list(cls.get("@inherits", [])):
|
|
76
|
-
parent_cls = next(
|
|
77
|
-
graph_cls for graph_cls in graph if graph_cls.get("@id") == parent_id
|
|
78
|
-
)
|
|
79
|
-
if parent_cls and has_field(graph, parent_cls, field):
|
|
80
|
-
return True
|
|
81
|
-
return False
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
class TerminusdbGenerator(Generator):
|
|
85
|
-
"""Generates JSON file to pass to WOQLClient.insert_document(..., graph_type="schema")`."""
|
|
86
|
-
|
|
87
|
-
generatorname = os.path.basename(__file__)
|
|
88
|
-
generatorversion = "0.1.0"
|
|
89
|
-
valid_formats = ["json"]
|
|
90
|
-
visit_all_class_slots = True
|
|
91
|
-
|
|
92
|
-
def __init__(self, schema: Union[str, TextIO, SchemaDefinition], **kwargs) -> None:
|
|
93
|
-
super().__init__(schema, **kwargs)
|
|
94
|
-
self.graph = []
|
|
95
|
-
self.cls_json = {}
|
|
96
|
-
|
|
97
|
-
def visit_schema(self, inline: bool = False, **kwargs) -> None:
|
|
98
|
-
self.graph.append(
|
|
99
|
-
{
|
|
100
|
-
"@type": "@context",
|
|
101
|
-
"@base": "https://api.microbiomedata.org/nmdcschema/ids/",
|
|
102
|
-
"@schema": "https://w3id.org/nmdc/",
|
|
103
|
-
}
|
|
104
|
-
)
|
|
105
|
-
|
|
106
|
-
def end_schema(self, **_) -> None:
|
|
107
|
-
for cls in self.graph:
|
|
108
|
-
if has_field(self.graph, cls, "id"):
|
|
109
|
-
cls["@key"] = {"@type": "Lexical", "@fields": ["id"]}
|
|
110
|
-
print(json.dumps(self.graph, indent=2))
|
|
111
|
-
|
|
112
|
-
def visit_class(self, cls: ClassDefinition) -> bool:
|
|
113
|
-
self.cls_json = {
|
|
114
|
-
"@type": "Class",
|
|
115
|
-
"@id": camelcase(cls.name),
|
|
116
|
-
"@documentation": {
|
|
117
|
-
"@comment": be(cls.description),
|
|
118
|
-
"@properties": {},
|
|
119
|
-
},
|
|
120
|
-
}
|
|
121
|
-
if cls.is_a:
|
|
122
|
-
self.cls_json["@inherits"] = camelcase(cls.is_a)
|
|
123
|
-
if cls.abstract:
|
|
124
|
-
self.cls_json["@abstract"] = []
|
|
125
|
-
return True
|
|
126
|
-
|
|
127
|
-
def end_class(self, cls: ClassDefinition) -> None:
|
|
128
|
-
self.cls_json["@id"] = cls.definition_uri.split(":")[-1].rpartition("/")[-1]
|
|
129
|
-
self.graph.append(self.cls_json)
|
|
130
|
-
|
|
131
|
-
# sounding board as solist
|
|
132
|
-
# safe space to ask questions. more of a whatsapp group.
|
|
133
|
-
# both re: business, how to structure proposals, etc.
|
|
134
|
-
# And also technical content suggestions. R data pipeline / copy/paste in Figma
|
|
135
|
-
# - how far do you go in automation in delivery
|
|
136
|
-
|
|
137
|
-
def visit_class_slot(
|
|
138
|
-
self, cls: ClassDefinition, aliased_slot_name: str, slot: SlotDefinition
|
|
139
|
-
) -> None:
|
|
140
|
-
if slot not in self.own_slots(cls):
|
|
141
|
-
return
|
|
142
|
-
if slot.is_usage_slot:
|
|
143
|
-
# TerminusDB does not support calling different things the same name.
|
|
144
|
-
# So, ignore usage overrides.
|
|
145
|
-
slot = self.schema.slots[aliased_slot_name]
|
|
146
|
-
|
|
147
|
-
if slot.range in self.schema.classes:
|
|
148
|
-
rng = camelcase(slot.range)
|
|
149
|
-
elif slot.range in self.schema.types:
|
|
150
|
-
# XXX Why does `linkml.utils.metamodelcore.Identifier` subclass `str`??
|
|
151
|
-
rng = str(self.schema.types[slot.range].uri)
|
|
152
|
-
else:
|
|
153
|
-
rng = "xsd:string"
|
|
154
|
-
|
|
155
|
-
# name = (
|
|
156
|
-
# f"{cls.name} {aliased_slot_name}"
|
|
157
|
-
# if slot.is_usage_slot
|
|
158
|
-
# else aliased_slot_name
|
|
159
|
-
# )
|
|
160
|
-
name = slot.name
|
|
161
|
-
# TODO fork nmdc schema and make any slots NOT required in parent class
|
|
162
|
-
# also NOT required in child classes. Can have opt-in entity validation logic in code.
|
|
163
|
-
|
|
164
|
-
# XXX MAG bin -> bin name goes to "mAGBin__bin_name", etc. Weird.
|
|
165
|
-
|
|
166
|
-
# # translate to terminusdb xsd builtins:
|
|
167
|
-
# if rng == "xsd:int":
|
|
168
|
-
# rng = "xsd:integer"
|
|
169
|
-
# elif rng == "xsd:float":
|
|
170
|
-
# rng = "xsd:double"
|
|
171
|
-
# elif rng == "xsd:language":
|
|
172
|
-
# rng = "xsd:string"
|
|
173
|
-
|
|
174
|
-
if rng not in XSD_Ok and slot.range not in self.schema.classes:
|
|
175
|
-
raise Exception(
|
|
176
|
-
f"slot range for {name} must be schema class or supported xsd type. "
|
|
177
|
-
f"Range {rng} is of type {type(rng)}."
|
|
178
|
-
)
|
|
179
|
-
|
|
180
|
-
self.cls_json[underscore(name)] = rng
|
|
181
|
-
self.cls_json["@documentation"]["@properties"][
|
|
182
|
-
underscore(name)
|
|
183
|
-
] = slot.description
|
|
184
|
-
if not slot.required:
|
|
185
|
-
self.cls_json[underscore(name)] = {"@type": "Optional", "@class": rng}
|
|
186
|
-
if slot.multivalued: # XXX what about an required multivalued field?
|
|
187
|
-
self.cls_json[underscore(name)] = {"@type": "Set", "@class": rng}
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
@shared_arguments(TerminusdbGenerator)
|
|
191
|
-
@click.command()
|
|
192
|
-
def cli(yamlfile, **args):
|
|
193
|
-
"""Generate graphql representation of a biolink model"""
|
|
194
|
-
print(TerminusdbGenerator(yamlfile, **args).serialize(**args))
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
if __name__ == "__main__":
|
|
198
|
-
cli()
|
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
|
|
4
|
-
from terminusdb_client import WOQLClient
|
|
5
|
-
|
|
6
|
-
team = "admin"
|
|
7
|
-
client = WOQLClient(f"http://localhost:6364/")
|
|
8
|
-
# make sure you have put the token in environment variable
|
|
9
|
-
# https://docs.terminusdb.com/v10.0/#/terminusx/get-your-api-key
|
|
10
|
-
client.connect(user=team, team=team, key="root")
|
|
11
|
-
|
|
12
|
-
dbid = "nmdc"
|
|
13
|
-
label = "NMDC"
|
|
14
|
-
description = "."
|
|
15
|
-
prefixes = {
|
|
16
|
-
"@base": "terminusdb:///data/",
|
|
17
|
-
"@schema": "terminusdb:///schema#",
|
|
18
|
-
"gold": "https://gold.jgi.doe.gov/biosample?id=",
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
def import_schema(client):
|
|
23
|
-
# sd = get_nmdc_schema_definition()
|
|
24
|
-
# sd.source_file = f"{REPO_ROOT_DIR.parent}/nmdc-schema/src/schema/nmdc.yaml"
|
|
25
|
-
# print(sd.source_file)
|
|
26
|
-
with open(Path(__file__).parent.joinpath("nmdc.schema.terminusdb.json")) as f:
|
|
27
|
-
schema_objects = json.load(f)
|
|
28
|
-
|
|
29
|
-
client.message = "Adding NMDC Schema"
|
|
30
|
-
results = client.insert_document(schema_objects, graph_type="schema")
|
|
31
|
-
print(f"Added schema: {results}")
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
if __name__ == "__main__":
|
|
35
|
-
exists = client.get_database(dbid)
|
|
36
|
-
|
|
37
|
-
if exists:
|
|
38
|
-
client.delete_database(dbid, team=team, force=True)
|
|
39
|
-
|
|
40
|
-
client.create_database(
|
|
41
|
-
dbid, team, label=label, description=description, prefixes=prefixes
|
|
42
|
-
)
|
|
43
|
-
|
|
44
|
-
import_schema(client)
|