nmdc-runtime 1.6.0__py3-none-any.whl → 1.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nmdc-runtime might be problematic. Click here for more details.
- nmdc_runtime/site/export/ncbi_xml.py +529 -0
- nmdc_runtime/site/export/ncbi_xml_utils.py +206 -0
- nmdc_runtime/site/export/study_metadata.py +24 -4
- nmdc_runtime/site/graphs.py +29 -11
- nmdc_runtime/site/ops.py +180 -44
- nmdc_runtime/site/repository.py +58 -6
- nmdc_runtime/site/resources.py +30 -40
- nmdc_runtime/site/translation/submission_portal_translator.py +16 -9
- nmdc_runtime/util.py +24 -1
- {nmdc_runtime-1.6.0.dist-info → nmdc_runtime-1.8.0.dist-info}/METADATA +4 -7
- {nmdc_runtime-1.6.0.dist-info → nmdc_runtime-1.8.0.dist-info}/RECORD +15 -17
- {nmdc_runtime-1.6.0.dist-info → nmdc_runtime-1.8.0.dist-info}/WHEEL +1 -1
- {nmdc_runtime-1.6.0.dist-info → nmdc_runtime-1.8.0.dist-info}/entry_points.txt +0 -1
- nmdc_runtime/site/terminusdb/__init__.py +0 -0
- nmdc_runtime/site/terminusdb/generate.py +0 -198
- nmdc_runtime/site/terminusdb/ingest.py +0 -44
- nmdc_runtime/site/terminusdb/schema.py +0 -1671
- {nmdc_runtime-1.6.0.dist-info → nmdc_runtime-1.8.0.dist-info}/LICENSE +0 -0
- {nmdc_runtime-1.6.0.dist-info → nmdc_runtime-1.8.0.dist-info}/top_level.txt +0 -0
nmdc_runtime/site/repository.py
CHANGED
|
@@ -42,6 +42,8 @@ from nmdc_runtime.site.graphs import (
|
|
|
42
42
|
ingest_neon_soil_metadata,
|
|
43
43
|
ingest_neon_benthic_metadata,
|
|
44
44
|
ingest_neon_surface_water_metadata,
|
|
45
|
+
ensure_alldocs,
|
|
46
|
+
nmdc_study_to_ncbi_submission_export,
|
|
45
47
|
)
|
|
46
48
|
from nmdc_runtime.site.resources import (
|
|
47
49
|
get_mongo,
|
|
@@ -50,7 +52,6 @@ from nmdc_runtime.site.resources import (
|
|
|
50
52
|
nmdc_portal_api_client_resource,
|
|
51
53
|
gold_api_client_resource,
|
|
52
54
|
neon_api_client_resource,
|
|
53
|
-
terminus_resource,
|
|
54
55
|
mongo_resource,
|
|
55
56
|
)
|
|
56
57
|
from nmdc_runtime.site.resources import (
|
|
@@ -68,7 +69,6 @@ resource_defs = {
|
|
|
68
69
|
"nmdc_portal_api_client": nmdc_portal_api_client_resource,
|
|
69
70
|
"gold_api_client": gold_api_client_resource,
|
|
70
71
|
"neon_api_client": neon_api_client_resource,
|
|
71
|
-
"terminus": terminus_resource,
|
|
72
72
|
"mongo": mongo_resource,
|
|
73
73
|
}
|
|
74
74
|
|
|
@@ -451,6 +451,7 @@ def repo():
|
|
|
451
451
|
ensure_jobs.to_job(**preset_normal),
|
|
452
452
|
apply_metadata_in.to_job(**preset_normal),
|
|
453
453
|
export_study_biosamples_metadata.to_job(**preset_normal),
|
|
454
|
+
ensure_alldocs.to_job(**preset_normal),
|
|
454
455
|
]
|
|
455
456
|
schedules = [housekeeping_weekly]
|
|
456
457
|
sensors = [
|
|
@@ -515,8 +516,8 @@ def biosample_submission_ingest():
|
|
|
515
516
|
"nmdc_portal_api_client": {
|
|
516
517
|
"config": {
|
|
517
518
|
"base_url": {"env": "NMDC_PORTAL_API_BASE_URL"},
|
|
518
|
-
"
|
|
519
|
-
"env": "
|
|
519
|
+
"refresh_token": {
|
|
520
|
+
"env": "NMDC_PORTAL_API_REFRESH_TOKEN"
|
|
520
521
|
},
|
|
521
522
|
}
|
|
522
523
|
}
|
|
@@ -555,8 +556,8 @@ def biosample_submission_ingest():
|
|
|
555
556
|
"nmdc_portal_api_client": {
|
|
556
557
|
"config": {
|
|
557
558
|
"base_url": {"env": "NMDC_PORTAL_API_BASE_URL"},
|
|
558
|
-
"
|
|
559
|
-
"env": "
|
|
559
|
+
"refresh_token": {
|
|
560
|
+
"env": "NMDC_PORTAL_API_REFRESH_TOKEN"
|
|
560
561
|
},
|
|
561
562
|
}
|
|
562
563
|
}
|
|
@@ -852,6 +853,57 @@ def biosample_submission_ingest():
|
|
|
852
853
|
]
|
|
853
854
|
|
|
854
855
|
|
|
856
|
+
@repository
|
|
857
|
+
def biosample_export():
|
|
858
|
+
normal_resources = run_config_frozen__normal_env["resources"]
|
|
859
|
+
return [
|
|
860
|
+
nmdc_study_to_ncbi_submission_export.to_job(
|
|
861
|
+
resource_defs=resource_defs,
|
|
862
|
+
config={
|
|
863
|
+
"resources": merge(
|
|
864
|
+
unfreeze(normal_resources),
|
|
865
|
+
{
|
|
866
|
+
"mongo": {
|
|
867
|
+
"config": {
|
|
868
|
+
"host": {"env": "MONGO_HOST"},
|
|
869
|
+
"username": {"env": "MONGO_USERNAME"},
|
|
870
|
+
"password": {"env": "MONGO_PASSWORD"},
|
|
871
|
+
"dbname": {"env": "MONGO_DBNAME"},
|
|
872
|
+
},
|
|
873
|
+
},
|
|
874
|
+
"runtime_api_site_client": {
|
|
875
|
+
"config": {
|
|
876
|
+
"base_url": {"env": "API_HOST"},
|
|
877
|
+
"client_id": {"env": "API_SITE_CLIENT_ID"},
|
|
878
|
+
"client_secret": {"env": "API_SITE_CLIENT_SECRET"},
|
|
879
|
+
"site_id": {"env": "API_SITE_ID"},
|
|
880
|
+
},
|
|
881
|
+
},
|
|
882
|
+
},
|
|
883
|
+
),
|
|
884
|
+
"ops": {
|
|
885
|
+
"get_ncbi_export_pipeline_study": {
|
|
886
|
+
"config": {
|
|
887
|
+
"nmdc_study_id": "",
|
|
888
|
+
}
|
|
889
|
+
},
|
|
890
|
+
"get_ncbi_export_pipeline_inputs": {
|
|
891
|
+
"config": {
|
|
892
|
+
"nmdc_ncbi_attribute_mapping_file_url": "",
|
|
893
|
+
"ncbi_submission_metadata": {
|
|
894
|
+
"organization": "",
|
|
895
|
+
},
|
|
896
|
+
"ncbi_biosample_metadata": {
|
|
897
|
+
"organism_name": "",
|
|
898
|
+
},
|
|
899
|
+
}
|
|
900
|
+
},
|
|
901
|
+
},
|
|
902
|
+
},
|
|
903
|
+
),
|
|
904
|
+
]
|
|
905
|
+
|
|
906
|
+
|
|
855
907
|
# @repository
|
|
856
908
|
# def validation():
|
|
857
909
|
# graph_jobs = [validate_jgi_job, validate_gold_job, validate_emsl_job]
|
nmdc_runtime/site/resources.py
CHANGED
|
@@ -19,7 +19,6 @@ from frozendict import frozendict
|
|
|
19
19
|
from linkml_runtime.dumpers import json_dumper
|
|
20
20
|
from pydantic import BaseModel, AnyUrl
|
|
21
21
|
from pymongo import MongoClient, ReplaceOne, InsertOne
|
|
22
|
-
from terminusdb_client import WOQLClient
|
|
23
22
|
from toolz import get_in
|
|
24
23
|
from toolz import merge
|
|
25
24
|
|
|
@@ -372,16 +371,37 @@ def gold_api_client_resource(context: InitResourceContext):
|
|
|
372
371
|
|
|
373
372
|
@dataclass
|
|
374
373
|
class NmdcPortalApiClient:
|
|
374
|
+
|
|
375
375
|
base_url: str
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
376
|
+
refresh_token: str
|
|
377
|
+
access_token: Optional[str] = None
|
|
378
|
+
access_token_expires_at: Optional[datetime] = None
|
|
379
|
+
|
|
380
|
+
def _request(self, method: str, endpoint: str, **kwargs):
|
|
381
|
+
r"""
|
|
382
|
+
Submits a request to the specified API endpoint;
|
|
383
|
+
after refreshing the access token, if necessary.
|
|
384
|
+
"""
|
|
385
|
+
if self.access_token is None or datetime.now() > self.access_token_expires_at:
|
|
386
|
+
refresh_response = requests.post(
|
|
387
|
+
f"{self.base_url}/auth/refresh",
|
|
388
|
+
json={"refresh_token": self.refresh_token},
|
|
389
|
+
)
|
|
390
|
+
refresh_response.raise_for_status()
|
|
391
|
+
refresh_body = refresh_response.json()
|
|
392
|
+
self.access_token_expires_at = datetime.now() + timedelta(
|
|
393
|
+
seconds=refresh_body["expires_in"]
|
|
394
|
+
)
|
|
395
|
+
self.access_token = refresh_body["access_token"]
|
|
379
396
|
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
397
|
+
headers = kwargs.get("headers", {})
|
|
398
|
+
headers["Authorization"] = f"Bearer {self.access_token}"
|
|
399
|
+
return requests.request(
|
|
400
|
+
method, f"{self.base_url}{endpoint}", **kwargs, headers=headers
|
|
384
401
|
)
|
|
402
|
+
|
|
403
|
+
def fetch_metadata_submission(self, id: str) -> Dict[str, Any]:
|
|
404
|
+
response = self._request("GET", f"/api/metadata_submission/{id}")
|
|
385
405
|
response.raise_for_status()
|
|
386
406
|
return response.json()
|
|
387
407
|
|
|
@@ -389,13 +409,13 @@ class NmdcPortalApiClient:
|
|
|
389
409
|
@resource(
|
|
390
410
|
config_schema={
|
|
391
411
|
"base_url": StringSource,
|
|
392
|
-
"
|
|
412
|
+
"refresh_token": StringSource,
|
|
393
413
|
}
|
|
394
414
|
)
|
|
395
415
|
def nmdc_portal_api_client_resource(context: InitResourceContext):
|
|
396
416
|
return NmdcPortalApiClient(
|
|
397
417
|
base_url=context.resource_config["base_url"],
|
|
398
|
-
|
|
418
|
+
refresh_token=context.resource_config["refresh_token"],
|
|
399
419
|
)
|
|
400
420
|
|
|
401
421
|
|
|
@@ -512,33 +532,3 @@ def get_mongo(run_config: frozendict):
|
|
|
512
532
|
)
|
|
513
533
|
)
|
|
514
534
|
return mongo_resource(resource_context)
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
class TerminusDB:
|
|
518
|
-
def __init__(self, server_url, user, key, account, dbid):
|
|
519
|
-
self.client = WOQLClient(server_url=server_url)
|
|
520
|
-
self.client.connect(user=user, key=key, account=account)
|
|
521
|
-
db_info = self.client.get_database(dbid=dbid, account=account)
|
|
522
|
-
if db_info is None:
|
|
523
|
-
self.client.create_database(dbid=dbid, accountid=account, label=dbid)
|
|
524
|
-
self.client.create_graph(graph_type="inference", graph_id="main")
|
|
525
|
-
self.client.connect(user=user, key=key, account=account, db=dbid)
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
@resource(
|
|
529
|
-
config_schema={
|
|
530
|
-
"server_url": StringSource,
|
|
531
|
-
"user": StringSource,
|
|
532
|
-
"key": StringSource,
|
|
533
|
-
"account": StringSource,
|
|
534
|
-
"dbid": StringSource,
|
|
535
|
-
}
|
|
536
|
-
)
|
|
537
|
-
def terminus_resource(context):
|
|
538
|
-
return TerminusDB(
|
|
539
|
-
server_url=context.resource_config["server_url"],
|
|
540
|
-
user=context.resource_config["user"],
|
|
541
|
-
key=context.resource_config["key"],
|
|
542
|
-
account=context.resource_config["account"],
|
|
543
|
-
dbid=context.resource_config["dbid"],
|
|
544
|
-
)
|
|
@@ -13,6 +13,9 @@ from toolz import get_in, groupby, concat, valmap, dissoc
|
|
|
13
13
|
from nmdc_runtime.site.translation.translator import JSON_OBJECT, Translator
|
|
14
14
|
|
|
15
15
|
|
|
16
|
+
BIOSAMPLE_UNIQUE_KEY_SLOT = "samp_name"
|
|
17
|
+
|
|
18
|
+
|
|
16
19
|
@lru_cache
|
|
17
20
|
def _get_schema_view():
|
|
18
21
|
"""Return a SchemaView instance representing the NMDC schema"""
|
|
@@ -98,7 +101,9 @@ class SubmissionPortalTranslator(Translator):
|
|
|
98
101
|
self.study_pi_image_url = study_pi_image_url
|
|
99
102
|
self.study_funding_sources = study_funding_sources
|
|
100
103
|
|
|
101
|
-
self.biosample_extras = group_dicts_by_key(
|
|
104
|
+
self.biosample_extras = group_dicts_by_key(
|
|
105
|
+
BIOSAMPLE_UNIQUE_KEY_SLOT, biosample_extras
|
|
106
|
+
)
|
|
102
107
|
self.biosample_extras_slot_mapping = group_dicts_by_key(
|
|
103
108
|
"subject_id", biosample_extras_slot_mapping
|
|
104
109
|
)
|
|
@@ -521,7 +526,7 @@ class SubmissionPortalTranslator(Translator):
|
|
|
521
526
|
:param default_env_package: Default value for `env_package` slot
|
|
522
527
|
:return: nmdc:Biosample
|
|
523
528
|
"""
|
|
524
|
-
|
|
529
|
+
biosample_key = sample_data[0].get(BIOSAMPLE_UNIQUE_KEY_SLOT, "").strip()
|
|
525
530
|
slots = {
|
|
526
531
|
"id": nmdc_biosample_id,
|
|
527
532
|
"part_of": nmdc_study_id,
|
|
@@ -533,7 +538,7 @@ class SubmissionPortalTranslator(Translator):
|
|
|
533
538
|
slots.update(transformed_tab)
|
|
534
539
|
|
|
535
540
|
if self.biosample_extras:
|
|
536
|
-
raw_extras = self.biosample_extras.get(
|
|
541
|
+
raw_extras = self.biosample_extras.get(biosample_key)
|
|
537
542
|
if raw_extras:
|
|
538
543
|
transformed_extras = self._transform_dict_for_class(
|
|
539
544
|
raw_extras, "Biosample", self.biosample_extras_slot_mapping
|
|
@@ -564,7 +569,9 @@ class SubmissionPortalTranslator(Translator):
|
|
|
564
569
|
|
|
565
570
|
sample_data = metadata_submission_data.get("sampleData", {})
|
|
566
571
|
package_name = metadata_submission_data["packageName"]
|
|
567
|
-
sample_data_by_id = groupby(
|
|
572
|
+
sample_data_by_id = groupby(
|
|
573
|
+
BIOSAMPLE_UNIQUE_KEY_SLOT, concat(sample_data.values())
|
|
574
|
+
)
|
|
568
575
|
nmdc_biosample_ids = self._id_minter("nmdc:Biosample", len(sample_data_by_id))
|
|
569
576
|
sample_data_to_nmdc_biosample_ids = dict(
|
|
570
577
|
zip(sample_data_by_id.keys(), nmdc_biosample_ids)
|
|
@@ -583,15 +590,15 @@ class SubmissionPortalTranslator(Translator):
|
|
|
583
590
|
|
|
584
591
|
if self.omics_processing_mapping:
|
|
585
592
|
# If there is data from an OmicsProcessing mapping file, process it now. This part
|
|
586
|
-
# assumes that there is a column in that file with the header
|
|
593
|
+
# assumes that there is a column in that file with the header __biosample_samp_name
|
|
587
594
|
# that can be used to join with the sample data from the submission portal. The
|
|
588
|
-
# biosample identified by that `
|
|
595
|
+
# biosample identified by that `samp_name` will be referenced in the `has_input`
|
|
589
596
|
# slot of the OmicsProcessing object. If a DataObject mapping file was also provided,
|
|
590
597
|
# those objects will also be generated and referenced in the `has_output` slot of the
|
|
591
|
-
# OmicsProcessing object. By keying off of the `
|
|
598
|
+
# OmicsProcessing object. By keying off of the `samp_name` slot of the submission's
|
|
592
599
|
# sample data there is an implicit 1:1 relationship between Biosample objects and
|
|
593
600
|
# OmicsProcessing objects generated here.
|
|
594
|
-
join_key = "
|
|
601
|
+
join_key = f"__biosample_{BIOSAMPLE_UNIQUE_KEY_SLOT}"
|
|
595
602
|
database.omics_processing_set = []
|
|
596
603
|
database.data_object_set = []
|
|
597
604
|
data_objects_by_sample_data_id = {}
|
|
@@ -617,7 +624,7 @@ class SubmissionPortalTranslator(Translator):
|
|
|
617
624
|
or sample_data_id not in sample_data_to_nmdc_biosample_ids
|
|
618
625
|
):
|
|
619
626
|
logging.warning(
|
|
620
|
-
f"Unrecognized biosample
|
|
627
|
+
f"Unrecognized biosample {BIOSAMPLE_UNIQUE_KEY_SLOT}: {sample_data_id}"
|
|
621
628
|
)
|
|
622
629
|
continue
|
|
623
630
|
nmdc_biosample_id = sample_data_to_nmdc_biosample_ids[sample_data_id]
|
nmdc_runtime/util.py
CHANGED
|
@@ -16,7 +16,7 @@ import fastjsonschema
|
|
|
16
16
|
import requests
|
|
17
17
|
from frozendict import frozendict
|
|
18
18
|
from jsonschema.validators import Draft7Validator
|
|
19
|
-
from nmdc_schema.
|
|
19
|
+
from nmdc_schema.nmdc import Database as NMDCDatabase
|
|
20
20
|
from nmdc_schema.get_nmdc_view import ViewGetter
|
|
21
21
|
from pydantic import Field, BaseModel
|
|
22
22
|
from pymongo.database import Database as MongoDatabase
|
|
@@ -376,6 +376,24 @@ collection_name_to_class_names: Dict[str, List[str]] = {
|
|
|
376
376
|
}
|
|
377
377
|
|
|
378
378
|
|
|
379
|
+
def class_hierarchy_as_list(obj) -> list[str]:
|
|
380
|
+
"""
|
|
381
|
+
get list of inherited classes for each concrete class
|
|
382
|
+
"""
|
|
383
|
+
rv = []
|
|
384
|
+
current_class = obj.__class__
|
|
385
|
+
|
|
386
|
+
def recurse_through_bases(cls):
|
|
387
|
+
if cls.__name__ == "YAMLRoot":
|
|
388
|
+
return rv
|
|
389
|
+
rv.append(cls.__name__)
|
|
390
|
+
for base in cls.__bases__:
|
|
391
|
+
recurse_through_bases(base)
|
|
392
|
+
return rv
|
|
393
|
+
|
|
394
|
+
return recurse_through_bases(current_class)
|
|
395
|
+
|
|
396
|
+
|
|
379
397
|
@lru_cache
|
|
380
398
|
def schema_collection_names_with_id_field() -> Set[str]:
|
|
381
399
|
"""
|
|
@@ -393,6 +411,11 @@ def schema_collection_names_with_id_field() -> Set[str]:
|
|
|
393
411
|
return target_collection_names
|
|
394
412
|
|
|
395
413
|
|
|
414
|
+
def populated_schema_collection_names_with_id_field(mdb: MongoDatabase) -> List[str]:
|
|
415
|
+
collection_names = sorted(schema_collection_names_with_id_field())
|
|
416
|
+
return [n for n in collection_names if mdb[n].find_one({"id": {"$exists": True}})]
|
|
417
|
+
|
|
418
|
+
|
|
396
419
|
def ensure_unique_id_indexes(mdb: MongoDatabase):
|
|
397
420
|
"""Ensure that any collections with an "id" field have an index on "id"."""
|
|
398
421
|
candidate_names = (
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: nmdc_runtime
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.8.0
|
|
4
4
|
Summary: A runtime system for NMDC data management and orchestration
|
|
5
5
|
Home-page: https://github.com/microbiomedata/nmdc-runtime
|
|
6
6
|
Author: Donny Winston
|
|
@@ -77,18 +77,15 @@ The runtime features:
|
|
|
77
77
|
- `schedules` trigger recurring pipeline runs based on time
|
|
78
78
|
- `sensors` trigger pipeline runs based on external state
|
|
79
79
|
- Each `pipeline` can declare dependencies on any runtime `resources` or additional
|
|
80
|
-
configuration. There are
|
|
80
|
+
configuration. There are MongoDB `resources` defined, as well as `preset`
|
|
81
81
|
configuration definitions for both "dev" and "prod" `modes`. The `preset`s tell Dagster to
|
|
82
82
|
look to a set of known environment variables to load resources configurations, depending on
|
|
83
83
|
the `mode`.
|
|
84
|
-
|
|
85
|
-
2. A [TerminusDB](https://terminusdb.com/) database supporting revision control of schema-validated
|
|
86
|
-
data.
|
|
87
84
|
|
|
88
|
-
|
|
85
|
+
2. A MongoDB database supporting write-once, high-throughput internal
|
|
89
86
|
data storage by the nmdc-runtime FastAPI instance.
|
|
90
87
|
|
|
91
|
-
|
|
88
|
+
3. A [FastAPI](https://fastapi.tiangolo.com/) service to interface with the orchestrator and
|
|
92
89
|
database, as a hub for data management and workflow automation.
|
|
93
90
|
|
|
94
91
|
## Local Development
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
nmdc_runtime/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
nmdc_runtime/containers.py,sha256=8m_S1wiFu8VOWvY7tyqzf-02X9gXY83YGc8FgjWzLGA,418
|
|
3
3
|
nmdc_runtime/main.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
-
nmdc_runtime/util.py,sha256=
|
|
4
|
+
nmdc_runtime/util.py,sha256=nfj1MjZzVaxs9pKrHo6A98yGAzL-jHQ0zTGs_sOkBnM,20531
|
|
5
5
|
nmdc_runtime/client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
nmdc_runtime/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
7
|
nmdc_runtime/core/db/Database.py,sha256=WamgBUbq85A7-fr3p5B9Tk92U__yPdr9pBb4zyQok-4,377
|
|
@@ -35,10 +35,10 @@ nmdc_runtime/minter/domain/model.py,sha256=WMOuKub3dVzkOt_EZSRDLeTsJPqFbKx01SMQ5
|
|
|
35
35
|
nmdc_runtime/minter/entrypoints/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
36
36
|
nmdc_runtime/minter/entrypoints/fastapi_app.py,sha256=JC4thvzfFwRc1mhWQ-kHy3yvs0SYxF6ktE7LXNCwqlI,4031
|
|
37
37
|
nmdc_runtime/site/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
38
|
-
nmdc_runtime/site/graphs.py,sha256=
|
|
39
|
-
nmdc_runtime/site/ops.py,sha256=
|
|
40
|
-
nmdc_runtime/site/repository.py,sha256=
|
|
41
|
-
nmdc_runtime/site/resources.py,sha256=
|
|
38
|
+
nmdc_runtime/site/graphs.py,sha256=QdmNvdtDLCgpJyKviLUj-IIF1gPS_vYzl1Kzv2mSF4g,12122
|
|
39
|
+
nmdc_runtime/site/ops.py,sha256=btdgcGBwNOFnVCzAa-vO4Gs1lMxgnjcRFd8B28X0who,38222
|
|
40
|
+
nmdc_runtime/site/repository.py,sha256=xTHAfokzbZVqlRFG65VuHxTfZfhyKZskOaCSGyrW_hw,37540
|
|
41
|
+
nmdc_runtime/site/resources.py,sha256=ZSH1yvA-li0R7Abc22_v0XLbjBYf5igETr2G01J3hnc,17557
|
|
42
42
|
nmdc_runtime/site/util.py,sha256=6hyVPpb6ZkWEG8Nm7uQxnZ-QmuPOG9hgWvl0mUBr5JU,1303
|
|
43
43
|
nmdc_runtime/site/backup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
44
44
|
nmdc_runtime/site/backup/nmdcdb_mongodump.py,sha256=H5uosmEiXwLwklJrYJWrNhb_Nuf_ew8dBpZLl6_dYhs,2699
|
|
@@ -50,13 +50,11 @@ nmdc_runtime/site/drsobjects/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
|
|
|
50
50
|
nmdc_runtime/site/drsobjects/ingest.py,sha256=pcMP69WSzFHFqHB9JIL55ePFhilnCLRc2XHCQ97w1Ik,3107
|
|
51
51
|
nmdc_runtime/site/drsobjects/registration.py,sha256=D1T3QUuxEOxqKZIvB5rkb_6ZxFZiA-U9SMPajyeWC2Y,3572
|
|
52
52
|
nmdc_runtime/site/export/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
53
|
-
nmdc_runtime/site/export/
|
|
53
|
+
nmdc_runtime/site/export/ncbi_xml.py,sha256=-GflgZO_Q4Y2rm53QIkI7vYY6pWwCf_l7tolGgTXiBg,21026
|
|
54
|
+
nmdc_runtime/site/export/ncbi_xml_utils.py,sha256=CqrtjwzmUbZXEW8aD-KpnCV_PlXVH-Gqp309nw3vbeo,6464
|
|
55
|
+
nmdc_runtime/site/export/study_metadata.py,sha256=WRU0F1ksWfNX3k9LD91Pn2DuLA-IOpGvYPJd6DnguEs,4819
|
|
54
56
|
nmdc_runtime/site/normalization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
55
57
|
nmdc_runtime/site/normalization/gold.py,sha256=iISDD4qs4d6uLhv631WYNeQVOzY5DO201ZpPtxHdkVk,1311
|
|
56
|
-
nmdc_runtime/site/terminusdb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
57
|
-
nmdc_runtime/site/terminusdb/generate.py,sha256=Z3c06LDx3TGw4pvPRO97caQvzc8SuhGmPIr_d5b_E9I,6144
|
|
58
|
-
nmdc_runtime/site/terminusdb/ingest.py,sha256=WE_V4vRRnlL6hIBU1TDSUheYOBWS9d5g6FHPS64jzvM,1245
|
|
59
|
-
nmdc_runtime/site/terminusdb/schema.py,sha256=3e39rHUSZsNbN_F0SHHNsvcEGRWtYa6O9KNj3cH3tUs,77129
|
|
60
58
|
nmdc_runtime/site/translation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
61
59
|
nmdc_runtime/site/translation/emsl.py,sha256=-aCTJTSCNaK-Koh8BE_4fTf5nyxP1KkquR6lloLEJl0,1245
|
|
62
60
|
nmdc_runtime/site/translation/gold.py,sha256=R3W99sdQb7Pgu_esN7ruIC-tyREQD_idJ4xCzkqWuGw,1622
|
|
@@ -66,7 +64,7 @@ nmdc_runtime/site/translation/neon_benthic_translator.py,sha256=e_7tXFrP0PpdhqUC
|
|
|
66
64
|
nmdc_runtime/site/translation/neon_soil_translator.py,sha256=cJJ_QPva5G5SIT_7DjCSsqbDvgbiKGqUYrxK3nx7_Lw,37634
|
|
67
65
|
nmdc_runtime/site/translation/neon_surface_water_translator.py,sha256=6LaFwBnVx6TN9v1D-G6LFrDxY0TK05AvMklx0E1tTeQ,26590
|
|
68
66
|
nmdc_runtime/site/translation/neon_utils.py,sha256=mdxJVPb3zbD4DiKW3Fwgk22kjczKMwkcozvy7fwteTE,5203
|
|
69
|
-
nmdc_runtime/site/translation/submission_portal_translator.py,sha256=
|
|
67
|
+
nmdc_runtime/site/translation/submission_portal_translator.py,sha256=KiVO1vohhrJGfwzLJOumRfyHjcbYfswBIBvkYIdFxv8,28097
|
|
70
68
|
nmdc_runtime/site/translation/translator.py,sha256=xM9dM-nTgSWwu5HFoUVNHf8kqk9iiH4PgWdSx4OKxEk,601
|
|
71
69
|
nmdc_runtime/site/translation/util.py,sha256=w_l3SiExGsl6cXRqto0a_ssDmHkP64ITvrOVfPxmNpY,4366
|
|
72
70
|
nmdc_runtime/site/validation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -74,9 +72,9 @@ nmdc_runtime/site/validation/emsl.py,sha256=OG20mv_3E2rkQqTQtYO0_SVRqFb-Z_zKCiAV
|
|
|
74
72
|
nmdc_runtime/site/validation/gold.py,sha256=Z5ZzYdjERbrJ2Tu06d0TDTBSfwaFdL1Z23Rl-YkZ2Ow,803
|
|
75
73
|
nmdc_runtime/site/validation/jgi.py,sha256=LdJfhqBVHWCDp0Kzyk8eJZMwEI5NQ-zuTda31BcGwOA,1299
|
|
76
74
|
nmdc_runtime/site/validation/util.py,sha256=GGbMDSwR090sr_E_fHffCN418gpYESaiot6XghS7OYk,3349
|
|
77
|
-
nmdc_runtime-1.
|
|
78
|
-
nmdc_runtime-1.
|
|
79
|
-
nmdc_runtime-1.
|
|
80
|
-
nmdc_runtime-1.
|
|
81
|
-
nmdc_runtime-1.
|
|
82
|
-
nmdc_runtime-1.
|
|
75
|
+
nmdc_runtime-1.8.0.dist-info/LICENSE,sha256=VWiv65r7gHGjgtr3jMJYVmQny5GRpQ6H-W9sScb1x70,2408
|
|
76
|
+
nmdc_runtime-1.8.0.dist-info/METADATA,sha256=lBQzzEEXtwobBObmYmDogAdFKQMLvSJn3wmjG8lHQ5I,7302
|
|
77
|
+
nmdc_runtime-1.8.0.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
|
78
|
+
nmdc_runtime-1.8.0.dist-info/entry_points.txt,sha256=JxdvOnvxHK_8046cwlvE30s_fV0-k-eTpQtkKYA69nQ,224
|
|
79
|
+
nmdc_runtime-1.8.0.dist-info/top_level.txt,sha256=b0K1s09L_iHH49ueBKaLrB5-lh6cyrSv9vL6x4Qvyz8,13
|
|
80
|
+
nmdc_runtime-1.8.0.dist-info/RECORD,,
|
|
@@ -2,4 +2,3 @@
|
|
|
2
2
|
nmdcdb-mongodump = nmdc_runtime.site.backup.nmdcdb_mongodump:main
|
|
3
3
|
nmdcdb-mongoexport = nmdc_runtime.site.backup.nmdcdb_mongoexport:main
|
|
4
4
|
nmdcdb-mongoimport = nmdc_runtime.site.backup.nmdcdb_mongoimport:main
|
|
5
|
-
schemagen-terminusdb = nmdc_runtime.site.terminusdb.generate:cli
|
|
File without changes
|
|
@@ -1,198 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Example usage:
|
|
3
|
-
$ schemagen-terminusdb ../nmdc-schema/src/schema/nmdc.yaml \
|
|
4
|
-
> nmdc_runtime/site/terminusdb/nmdc.schema.terminusdb.json
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
import json
|
|
8
|
-
import os
|
|
9
|
-
from typing import Union, TextIO, List
|
|
10
|
-
|
|
11
|
-
import click
|
|
12
|
-
from linkml.utils.generator import Generator, shared_arguments
|
|
13
|
-
from linkml_runtime.linkml_model.meta import (
|
|
14
|
-
SchemaDefinition,
|
|
15
|
-
ClassDefinition,
|
|
16
|
-
SlotDefinition,
|
|
17
|
-
)
|
|
18
|
-
from linkml_runtime.utils.formatutils import camelcase, be, underscore
|
|
19
|
-
|
|
20
|
-
# http://books.xmlschemata.org/relaxng/relax-CHP-19.html
|
|
21
|
-
XSD_Ok = {
|
|
22
|
-
"xsd:anyURI",
|
|
23
|
-
"xsd:base64Binary",
|
|
24
|
-
"xsd:boolean",
|
|
25
|
-
"xsd:byte",
|
|
26
|
-
"xsd:date",
|
|
27
|
-
"xsd:dateTime",
|
|
28
|
-
"xsd:decimal",
|
|
29
|
-
"xsd:double",
|
|
30
|
-
"xsd:duration",
|
|
31
|
-
"xsd:ENTITIES",
|
|
32
|
-
"xsd:ENTITY",
|
|
33
|
-
"xsd:float",
|
|
34
|
-
"xsd:gDay",
|
|
35
|
-
"xsd:gMonth",
|
|
36
|
-
"xsd:gMonthDay",
|
|
37
|
-
"xsd:gYear",
|
|
38
|
-
"xsd:gYearMonth",
|
|
39
|
-
"xsd:hexBinary",
|
|
40
|
-
"xsd:ID",
|
|
41
|
-
"xsd:IDREF",
|
|
42
|
-
"xsd:IDREFS",
|
|
43
|
-
"xsd:int",
|
|
44
|
-
"xsd:integer",
|
|
45
|
-
"xsd:language",
|
|
46
|
-
"xsd:long",
|
|
47
|
-
"xsd:Name",
|
|
48
|
-
"xsd:NCName",
|
|
49
|
-
"xsd:negativeInteger",
|
|
50
|
-
"xsd:NMTOKEN",
|
|
51
|
-
"xsd:NMTOKENS",
|
|
52
|
-
"xsd:nonNegativeInteger",
|
|
53
|
-
"xsd:nonPositiveInteger",
|
|
54
|
-
"xsd:normalizedString",
|
|
55
|
-
"xsd:NOTATION",
|
|
56
|
-
"xsd:positiveInteger",
|
|
57
|
-
"xsd:short",
|
|
58
|
-
"xsd:string",
|
|
59
|
-
"xsd:time",
|
|
60
|
-
"xsd:token",
|
|
61
|
-
"xsd:unsignedByte",
|
|
62
|
-
"xsd:unsignedInt",
|
|
63
|
-
"xsd:unsignedLong",
|
|
64
|
-
"xsd:unsignedShort",
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
def as_list(thing) -> list:
|
|
69
|
-
return thing if isinstance(thing, list) else [thing]
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
def has_field(graph: List[dict], cls: dict, field: str) -> bool:
|
|
73
|
-
if field in cls:
|
|
74
|
-
return True
|
|
75
|
-
for parent_id in as_list(cls.get("@inherits", [])):
|
|
76
|
-
parent_cls = next(
|
|
77
|
-
graph_cls for graph_cls in graph if graph_cls.get("@id") == parent_id
|
|
78
|
-
)
|
|
79
|
-
if parent_cls and has_field(graph, parent_cls, field):
|
|
80
|
-
return True
|
|
81
|
-
return False
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
class TerminusdbGenerator(Generator):
|
|
85
|
-
"""Generates JSON file to pass to WOQLClient.insert_document(..., graph_type="schema")`."""
|
|
86
|
-
|
|
87
|
-
generatorname = os.path.basename(__file__)
|
|
88
|
-
generatorversion = "0.1.0"
|
|
89
|
-
valid_formats = ["json"]
|
|
90
|
-
visit_all_class_slots = True
|
|
91
|
-
|
|
92
|
-
def __init__(self, schema: Union[str, TextIO, SchemaDefinition], **kwargs) -> None:
|
|
93
|
-
super().__init__(schema, **kwargs)
|
|
94
|
-
self.graph = []
|
|
95
|
-
self.cls_json = {}
|
|
96
|
-
|
|
97
|
-
def visit_schema(self, inline: bool = False, **kwargs) -> None:
|
|
98
|
-
self.graph.append(
|
|
99
|
-
{
|
|
100
|
-
"@type": "@context",
|
|
101
|
-
"@base": "https://api.microbiomedata.org/nmdcschema/ids/",
|
|
102
|
-
"@schema": "https://w3id.org/nmdc/",
|
|
103
|
-
}
|
|
104
|
-
)
|
|
105
|
-
|
|
106
|
-
def end_schema(self, **_) -> None:
|
|
107
|
-
for cls in self.graph:
|
|
108
|
-
if has_field(self.graph, cls, "id"):
|
|
109
|
-
cls["@key"] = {"@type": "Lexical", "@fields": ["id"]}
|
|
110
|
-
print(json.dumps(self.graph, indent=2))
|
|
111
|
-
|
|
112
|
-
def visit_class(self, cls: ClassDefinition) -> bool:
|
|
113
|
-
self.cls_json = {
|
|
114
|
-
"@type": "Class",
|
|
115
|
-
"@id": camelcase(cls.name),
|
|
116
|
-
"@documentation": {
|
|
117
|
-
"@comment": be(cls.description),
|
|
118
|
-
"@properties": {},
|
|
119
|
-
},
|
|
120
|
-
}
|
|
121
|
-
if cls.is_a:
|
|
122
|
-
self.cls_json["@inherits"] = camelcase(cls.is_a)
|
|
123
|
-
if cls.abstract:
|
|
124
|
-
self.cls_json["@abstract"] = []
|
|
125
|
-
return True
|
|
126
|
-
|
|
127
|
-
def end_class(self, cls: ClassDefinition) -> None:
|
|
128
|
-
self.cls_json["@id"] = cls.definition_uri.split(":")[-1].rpartition("/")[-1]
|
|
129
|
-
self.graph.append(self.cls_json)
|
|
130
|
-
|
|
131
|
-
# sounding board as solist
|
|
132
|
-
# safe space to ask questions. more of a whatsapp group.
|
|
133
|
-
# both re: business, how to structure proposals, etc.
|
|
134
|
-
# And also technical content suggestions. R data pipeline / copy/paste in Figma
|
|
135
|
-
# - how far do you go in automation in delivery
|
|
136
|
-
|
|
137
|
-
def visit_class_slot(
|
|
138
|
-
self, cls: ClassDefinition, aliased_slot_name: str, slot: SlotDefinition
|
|
139
|
-
) -> None:
|
|
140
|
-
if slot not in self.own_slots(cls):
|
|
141
|
-
return
|
|
142
|
-
if slot.is_usage_slot:
|
|
143
|
-
# TerminusDB does not support calling different things the same name.
|
|
144
|
-
# So, ignore usage overrides.
|
|
145
|
-
slot = self.schema.slots[aliased_slot_name]
|
|
146
|
-
|
|
147
|
-
if slot.range in self.schema.classes:
|
|
148
|
-
rng = camelcase(slot.range)
|
|
149
|
-
elif slot.range in self.schema.types:
|
|
150
|
-
# XXX Why does `linkml.utils.metamodelcore.Identifier` subclass `str`??
|
|
151
|
-
rng = str(self.schema.types[slot.range].uri)
|
|
152
|
-
else:
|
|
153
|
-
rng = "xsd:string"
|
|
154
|
-
|
|
155
|
-
# name = (
|
|
156
|
-
# f"{cls.name} {aliased_slot_name}"
|
|
157
|
-
# if slot.is_usage_slot
|
|
158
|
-
# else aliased_slot_name
|
|
159
|
-
# )
|
|
160
|
-
name = slot.name
|
|
161
|
-
# TODO fork nmdc schema and make any slots NOT required in parent class
|
|
162
|
-
# also NOT required in child classes. Can have opt-in entity validation logic in code.
|
|
163
|
-
|
|
164
|
-
# XXX MAG bin -> bin name goes to "mAGBin__bin_name", etc. Weird.
|
|
165
|
-
|
|
166
|
-
# # translate to terminusdb xsd builtins:
|
|
167
|
-
# if rng == "xsd:int":
|
|
168
|
-
# rng = "xsd:integer"
|
|
169
|
-
# elif rng == "xsd:float":
|
|
170
|
-
# rng = "xsd:double"
|
|
171
|
-
# elif rng == "xsd:language":
|
|
172
|
-
# rng = "xsd:string"
|
|
173
|
-
|
|
174
|
-
if rng not in XSD_Ok and slot.range not in self.schema.classes:
|
|
175
|
-
raise Exception(
|
|
176
|
-
f"slot range for {name} must be schema class or supported xsd type. "
|
|
177
|
-
f"Range {rng} is of type {type(rng)}."
|
|
178
|
-
)
|
|
179
|
-
|
|
180
|
-
self.cls_json[underscore(name)] = rng
|
|
181
|
-
self.cls_json["@documentation"]["@properties"][
|
|
182
|
-
underscore(name)
|
|
183
|
-
] = slot.description
|
|
184
|
-
if not slot.required:
|
|
185
|
-
self.cls_json[underscore(name)] = {"@type": "Optional", "@class": rng}
|
|
186
|
-
if slot.multivalued: # XXX what about an required multivalued field?
|
|
187
|
-
self.cls_json[underscore(name)] = {"@type": "Set", "@class": rng}
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
@shared_arguments(TerminusdbGenerator)
|
|
191
|
-
@click.command()
|
|
192
|
-
def cli(yamlfile, **args):
|
|
193
|
-
"""Generate graphql representation of a biolink model"""
|
|
194
|
-
print(TerminusdbGenerator(yamlfile, **args).serialize(**args))
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
if __name__ == "__main__":
|
|
198
|
-
cli()
|