nmdc-runtime 1.6.0__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nmdc-runtime might be problematic. Click here for more details.

@@ -22,7 +22,6 @@ from nmdc_runtime.site.ops import (
22
22
  hello,
23
23
  mongo_stats,
24
24
  submit_metadata_to_db,
25
- update_schema,
26
25
  filter_ops_undone_expired,
27
26
  construct_jobs,
28
27
  maybe_post_jobs,
@@ -49,7 +48,14 @@ from nmdc_runtime.site.ops import (
49
48
  get_neon_pipeline_inputs,
50
49
  get_df_from_url,
51
50
  site_code_mapping,
51
+ get_ncbi_export_pipeline_study,
52
+ get_data_objects_from_biosamples,
53
+ get_omics_processing_from_biosamples,
54
+ get_ncbi_export_pipeline_inputs,
55
+ ncbi_submission_xml_from_nmdc_study,
56
+ ncbi_submission_xml_asset,
52
57
  )
58
+ from nmdc_runtime.site.export.study_metadata import get_biosamples_by_study_id
53
59
 
54
60
 
55
61
  @graph
@@ -87,17 +93,6 @@ def hello_mongo():
87
93
  mongo_stats()
88
94
 
89
95
 
90
- @graph
91
- def update_terminus():
92
- """
93
- A pipeline definition. This example pipeline has a single solid.
94
-
95
- For more hints on writing Dagster pipelines, see our documentation overview on Pipelines:
96
- https://docs.dagster.io/overview/solids-pipelines/pipelines
97
- """
98
- update_schema()
99
-
100
-
101
96
  @graph
102
97
  def housekeeping():
103
98
  delete_operations(list_operations(filter_ops_undone_expired()))
@@ -381,3 +376,20 @@ def ingest_neon_surface_water_metadata():
381
376
  )
382
377
  run_id = submit_metadata_to_db(database)
383
378
  poll_for_run_completion(run_id)
379
+
380
+
381
+ @graph
382
+ def nmdc_study_to_ncbi_submission_export():
383
+ nmdc_study = get_ncbi_export_pipeline_study()
384
+ ncbi_submission_metadata = get_ncbi_export_pipeline_inputs()
385
+ biosamples = get_biosamples_by_study_id(nmdc_study)
386
+ omics_processing_records = get_omics_processing_from_biosamples(biosamples)
387
+ data_objects = get_data_objects_from_biosamples(biosamples)
388
+ xml_data = ncbi_submission_xml_from_nmdc_study(
389
+ nmdc_study,
390
+ ncbi_submission_metadata,
391
+ biosamples,
392
+ omics_processing_records,
393
+ data_objects,
394
+ )
395
+ ncbi_submission_xml_asset(xml_data)
nmdc_runtime/site/ops.py CHANGED
@@ -9,6 +9,7 @@ from datetime import datetime, timezone
9
9
  from io import BytesIO, StringIO
10
10
  from typing import Tuple
11
11
  from zipfile import ZipFile
12
+
12
13
  import pandas as pd
13
14
  import requests
14
15
 
@@ -29,10 +30,14 @@ from dagster import (
29
30
  String,
30
31
  op,
31
32
  Optional,
33
+ Field,
34
+ Permissive,
35
+ Bool,
32
36
  )
33
37
  from gridfs import GridFS
34
38
  from linkml_runtime.dumpers import json_dumper
35
39
  from linkml_runtime.utils.yamlutils import YAMLRoot
40
+ from nmdc_runtime.api.db.mongo import get_mongo_db
36
41
  from nmdc_runtime.api.core.idgen import generate_one_id
37
42
  from nmdc_runtime.api.core.metadata import (
38
43
  _validate_changesheet,
@@ -42,6 +47,7 @@ from nmdc_runtime.api.core.metadata import (
42
47
  )
43
48
  from nmdc_runtime.api.core.util import dotted_path_for, hash_from_str, json_clean, now
44
49
  from nmdc_runtime.api.endpoints.util import persist_content_and_get_drs_object
50
+ from nmdc_runtime.api.endpoints.find import find_study_by_id
45
51
  from nmdc_runtime.api.models.job import Job, JobOperationMetadata
46
52
  from nmdc_runtime.api.models.metadata import ChangesheetIn
47
53
  from nmdc_runtime.api.models.operation import (
@@ -55,6 +61,11 @@ from nmdc_runtime.api.models.run import (
55
61
  _add_run_complete_event,
56
62
  )
57
63
  from nmdc_runtime.api.models.util import ResultT
64
+ from nmdc_runtime.site.export.ncbi_xml import NCBISubmissionXML
65
+ from nmdc_runtime.site.export.ncbi_xml_utils import (
66
+ fetch_data_objects_from_biosamples,
67
+ fetch_omics_processing_from_biosamples,
68
+ )
58
69
  from nmdc_runtime.site.drsobjects.ingest import mongo_add_docs_result_as_dict
59
70
  from nmdc_runtime.site.resources import (
60
71
  NmdcPortalApiClient,
@@ -86,7 +97,6 @@ from nmdc_schema import nmdc
86
97
  from pydantic import BaseModel
87
98
  from pymongo.database import Database as MongoDatabase
88
99
  from starlette import status
89
- from terminusdb_client.woqlquery import WOQLQuery as WQ
90
100
  from toolz import assoc, dissoc, get_in, valfilter, identity
91
101
 
92
102
 
@@ -111,14 +121,6 @@ def log_env(context):
111
121
  context.log.info("\n".join(out))
112
122
 
113
123
 
114
- @op(required_resource_keys={"terminus"})
115
- def list_databases(context) -> List[String]:
116
- client = context.resources.terminus.client
117
- list_ = client.list_databases()
118
- context.log.info(f"databases: {list_}")
119
- return list_
120
-
121
-
122
124
  @op(required_resource_keys={"mongo"})
123
125
  def mongo_stats(context) -> List[str]:
124
126
  db = context.resources.mongo.db
@@ -127,41 +129,6 @@ def mongo_stats(context) -> List[str]:
127
129
  return collection_names
128
130
 
129
131
 
130
- @op(required_resource_keys={"terminus"})
131
- def update_schema(context):
132
- with tempfile.TemporaryDirectory() as tmpdirname:
133
- try:
134
- context.log.info("shallow-cloning nmdc-schema repo")
135
- subprocess.check_output(
136
- "git clone https://github.com/microbiomedata/nmdc-schema.git"
137
- f" --branch main --single-branch {tmpdirname}/nmdc-schema",
138
- shell=True,
139
- )
140
- context.log.info("generating TerminusDB JSON-LD from NMDC LinkML")
141
- subprocess.check_output(
142
- f"gen-terminusdb {tmpdirname}/nmdc-schema/src/schema/nmdc.yaml"
143
- f" > {tmpdirname}/nmdc.terminus.json",
144
- shell=True,
145
- )
146
- except subprocess.CalledProcessError as e:
147
- if e.stdout:
148
- context.log.debug(e.stdout.decode())
149
- if e.stderr:
150
- context.log.error(e.stderr.decode())
151
- context.log.debug(str(e.returncode))
152
- raise e
153
-
154
- with open(f"{tmpdirname}/nmdc.terminus.json") as f:
155
- woql_dict = json.load(f)
156
-
157
- context.log.info("Updating terminus schema via WOQLQuery")
158
- rv = WQ(query=woql_dict).execute(
159
- context.resources.terminus.client, "update schema via WOQL"
160
- )
161
- context.log.info(str(rv))
162
- return rv
163
-
164
-
165
132
  @op(
166
133
  required_resource_keys={"mongo", "runtime_api_site_client"},
167
134
  retry_policy=RetryPolicy(max_retries=2),
@@ -768,6 +735,33 @@ def export_json_to_drs(
768
735
  return ["/objects/" + drs_object["id"]]
769
736
 
770
737
 
738
+ @op(
739
+ description="NCBI Submission XML file rendered in a Dagster Asset",
740
+ out=Out(description="XML content rendered through Dagit UI"),
741
+ )
742
+ def ncbi_submission_xml_asset(context: OpExecutionContext, data: str):
743
+ filename = "ncbi_submission.xml"
744
+ file_path = os.path.join(context.instance.storage_directory(), filename)
745
+
746
+ os.makedirs(os.path.dirname(file_path), exist_ok=True)
747
+
748
+ with open(file_path, "w") as f:
749
+ f.write(data)
750
+
751
+ context.log_event(
752
+ AssetMaterialization(
753
+ asset_key="ncbi_submission_xml",
754
+ description="NCBI Submission XML Data",
755
+ metadata={
756
+ "file_path": MetadataValue.path(file_path),
757
+ "xml": MetadataValue.text(data),
758
+ },
759
+ )
760
+ )
761
+
762
+ return Output(data)
763
+
764
+
771
765
  def unique_field_values(docs: List[Dict[str, Any]], field: str):
772
766
  return {doc[field] for doc in docs if field in doc}
773
767
 
@@ -977,3 +971,85 @@ def site_code_mapping() -> dict:
977
971
  raise Exception(
978
972
  f"Failed to fetch site data from {endpoint}. Status code: {response.status_code}, Content: {response.content}"
979
973
  )
974
+
975
+
976
+ @op(config_schema={"nmdc_study_id": str}, required_resource_keys={"mongo"})
977
+ def get_ncbi_export_pipeline_study(context: OpExecutionContext) -> Any:
978
+ nmdc_study = find_study_by_id(
979
+ context.op_config["nmdc_study_id"], context.resources.mongo.db
980
+ )
981
+ return nmdc_study
982
+
983
+
984
+ @op(
985
+ config_schema={
986
+ "nmdc_ncbi_attribute_mapping_file_url": str,
987
+ "ncbi_submission_metadata": Field(
988
+ Permissive(
989
+ {
990
+ "organization": String,
991
+ }
992
+ ),
993
+ is_required=True,
994
+ description="General metadata about the NCBI submission.",
995
+ ),
996
+ "ncbi_biosample_metadata": Field(
997
+ Permissive(
998
+ {
999
+ "organism_name": String,
1000
+ }
1001
+ ),
1002
+ is_required=True,
1003
+ description="Metadata for one or many NCBI BioSample in the Submission.",
1004
+ ),
1005
+ },
1006
+ out=Out(Dict),
1007
+ )
1008
+ def get_ncbi_export_pipeline_inputs(context: OpExecutionContext) -> str:
1009
+ nmdc_ncbi_attribute_mapping_file_url = context.op_config[
1010
+ "nmdc_ncbi_attribute_mapping_file_url"
1011
+ ]
1012
+ ncbi_submission_metadata = context.op_config.get("ncbi_submission_metadata", {})
1013
+ ncbi_biosample_metadata = context.op_config.get("ncbi_biosample_metadata", {})
1014
+
1015
+ return {
1016
+ "nmdc_ncbi_attribute_mapping_file_url": nmdc_ncbi_attribute_mapping_file_url,
1017
+ "ncbi_submission_metadata": ncbi_submission_metadata,
1018
+ "ncbi_biosample_metadata": ncbi_biosample_metadata,
1019
+ }
1020
+
1021
+
1022
+ @op(required_resource_keys={"mongo"})
1023
+ def get_data_objects_from_biosamples(context: OpExecutionContext, biosamples: list):
1024
+ mdb = context.resources.mongo.db
1025
+ alldocs_collection = mdb["alldocs"]
1026
+ biosample_data_objects = fetch_data_objects_from_biosamples(
1027
+ alldocs_collection, biosamples
1028
+ )
1029
+ return biosample_data_objects
1030
+
1031
+
1032
+ @op(required_resource_keys={"mongo"})
1033
+ def get_omics_processing_from_biosamples(context: OpExecutionContext, biosamples: list):
1034
+ mdb = context.resources.mongo.db
1035
+ alldocs_collection = mdb["alldocs"]
1036
+ biosample_omics_processing = fetch_omics_processing_from_biosamples(
1037
+ alldocs_collection, biosamples
1038
+ )
1039
+ return biosample_omics_processing
1040
+
1041
+
1042
+ @op
1043
+ def ncbi_submission_xml_from_nmdc_study(
1044
+ context: OpExecutionContext,
1045
+ nmdc_study: Any,
1046
+ ncbi_exporter_metadata: dict,
1047
+ biosamples: list,
1048
+ omics_processing_records: list,
1049
+ data_objects: list,
1050
+ ) -> str:
1051
+ ncbi_exporter = NCBISubmissionXML(nmdc_study, ncbi_exporter_metadata)
1052
+ ncbi_xml = ncbi_exporter.get_submission_xml(
1053
+ biosamples, omics_processing_records, data_objects
1054
+ )
1055
+ return ncbi_xml
@@ -42,6 +42,7 @@ from nmdc_runtime.site.graphs import (
42
42
  ingest_neon_soil_metadata,
43
43
  ingest_neon_benthic_metadata,
44
44
  ingest_neon_surface_water_metadata,
45
+ nmdc_study_to_ncbi_submission_export,
45
46
  )
46
47
  from nmdc_runtime.site.resources import (
47
48
  get_mongo,
@@ -50,7 +51,6 @@ from nmdc_runtime.site.resources import (
50
51
  nmdc_portal_api_client_resource,
51
52
  gold_api_client_resource,
52
53
  neon_api_client_resource,
53
- terminus_resource,
54
54
  mongo_resource,
55
55
  )
56
56
  from nmdc_runtime.site.resources import (
@@ -68,7 +68,6 @@ resource_defs = {
68
68
  "nmdc_portal_api_client": nmdc_portal_api_client_resource,
69
69
  "gold_api_client": gold_api_client_resource,
70
70
  "neon_api_client": neon_api_client_resource,
71
- "terminus": terminus_resource,
72
71
  "mongo": mongo_resource,
73
72
  }
74
73
 
@@ -515,8 +514,8 @@ def biosample_submission_ingest():
515
514
  "nmdc_portal_api_client": {
516
515
  "config": {
517
516
  "base_url": {"env": "NMDC_PORTAL_API_BASE_URL"},
518
- "session_cookie": {
519
- "env": "NMDC_PORTAL_API_SESSION_COOKIE"
517
+ "refresh_token": {
518
+ "env": "NMDC_PORTAL_API_REFRESH_TOKEN"
520
519
  },
521
520
  }
522
521
  }
@@ -555,8 +554,8 @@ def biosample_submission_ingest():
555
554
  "nmdc_portal_api_client": {
556
555
  "config": {
557
556
  "base_url": {"env": "NMDC_PORTAL_API_BASE_URL"},
558
- "session_cookie": {
559
- "env": "NMDC_PORTAL_API_SESSION_COOKIE"
557
+ "refresh_token": {
558
+ "env": "NMDC_PORTAL_API_REFRESH_TOKEN"
560
559
  },
561
560
  }
562
561
  }
@@ -852,6 +851,57 @@ def biosample_submission_ingest():
852
851
  ]
853
852
 
854
853
 
854
+ @repository
855
+ def biosample_export():
856
+ normal_resources = run_config_frozen__normal_env["resources"]
857
+ return [
858
+ nmdc_study_to_ncbi_submission_export.to_job(
859
+ resource_defs=resource_defs,
860
+ config={
861
+ "resources": merge(
862
+ unfreeze(normal_resources),
863
+ {
864
+ "mongo": {
865
+ "config": {
866
+ "host": {"env": "MONGO_HOST"},
867
+ "username": {"env": "MONGO_USERNAME"},
868
+ "password": {"env": "MONGO_PASSWORD"},
869
+ "dbname": {"env": "MONGO_DBNAME"},
870
+ },
871
+ },
872
+ "runtime_api_site_client": {
873
+ "config": {
874
+ "base_url": {"env": "API_HOST"},
875
+ "client_id": {"env": "API_SITE_CLIENT_ID"},
876
+ "client_secret": {"env": "API_SITE_CLIENT_SECRET"},
877
+ "site_id": {"env": "API_SITE_ID"},
878
+ },
879
+ },
880
+ },
881
+ ),
882
+ "ops": {
883
+ "get_ncbi_export_pipeline_study": {
884
+ "config": {
885
+ "nmdc_study_id": "",
886
+ }
887
+ },
888
+ "get_ncbi_export_pipeline_inputs": {
889
+ "config": {
890
+ "nmdc_ncbi_attribute_mapping_file_url": "",
891
+ "ncbi_submission_metadata": {
892
+ "organization": "",
893
+ },
894
+ "ncbi_biosample_metadata": {
895
+ "organism_name": "",
896
+ },
897
+ }
898
+ },
899
+ },
900
+ },
901
+ ),
902
+ ]
903
+
904
+
855
905
  # @repository
856
906
  # def validation():
857
907
  # graph_jobs = [validate_jgi_job, validate_gold_job, validate_emsl_job]
@@ -19,7 +19,6 @@ from frozendict import frozendict
19
19
  from linkml_runtime.dumpers import json_dumper
20
20
  from pydantic import BaseModel, AnyUrl
21
21
  from pymongo import MongoClient, ReplaceOne, InsertOne
22
- from terminusdb_client import WOQLClient
23
22
  from toolz import get_in
24
23
  from toolz import merge
25
24
 
@@ -372,16 +371,37 @@ def gold_api_client_resource(context: InitResourceContext):
372
371
 
373
372
  @dataclass
374
373
  class NmdcPortalApiClient:
374
+
375
375
  base_url: str
376
- # Using a cookie for authentication is not ideal and should be replaced
377
- # when this API has an another authentication method
378
- session_cookie: str
376
+ refresh_token: str
377
+ access_token: Optional[str] = None
378
+ access_token_expires_at: Optional[datetime] = None
379
+
380
+ def _request(self, method: str, endpoint: str, **kwargs):
381
+ r"""
382
+ Submits a request to the specified API endpoint;
383
+ after refreshing the access token, if necessary.
384
+ """
385
+ if self.access_token is None or datetime.now() > self.access_token_expires_at:
386
+ refresh_response = requests.post(
387
+ f"{self.base_url}/auth/refresh",
388
+ json={"refresh_token": self.refresh_token},
389
+ )
390
+ refresh_response.raise_for_status()
391
+ refresh_body = refresh_response.json()
392
+ self.access_token_expires_at = datetime.now() + timedelta(
393
+ seconds=refresh_body["expires_in"]
394
+ )
395
+ self.access_token = refresh_body["access_token"]
379
396
 
380
- def fetch_metadata_submission(self, id: str) -> Dict[str, Any]:
381
- response = requests.get(
382
- f"{self.base_url}/api/metadata_submission/{id}",
383
- cookies={"session": self.session_cookie},
397
+ headers = kwargs.get("headers", {})
398
+ headers["Authorization"] = f"Bearer {self.access_token}"
399
+ return requests.request(
400
+ method, f"{self.base_url}{endpoint}", **kwargs, headers=headers
384
401
  )
402
+
403
+ def fetch_metadata_submission(self, id: str) -> Dict[str, Any]:
404
+ response = self._request("GET", f"/api/metadata_submission/{id}")
385
405
  response.raise_for_status()
386
406
  return response.json()
387
407
 
@@ -389,13 +409,13 @@ class NmdcPortalApiClient:
389
409
  @resource(
390
410
  config_schema={
391
411
  "base_url": StringSource,
392
- "session_cookie": StringSource,
412
+ "refresh_token": StringSource,
393
413
  }
394
414
  )
395
415
  def nmdc_portal_api_client_resource(context: InitResourceContext):
396
416
  return NmdcPortalApiClient(
397
417
  base_url=context.resource_config["base_url"],
398
- session_cookie=context.resource_config["session_cookie"],
418
+ refresh_token=context.resource_config["refresh_token"],
399
419
  )
400
420
 
401
421
 
@@ -512,33 +532,3 @@ def get_mongo(run_config: frozendict):
512
532
  )
513
533
  )
514
534
  return mongo_resource(resource_context)
515
-
516
-
517
- class TerminusDB:
518
- def __init__(self, server_url, user, key, account, dbid):
519
- self.client = WOQLClient(server_url=server_url)
520
- self.client.connect(user=user, key=key, account=account)
521
- db_info = self.client.get_database(dbid=dbid, account=account)
522
- if db_info is None:
523
- self.client.create_database(dbid=dbid, accountid=account, label=dbid)
524
- self.client.create_graph(graph_type="inference", graph_id="main")
525
- self.client.connect(user=user, key=key, account=account, db=dbid)
526
-
527
-
528
- @resource(
529
- config_schema={
530
- "server_url": StringSource,
531
- "user": StringSource,
532
- "key": StringSource,
533
- "account": StringSource,
534
- "dbid": StringSource,
535
- }
536
- )
537
- def terminus_resource(context):
538
- return TerminusDB(
539
- server_url=context.resource_config["server_url"],
540
- user=context.resource_config["user"],
541
- key=context.resource_config["key"],
542
- account=context.resource_config["account"],
543
- dbid=context.resource_config["dbid"],
544
- )
@@ -13,6 +13,9 @@ from toolz import get_in, groupby, concat, valmap, dissoc
13
13
  from nmdc_runtime.site.translation.translator import JSON_OBJECT, Translator
14
14
 
15
15
 
16
+ BIOSAMPLE_UNIQUE_KEY_SLOT = "samp_name"
17
+
18
+
16
19
  @lru_cache
17
20
  def _get_schema_view():
18
21
  """Return a SchemaView instance representing the NMDC schema"""
@@ -98,7 +101,9 @@ class SubmissionPortalTranslator(Translator):
98
101
  self.study_pi_image_url = study_pi_image_url
99
102
  self.study_funding_sources = study_funding_sources
100
103
 
101
- self.biosample_extras = group_dicts_by_key("source_mat_id", biosample_extras)
104
+ self.biosample_extras = group_dicts_by_key(
105
+ BIOSAMPLE_UNIQUE_KEY_SLOT, biosample_extras
106
+ )
102
107
  self.biosample_extras_slot_mapping = group_dicts_by_key(
103
108
  "subject_id", biosample_extras_slot_mapping
104
109
  )
@@ -521,7 +526,7 @@ class SubmissionPortalTranslator(Translator):
521
526
  :param default_env_package: Default value for `env_package` slot
522
527
  :return: nmdc:Biosample
523
528
  """
524
- source_mat_id = sample_data[0].get("source_mat_id", "").strip()
529
+ biosample_key = sample_data[0].get(BIOSAMPLE_UNIQUE_KEY_SLOT, "").strip()
525
530
  slots = {
526
531
  "id": nmdc_biosample_id,
527
532
  "part_of": nmdc_study_id,
@@ -533,7 +538,7 @@ class SubmissionPortalTranslator(Translator):
533
538
  slots.update(transformed_tab)
534
539
 
535
540
  if self.biosample_extras:
536
- raw_extras = self.biosample_extras.get(source_mat_id)
541
+ raw_extras = self.biosample_extras.get(biosample_key)
537
542
  if raw_extras:
538
543
  transformed_extras = self._transform_dict_for_class(
539
544
  raw_extras, "Biosample", self.biosample_extras_slot_mapping
@@ -564,7 +569,9 @@ class SubmissionPortalTranslator(Translator):
564
569
 
565
570
  sample_data = metadata_submission_data.get("sampleData", {})
566
571
  package_name = metadata_submission_data["packageName"]
567
- sample_data_by_id = groupby("source_mat_id", concat(sample_data.values()))
572
+ sample_data_by_id = groupby(
573
+ BIOSAMPLE_UNIQUE_KEY_SLOT, concat(sample_data.values())
574
+ )
568
575
  nmdc_biosample_ids = self._id_minter("nmdc:Biosample", len(sample_data_by_id))
569
576
  sample_data_to_nmdc_biosample_ids = dict(
570
577
  zip(sample_data_by_id.keys(), nmdc_biosample_ids)
@@ -583,15 +590,15 @@ class SubmissionPortalTranslator(Translator):
583
590
 
584
591
  if self.omics_processing_mapping:
585
592
  # If there is data from an OmicsProcessing mapping file, process it now. This part
586
- # assumes that there is a column in that file with the header __biosample_source_mat_id
593
+ # assumes that there is a column in that file with the header __biosample_samp_name
587
594
  # that can be used to join with the sample data from the submission portal. The
588
- # biosample identified by that `source_mat_id` will be referenced in the `has_input`
595
+ # biosample identified by that `samp_name` will be referenced in the `has_input`
589
596
  # slot of the OmicsProcessing object. If a DataObject mapping file was also provided,
590
597
  # those objects will also be generated and referenced in the `has_output` slot of the
591
- # OmicsProcessing object. By keying off of the `source_mat_id` slot of the submission's
598
+ # OmicsProcessing object. By keying off of the `samp_name` slot of the submission's
592
599
  # sample data there is an implicit 1:1 relationship between Biosample objects and
593
600
  # OmicsProcessing objects generated here.
594
- join_key = "__biosample_source_mat_id"
601
+ join_key = f"__biosample_{BIOSAMPLE_UNIQUE_KEY_SLOT}"
595
602
  database.omics_processing_set = []
596
603
  database.data_object_set = []
597
604
  data_objects_by_sample_data_id = {}
@@ -617,7 +624,7 @@ class SubmissionPortalTranslator(Translator):
617
624
  or sample_data_id not in sample_data_to_nmdc_biosample_ids
618
625
  ):
619
626
  logging.warning(
620
- f"Unrecognized biosample source_mat_id: {sample_data_id}"
627
+ f"Unrecognized biosample {BIOSAMPLE_UNIQUE_KEY_SLOT}: {sample_data_id}"
621
628
  )
622
629
  continue
623
630
  nmdc_biosample_id = sample_data_to_nmdc_biosample_ids[sample_data_id]
nmdc_runtime/util.py CHANGED
@@ -16,7 +16,7 @@ import fastjsonschema
16
16
  import requests
17
17
  from frozendict import frozendict
18
18
  from jsonschema.validators import Draft7Validator
19
- from nmdc_schema.nmdc_schema_accepting_legacy_ids import Database as NMDCDatabase
19
+ from nmdc_schema.nmdc import Database as NMDCDatabase
20
20
  from nmdc_schema.get_nmdc_view import ViewGetter
21
21
  from pydantic import Field, BaseModel
22
22
  from pymongo.database import Database as MongoDatabase
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nmdc_runtime
3
- Version: 1.6.0
3
+ Version: 1.7.0
4
4
  Summary: A runtime system for NMDC data management and orchestration
5
5
  Home-page: https://github.com/microbiomedata/nmdc-runtime
6
6
  Author: Donny Winston
@@ -77,18 +77,15 @@ The runtime features:
77
77
  - `schedules` trigger recurring pipeline runs based on time
78
78
  - `sensors` trigger pipeline runs based on external state
79
79
  - Each `pipeline` can declare dependencies on any runtime `resources` or additional
80
- configuration. There are TerminusDB and MongoDB `resources` defined, as well as `preset`
80
+ configuration. There are MongoDB `resources` defined, as well as `preset`
81
81
  configuration definitions for both "dev" and "prod" `modes`. The `preset`s tell Dagster to
82
82
  look to a set of known environment variables to load resources configurations, depending on
83
83
  the `mode`.
84
-
85
- 2. A [TerminusDB](https://terminusdb.com/) database supporting revision control of schema-validated
86
- data.
87
84
 
88
- 3. A MongoDB database supporting write-once, high-throughput internal
85
+ 2. A MongoDB database supporting write-once, high-throughput internal
89
86
  data storage by the nmdc-runtime FastAPI instance.
90
87
 
91
- 4. A [FastAPI](https://fastapi.tiangolo.com/) service to interface with the orchestrator and
88
+ 3. A [FastAPI](https://fastapi.tiangolo.com/) service to interface with the orchestrator and
92
89
  database, as a hub for data management and workflow automation.
93
90
 
94
91
  ## Local Development
@@ -1,7 +1,7 @@
1
1
  nmdc_runtime/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  nmdc_runtime/containers.py,sha256=8m_S1wiFu8VOWvY7tyqzf-02X9gXY83YGc8FgjWzLGA,418
3
3
  nmdc_runtime/main.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- nmdc_runtime/util.py,sha256=o74ZKOmSD79brPFAcQFsYpA6wh9287m0hDhDlIpn9VM,19872
4
+ nmdc_runtime/util.py,sha256=3mHVEUdMOv73XgT6NTuzMuMCL5Gs6NJ4Mk0bkgQQaQU,19844
5
5
  nmdc_runtime/client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  nmdc_runtime/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
7
  nmdc_runtime/core/db/Database.py,sha256=WamgBUbq85A7-fr3p5B9Tk92U__yPdr9pBb4zyQok-4,377
@@ -35,10 +35,10 @@ nmdc_runtime/minter/domain/model.py,sha256=WMOuKub3dVzkOt_EZSRDLeTsJPqFbKx01SMQ5
35
35
  nmdc_runtime/minter/entrypoints/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
36
  nmdc_runtime/minter/entrypoints/fastapi_app.py,sha256=JC4thvzfFwRc1mhWQ-kHy3yvs0SYxF6ktE7LXNCwqlI,4031
37
37
  nmdc_runtime/site/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
- nmdc_runtime/site/graphs.py,sha256=mOWZvT2Rk4X96RmVAvHQwur-FhNuMWAko3jjRLGygEE,11455
39
- nmdc_runtime/site/ops.py,sha256=YjaH2zqzd01cRcqV0E93RoaWt8T4ExESx4SSszmczZ8,33620
40
- nmdc_runtime/site/repository.py,sha256=QI9Gcjr68-DT2MPwOx87Vkxcwp3ZIOVaFZ9uCO13w9U,35502
41
- nmdc_runtime/site/resources.py,sha256=pQSwg1dRpL_D91gYLzzaOIDZ3qa69rPqSlsq5dS9i_M,17783
38
+ nmdc_runtime/site/graphs.py,sha256=_vCyQnICis4OQGH91i1ZwpvHYcXOG6Nfg04f5DVdy2M,12040
39
+ nmdc_runtime/site/ops.py,sha256=G6X3YgSmDNxOnsMEByLUMfB0peY4o21o0_Ig3V7v6M4,35835
40
+ nmdc_runtime/site/repository.py,sha256=-dOk9BEnLSrmAN6bZoIu_WnFSqriIpO0c5P76PuHW1M,37472
41
+ nmdc_runtime/site/resources.py,sha256=ZSH1yvA-li0R7Abc22_v0XLbjBYf5igETr2G01J3hnc,17557
42
42
  nmdc_runtime/site/util.py,sha256=6hyVPpb6ZkWEG8Nm7uQxnZ-QmuPOG9hgWvl0mUBr5JU,1303
43
43
  nmdc_runtime/site/backup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
44
  nmdc_runtime/site/backup/nmdcdb_mongodump.py,sha256=H5uosmEiXwLwklJrYJWrNhb_Nuf_ew8dBpZLl6_dYhs,2699
@@ -50,13 +50,11 @@ nmdc_runtime/site/drsobjects/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
50
50
  nmdc_runtime/site/drsobjects/ingest.py,sha256=pcMP69WSzFHFqHB9JIL55ePFhilnCLRc2XHCQ97w1Ik,3107
51
51
  nmdc_runtime/site/drsobjects/registration.py,sha256=D1T3QUuxEOxqKZIvB5rkb_6ZxFZiA-U9SMPajyeWC2Y,3572
52
52
  nmdc_runtime/site/export/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
- nmdc_runtime/site/export/study_metadata.py,sha256=d3q6RV93B0BA64ZkfCPEHdlrC1P8pm9Sj6SxRDcpB7A,4027
53
+ nmdc_runtime/site/export/ncbi_xml.py,sha256=Z2qsaGIBvY2OdOkf8kJEZl1T_8R_YzhAlXxJ1gMQwnk,16946
54
+ nmdc_runtime/site/export/ncbi_xml_utils.py,sha256=CqrtjwzmUbZXEW8aD-KpnCV_PlXVH-Gqp309nw3vbeo,6464
55
+ nmdc_runtime/site/export/study_metadata.py,sha256=WRU0F1ksWfNX3k9LD91Pn2DuLA-IOpGvYPJd6DnguEs,4819
54
56
  nmdc_runtime/site/normalization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
55
57
  nmdc_runtime/site/normalization/gold.py,sha256=iISDD4qs4d6uLhv631WYNeQVOzY5DO201ZpPtxHdkVk,1311
56
- nmdc_runtime/site/terminusdb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
57
- nmdc_runtime/site/terminusdb/generate.py,sha256=Z3c06LDx3TGw4pvPRO97caQvzc8SuhGmPIr_d5b_E9I,6144
58
- nmdc_runtime/site/terminusdb/ingest.py,sha256=WE_V4vRRnlL6hIBU1TDSUheYOBWS9d5g6FHPS64jzvM,1245
59
- nmdc_runtime/site/terminusdb/schema.py,sha256=3e39rHUSZsNbN_F0SHHNsvcEGRWtYa6O9KNj3cH3tUs,77129
60
58
  nmdc_runtime/site/translation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
61
59
  nmdc_runtime/site/translation/emsl.py,sha256=-aCTJTSCNaK-Koh8BE_4fTf5nyxP1KkquR6lloLEJl0,1245
62
60
  nmdc_runtime/site/translation/gold.py,sha256=R3W99sdQb7Pgu_esN7ruIC-tyREQD_idJ4xCzkqWuGw,1622
@@ -66,7 +64,7 @@ nmdc_runtime/site/translation/neon_benthic_translator.py,sha256=e_7tXFrP0PpdhqUC
66
64
  nmdc_runtime/site/translation/neon_soil_translator.py,sha256=cJJ_QPva5G5SIT_7DjCSsqbDvgbiKGqUYrxK3nx7_Lw,37634
67
65
  nmdc_runtime/site/translation/neon_surface_water_translator.py,sha256=6LaFwBnVx6TN9v1D-G6LFrDxY0TK05AvMklx0E1tTeQ,26590
68
66
  nmdc_runtime/site/translation/neon_utils.py,sha256=mdxJVPb3zbD4DiKW3Fwgk22kjczKMwkcozvy7fwteTE,5203
69
- nmdc_runtime/site/translation/submission_portal_translator.py,sha256=lHcrfPR5wk3BcZ0Uw5zUyWu5XRVikgOzdzSb5nFVS9I,27964
67
+ nmdc_runtime/site/translation/submission_portal_translator.py,sha256=KiVO1vohhrJGfwzLJOumRfyHjcbYfswBIBvkYIdFxv8,28097
70
68
  nmdc_runtime/site/translation/translator.py,sha256=xM9dM-nTgSWwu5HFoUVNHf8kqk9iiH4PgWdSx4OKxEk,601
71
69
  nmdc_runtime/site/translation/util.py,sha256=w_l3SiExGsl6cXRqto0a_ssDmHkP64ITvrOVfPxmNpY,4366
72
70
  nmdc_runtime/site/validation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -74,9 +72,9 @@ nmdc_runtime/site/validation/emsl.py,sha256=OG20mv_3E2rkQqTQtYO0_SVRqFb-Z_zKCiAV
74
72
  nmdc_runtime/site/validation/gold.py,sha256=Z5ZzYdjERbrJ2Tu06d0TDTBSfwaFdL1Z23Rl-YkZ2Ow,803
75
73
  nmdc_runtime/site/validation/jgi.py,sha256=LdJfhqBVHWCDp0Kzyk8eJZMwEI5NQ-zuTda31BcGwOA,1299
76
74
  nmdc_runtime/site/validation/util.py,sha256=GGbMDSwR090sr_E_fHffCN418gpYESaiot6XghS7OYk,3349
77
- nmdc_runtime-1.6.0.dist-info/LICENSE,sha256=VWiv65r7gHGjgtr3jMJYVmQny5GRpQ6H-W9sScb1x70,2408
78
- nmdc_runtime-1.6.0.dist-info/METADATA,sha256=hKgDLZfx14AX3IWIi3C9vHa9YAP-agU7tsmKZ_kg8JY,7424
79
- nmdc_runtime-1.6.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
80
- nmdc_runtime-1.6.0.dist-info/entry_points.txt,sha256=nfH6-K9tDKv7va8ENfShsBnxVQoYJdEe7HHdwtkbh1Y,289
81
- nmdc_runtime-1.6.0.dist-info/top_level.txt,sha256=b0K1s09L_iHH49ueBKaLrB5-lh6cyrSv9vL6x4Qvyz8,13
82
- nmdc_runtime-1.6.0.dist-info/RECORD,,
75
+ nmdc_runtime-1.7.0.dist-info/LICENSE,sha256=VWiv65r7gHGjgtr3jMJYVmQny5GRpQ6H-W9sScb1x70,2408
76
+ nmdc_runtime-1.7.0.dist-info/METADATA,sha256=FnoXHNgR6o5PEe6XhqRGdqOjbIX_ry-SKY5uMtZJQXY,7302
77
+ nmdc_runtime-1.7.0.dist-info/WHEEL,sha256=mguMlWGMX-VHnMpKOjjQidIo1ssRlCFu4a4mBpz1s2M,91
78
+ nmdc_runtime-1.7.0.dist-info/entry_points.txt,sha256=JxdvOnvxHK_8046cwlvE30s_fV0-k-eTpQtkKYA69nQ,224
79
+ nmdc_runtime-1.7.0.dist-info/top_level.txt,sha256=b0K1s09L_iHH49ueBKaLrB5-lh6cyrSv9vL6x4Qvyz8,13
80
+ nmdc_runtime-1.7.0.dist-info/RECORD,,