nmdc-runtime 2.5.0__py3-none-any.whl → 2.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nmdc-runtime might be problematic. Click here for more details.
- nmdc_runtime/site/export/ncbi_xml.py +0 -1
- nmdc_runtime/site/export/ncbi_xml_utils.py +0 -25
- nmdc_runtime/site/graphs.py +11 -0
- nmdc_runtime/site/ops.py +48 -11
- nmdc_runtime/site/repository.py +0 -4
- nmdc_runtime/site/translation/gold_translator.py +11 -0
- nmdc_runtime/site/translation/submission_portal_translator.py +269 -51
- nmdc_runtime/site/util.py +8 -1
- {nmdc_runtime-2.5.0.dist-info → nmdc_runtime-2.7.0.dist-info}/METADATA +3 -3
- {nmdc_runtime-2.5.0.dist-info → nmdc_runtime-2.7.0.dist-info}/RECORD +14 -14
- {nmdc_runtime-2.5.0.dist-info → nmdc_runtime-2.7.0.dist-info}/WHEEL +1 -1
- {nmdc_runtime-2.5.0.dist-info → nmdc_runtime-2.7.0.dist-info}/entry_points.txt +0 -0
- {nmdc_runtime-2.5.0.dist-info → nmdc_runtime-2.7.0.dist-info}/licenses/LICENSE +0 -0
- {nmdc_runtime-2.5.0.dist-info → nmdc_runtime-2.7.0.dist-info}/top_level.txt +0 -0
|
@@ -7,7 +7,6 @@ import xml.dom.minidom
|
|
|
7
7
|
from typing import Any, List, Union
|
|
8
8
|
from urllib.parse import urlparse
|
|
9
9
|
from nmdc_runtime.site.export.ncbi_xml_utils import (
|
|
10
|
-
get_instruments,
|
|
11
10
|
handle_controlled_identified_term_value,
|
|
12
11
|
handle_controlled_term_value,
|
|
13
12
|
handle_geolocation_value,
|
|
@@ -24,31 +24,6 @@ def get_classname_from_typecode(doc_id):
|
|
|
24
24
|
return class_map.get(typecode)
|
|
25
25
|
|
|
26
26
|
|
|
27
|
-
def get_instruments(instrument_set_collection):
|
|
28
|
-
# dictionary to capture a list of all instruments
|
|
29
|
-
# Structure of dict:
|
|
30
|
-
# {"instrument_id": {"vendor": "vendor_name", "model": "model_name"}}
|
|
31
|
-
all_instruments = {}
|
|
32
|
-
|
|
33
|
-
try:
|
|
34
|
-
query = {"type": "nmdc:Instrument"}
|
|
35
|
-
cursor = instrument_set_collection.find(query)
|
|
36
|
-
|
|
37
|
-
for document in cursor:
|
|
38
|
-
instrument_id = document.get("id")
|
|
39
|
-
vendor = document.get("vendor")
|
|
40
|
-
model = document.get("model")
|
|
41
|
-
|
|
42
|
-
if not instrument_id or not vendor or not model:
|
|
43
|
-
continue
|
|
44
|
-
|
|
45
|
-
all_instruments[instrument_id] = {"vendor": vendor, "model": model}
|
|
46
|
-
|
|
47
|
-
return all_instruments
|
|
48
|
-
except Exception as e:
|
|
49
|
-
raise RuntimeError(f"An error occurred while fetching instrument data: {e}")
|
|
50
|
-
|
|
51
|
-
|
|
52
27
|
def fetch_data_objects_from_biosamples(
|
|
53
28
|
all_docs_collection: Collection,
|
|
54
29
|
data_object_set: Collection,
|
nmdc_runtime/site/graphs.py
CHANGED
|
@@ -61,6 +61,8 @@ from nmdc_runtime.site.ops import (
|
|
|
61
61
|
get_database_updater_inputs,
|
|
62
62
|
post_submission_portal_biosample_ingest_record_stitching_filename,
|
|
63
63
|
generate_data_generation_set_post_biosample_ingest,
|
|
64
|
+
get_instrument_ids_by_model,
|
|
65
|
+
log_database_ids,
|
|
64
66
|
)
|
|
65
67
|
from nmdc_runtime.site.export.study_metadata import get_biosamples_by_study_id
|
|
66
68
|
|
|
@@ -181,6 +183,7 @@ def translate_metadata_submission_to_nmdc_schema_database():
|
|
|
181
183
|
biosample_extras_slot_mapping = get_csv_rows_from_url(
|
|
182
184
|
biosample_extras_slot_mapping_file_url
|
|
183
185
|
)
|
|
186
|
+
instrument_mapping = get_instrument_ids_by_model()
|
|
184
187
|
|
|
185
188
|
database = translate_portal_submission_to_nmdc_schema_database(
|
|
186
189
|
metadata_submission,
|
|
@@ -188,10 +191,13 @@ def translate_metadata_submission_to_nmdc_schema_database():
|
|
|
188
191
|
data_object_mapping=data_object_mapping,
|
|
189
192
|
biosample_extras=biosample_extras,
|
|
190
193
|
biosample_extras_slot_mapping=biosample_extras_slot_mapping,
|
|
194
|
+
instrument_mapping=instrument_mapping,
|
|
191
195
|
)
|
|
192
196
|
|
|
193
197
|
validate_metadata(database)
|
|
194
198
|
|
|
199
|
+
log_database_ids(database)
|
|
200
|
+
|
|
195
201
|
database_dict = nmdc_schema_object_to_dict(database)
|
|
196
202
|
filename = nmdc_schema_database_export_filename(metadata_submission)
|
|
197
203
|
outputs = export_json_to_drs(database_dict, filename)
|
|
@@ -217,6 +223,7 @@ def ingest_metadata_submission():
|
|
|
217
223
|
biosample_extras_slot_mapping = get_csv_rows_from_url(
|
|
218
224
|
biosample_extras_slot_mapping_file_url
|
|
219
225
|
)
|
|
226
|
+
instrument_mapping = get_instrument_ids_by_model()
|
|
220
227
|
|
|
221
228
|
database = translate_portal_submission_to_nmdc_schema_database(
|
|
222
229
|
metadata_submission,
|
|
@@ -224,7 +231,11 @@ def ingest_metadata_submission():
|
|
|
224
231
|
data_object_mapping=data_object_mapping,
|
|
225
232
|
biosample_extras=biosample_extras,
|
|
226
233
|
biosample_extras_slot_mapping=biosample_extras_slot_mapping,
|
|
234
|
+
instrument_mapping=instrument_mapping,
|
|
227
235
|
)
|
|
236
|
+
|
|
237
|
+
log_database_ids(database)
|
|
238
|
+
|
|
228
239
|
run_id = submit_metadata_to_db(database)
|
|
229
240
|
poll_for_run_completion(run_id)
|
|
230
241
|
|
nmdc_runtime/site/ops.py
CHANGED
|
@@ -7,6 +7,7 @@ import tempfile
|
|
|
7
7
|
from collections import defaultdict
|
|
8
8
|
from datetime import datetime, timezone
|
|
9
9
|
from io import BytesIO, StringIO
|
|
10
|
+
from pprint import pformat
|
|
10
11
|
from toolz.dicttoolz import keyfilter
|
|
11
12
|
from typing import Tuple
|
|
12
13
|
from zipfile import ZipFile
|
|
@@ -38,7 +39,7 @@ from dagster import (
|
|
|
38
39
|
Bool,
|
|
39
40
|
)
|
|
40
41
|
from gridfs import GridFS
|
|
41
|
-
from linkml_runtime.
|
|
42
|
+
from linkml_runtime.utils.dictutils import as_simple_dict
|
|
42
43
|
from linkml_runtime.utils.yamlutils import YAMLRoot
|
|
43
44
|
from nmdc_runtime.api.db.mongo import get_mongo_db
|
|
44
45
|
from nmdc_runtime.api.core.idgen import generate_one_id
|
|
@@ -69,7 +70,6 @@ from nmdc_runtime.site.export.ncbi_xml_utils import (
|
|
|
69
70
|
fetch_data_objects_from_biosamples,
|
|
70
71
|
fetch_nucleotide_sequencing_from_biosamples,
|
|
71
72
|
fetch_library_preparation_from_biosamples,
|
|
72
|
-
get_instruments,
|
|
73
73
|
)
|
|
74
74
|
from nmdc_runtime.site.drsobjects.ingest import mongo_add_docs_result_as_dict
|
|
75
75
|
from nmdc_runtime.site.resources import (
|
|
@@ -96,6 +96,7 @@ from nmdc_runtime.site.util import (
|
|
|
96
96
|
run_and_log,
|
|
97
97
|
schema_collection_has_index_on_id,
|
|
98
98
|
nmdc_study_id_to_filename,
|
|
99
|
+
get_instruments_by_id,
|
|
99
100
|
)
|
|
100
101
|
from nmdc_runtime.util import (
|
|
101
102
|
drs_object_in_for,
|
|
@@ -720,9 +721,8 @@ def translate_portal_submission_to_nmdc_schema_database(
|
|
|
720
721
|
metadata_submission: Dict[str, Any],
|
|
721
722
|
nucleotide_sequencing_mapping: List,
|
|
722
723
|
data_object_mapping: List,
|
|
724
|
+
instrument_mapping: Dict[str, str],
|
|
723
725
|
study_category: Optional[str],
|
|
724
|
-
study_doi_category: Optional[str],
|
|
725
|
-
study_doi_provider: Optional[str],
|
|
726
726
|
study_pi_image_url: Optional[str],
|
|
727
727
|
biosample_extras: Optional[list[dict]],
|
|
728
728
|
biosample_extras_slot_mapping: Optional[list[dict]],
|
|
@@ -739,11 +739,10 @@ def translate_portal_submission_to_nmdc_schema_database(
|
|
|
739
739
|
data_object_mapping=data_object_mapping,
|
|
740
740
|
id_minter=id_minter,
|
|
741
741
|
study_category=study_category,
|
|
742
|
-
study_doi_category=study_doi_category,
|
|
743
|
-
study_doi_provider=study_doi_provider,
|
|
744
742
|
study_pi_image_url=study_pi_image_url,
|
|
745
743
|
biosample_extras=biosample_extras,
|
|
746
744
|
biosample_extras_slot_mapping=biosample_extras_slot_mapping,
|
|
745
|
+
illumina_instrument_mapping=instrument_mapping,
|
|
747
746
|
)
|
|
748
747
|
database = translator.get_database()
|
|
749
748
|
return database
|
|
@@ -761,7 +760,7 @@ def nmdc_schema_database_export_filename(study: Dict[str, Any]) -> str:
|
|
|
761
760
|
|
|
762
761
|
@op
|
|
763
762
|
def nmdc_schema_object_to_dict(object: YAMLRoot) -> Dict[str, Any]:
|
|
764
|
-
return
|
|
763
|
+
return as_simple_dict(object)
|
|
765
764
|
|
|
766
765
|
|
|
767
766
|
@op(required_resource_keys={"mongo"}, config_schema={"username": str})
|
|
@@ -1227,11 +1226,26 @@ def get_library_preparation_from_biosamples(
|
|
|
1227
1226
|
|
|
1228
1227
|
|
|
1229
1228
|
@op(required_resource_keys={"mongo"})
|
|
1230
|
-
def get_all_instruments(context: OpExecutionContext):
|
|
1229
|
+
def get_all_instruments(context: OpExecutionContext) -> dict[str, dict]:
|
|
1231
1230
|
mdb = context.resources.mongo.db
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1231
|
+
return get_instruments_by_id(mdb)
|
|
1232
|
+
|
|
1233
|
+
|
|
1234
|
+
@op(required_resource_keys={"mongo"})
|
|
1235
|
+
def get_instrument_ids_by_model(context: OpExecutionContext) -> dict[str, str]:
|
|
1236
|
+
mdb = context.resources.mongo.db
|
|
1237
|
+
instruments_by_id = get_instruments_by_id(mdb)
|
|
1238
|
+
instruments_by_model: dict[str, str] = {}
|
|
1239
|
+
for inst_id, instrument in instruments_by_id.items():
|
|
1240
|
+
model = instrument.get("model")
|
|
1241
|
+
if model is None:
|
|
1242
|
+
context.log.warning(f"Instrument {inst_id} has no model.")
|
|
1243
|
+
continue
|
|
1244
|
+
if model in instruments_by_model:
|
|
1245
|
+
context.log.warning(f"Instrument model {model} is not unique.")
|
|
1246
|
+
instruments_by_model[model] = inst_id
|
|
1247
|
+
context.log.info("Instrument models: %s", pformat(instruments_by_model))
|
|
1248
|
+
return instruments_by_model
|
|
1235
1249
|
|
|
1236
1250
|
|
|
1237
1251
|
@op
|
|
@@ -1345,3 +1359,26 @@ def generate_biosample_set_for_nmdc_study_from_gold(
|
|
|
1345
1359
|
database = database_updater.generate_biosample_set_from_gold_api_for_study()
|
|
1346
1360
|
|
|
1347
1361
|
return database
|
|
1362
|
+
|
|
1363
|
+
|
|
1364
|
+
@op
|
|
1365
|
+
def log_database_ids(
|
|
1366
|
+
context: OpExecutionContext,
|
|
1367
|
+
database: nmdc.Database,
|
|
1368
|
+
) -> None:
|
|
1369
|
+
"""Log the IDs of the database."""
|
|
1370
|
+
database_dict = as_simple_dict(database)
|
|
1371
|
+
message = ""
|
|
1372
|
+
for collection_name, collection in database_dict.items():
|
|
1373
|
+
if not isinstance(collection, list):
|
|
1374
|
+
continue
|
|
1375
|
+
message += f"{collection_name} ({len(collection)}):\n"
|
|
1376
|
+
if len(collection) < 10:
|
|
1377
|
+
message += "\n".join(f" {doc['id']}" for doc in collection)
|
|
1378
|
+
else:
|
|
1379
|
+
message += "\n".join(f" {doc['id']}" for doc in collection[:4])
|
|
1380
|
+
message += f"\n ... {len(collection) - 8} more\n"
|
|
1381
|
+
message += "\n".join(f" {doc['id']}" for doc in collection[-4:])
|
|
1382
|
+
message += "\n"
|
|
1383
|
+
if message:
|
|
1384
|
+
context.log.info(message)
|
nmdc_runtime/site/repository.py
CHANGED
|
@@ -553,8 +553,6 @@ def biosample_submission_ingest():
|
|
|
553
553
|
"translate_portal_submission_to_nmdc_schema_database": {
|
|
554
554
|
"inputs": {
|
|
555
555
|
"study_category": "research_study",
|
|
556
|
-
"study_doi_category": None,
|
|
557
|
-
"study_doi_provider": None,
|
|
558
556
|
"study_pi_image_url": None,
|
|
559
557
|
}
|
|
560
558
|
},
|
|
@@ -591,8 +589,6 @@ def biosample_submission_ingest():
|
|
|
591
589
|
"translate_portal_submission_to_nmdc_schema_database": {
|
|
592
590
|
"inputs": {
|
|
593
591
|
"study_category": None,
|
|
594
|
-
"study_doi_category": None,
|
|
595
|
-
"study_doi_provider": None,
|
|
596
592
|
"study_pi_image_url": None,
|
|
597
593
|
}
|
|
598
594
|
},
|
|
@@ -639,6 +639,16 @@ class GoldStudyTranslator(Translator):
|
|
|
639
639
|
:return: nmdc:NucleotideSequencing object
|
|
640
640
|
"""
|
|
641
641
|
gold_project_id = gold_project["projectGoldId"]
|
|
642
|
+
ncbi_bioproject_identifier = gold_project.get("ncbiBioProjectAccession")
|
|
643
|
+
insdc_bioproject_identifiers = []
|
|
644
|
+
if ncbi_bioproject_identifier:
|
|
645
|
+
insdc_bioproject_identifiers.append(
|
|
646
|
+
self._ensure_curie(
|
|
647
|
+
ncbi_bioproject_identifier,
|
|
648
|
+
default_prefix="bioproject",
|
|
649
|
+
)
|
|
650
|
+
)
|
|
651
|
+
|
|
642
652
|
return nmdc.NucleotideSequencing(
|
|
643
653
|
id=nmdc_nucleotide_sequencing_id,
|
|
644
654
|
name=gold_project.get("projectName"),
|
|
@@ -650,6 +660,7 @@ class GoldStudyTranslator(Translator):
|
|
|
650
660
|
has_input=nmdc_biosample_id,
|
|
651
661
|
add_date=gold_project.get("addDate"),
|
|
652
662
|
mod_date=self._get_mod_date(gold_project),
|
|
663
|
+
insdc_bioproject_identifiers=insdc_bioproject_identifiers,
|
|
653
664
|
principal_investigator=self._get_pi(gold_project),
|
|
654
665
|
processing_institution=self._get_processing_institution(gold_project),
|
|
655
666
|
instrument_used=self._get_instrument(gold_project),
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import re
|
|
3
|
+
from collections import namedtuple
|
|
3
4
|
from datetime import datetime
|
|
4
5
|
from enum import Enum
|
|
5
6
|
from functools import lru_cache
|
|
6
7
|
from importlib import resources
|
|
7
|
-
from typing import Any, List, Optional, Union
|
|
8
|
+
from typing import Any, List, Optional, Union, Tuple
|
|
9
|
+
from urllib.parse import urlparse
|
|
8
10
|
|
|
9
11
|
from linkml_runtime import SchemaView
|
|
10
12
|
from linkml_runtime.linkml_model import SlotDefinition
|
|
@@ -13,8 +15,38 @@ from toolz import concat, dissoc, get_in, groupby, valmap
|
|
|
13
15
|
|
|
14
16
|
from nmdc_runtime.site.translation.translator import JSON_OBJECT, Translator
|
|
15
17
|
|
|
18
|
+
|
|
19
|
+
DataUrlSet = namedtuple("DataUrlSet", ["url", "md5_checksum"])
|
|
20
|
+
|
|
21
|
+
READ_1 = DataUrlSet("read_1_url", "read_1_md5_checksum")
|
|
22
|
+
READ_2 = DataUrlSet("read_2_url", "read_2_md5_checksum")
|
|
23
|
+
INTERLEAVED = DataUrlSet("interleaved_url", "interleaved_md5_checksum")
|
|
24
|
+
|
|
25
|
+
DATA_URL_SETS: list[DataUrlSet] = [READ_1, READ_2, INTERLEAVED]
|
|
26
|
+
|
|
16
27
|
BIOSAMPLE_UNIQUE_KEY_SLOT = "samp_name"
|
|
17
28
|
|
|
29
|
+
TAB_NAME_KEY = "__tab_name"
|
|
30
|
+
METAGENOME = nmdc.NucleotideSequencingEnum(nmdc.NucleotideSequencingEnum.metagenome)
|
|
31
|
+
METATRANSCRIPTOME = nmdc.NucleotideSequencingEnum(
|
|
32
|
+
nmdc.NucleotideSequencingEnum.metatranscriptome
|
|
33
|
+
)
|
|
34
|
+
TAB_NAME_TO_ANALYTE_CATEGORY: dict[str, nmdc.NucleotideSequencingEnum] = {
|
|
35
|
+
"metagenome_sequencing_non_interleaved_data": METAGENOME,
|
|
36
|
+
"metagenome_sequencing_interleaved_data": METAGENOME,
|
|
37
|
+
"metatranscriptome_sequencing_non_interleaved_data": METATRANSCRIPTOME,
|
|
38
|
+
"metatranscriptome_sequencing_interleaved_data": METATRANSCRIPTOME,
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
DATA_URL_SET_AND_ANALYTE_TO_DATA_OBJECT_TYPE: dict[tuple[DataUrlSet, str], str] = {
|
|
42
|
+
(READ_1, str(METAGENOME)): "Metagenome Raw Read 1",
|
|
43
|
+
(READ_2, str(METAGENOME)): "Metagenome Raw Read 2",
|
|
44
|
+
(INTERLEAVED, str(METAGENOME)): "Metagenome Raw Reads",
|
|
45
|
+
(READ_1, str(METATRANSCRIPTOME)): "Metatranscriptome Raw Read 1",
|
|
46
|
+
(READ_2, str(METATRANSCRIPTOME)): "Metatranscriptome Raw Read 2",
|
|
47
|
+
(INTERLEAVED, str(METATRANSCRIPTOME)): "Metatranscriptome Raw Reads",
|
|
48
|
+
}
|
|
49
|
+
|
|
18
50
|
|
|
19
51
|
class EnvironmentPackage(Enum):
|
|
20
52
|
r"""
|
|
@@ -75,6 +107,18 @@ def group_dicts_by_key(key: str, seq: Optional[list[dict]]) -> Optional[dict]:
|
|
|
75
107
|
return grouped
|
|
76
108
|
|
|
77
109
|
|
|
110
|
+
def split_strip(string: str | None, sep: str) -> list[str] | None:
|
|
111
|
+
"""Split a string by a separator and strip whitespace from each part.
|
|
112
|
+
|
|
113
|
+
:param string: string to split
|
|
114
|
+
:param sep: separator to split by
|
|
115
|
+
:return: list of stripped strings
|
|
116
|
+
"""
|
|
117
|
+
if string is None:
|
|
118
|
+
return None
|
|
119
|
+
return [s.strip() for s in string.split(sep)]
|
|
120
|
+
|
|
121
|
+
|
|
78
122
|
class SubmissionPortalTranslator(Translator):
|
|
79
123
|
"""A Translator subclass for handling submission portal entries
|
|
80
124
|
|
|
@@ -86,17 +130,15 @@ class SubmissionPortalTranslator(Translator):
|
|
|
86
130
|
|
|
87
131
|
def __init__(
|
|
88
132
|
self,
|
|
89
|
-
metadata_submission: JSON_OBJECT =
|
|
133
|
+
metadata_submission: Optional[JSON_OBJECT] = None,
|
|
90
134
|
*args,
|
|
91
135
|
nucleotide_sequencing_mapping: Optional[list] = None,
|
|
92
136
|
data_object_mapping: Optional[list] = None,
|
|
137
|
+
illumina_instrument_mapping: Optional[dict[str, str]] = None,
|
|
93
138
|
# Additional study-level metadata not captured by the submission portal currently
|
|
94
139
|
# See: https://github.com/microbiomedata/submission-schema/issues/162
|
|
95
|
-
study_doi_category: Optional[str] = None,
|
|
96
|
-
study_doi_provider: Optional[str] = None,
|
|
97
140
|
study_category: Optional[str] = None,
|
|
98
141
|
study_pi_image_url: Optional[str] = None,
|
|
99
|
-
study_funding_sources: Optional[list[str]] = None,
|
|
100
142
|
# Additional biosample-level metadata with optional column mapping information not captured
|
|
101
143
|
# by the submission portal currently.
|
|
102
144
|
# See: https://github.com/microbiomedata/submission-schema/issues/162
|
|
@@ -106,23 +148,17 @@ class SubmissionPortalTranslator(Translator):
|
|
|
106
148
|
) -> None:
|
|
107
149
|
super().__init__(*args, **kwargs)
|
|
108
150
|
|
|
109
|
-
self.metadata_submission = metadata_submission
|
|
151
|
+
self.metadata_submission: JSON_OBJECT = metadata_submission or {}
|
|
110
152
|
self.nucleotide_sequencing_mapping = nucleotide_sequencing_mapping
|
|
111
153
|
self.data_object_mapping = data_object_mapping
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
nmdc.DoiCategoryEnum(study_doi_category)
|
|
115
|
-
if study_doi_category
|
|
116
|
-
else nmdc.DoiCategoryEnum.dataset_doi
|
|
117
|
-
)
|
|
118
|
-
self.study_doi_provider = (
|
|
119
|
-
nmdc.DoiProviderEnum(study_doi_provider) if study_doi_provider else None
|
|
154
|
+
self.illumina_instrument_mapping: dict[str, str] = (
|
|
155
|
+
illumina_instrument_mapping or {}
|
|
120
156
|
)
|
|
157
|
+
|
|
121
158
|
self.study_category = (
|
|
122
159
|
nmdc.StudyCategoryEnum(study_category) if study_category else None
|
|
123
160
|
)
|
|
124
161
|
self.study_pi_image_url = study_pi_image_url
|
|
125
|
-
self.study_funding_sources = study_funding_sources
|
|
126
162
|
|
|
127
163
|
self.biosample_extras = group_dicts_by_key(
|
|
128
164
|
BIOSAMPLE_UNIQUE_KEY_SLOT, biosample_extras
|
|
@@ -153,28 +189,6 @@ class SubmissionPortalTranslator(Translator):
|
|
|
153
189
|
type=nmdc.PersonValue.class_class_curie,
|
|
154
190
|
)
|
|
155
191
|
|
|
156
|
-
def _get_doi(self, metadata_submission: JSON_OBJECT) -> Union[List[nmdc.Doi], None]:
|
|
157
|
-
"""Get DOI information from the context form data
|
|
158
|
-
|
|
159
|
-
:param metadata_submission: submission portal entry
|
|
160
|
-
:return: list of strings or None
|
|
161
|
-
"""
|
|
162
|
-
dataset_doi = get_in(["contextForm", "datasetDoi"], metadata_submission)
|
|
163
|
-
if not dataset_doi:
|
|
164
|
-
return None
|
|
165
|
-
|
|
166
|
-
if not dataset_doi.startswith("doi:"):
|
|
167
|
-
dataset_doi = f"doi:{dataset_doi}"
|
|
168
|
-
|
|
169
|
-
return [
|
|
170
|
-
nmdc.Doi(
|
|
171
|
-
doi_value=dataset_doi,
|
|
172
|
-
doi_provider=self.study_doi_provider,
|
|
173
|
-
doi_category=self.study_doi_category,
|
|
174
|
-
type="nmdc:Doi",
|
|
175
|
-
)
|
|
176
|
-
]
|
|
177
|
-
|
|
178
192
|
def _get_has_credit_associations(
|
|
179
193
|
self, metadata_submission: JSON_OBJECT
|
|
180
194
|
) -> Union[List[nmdc.CreditAssociation], None]:
|
|
@@ -203,21 +217,34 @@ class SubmissionPortalTranslator(Translator):
|
|
|
203
217
|
def _get_gold_study_identifiers(
|
|
204
218
|
self, metadata_submission: JSON_OBJECT
|
|
205
219
|
) -> Union[List[str], None]:
|
|
206
|
-
"""Construct a GOLD CURIE from the
|
|
220
|
+
"""Construct a GOLD CURIE from the study form data
|
|
207
221
|
|
|
208
222
|
:param metadata_submission: submission portal entry
|
|
209
223
|
:return: GOLD CURIE
|
|
210
224
|
"""
|
|
211
|
-
gold_study_id = get_in(["
|
|
225
|
+
gold_study_id = get_in(["studyForm", "GOLDStudyId"], metadata_submission)
|
|
212
226
|
if not gold_study_id:
|
|
213
227
|
return None
|
|
214
228
|
|
|
215
229
|
return [self._ensure_curie(gold_study_id, default_prefix="gold")]
|
|
216
230
|
|
|
231
|
+
def _get_ncbi_bioproject_identifiers(
|
|
232
|
+
self, metadata_submission: JSON_OBJECT
|
|
233
|
+
) -> Union[List[str], None]:
|
|
234
|
+
"""Construct a NCBI Bioproject CURIE from the study form data"""
|
|
235
|
+
|
|
236
|
+
ncbi_bioproject_id = get_in(
|
|
237
|
+
["studyForm", "NCBIBioProjectId"], metadata_submission
|
|
238
|
+
)
|
|
239
|
+
if not ncbi_bioproject_id:
|
|
240
|
+
return None
|
|
241
|
+
|
|
242
|
+
return [self._ensure_curie(ncbi_bioproject_id, default_prefix="bioproject")]
|
|
243
|
+
|
|
217
244
|
def _get_jgi_study_identifiers(
|
|
218
245
|
self, metadata_submission: JSON_OBJECT
|
|
219
246
|
) -> Union[List[str], None]:
|
|
220
|
-
"""Construct a JGI proposal CURIE from the multiomics
|
|
247
|
+
"""Construct a JGI proposal CURIE from the multiomics form data
|
|
221
248
|
|
|
222
249
|
:param metadata_submission: submission portal entry
|
|
223
250
|
:return: JGI proposal CURIE
|
|
@@ -228,6 +255,20 @@ class SubmissionPortalTranslator(Translator):
|
|
|
228
255
|
|
|
229
256
|
return [self._ensure_curie(jgi_study_id, default_prefix="jgi.proposal")]
|
|
230
257
|
|
|
258
|
+
def _get_emsl_project_identifiers(
|
|
259
|
+
self, metadata_submission: JSON_OBJECT
|
|
260
|
+
) -> Union[List[str], None]:
|
|
261
|
+
"""Construct an EMSL project CURIE from the multiomics form data
|
|
262
|
+
|
|
263
|
+
:param metadata_submission: submission portal entry
|
|
264
|
+
:return: EMSL project CURIE
|
|
265
|
+
"""
|
|
266
|
+
emsl_project_id = get_in(["multiOmicsForm", "studyNumber"], metadata_submission)
|
|
267
|
+
if not emsl_project_id:
|
|
268
|
+
return None
|
|
269
|
+
|
|
270
|
+
return [self._ensure_curie(emsl_project_id, default_prefix="emsl.project")]
|
|
271
|
+
|
|
231
272
|
def _get_quantity_value(
|
|
232
273
|
self, raw_value: Optional[str], unit: Optional[str] = None
|
|
233
274
|
) -> Union[nmdc.QuantityValue, None]:
|
|
@@ -434,6 +475,75 @@ class SubmissionPortalTranslator(Translator):
|
|
|
434
475
|
|
|
435
476
|
return value
|
|
436
477
|
|
|
478
|
+
def _get_data_objects_from_fields(
|
|
479
|
+
self,
|
|
480
|
+
sample_data: JSON_OBJECT,
|
|
481
|
+
*,
|
|
482
|
+
url_field_name: str,
|
|
483
|
+
md5_checksum_field_name: str,
|
|
484
|
+
nucleotide_sequencing_id: str,
|
|
485
|
+
data_object_type: nmdc.FileTypeEnum,
|
|
486
|
+
) -> Tuple[List[nmdc.DataObject], nmdc.Manifest | None]:
|
|
487
|
+
"""Get a DataObject instances based on the URLs and MD5 checksums in the given fields.
|
|
488
|
+
|
|
489
|
+
If the field provides multiple URLs, multiple DataObject instances will be created and a
|
|
490
|
+
Manifest will be created and provided in the second return value.
|
|
491
|
+
|
|
492
|
+
:param sample_data: sample data
|
|
493
|
+
:param url_field_name: field name for the URL
|
|
494
|
+
:param md5_checksum_field_name: field name for the MD5 checksum
|
|
495
|
+
:param nucleotide_sequencing_id: ID for the nmdc:NucleotideSequencing object that generated the data object(s)
|
|
496
|
+
:param data_object_type: FileTypeEnum representing the type of the data object
|
|
497
|
+
:return: nmdc.DataObject or None
|
|
498
|
+
"""
|
|
499
|
+
data_objects: List[nmdc.DataObject] = []
|
|
500
|
+
urls = split_strip(sample_data.get(url_field_name), ";")
|
|
501
|
+
if not urls:
|
|
502
|
+
return data_objects, None
|
|
503
|
+
|
|
504
|
+
md5_checksums = split_strip(sample_data.get(md5_checksum_field_name), ";")
|
|
505
|
+
if md5_checksums and len(urls) != len(md5_checksums):
|
|
506
|
+
raise ValueError(
|
|
507
|
+
f"{url_field_name} and {md5_checksum_field_name} must have the same number of values"
|
|
508
|
+
)
|
|
509
|
+
|
|
510
|
+
data_object_ids = self._id_minter("nmdc:DataObject", len(urls))
|
|
511
|
+
manifest: nmdc.Manifest | None = None
|
|
512
|
+
if len(urls) > 1:
|
|
513
|
+
manifest_id = self._id_minter("nmdc:Manifest", 1)[0]
|
|
514
|
+
manifest = nmdc.Manifest(
|
|
515
|
+
id=manifest_id,
|
|
516
|
+
manifest_category=nmdc.ManifestCategoryEnum(
|
|
517
|
+
nmdc.ManifestCategoryEnum.poolable_replicates
|
|
518
|
+
),
|
|
519
|
+
type="nmdc:Manifest",
|
|
520
|
+
)
|
|
521
|
+
|
|
522
|
+
for i, url in enumerate(urls):
|
|
523
|
+
data_object_id = data_object_ids[i]
|
|
524
|
+
parsed_url = urlparse(url)
|
|
525
|
+
possible_filename = parsed_url.path.rsplit("/", 1)[-1]
|
|
526
|
+
data_object_slots = {
|
|
527
|
+
"id": data_object_id,
|
|
528
|
+
"name": possible_filename,
|
|
529
|
+
"description": f"{data_object_type} for {nucleotide_sequencing_id}",
|
|
530
|
+
"type": "nmdc:DataObject",
|
|
531
|
+
"url": url,
|
|
532
|
+
"md5_checksum": md5_checksums[i] if md5_checksums else None,
|
|
533
|
+
"in_manifest": [manifest.id] if manifest else None,
|
|
534
|
+
"data_category": nmdc.DataCategoryEnum(
|
|
535
|
+
nmdc.DataCategoryEnum.instrument_data
|
|
536
|
+
),
|
|
537
|
+
"data_object_type": data_object_type,
|
|
538
|
+
"was_generated_by": nucleotide_sequencing_id,
|
|
539
|
+
}
|
|
540
|
+
data_object_slots.update(
|
|
541
|
+
self._transform_dict_for_class(sample_data, "DataObject")
|
|
542
|
+
)
|
|
543
|
+
data_objects.append(nmdc.DataObject(**data_object_slots))
|
|
544
|
+
|
|
545
|
+
return data_objects, manifest
|
|
546
|
+
|
|
437
547
|
def _translate_study(
|
|
438
548
|
self, metadata_submission: JSON_OBJECT, nmdc_study_id: str
|
|
439
549
|
) -> nmdc.Study:
|
|
@@ -448,18 +558,17 @@ class SubmissionPortalTranslator(Translator):
|
|
|
448
558
|
"""
|
|
449
559
|
return nmdc.Study(
|
|
450
560
|
alternative_names=self._get_from(
|
|
451
|
-
metadata_submission, ["
|
|
561
|
+
metadata_submission, ["studyForm", "alternativeNames"]
|
|
452
562
|
),
|
|
453
|
-
associated_dois=self._get_doi(metadata_submission),
|
|
454
563
|
description=self._get_from(
|
|
455
564
|
metadata_submission, ["studyForm", "description"]
|
|
456
565
|
),
|
|
457
566
|
funding_sources=self._get_from(
|
|
458
567
|
metadata_submission, ["studyForm", "fundingSources"]
|
|
459
568
|
),
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
569
|
+
emsl_project_identifiers=self._get_emsl_project_identifiers(
|
|
570
|
+
metadata_submission
|
|
571
|
+
),
|
|
463
572
|
gold_study_identifiers=self._get_gold_study_identifiers(
|
|
464
573
|
metadata_submission
|
|
465
574
|
),
|
|
@@ -467,8 +576,8 @@ class SubmissionPortalTranslator(Translator):
|
|
|
467
576
|
metadata_submission
|
|
468
577
|
),
|
|
469
578
|
id=nmdc_study_id,
|
|
470
|
-
insdc_bioproject_identifiers=self.
|
|
471
|
-
metadata_submission
|
|
579
|
+
insdc_bioproject_identifiers=self._get_ncbi_bioproject_identifiers(
|
|
580
|
+
metadata_submission
|
|
472
581
|
),
|
|
473
582
|
jgi_portal_study_identifiers=self._get_jgi_study_identifiers(
|
|
474
583
|
metadata_submission
|
|
@@ -555,7 +664,7 @@ class SubmissionPortalTranslator(Translator):
|
|
|
555
664
|
if slot_definition.multivalued:
|
|
556
665
|
value_list = value
|
|
557
666
|
if isinstance(value, str):
|
|
558
|
-
value_list =
|
|
667
|
+
value_list = split_strip(value, "|")
|
|
559
668
|
transformed_value = [
|
|
560
669
|
self._transform_value_for_slot(item, slot_definition, unit)
|
|
561
670
|
for item in value_list
|
|
@@ -629,16 +738,18 @@ class SubmissionPortalTranslator(Translator):
|
|
|
629
738
|
:return: nmdc:Database object
|
|
630
739
|
"""
|
|
631
740
|
database = nmdc.Database()
|
|
632
|
-
|
|
633
|
-
nmdc_study_id = self._id_minter("nmdc:Study")[0]
|
|
634
|
-
|
|
635
741
|
metadata_submission_data = self.metadata_submission.get(
|
|
636
742
|
"metadata_submission", {}
|
|
637
743
|
)
|
|
744
|
+
|
|
745
|
+
# Generate one Study instance based on the metadata submission
|
|
746
|
+
nmdc_study_id = self._id_minter("nmdc:Study")[0]
|
|
638
747
|
database.study_set = [
|
|
639
748
|
self._translate_study(metadata_submission_data, nmdc_study_id)
|
|
640
749
|
]
|
|
641
750
|
|
|
751
|
+
# Automatically populate the `env_package` field in the sample data based on which
|
|
752
|
+
# environmental data tab the sample data came from.
|
|
642
753
|
sample_data = metadata_submission_data.get("sampleData", {})
|
|
643
754
|
for key in sample_data.keys():
|
|
644
755
|
env = key.removesuffix("_data").upper()
|
|
@@ -647,8 +758,16 @@ class SubmissionPortalTranslator(Translator):
|
|
|
647
758
|
for sample in sample_data[key]:
|
|
648
759
|
sample["env_package"] = package_name
|
|
649
760
|
except KeyError:
|
|
761
|
+
# This is expected when processing rows from tabs like the JGI/EMSL tabs or external
|
|
762
|
+
# sequencing data tabs.
|
|
650
763
|
pass
|
|
651
764
|
|
|
765
|
+
# Before regrouping the data by sample name, record which tab each object came from
|
|
766
|
+
for tab_name in sample_data.keys():
|
|
767
|
+
for tab in sample_data[tab_name]:
|
|
768
|
+
tab[TAB_NAME_KEY] = tab_name
|
|
769
|
+
|
|
770
|
+
# Reorganize the sample data by sample name and generate a unique NMDC ID for each
|
|
652
771
|
sample_data_by_id = groupby(
|
|
653
772
|
BIOSAMPLE_UNIQUE_KEY_SLOT,
|
|
654
773
|
concat(sample_data.values()),
|
|
@@ -658,6 +777,7 @@ class SubmissionPortalTranslator(Translator):
|
|
|
658
777
|
zip(sample_data_by_id.keys(), nmdc_biosample_ids)
|
|
659
778
|
)
|
|
660
779
|
|
|
780
|
+
# Translate the sample data into nmdc:Biosample objects
|
|
661
781
|
database.biosample_set = [
|
|
662
782
|
self._translate_biosample(
|
|
663
783
|
sample_data,
|
|
@@ -668,6 +788,104 @@ class SubmissionPortalTranslator(Translator):
|
|
|
668
788
|
if sample_data
|
|
669
789
|
]
|
|
670
790
|
|
|
791
|
+
# This section handles the translation of information in the external sequencing tabs into
|
|
792
|
+
# various NMDC objects.
|
|
793
|
+
database.data_generation_set = []
|
|
794
|
+
database.data_object_set = []
|
|
795
|
+
database.instrument_set = []
|
|
796
|
+
database.manifest_set = []
|
|
797
|
+
today = datetime.now().strftime("%Y-%m-%d")
|
|
798
|
+
for sample_data_id, sample_data in sample_data_by_id.items():
|
|
799
|
+
for tab in sample_data:
|
|
800
|
+
tab_name = tab.get(TAB_NAME_KEY)
|
|
801
|
+
analyte_category = TAB_NAME_TO_ANALYTE_CATEGORY.get(tab_name)
|
|
802
|
+
if not analyte_category:
|
|
803
|
+
# If the tab name cannot be mapped to an analyte category, that means we're
|
|
804
|
+
# not in an external sequencing data tabs (e.g. this is an environmental data
|
|
805
|
+
# tab or a JGI/EMSL tab). Skip this tab.
|
|
806
|
+
continue
|
|
807
|
+
|
|
808
|
+
# Start by generating one NucleotideSequencing instance with a has_input
|
|
809
|
+
# relationship to the current Biosample instance.
|
|
810
|
+
nucleotide_sequencing_id = self._id_minter(
|
|
811
|
+
"nmdc:NucleotideSequencing", 1
|
|
812
|
+
)[0]
|
|
813
|
+
nucleotide_sequencing_slots = {
|
|
814
|
+
"id": nucleotide_sequencing_id,
|
|
815
|
+
"has_input": sample_data_to_nmdc_biosample_ids[sample_data_id],
|
|
816
|
+
"has_output": [],
|
|
817
|
+
"associated_studies": [nmdc_study_id],
|
|
818
|
+
"add_date": today,
|
|
819
|
+
"mod_date": today,
|
|
820
|
+
"analyte_category": analyte_category,
|
|
821
|
+
"type": "nmdc:NucleotideSequencing",
|
|
822
|
+
}
|
|
823
|
+
# If the protocol_link column was filled in, expand it into an nmdc:Protocol object
|
|
824
|
+
if "protocol_link" in tab:
|
|
825
|
+
protocol_link = tab.pop("protocol_link")
|
|
826
|
+
nucleotide_sequencing_slots["protocol_link"] = nmdc.Protocol(
|
|
827
|
+
url=protocol_link,
|
|
828
|
+
type="nmdc:Protocol",
|
|
829
|
+
)
|
|
830
|
+
# If model column was filled in, expand it into an nmdc:Instrument object. This is
|
|
831
|
+
# done by first checking the provided instrument mapping to see if the model is
|
|
832
|
+
# already present. If it is not, a new instrument object is created and added to the
|
|
833
|
+
# instrument_set. Currently, we only accept sequencing data in the submission portal
|
|
834
|
+
# that was generated by Illumina instruments, so the vendor is hardcoded here.
|
|
835
|
+
if "model" in tab:
|
|
836
|
+
model = tab.pop("model")
|
|
837
|
+
if model not in self.illumina_instrument_mapping:
|
|
838
|
+
# If the model is not already in the mapping, create a new record for it
|
|
839
|
+
nmdc_instrument_id = self._id_minter("nmdc:Instrument", 1)[0]
|
|
840
|
+
database.instrument_set.append(
|
|
841
|
+
nmdc.Instrument(
|
|
842
|
+
id=nmdc_instrument_id,
|
|
843
|
+
vendor=nmdc.InstrumentVendorEnum(
|
|
844
|
+
nmdc.InstrumentVendorEnum.illumina
|
|
845
|
+
),
|
|
846
|
+
model=nmdc.InstrumentModelEnum(model),
|
|
847
|
+
type="nmdc:Instrument",
|
|
848
|
+
)
|
|
849
|
+
)
|
|
850
|
+
self.illumina_instrument_mapping[model] = nmdc_instrument_id
|
|
851
|
+
nucleotide_sequencing_slots["instrument_used"] = (
|
|
852
|
+
self.illumina_instrument_mapping[model]
|
|
853
|
+
)
|
|
854
|
+
# Process the remaining columns according to the NucleotideSequencing class
|
|
855
|
+
# definition
|
|
856
|
+
nucleotide_sequencing_slots.update(
|
|
857
|
+
self._transform_dict_for_class(tab, "NucleotideSequencing")
|
|
858
|
+
)
|
|
859
|
+
nucleotide_sequencing = nmdc.NucleotideSequencing(
|
|
860
|
+
**nucleotide_sequencing_slots
|
|
861
|
+
)
|
|
862
|
+
database.data_generation_set.append(nucleotide_sequencing)
|
|
863
|
+
|
|
864
|
+
# Iterate over the columns that contain URLs and MD5 checksums and translate them
|
|
865
|
+
# into DataObject instances. Each of these DataObject instances will be connected
|
|
866
|
+
# to the NucleotideSequencing instance via the has_output/was_generated_by
|
|
867
|
+
# relationships.
|
|
868
|
+
for data_url in DATA_URL_SETS:
|
|
869
|
+
data_object_type = DATA_URL_SET_AND_ANALYTE_TO_DATA_OBJECT_TYPE[
|
|
870
|
+
(data_url, str(analyte_category))
|
|
871
|
+
]
|
|
872
|
+
data_objects, manifest = self._get_data_objects_from_fields(
|
|
873
|
+
tab,
|
|
874
|
+
url_field_name=data_url.url,
|
|
875
|
+
md5_checksum_field_name=data_url.md5_checksum,
|
|
876
|
+
nucleotide_sequencing_id=nucleotide_sequencing_id,
|
|
877
|
+
data_object_type=nmdc.FileTypeEnum(data_object_type),
|
|
878
|
+
)
|
|
879
|
+
if manifest:
|
|
880
|
+
database.manifest_set.append(manifest)
|
|
881
|
+
for data_object in data_objects:
|
|
882
|
+
nucleotide_sequencing.has_output.append(data_object.id)
|
|
883
|
+
database.data_object_set.append(data_object)
|
|
884
|
+
|
|
885
|
+
# This is the older way of handling attaching NucleotideSequencing and DataObject instances
|
|
886
|
+
# to the Biosample instances. This should now mainly be handled by the external sequencing
|
|
887
|
+
# data tabs in the submission portal. This code is being left in place for now in case it is
|
|
888
|
+
# needed in the future.
|
|
671
889
|
if self.nucleotide_sequencing_mapping:
|
|
672
890
|
# If there is data from an NucleotideSequencing mapping file, process it now. This part
|
|
673
891
|
# assumes that there is a column in that file with the header __biosample_samp_name
|
nmdc_runtime/site/util.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import os
|
|
2
2
|
|
|
3
|
-
from dagster import op
|
|
4
3
|
from functools import lru_cache
|
|
5
4
|
from pymongo.database import Database as MongoDatabase
|
|
6
5
|
from subprocess import Popen, PIPE, STDOUT, CalledProcessError
|
|
6
|
+
from toolz import groupby
|
|
7
7
|
|
|
8
8
|
from nmdc_runtime.api.db.mongo import get_collection_names_from_schema
|
|
9
9
|
from nmdc_runtime.site.resources import mongo_resource
|
|
@@ -52,3 +52,10 @@ def get_basename(filename: str) -> str:
|
|
|
52
52
|
|
|
53
53
|
def nmdc_study_id_to_filename(nmdc_study_id: str) -> str:
|
|
54
54
|
return nmdc_study_id.replace(":", "_").replace("-", "_")
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def get_instruments_by_id(mdb: MongoDatabase) -> dict[str, dict]:
|
|
58
|
+
"""Get all documents from the instrument_set collection in a dict keyed by id."""
|
|
59
|
+
return {
|
|
60
|
+
instrument["id"]: instrument for instrument in mdb["instrument_set"].find({})
|
|
61
|
+
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nmdc_runtime
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.7.0
|
|
4
4
|
Summary: A runtime system for NMDC data management and orchestration
|
|
5
5
|
Home-page: https://github.com/microbiomedata/nmdc-runtime
|
|
6
6
|
Author: Donny Winston
|
|
@@ -106,10 +106,10 @@ docker compose version
|
|
|
106
106
|
docker info
|
|
107
107
|
```
|
|
108
108
|
|
|
109
|
-
Ensure the permissions of
|
|
109
|
+
Ensure the permissions of `./.docker/mongoKeyFile` are such that only the file's owner can read or write the file.
|
|
110
110
|
|
|
111
111
|
```shell
|
|
112
|
-
chmod 600
|
|
112
|
+
chmod 600 ./.docker/mongoKeyFile
|
|
113
113
|
```
|
|
114
114
|
|
|
115
115
|
Ensure you have a `.env` file for the Docker services to source from. You may copy `.env.example` to
|
|
@@ -36,11 +36,11 @@ nmdc_runtime/minter/domain/model.py,sha256=WMOuKub3dVzkOt_EZSRDLeTsJPqFbKx01SMQ5
|
|
|
36
36
|
nmdc_runtime/minter/entrypoints/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
37
37
|
nmdc_runtime/minter/entrypoints/fastapi_app.py,sha256=JC4thvzfFwRc1mhWQ-kHy3yvs0SYxF6ktE7LXNCwqlI,4031
|
|
38
38
|
nmdc_runtime/site/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
39
|
-
nmdc_runtime/site/graphs.py,sha256=
|
|
40
|
-
nmdc_runtime/site/ops.py,sha256=
|
|
41
|
-
nmdc_runtime/site/repository.py,sha256=
|
|
39
|
+
nmdc_runtime/site/graphs.py,sha256=DoKK6B6xkSwRcY5PVVo6jV_IA4HI5qL8xW9_n94jVfQ,15990
|
|
40
|
+
nmdc_runtime/site/ops.py,sha256=atZNkU5mzRRqTnaW39fvq7gVO2sKSH8ztVOp8_dOLbU,48048
|
|
41
|
+
nmdc_runtime/site/repository.py,sha256=nHu1skayyTjJWwGEf5eToX02cgBNTG_kdSluzJZ6rJc,43695
|
|
42
42
|
nmdc_runtime/site/resources.py,sha256=sqtRWb4ewU61U-JZTphsC4wBvYT5B0wj33WU70vjq_k,19677
|
|
43
|
-
nmdc_runtime/site/util.py,sha256
|
|
43
|
+
nmdc_runtime/site/util.py,sha256=h70UJCT9g-I63EJn0drZjv1iaQ8LHJTbG29R9kqJ04c,1821
|
|
44
44
|
nmdc_runtime/site/backup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
45
45
|
nmdc_runtime/site/backup/nmdcdb_mongodump.py,sha256=H5uosmEiXwLwklJrYJWrNhb_Nuf_ew8dBpZLl6_dYhs,2699
|
|
46
46
|
nmdc_runtime/site/backup/nmdcdb_mongoexport.py,sha256=XIFI_AI3zl0dFr-ELOEmwvT41MyRKBGFaAT3RcamTNE,4166
|
|
@@ -51,8 +51,8 @@ nmdc_runtime/site/drsobjects/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
|
|
|
51
51
|
nmdc_runtime/site/drsobjects/ingest.py,sha256=pcMP69WSzFHFqHB9JIL55ePFhilnCLRc2XHCQ97w1Ik,3107
|
|
52
52
|
nmdc_runtime/site/drsobjects/registration.py,sha256=D1T3QUuxEOxqKZIvB5rkb_6ZxFZiA-U9SMPajyeWC2Y,3572
|
|
53
53
|
nmdc_runtime/site/export/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
54
|
-
nmdc_runtime/site/export/ncbi_xml.py,sha256=
|
|
55
|
-
nmdc_runtime/site/export/ncbi_xml_utils.py,sha256=
|
|
54
|
+
nmdc_runtime/site/export/ncbi_xml.py,sha256=4RqaT6qs1LDSiDDfF-JNZL5gOel8m65oCOelfr0blXs,26209
|
|
55
|
+
nmdc_runtime/site/export/ncbi_xml_utils.py,sha256=X35zbkxBxEyCnA9peY9YBAa_0oeoWy3DQEXoAXmc6vg,10100
|
|
56
56
|
nmdc_runtime/site/export/study_metadata.py,sha256=yR5pXL6JG8d7cAtqcF-60Hp7bLD3dJ0Rut4AtYc0tXA,4844
|
|
57
57
|
nmdc_runtime/site/normalization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
58
58
|
nmdc_runtime/site/normalization/gold.py,sha256=iISDD4qs4d6uLhv631WYNeQVOzY5DO201ZpPtxHdkVk,1311
|
|
@@ -61,13 +61,13 @@ nmdc_runtime/site/repair/database_updater.py,sha256=eTNAPtgAc_xQodADBfgomwow9-14
|
|
|
61
61
|
nmdc_runtime/site/translation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
62
62
|
nmdc_runtime/site/translation/emsl.py,sha256=-aCTJTSCNaK-Koh8BE_4fTf5nyxP1KkquR6lloLEJl0,1245
|
|
63
63
|
nmdc_runtime/site/translation/gold.py,sha256=R3W99sdQb7Pgu_esN7ruIC-tyREQD_idJ4xCzkqWuGw,1622
|
|
64
|
-
nmdc_runtime/site/translation/gold_translator.py,sha256=
|
|
64
|
+
nmdc_runtime/site/translation/gold_translator.py,sha256=HGbWeuxppqlVfU8F5oKTYIDoC6qaftugJeWFIALB9XE,32720
|
|
65
65
|
nmdc_runtime/site/translation/jgi.py,sha256=qk878KhIw674TkrVfbl2x1QJrKi3zlvE0vesIpe9slM,876
|
|
66
66
|
nmdc_runtime/site/translation/neon_benthic_translator.py,sha256=VxN7yCziQE-ZP9mtrzqI-yaS9taEgTy0EnIEattYeKo,23727
|
|
67
67
|
nmdc_runtime/site/translation/neon_soil_translator.py,sha256=Rol0g67nVBGSBySUzpfdW4Fwes7bKtvnlv2g5cB0aTI,38550
|
|
68
68
|
nmdc_runtime/site/translation/neon_surface_water_translator.py,sha256=k06eULMTYx0sQ00UlyeNJvCJMcX-neClnES1G6zpPKg,30517
|
|
69
69
|
nmdc_runtime/site/translation/neon_utils.py,sha256=d00o7duKKugpLHmsEifNbp4WjeC4GOqcgw0b5qlCg4I,5549
|
|
70
|
-
nmdc_runtime/site/translation/submission_portal_translator.py,sha256=
|
|
70
|
+
nmdc_runtime/site/translation/submission_portal_translator.py,sha256=UEeqlkz_YGqcnx8vomFysetOlXxDu23q0Ryr93SZy78,41684
|
|
71
71
|
nmdc_runtime/site/translation/translator.py,sha256=V6Aq0y03LoQ4LTL2iHDHxGTh_eMjOmDJJSwNHSrp2wo,837
|
|
72
72
|
nmdc_runtime/site/translation/util.py,sha256=w_l3SiExGsl6cXRqto0a_ssDmHkP64ITvrOVfPxmNpY,4366
|
|
73
73
|
nmdc_runtime/site/validation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -75,9 +75,9 @@ nmdc_runtime/site/validation/emsl.py,sha256=OG20mv_3E2rkQqTQtYO0_SVRqFb-Z_zKCiAV
|
|
|
75
75
|
nmdc_runtime/site/validation/gold.py,sha256=Z5ZzYdjERbrJ2Tu06d0TDTBSfwaFdL1Z23Rl-YkZ2Ow,803
|
|
76
76
|
nmdc_runtime/site/validation/jgi.py,sha256=LdJfhqBVHWCDp0Kzyk8eJZMwEI5NQ-zuTda31BcGwOA,1299
|
|
77
77
|
nmdc_runtime/site/validation/util.py,sha256=GGbMDSwR090sr_E_fHffCN418gpYESaiot6XghS7OYk,3349
|
|
78
|
-
nmdc_runtime-2.
|
|
79
|
-
nmdc_runtime-2.
|
|
80
|
-
nmdc_runtime-2.
|
|
81
|
-
nmdc_runtime-2.
|
|
82
|
-
nmdc_runtime-2.
|
|
83
|
-
nmdc_runtime-2.
|
|
78
|
+
nmdc_runtime-2.7.0.dist-info/licenses/LICENSE,sha256=VWiv65r7gHGjgtr3jMJYVmQny5GRpQ6H-W9sScb1x70,2408
|
|
79
|
+
nmdc_runtime-2.7.0.dist-info/METADATA,sha256=YgD6NKMOIO2FpMKIy7EWaGDTE_XkEM15ZXG2AhgMFFk,8155
|
|
80
|
+
nmdc_runtime-2.7.0.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
|
|
81
|
+
nmdc_runtime-2.7.0.dist-info/entry_points.txt,sha256=JxdvOnvxHK_8046cwlvE30s_fV0-k-eTpQtkKYA69nQ,224
|
|
82
|
+
nmdc_runtime-2.7.0.dist-info/top_level.txt,sha256=b0K1s09L_iHH49ueBKaLrB5-lh6cyrSv9vL6x4Qvyz8,13
|
|
83
|
+
nmdc_runtime-2.7.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|