nmdc-runtime 2.9.0__py3-none-any.whl → 2.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nmdc-runtime might be problematic. Click here for more details.
- nmdc_runtime/Dockerfile +167 -0
- nmdc_runtime/api/analytics.py +90 -0
- nmdc_runtime/api/boot/capabilities.py +9 -0
- nmdc_runtime/api/boot/object_types.py +126 -0
- nmdc_runtime/api/boot/triggers.py +84 -0
- nmdc_runtime/api/boot/workflows.py +116 -0
- nmdc_runtime/api/core/auth.py +208 -0
- nmdc_runtime/api/core/idgen.py +200 -0
- nmdc_runtime/api/core/metadata.py +788 -0
- nmdc_runtime/api/core/util.py +109 -0
- nmdc_runtime/api/db/mongo.py +435 -0
- nmdc_runtime/api/db/s3.py +37 -0
- nmdc_runtime/api/endpoints/capabilities.py +25 -0
- nmdc_runtime/api/endpoints/find.py +634 -0
- nmdc_runtime/api/endpoints/jobs.py +143 -0
- nmdc_runtime/api/endpoints/lib/helpers.py +274 -0
- nmdc_runtime/api/endpoints/lib/linked_instances.py +180 -0
- nmdc_runtime/api/endpoints/lib/path_segments.py +165 -0
- nmdc_runtime/api/endpoints/metadata.py +260 -0
- nmdc_runtime/api/endpoints/nmdcschema.py +502 -0
- nmdc_runtime/api/endpoints/object_types.py +38 -0
- nmdc_runtime/api/endpoints/objects.py +270 -0
- nmdc_runtime/api/endpoints/operations.py +78 -0
- nmdc_runtime/api/endpoints/queries.py +701 -0
- nmdc_runtime/api/endpoints/runs.py +98 -0
- nmdc_runtime/api/endpoints/search.py +38 -0
- nmdc_runtime/api/endpoints/sites.py +205 -0
- nmdc_runtime/api/endpoints/triggers.py +25 -0
- nmdc_runtime/api/endpoints/users.py +214 -0
- nmdc_runtime/api/endpoints/util.py +796 -0
- nmdc_runtime/api/endpoints/workflows.py +353 -0
- nmdc_runtime/api/entrypoint.sh +7 -0
- nmdc_runtime/api/main.py +425 -0
- nmdc_runtime/api/middleware.py +43 -0
- nmdc_runtime/api/models/capability.py +14 -0
- nmdc_runtime/api/models/id.py +92 -0
- nmdc_runtime/api/models/job.py +37 -0
- nmdc_runtime/api/models/lib/helpers.py +78 -0
- nmdc_runtime/api/models/metadata.py +11 -0
- nmdc_runtime/api/models/nmdc_schema.py +146 -0
- nmdc_runtime/api/models/object.py +180 -0
- nmdc_runtime/api/models/object_type.py +20 -0
- nmdc_runtime/api/models/operation.py +66 -0
- nmdc_runtime/api/models/query.py +246 -0
- nmdc_runtime/api/models/query_continuation.py +111 -0
- nmdc_runtime/api/models/run.py +161 -0
- nmdc_runtime/api/models/site.py +87 -0
- nmdc_runtime/api/models/trigger.py +13 -0
- nmdc_runtime/api/models/user.py +140 -0
- nmdc_runtime/api/models/util.py +260 -0
- nmdc_runtime/api/models/workflow.py +15 -0
- nmdc_runtime/api/openapi.py +178 -0
- nmdc_runtime/api/swagger_ui/assets/custom-elements.js +522 -0
- nmdc_runtime/api/swagger_ui/assets/script.js +247 -0
- nmdc_runtime/api/swagger_ui/assets/style.css +155 -0
- nmdc_runtime/api/swagger_ui/swagger_ui.py +34 -0
- nmdc_runtime/config.py +7 -8
- nmdc_runtime/minter/adapters/repository.py +22 -2
- nmdc_runtime/minter/config.py +2 -0
- nmdc_runtime/minter/domain/model.py +55 -1
- nmdc_runtime/minter/entrypoints/fastapi_app.py +1 -1
- nmdc_runtime/mongo_util.py +1 -2
- nmdc_runtime/site/backup/nmdcdb_mongodump.py +1 -1
- nmdc_runtime/site/backup/nmdcdb_mongoexport.py +1 -3
- nmdc_runtime/site/changesheets/data/OmicsProcessing-to-catted-Biosamples.tsv +1561 -0
- nmdc_runtime/site/changesheets/scripts/missing_neon_soils_ecosystem_data.py +311 -0
- nmdc_runtime/site/changesheets/scripts/neon_soils_add_ncbi_ids.py +210 -0
- nmdc_runtime/site/dagster.yaml +53 -0
- nmdc_runtime/site/entrypoint-daemon.sh +26 -0
- nmdc_runtime/site/entrypoint-dagit-readonly.sh +26 -0
- nmdc_runtime/site/entrypoint-dagit.sh +26 -0
- nmdc_runtime/site/export/ncbi_xml.py +633 -13
- nmdc_runtime/site/export/ncbi_xml_utils.py +115 -1
- nmdc_runtime/site/graphs.py +8 -22
- nmdc_runtime/site/ops.py +147 -181
- nmdc_runtime/site/repository.py +2 -112
- nmdc_runtime/site/resources.py +16 -3
- nmdc_runtime/site/translation/gold_translator.py +4 -12
- nmdc_runtime/site/translation/neon_benthic_translator.py +0 -1
- nmdc_runtime/site/translation/neon_soil_translator.py +4 -5
- nmdc_runtime/site/translation/neon_surface_water_translator.py +0 -2
- nmdc_runtime/site/translation/submission_portal_translator.py +84 -68
- nmdc_runtime/site/translation/translator.py +63 -1
- nmdc_runtime/site/util.py +8 -3
- nmdc_runtime/site/validation/util.py +10 -5
- nmdc_runtime/site/workspace.yaml +13 -0
- nmdc_runtime/static/NMDC_logo.svg +1073 -0
- nmdc_runtime/static/ORCID-iD_icon_vector.svg +4 -0
- nmdc_runtime/static/README.md +5 -0
- nmdc_runtime/static/favicon.ico +0 -0
- nmdc_runtime/util.py +90 -48
- nmdc_runtime-2.11.0.dist-info/METADATA +46 -0
- nmdc_runtime-2.11.0.dist-info/RECORD +128 -0
- {nmdc_runtime-2.9.0.dist-info → nmdc_runtime-2.11.0.dist-info}/WHEEL +1 -2
- nmdc_runtime/containers.py +0 -14
- nmdc_runtime/core/db/Database.py +0 -15
- nmdc_runtime/core/exceptions/__init__.py +0 -23
- nmdc_runtime/core/exceptions/base.py +0 -47
- nmdc_runtime/core/exceptions/token.py +0 -13
- nmdc_runtime/domain/users/queriesInterface.py +0 -18
- nmdc_runtime/domain/users/userSchema.py +0 -37
- nmdc_runtime/domain/users/userService.py +0 -14
- nmdc_runtime/infrastructure/database/db.py +0 -3
- nmdc_runtime/infrastructure/database/models/user.py +0 -10
- nmdc_runtime/lib/__init__.py +0 -1
- nmdc_runtime/lib/extract_nmdc_data.py +0 -41
- nmdc_runtime/lib/load_nmdc_data.py +0 -121
- nmdc_runtime/lib/nmdc_dataframes.py +0 -829
- nmdc_runtime/lib/nmdc_etl_class.py +0 -402
- nmdc_runtime/lib/transform_nmdc_data.py +0 -1117
- nmdc_runtime/site/drsobjects/ingest.py +0 -93
- nmdc_runtime/site/drsobjects/registration.py +0 -131
- nmdc_runtime/site/translation/emsl.py +0 -43
- nmdc_runtime/site/translation/gold.py +0 -53
- nmdc_runtime/site/translation/jgi.py +0 -32
- nmdc_runtime/site/translation/util.py +0 -132
- nmdc_runtime/site/validation/jgi.py +0 -43
- nmdc_runtime-2.9.0.dist-info/METADATA +0 -214
- nmdc_runtime-2.9.0.dist-info/RECORD +0 -84
- nmdc_runtime-2.9.0.dist-info/top_level.txt +0 -1
- /nmdc_runtime/{client → api}/__init__.py +0 -0
- /nmdc_runtime/{core → api/boot}/__init__.py +0 -0
- /nmdc_runtime/{core/db → api/core}/__init__.py +0 -0
- /nmdc_runtime/{domain → api/db}/__init__.py +0 -0
- /nmdc_runtime/{domain/users → api/endpoints}/__init__.py +0 -0
- /nmdc_runtime/{infrastructure → api/endpoints/lib}/__init__.py +0 -0
- /nmdc_runtime/{infrastructure/database → api/models}/__init__.py +0 -0
- /nmdc_runtime/{infrastructure/database/models → api/models/lib}/__init__.py +0 -0
- /nmdc_runtime/{site/drsobjects/__init__.py → api/models/minter.py} +0 -0
- {nmdc_runtime-2.9.0.dist-info → nmdc_runtime-2.11.0.dist-info}/entry_points.txt +0 -0
- {nmdc_runtime-2.9.0.dist-info → nmdc_runtime-2.11.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,121 +0,0 @@
|
|
|
1
|
-
## author: Bill Duncan
|
|
2
|
-
## summary: Contains methods for saving or loading NMDC data into a resource.
|
|
3
|
-
|
|
4
|
-
import json
|
|
5
|
-
import jq
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
def save_json(json_data, file_path: str):
|
|
9
|
-
## save json with changed data types
|
|
10
|
-
with open(file_path, "w") as out_file:
|
|
11
|
-
json.dump(json_data, out_file, indent=2)
|
|
12
|
-
return json_data
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def get_json_from_file(file_path: str, replace_single_quote=False):
|
|
16
|
-
## load json
|
|
17
|
-
with open(file_path, "r") as in_file:
|
|
18
|
-
if replace_single_quote: # json
|
|
19
|
-
text = in_file.read()
|
|
20
|
-
json_data = json.loads(text.replace("'", '"'))
|
|
21
|
-
else:
|
|
22
|
-
json_data = json.load(in_file)
|
|
23
|
-
return json_data
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
def get_json(file_path="", replace_single_quote=False):
|
|
27
|
-
if len(file_path) > 0:
|
|
28
|
-
return get_json_from_file(file_path, replace_single_quote)
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
def save_nmdc_dict_as_json_to_file(nmdc_dict: dict, file_path: str):
|
|
32
|
-
with open(file_path, "w") as f:
|
|
33
|
-
json.dump(nmdc_dict, f, indent=2)
|
|
34
|
-
return json.dumps(nmdc_dict, indent=2)
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def save_nmdc_dict(nmdc_dict: dict, file_path="", data_format="json"):
|
|
38
|
-
if len(file_path) > 0:
|
|
39
|
-
if "json" == data_format:
|
|
40
|
-
return save_nmdc_dict_as_json_to_file(nmdc_dict, file_path)
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def make_nmdc_example_database(
|
|
44
|
-
gold_study_file="output/nmdc_etl/gold_study.json",
|
|
45
|
-
gold_omics_processing_file="output/nmdc_etl/gold_omics_processing.json",
|
|
46
|
-
gold_biosample_file="output/nmdc_etl/gold_biosample.json",
|
|
47
|
-
jgi_fastq_data_object_file="output/nmdc_etl/jgi_fastq_data_objects.json",
|
|
48
|
-
output_file="output/nmdc_example-database.json",
|
|
49
|
-
):
|
|
50
|
-
## load json files
|
|
51
|
-
biosample_json = get_json(gold_biosample_file)
|
|
52
|
-
projects_json = get_json(gold_omics_processing_file)
|
|
53
|
-
study_json = get_json(gold_study_file)
|
|
54
|
-
data_objects_json = get_json(jgi_fastq_data_object_file)
|
|
55
|
-
|
|
56
|
-
## get a list of distinct omics processing study ids, and choose the first 3 studies
|
|
57
|
-
study_ids = set(
|
|
58
|
-
jq.compile(".[] | .part_of[]").input(projects_json).all()
|
|
59
|
-
) # all returns a list
|
|
60
|
-
study_ids = list(study_ids)[0:3]
|
|
61
|
-
# study_ids =
|
|
62
|
-
|
|
63
|
-
## build a test set of studies from the study ids
|
|
64
|
-
study_test = (
|
|
65
|
-
jq.compile(
|
|
66
|
-
".[] | select( .id == ("
|
|
67
|
-
+ ", ".join('"{0}"'.format(id) for id in study_ids)
|
|
68
|
-
+ "))"
|
|
69
|
-
)
|
|
70
|
-
.input(study_json)
|
|
71
|
-
.all()
|
|
72
|
-
) # all() returns a list
|
|
73
|
-
|
|
74
|
-
## build a test set of projects from the study ids
|
|
75
|
-
## note: the jq query only selects first omics found for a given study id
|
|
76
|
-
projects_test = []
|
|
77
|
-
for id in study_ids:
|
|
78
|
-
j = (
|
|
79
|
-
jq.compile(f'[.[] | select( .part_of[]? | . == "{id}")][0]')
|
|
80
|
-
.input(projects_json)
|
|
81
|
-
.all()
|
|
82
|
-
)
|
|
83
|
-
projects_test.append(*j)
|
|
84
|
-
|
|
85
|
-
## get list of unique biossample ids from omics processing and build biosample test set
|
|
86
|
-
biosample_ids = (
|
|
87
|
-
jq.compile(".[] | .has_input[]?").input(projects_test).all()
|
|
88
|
-
) # all() returns a list
|
|
89
|
-
biosample_test = (
|
|
90
|
-
jq.compile(
|
|
91
|
-
".[] | select( .id == ("
|
|
92
|
-
+ ", ".join('"{0}"'.format(id) for id in biosample_ids)
|
|
93
|
-
+ "))"
|
|
94
|
-
)
|
|
95
|
-
.input(biosample_json)
|
|
96
|
-
.all()
|
|
97
|
-
) # all() returns a list
|
|
98
|
-
|
|
99
|
-
## get a list of data object ids and build data objects test set
|
|
100
|
-
data_objects_ids = (
|
|
101
|
-
jq.compile(".[] | .has_output[]?").input(projects_test).all()
|
|
102
|
-
) # all() returns a list
|
|
103
|
-
data_objects_test = (
|
|
104
|
-
jq.compile(
|
|
105
|
-
".[] | select( .id == ("
|
|
106
|
-
+ ", ".join('"{0}"'.format(id) for id in data_objects_ids)
|
|
107
|
-
+ "))"
|
|
108
|
-
)
|
|
109
|
-
.input(data_objects_json)
|
|
110
|
-
.all()
|
|
111
|
-
) # all() returns a list
|
|
112
|
-
|
|
113
|
-
## compile into database object
|
|
114
|
-
database = {
|
|
115
|
-
"study_set": [*study_test],
|
|
116
|
-
"omics_processing_set": [*projects_test],
|
|
117
|
-
"biosample_set": [*biosample_test],
|
|
118
|
-
"data_object_set": [*data_objects_test],
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
save_json(database, output_file)
|