nmdc-runtime 2.10.0__py3-none-any.whl → 2.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nmdc-runtime might be problematic. Click here for more details.
- nmdc_runtime/Dockerfile +167 -0
- nmdc_runtime/api/analytics.py +22 -2
- nmdc_runtime/api/core/idgen.py +36 -6
- nmdc_runtime/api/db/mongo.py +0 -12
- nmdc_runtime/api/endpoints/find.py +65 -225
- nmdc_runtime/api/endpoints/lib/linked_instances.py +180 -0
- nmdc_runtime/api/endpoints/nmdcschema.py +65 -144
- nmdc_runtime/api/endpoints/objects.py +4 -11
- nmdc_runtime/api/endpoints/operations.py +0 -27
- nmdc_runtime/api/endpoints/queries.py +22 -0
- nmdc_runtime/api/endpoints/sites.py +0 -24
- nmdc_runtime/api/endpoints/util.py +57 -35
- nmdc_runtime/api/entrypoint.sh +7 -0
- nmdc_runtime/api/main.py +84 -60
- nmdc_runtime/api/models/util.py +12 -5
- nmdc_runtime/api/openapi.py +116 -180
- nmdc_runtime/api/swagger_ui/assets/custom-elements.js +522 -0
- nmdc_runtime/api/swagger_ui/assets/script.js +247 -0
- nmdc_runtime/api/swagger_ui/assets/style.css +155 -0
- nmdc_runtime/api/swagger_ui/swagger_ui.py +34 -0
- nmdc_runtime/minter/adapters/repository.py +21 -0
- nmdc_runtime/minter/domain/model.py +20 -0
- nmdc_runtime/site/changesheets/data/OmicsProcessing-to-catted-Biosamples.tsv +1561 -0
- nmdc_runtime/site/changesheets/scripts/missing_neon_soils_ecosystem_data.py +311 -0
- nmdc_runtime/site/changesheets/scripts/neon_soils_add_ncbi_ids.py +210 -0
- nmdc_runtime/site/dagster.yaml +53 -0
- nmdc_runtime/site/entrypoint-daemon.sh +26 -0
- nmdc_runtime/site/entrypoint-dagit-readonly.sh +26 -0
- nmdc_runtime/site/entrypoint-dagit.sh +26 -0
- nmdc_runtime/site/export/ncbi_xml.py +632 -11
- nmdc_runtime/site/export/ncbi_xml_utils.py +114 -0
- nmdc_runtime/site/graphs.py +7 -0
- nmdc_runtime/site/ops.py +92 -34
- nmdc_runtime/site/repository.py +2 -0
- nmdc_runtime/site/resources.py +16 -3
- nmdc_runtime/site/translation/submission_portal_translator.py +82 -14
- nmdc_runtime/site/workspace.yaml +13 -0
- nmdc_runtime/static/NMDC_logo.svg +1073 -0
- nmdc_runtime/static/ORCID-iD_icon_vector.svg +4 -0
- nmdc_runtime/static/README.md +5 -0
- nmdc_runtime/static/favicon.ico +0 -0
- nmdc_runtime/util.py +87 -1
- nmdc_runtime-2.11.0.dist-info/METADATA +46 -0
- {nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.0.dist-info}/RECORD +47 -57
- {nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.0.dist-info}/WHEEL +1 -2
- nmdc_runtime/api/endpoints/ids.py +0 -192
- nmdc_runtime/client/__init__.py +0 -0
- nmdc_runtime/containers.py +0 -14
- nmdc_runtime/core/__init__.py +0 -0
- nmdc_runtime/core/db/Database.py +0 -13
- nmdc_runtime/core/db/__init__.py +0 -0
- nmdc_runtime/core/exceptions/__init__.py +0 -23
- nmdc_runtime/core/exceptions/base.py +0 -47
- nmdc_runtime/core/exceptions/token.py +0 -13
- nmdc_runtime/domain/__init__.py +0 -0
- nmdc_runtime/domain/users/__init__.py +0 -0
- nmdc_runtime/domain/users/queriesInterface.py +0 -18
- nmdc_runtime/domain/users/userSchema.py +0 -37
- nmdc_runtime/domain/users/userService.py +0 -14
- nmdc_runtime/infrastructure/__init__.py +0 -0
- nmdc_runtime/infrastructure/database/__init__.py +0 -0
- nmdc_runtime/infrastructure/database/db.py +0 -3
- nmdc_runtime/infrastructure/database/models/__init__.py +0 -0
- nmdc_runtime/infrastructure/database/models/user.py +0 -1
- nmdc_runtime/lib/__init__.py +0 -1
- nmdc_runtime/lib/extract_nmdc_data.py +0 -33
- nmdc_runtime/lib/load_nmdc_data.py +0 -121
- nmdc_runtime/lib/nmdc_dataframes.py +0 -825
- nmdc_runtime/lib/nmdc_etl_class.py +0 -396
- nmdc_runtime/lib/transform_nmdc_data.py +0 -1117
- nmdc_runtime/site/drsobjects/__init__.py +0 -0
- nmdc_runtime/site/drsobjects/ingest.py +0 -93
- nmdc_runtime/site/drsobjects/registration.py +0 -131
- nmdc_runtime-2.10.0.dist-info/METADATA +0 -265
- nmdc_runtime-2.10.0.dist-info/top_level.txt +0 -1
- {nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.0.dist-info}/entry_points.txt +0 -0
- {nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,47 +0,0 @@
|
|
|
1
|
-
from http import HTTPStatus
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
class CustomException(Exception):
|
|
5
|
-
code = HTTPStatus.BAD_GATEWAY
|
|
6
|
-
error_code = HTTPStatus.BAD_GATEWAY
|
|
7
|
-
message = HTTPStatus.BAD_GATEWAY.description
|
|
8
|
-
|
|
9
|
-
def __init__(self, message=None):
|
|
10
|
-
if message:
|
|
11
|
-
self.message = message
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class BadRequestException(CustomException):
|
|
15
|
-
code = HTTPStatus.BAD_REQUEST
|
|
16
|
-
error_code = HTTPStatus.BAD_REQUEST
|
|
17
|
-
message = HTTPStatus.BAD_REQUEST.description
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
class NotFoundException(CustomException):
|
|
21
|
-
code = HTTPStatus.NOT_FOUND
|
|
22
|
-
error_code = HTTPStatus.NOT_FOUND
|
|
23
|
-
message = HTTPStatus.NOT_FOUND.description
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
class ForbiddenException(CustomException):
|
|
27
|
-
code = HTTPStatus.FORBIDDEN
|
|
28
|
-
error_code = HTTPStatus.FORBIDDEN
|
|
29
|
-
message = HTTPStatus.FORBIDDEN.description
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
class UnauthorizedException(CustomException):
|
|
33
|
-
code = HTTPStatus.UNAUTHORIZED
|
|
34
|
-
error_code = HTTPStatus.UNAUTHORIZED
|
|
35
|
-
message = HTTPStatus.UNAUTHORIZED.description
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
class UnprocessableEntity(CustomException):
|
|
39
|
-
code = HTTPStatus.UNPROCESSABLE_ENTITY
|
|
40
|
-
error_code = HTTPStatus.UNPROCESSABLE_ENTITY
|
|
41
|
-
message = HTTPStatus.UNPROCESSABLE_ENTITY.description
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
class DuplicateValueException(CustomException):
|
|
45
|
-
code = HTTPStatus.UNPROCESSABLE_ENTITY
|
|
46
|
-
error_code = HTTPStatus.UNPROCESSABLE_ENTITY
|
|
47
|
-
message = HTTPStatus.UNPROCESSABLE_ENTITY.description
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
from nmdc_runtime.core.exceptions import CustomException
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
class DecodeTokenException(CustomException):
|
|
5
|
-
code = 400
|
|
6
|
-
error_code = 10000
|
|
7
|
-
message = "token decode error"
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class ExpiredTokenException(CustomException):
|
|
11
|
-
code = 400
|
|
12
|
-
error_code = 10001
|
|
13
|
-
message = "expired token"
|
nmdc_runtime/domain/__init__.py
DELETED
|
File without changes
|
|
File without changes
|
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
from abc import ABC
|
|
3
|
-
|
|
4
|
-
from abc import abstractmethod
|
|
5
|
-
|
|
6
|
-
from nmdc_runtime.domain.users.userSchema import UserAuth, UserUpdate, UserOut
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class IUserQueries(ABC):
|
|
10
|
-
@abstractmethod
|
|
11
|
-
async def create(self, user: UserAuth) -> UserOut:
|
|
12
|
-
"""Create new user"""
|
|
13
|
-
raise NotImplementedError
|
|
14
|
-
|
|
15
|
-
@abstractmethod
|
|
16
|
-
async def update(self, user: UserUpdate) -> UserOut:
|
|
17
|
-
"""Update user data"""
|
|
18
|
-
raise NotImplementedError
|
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
from typing import Optional, List
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
from pydantic import BaseModel, EmailStr
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class UserBase(BaseModel):
|
|
8
|
-
username: Optional[str] = None
|
|
9
|
-
email: Optional[str] = None
|
|
10
|
-
full_name: Optional[str] = None
|
|
11
|
-
site_admin: Optional[List[str]] = []
|
|
12
|
-
disabled: Optional[bool] = False
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class UserAuth(UserBase):
|
|
16
|
-
"""User register and login auth"""
|
|
17
|
-
|
|
18
|
-
username: str
|
|
19
|
-
password: str
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
# Properties to receive via API on update
|
|
23
|
-
class UserUpdate(UserBase):
|
|
24
|
-
"""Updatable user fields"""
|
|
25
|
-
|
|
26
|
-
email: Optional[EmailStr] = None
|
|
27
|
-
|
|
28
|
-
# User information
|
|
29
|
-
full_name: Optional[str] = None
|
|
30
|
-
password: Optional[str] = None
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
class UserOut(UserUpdate):
|
|
34
|
-
"""User fields pushed to the client"""
|
|
35
|
-
|
|
36
|
-
email: EmailStr
|
|
37
|
-
disabled: Optional[bool] = False
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
from typing import Any
|
|
2
|
-
|
|
3
|
-
from nmdc_runtime.domain.users.userSchema import UserAuth, UserUpdate, UserOut
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
class UserService:
|
|
7
|
-
def __init__(self, user_queries: Any) -> None:
|
|
8
|
-
self.__user_queries = user_queries
|
|
9
|
-
|
|
10
|
-
async def create_user(self, user: UserAuth) -> UserOut:
|
|
11
|
-
return await self.__user_queries.create(user)
|
|
12
|
-
|
|
13
|
-
async def update_user(self, username: str, new_user: UserUpdate) -> UserOut:
|
|
14
|
-
pass
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
nmdc_runtime/lib/__init__.py
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
|
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
## author: Bill Duncan
|
|
2
|
-
## summary: Contains methods for extracting data for the NMDC ETL pipeline.
|
|
3
|
-
|
|
4
|
-
## system level modules
|
|
5
|
-
import pandas as pds
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
def extract_table(merged_df, table_name):
|
|
9
|
-
df = unpivot_dataframe(merged_df[merged_df.nmdc_data_source == table_name])
|
|
10
|
-
return df
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
def unpivot_dataframe(
|
|
14
|
-
df,
|
|
15
|
-
index="nmdc_record_id",
|
|
16
|
-
columns="attribute",
|
|
17
|
-
value="value",
|
|
18
|
-
splice=["nmdc_record_id", "attribute", "value"],
|
|
19
|
-
):
|
|
20
|
-
## reshape eav structure to row-column structure
|
|
21
|
-
## see: https://www.journaldev.com/33398/pandas-melt-unmelt-pivot-function
|
|
22
|
-
if len(splice) > 0:
|
|
23
|
-
df = df[splice].pivot(index=index, columns=columns)
|
|
24
|
-
else:
|
|
25
|
-
df = df.pivot(index=index, columns=columns)
|
|
26
|
-
|
|
27
|
-
if len(df) > 0:
|
|
28
|
-
df = df[value].reset_index() # drop value hierarchical index
|
|
29
|
-
if len(df) > 0:
|
|
30
|
-
df = df.where(pds.notnull(df), None) # replace an NaN values with None
|
|
31
|
-
df.columns.name = None # remove column name attribute
|
|
32
|
-
|
|
33
|
-
return df
|
|
@@ -1,121 +0,0 @@
|
|
|
1
|
-
## author: Bill Duncan
|
|
2
|
-
## summary: Contains methods for saving or loading NMDC data into a resource.
|
|
3
|
-
|
|
4
|
-
import json
|
|
5
|
-
import jq
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
def save_json(json_data, file_path: str):
|
|
9
|
-
## save json with changed data types
|
|
10
|
-
with open(file_path, "w") as out_file:
|
|
11
|
-
json.dump(json_data, out_file, indent=2)
|
|
12
|
-
return json_data
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def get_json_from_file(file_path: str, replace_single_quote=False):
|
|
16
|
-
## load json
|
|
17
|
-
with open(file_path, "r") as in_file:
|
|
18
|
-
if replace_single_quote: # json
|
|
19
|
-
text = in_file.read()
|
|
20
|
-
json_data = json.loads(text.replace("'", '"'))
|
|
21
|
-
else:
|
|
22
|
-
json_data = json.load(in_file)
|
|
23
|
-
return json_data
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
def get_json(file_path="", replace_single_quote=False):
|
|
27
|
-
if len(file_path) > 0:
|
|
28
|
-
return get_json_from_file(file_path, replace_single_quote)
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
def save_nmdc_dict_as_json_to_file(nmdc_dict: dict, file_path: str):
|
|
32
|
-
with open(file_path, "w") as f:
|
|
33
|
-
json.dump(nmdc_dict, f, indent=2)
|
|
34
|
-
return json.dumps(nmdc_dict, indent=2)
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def save_nmdc_dict(nmdc_dict: dict, file_path="", data_format="json"):
|
|
38
|
-
if len(file_path) > 0:
|
|
39
|
-
if "json" == data_format:
|
|
40
|
-
return save_nmdc_dict_as_json_to_file(nmdc_dict, file_path)
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def make_nmdc_example_database(
|
|
44
|
-
gold_study_file="output/nmdc_etl/gold_study.json",
|
|
45
|
-
gold_omics_processing_file="output/nmdc_etl/gold_omics_processing.json",
|
|
46
|
-
gold_biosample_file="output/nmdc_etl/gold_biosample.json",
|
|
47
|
-
jgi_fastq_data_object_file="output/nmdc_etl/jgi_fastq_data_objects.json",
|
|
48
|
-
output_file="output/nmdc_example-database.json",
|
|
49
|
-
):
|
|
50
|
-
## load json files
|
|
51
|
-
biosample_json = get_json(gold_biosample_file)
|
|
52
|
-
projects_json = get_json(gold_omics_processing_file)
|
|
53
|
-
study_json = get_json(gold_study_file)
|
|
54
|
-
data_objects_json = get_json(jgi_fastq_data_object_file)
|
|
55
|
-
|
|
56
|
-
## get a list of distinct omics processing study ids, and choose the first 3 studies
|
|
57
|
-
study_ids = set(
|
|
58
|
-
jq.compile(".[] | .part_of[]").input(projects_json).all()
|
|
59
|
-
) # all returns a list
|
|
60
|
-
study_ids = list(study_ids)[0:3]
|
|
61
|
-
# study_ids =
|
|
62
|
-
|
|
63
|
-
## build a test set of studies from the study ids
|
|
64
|
-
study_test = (
|
|
65
|
-
jq.compile(
|
|
66
|
-
".[] | select( .id == ("
|
|
67
|
-
+ ", ".join('"{0}"'.format(id) for id in study_ids)
|
|
68
|
-
+ "))"
|
|
69
|
-
)
|
|
70
|
-
.input(study_json)
|
|
71
|
-
.all()
|
|
72
|
-
) # all() returns a list
|
|
73
|
-
|
|
74
|
-
## build a test set of projects from the study ids
|
|
75
|
-
## note: the jq query only selects first omics found for a given study id
|
|
76
|
-
projects_test = []
|
|
77
|
-
for id in study_ids:
|
|
78
|
-
j = (
|
|
79
|
-
jq.compile(f'[.[] | select( .part_of[]? | . == "{id}")][0]')
|
|
80
|
-
.input(projects_json)
|
|
81
|
-
.all()
|
|
82
|
-
)
|
|
83
|
-
projects_test.append(*j)
|
|
84
|
-
|
|
85
|
-
## get list of unique biossample ids from omics processing and build biosample test set
|
|
86
|
-
biosample_ids = (
|
|
87
|
-
jq.compile(".[] | .has_input[]?").input(projects_test).all()
|
|
88
|
-
) # all() returns a list
|
|
89
|
-
biosample_test = (
|
|
90
|
-
jq.compile(
|
|
91
|
-
".[] | select( .id == ("
|
|
92
|
-
+ ", ".join('"{0}"'.format(id) for id in biosample_ids)
|
|
93
|
-
+ "))"
|
|
94
|
-
)
|
|
95
|
-
.input(biosample_json)
|
|
96
|
-
.all()
|
|
97
|
-
) # all() returns a list
|
|
98
|
-
|
|
99
|
-
## get a list of data object ids and build data objects test set
|
|
100
|
-
data_objects_ids = (
|
|
101
|
-
jq.compile(".[] | .has_output[]?").input(projects_test).all()
|
|
102
|
-
) # all() returns a list
|
|
103
|
-
data_objects_test = (
|
|
104
|
-
jq.compile(
|
|
105
|
-
".[] | select( .id == ("
|
|
106
|
-
+ ", ".join('"{0}"'.format(id) for id in data_objects_ids)
|
|
107
|
-
+ "))"
|
|
108
|
-
)
|
|
109
|
-
.input(data_objects_json)
|
|
110
|
-
.all()
|
|
111
|
-
) # all() returns a list
|
|
112
|
-
|
|
113
|
-
## compile into database object
|
|
114
|
-
database = {
|
|
115
|
-
"study_set": [*study_test],
|
|
116
|
-
"omics_processing_set": [*projects_test],
|
|
117
|
-
"biosample_set": [*biosample_test],
|
|
118
|
-
"data_object_set": [*data_objects_test],
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
save_json(database, output_file)
|