nmdc-runtime 2.10.0__py3-none-any.whl → 2.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nmdc-runtime might be problematic. Click here for more details.

Files changed (77) hide show
  1. nmdc_runtime/Dockerfile +167 -0
  2. nmdc_runtime/api/analytics.py +22 -2
  3. nmdc_runtime/api/core/idgen.py +36 -6
  4. nmdc_runtime/api/db/mongo.py +0 -12
  5. nmdc_runtime/api/endpoints/find.py +65 -225
  6. nmdc_runtime/api/endpoints/lib/linked_instances.py +180 -0
  7. nmdc_runtime/api/endpoints/nmdcschema.py +65 -144
  8. nmdc_runtime/api/endpoints/objects.py +4 -11
  9. nmdc_runtime/api/endpoints/operations.py +0 -27
  10. nmdc_runtime/api/endpoints/queries.py +22 -0
  11. nmdc_runtime/api/endpoints/sites.py +0 -24
  12. nmdc_runtime/api/endpoints/util.py +57 -35
  13. nmdc_runtime/api/entrypoint.sh +7 -0
  14. nmdc_runtime/api/main.py +84 -60
  15. nmdc_runtime/api/models/util.py +12 -5
  16. nmdc_runtime/api/openapi.py +116 -180
  17. nmdc_runtime/api/swagger_ui/assets/custom-elements.js +522 -0
  18. nmdc_runtime/api/swagger_ui/assets/script.js +247 -0
  19. nmdc_runtime/api/swagger_ui/assets/style.css +155 -0
  20. nmdc_runtime/api/swagger_ui/swagger_ui.py +34 -0
  21. nmdc_runtime/minter/adapters/repository.py +21 -0
  22. nmdc_runtime/minter/domain/model.py +20 -0
  23. nmdc_runtime/site/changesheets/data/OmicsProcessing-to-catted-Biosamples.tsv +1561 -0
  24. nmdc_runtime/site/changesheets/scripts/missing_neon_soils_ecosystem_data.py +311 -0
  25. nmdc_runtime/site/changesheets/scripts/neon_soils_add_ncbi_ids.py +210 -0
  26. nmdc_runtime/site/dagster.yaml +53 -0
  27. nmdc_runtime/site/entrypoint-daemon.sh +26 -0
  28. nmdc_runtime/site/entrypoint-dagit-readonly.sh +26 -0
  29. nmdc_runtime/site/entrypoint-dagit.sh +26 -0
  30. nmdc_runtime/site/export/ncbi_xml.py +632 -11
  31. nmdc_runtime/site/export/ncbi_xml_utils.py +114 -0
  32. nmdc_runtime/site/graphs.py +7 -0
  33. nmdc_runtime/site/ops.py +92 -34
  34. nmdc_runtime/site/repository.py +2 -0
  35. nmdc_runtime/site/resources.py +16 -3
  36. nmdc_runtime/site/translation/submission_portal_translator.py +82 -14
  37. nmdc_runtime/site/workspace.yaml +13 -0
  38. nmdc_runtime/static/NMDC_logo.svg +1073 -0
  39. nmdc_runtime/static/ORCID-iD_icon_vector.svg +4 -0
  40. nmdc_runtime/static/README.md +5 -0
  41. nmdc_runtime/static/favicon.ico +0 -0
  42. nmdc_runtime/util.py +87 -1
  43. nmdc_runtime-2.11.0.dist-info/METADATA +46 -0
  44. {nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.0.dist-info}/RECORD +47 -57
  45. {nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.0.dist-info}/WHEEL +1 -2
  46. nmdc_runtime/api/endpoints/ids.py +0 -192
  47. nmdc_runtime/client/__init__.py +0 -0
  48. nmdc_runtime/containers.py +0 -14
  49. nmdc_runtime/core/__init__.py +0 -0
  50. nmdc_runtime/core/db/Database.py +0 -13
  51. nmdc_runtime/core/db/__init__.py +0 -0
  52. nmdc_runtime/core/exceptions/__init__.py +0 -23
  53. nmdc_runtime/core/exceptions/base.py +0 -47
  54. nmdc_runtime/core/exceptions/token.py +0 -13
  55. nmdc_runtime/domain/__init__.py +0 -0
  56. nmdc_runtime/domain/users/__init__.py +0 -0
  57. nmdc_runtime/domain/users/queriesInterface.py +0 -18
  58. nmdc_runtime/domain/users/userSchema.py +0 -37
  59. nmdc_runtime/domain/users/userService.py +0 -14
  60. nmdc_runtime/infrastructure/__init__.py +0 -0
  61. nmdc_runtime/infrastructure/database/__init__.py +0 -0
  62. nmdc_runtime/infrastructure/database/db.py +0 -3
  63. nmdc_runtime/infrastructure/database/models/__init__.py +0 -0
  64. nmdc_runtime/infrastructure/database/models/user.py +0 -1
  65. nmdc_runtime/lib/__init__.py +0 -1
  66. nmdc_runtime/lib/extract_nmdc_data.py +0 -33
  67. nmdc_runtime/lib/load_nmdc_data.py +0 -121
  68. nmdc_runtime/lib/nmdc_dataframes.py +0 -825
  69. nmdc_runtime/lib/nmdc_etl_class.py +0 -396
  70. nmdc_runtime/lib/transform_nmdc_data.py +0 -1117
  71. nmdc_runtime/site/drsobjects/__init__.py +0 -0
  72. nmdc_runtime/site/drsobjects/ingest.py +0 -93
  73. nmdc_runtime/site/drsobjects/registration.py +0 -131
  74. nmdc_runtime-2.10.0.dist-info/METADATA +0 -265
  75. nmdc_runtime-2.10.0.dist-info/top_level.txt +0 -1
  76. {nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.0.dist-info}/entry_points.txt +0 -0
  77. {nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,47 +0,0 @@
1
- from http import HTTPStatus
2
-
3
-
4
- class CustomException(Exception):
5
- code = HTTPStatus.BAD_GATEWAY
6
- error_code = HTTPStatus.BAD_GATEWAY
7
- message = HTTPStatus.BAD_GATEWAY.description
8
-
9
- def __init__(self, message=None):
10
- if message:
11
- self.message = message
12
-
13
-
14
- class BadRequestException(CustomException):
15
- code = HTTPStatus.BAD_REQUEST
16
- error_code = HTTPStatus.BAD_REQUEST
17
- message = HTTPStatus.BAD_REQUEST.description
18
-
19
-
20
- class NotFoundException(CustomException):
21
- code = HTTPStatus.NOT_FOUND
22
- error_code = HTTPStatus.NOT_FOUND
23
- message = HTTPStatus.NOT_FOUND.description
24
-
25
-
26
- class ForbiddenException(CustomException):
27
- code = HTTPStatus.FORBIDDEN
28
- error_code = HTTPStatus.FORBIDDEN
29
- message = HTTPStatus.FORBIDDEN.description
30
-
31
-
32
- class UnauthorizedException(CustomException):
33
- code = HTTPStatus.UNAUTHORIZED
34
- error_code = HTTPStatus.UNAUTHORIZED
35
- message = HTTPStatus.UNAUTHORIZED.description
36
-
37
-
38
- class UnprocessableEntity(CustomException):
39
- code = HTTPStatus.UNPROCESSABLE_ENTITY
40
- error_code = HTTPStatus.UNPROCESSABLE_ENTITY
41
- message = HTTPStatus.UNPROCESSABLE_ENTITY.description
42
-
43
-
44
- class DuplicateValueException(CustomException):
45
- code = HTTPStatus.UNPROCESSABLE_ENTITY
46
- error_code = HTTPStatus.UNPROCESSABLE_ENTITY
47
- message = HTTPStatus.UNPROCESSABLE_ENTITY.description
@@ -1,13 +0,0 @@
1
- from nmdc_runtime.core.exceptions import CustomException
2
-
3
-
4
- class DecodeTokenException(CustomException):
5
- code = 400
6
- error_code = 10000
7
- message = "token decode error"
8
-
9
-
10
- class ExpiredTokenException(CustomException):
11
- code = 400
12
- error_code = 10001
13
- message = "expired token"
File without changes
File without changes
@@ -1,18 +0,0 @@
1
- from __future__ import annotations
2
- from abc import ABC
3
-
4
- from abc import abstractmethod
5
-
6
- from nmdc_runtime.domain.users.userSchema import UserAuth, UserUpdate, UserOut
7
-
8
-
9
- class IUserQueries(ABC):
10
- @abstractmethod
11
- async def create(self, user: UserAuth) -> UserOut:
12
- """Create new user"""
13
- raise NotImplementedError
14
-
15
- @abstractmethod
16
- async def update(self, user: UserUpdate) -> UserOut:
17
- """Update user data"""
18
- raise NotImplementedError
@@ -1,37 +0,0 @@
1
- from typing import Optional, List
2
-
3
-
4
- from pydantic import BaseModel, EmailStr
5
-
6
-
7
- class UserBase(BaseModel):
8
- username: Optional[str] = None
9
- email: Optional[str] = None
10
- full_name: Optional[str] = None
11
- site_admin: Optional[List[str]] = []
12
- disabled: Optional[bool] = False
13
-
14
-
15
- class UserAuth(UserBase):
16
- """User register and login auth"""
17
-
18
- username: str
19
- password: str
20
-
21
-
22
- # Properties to receive via API on update
23
- class UserUpdate(UserBase):
24
- """Updatable user fields"""
25
-
26
- email: Optional[EmailStr] = None
27
-
28
- # User information
29
- full_name: Optional[str] = None
30
- password: Optional[str] = None
31
-
32
-
33
- class UserOut(UserUpdate):
34
- """User fields pushed to the client"""
35
-
36
- email: EmailStr
37
- disabled: Optional[bool] = False
@@ -1,14 +0,0 @@
1
- from typing import Any
2
-
3
- from nmdc_runtime.domain.users.userSchema import UserAuth, UserUpdate, UserOut
4
-
5
-
6
- class UserService:
7
- def __init__(self, user_queries: Any) -> None:
8
- self.__user_queries = user_queries
9
-
10
- async def create_user(self, user: UserAuth) -> UserOut:
11
- return await self.__user_queries.create(user)
12
-
13
- async def update_user(self, username: str, new_user: UserUpdate) -> UserOut:
14
- pass
File without changes
File without changes
@@ -1,3 +0,0 @@
1
- """
2
- Database initialization
3
- """
File without changes
@@ -1 +0,0 @@
1
- from __future__ import annotations
@@ -1 +0,0 @@
1
-
@@ -1,33 +0,0 @@
1
- ## author: Bill Duncan
2
- ## summary: Contains methods for extracting data for the NMDC ETL pipeline.
3
-
4
- ## system level modules
5
- import pandas as pds
6
-
7
-
8
- def extract_table(merged_df, table_name):
9
- df = unpivot_dataframe(merged_df[merged_df.nmdc_data_source == table_name])
10
- return df
11
-
12
-
13
- def unpivot_dataframe(
14
- df,
15
- index="nmdc_record_id",
16
- columns="attribute",
17
- value="value",
18
- splice=["nmdc_record_id", "attribute", "value"],
19
- ):
20
- ## reshape eav structure to row-column structure
21
- ## see: https://www.journaldev.com/33398/pandas-melt-unmelt-pivot-function
22
- if len(splice) > 0:
23
- df = df[splice].pivot(index=index, columns=columns)
24
- else:
25
- df = df.pivot(index=index, columns=columns)
26
-
27
- if len(df) > 0:
28
- df = df[value].reset_index() # drop value hierarchical index
29
- if len(df) > 0:
30
- df = df.where(pds.notnull(df), None) # replace an NaN values with None
31
- df.columns.name = None # remove column name attribute
32
-
33
- return df
@@ -1,121 +0,0 @@
1
- ## author: Bill Duncan
2
- ## summary: Contains methods for saving or loading NMDC data into a resource.
3
-
4
- import json
5
- import jq
6
-
7
-
8
- def save_json(json_data, file_path: str):
9
- ## save json with changed data types
10
- with open(file_path, "w") as out_file:
11
- json.dump(json_data, out_file, indent=2)
12
- return json_data
13
-
14
-
15
- def get_json_from_file(file_path: str, replace_single_quote=False):
16
- ## load json
17
- with open(file_path, "r") as in_file:
18
- if replace_single_quote: # json
19
- text = in_file.read()
20
- json_data = json.loads(text.replace("'", '"'))
21
- else:
22
- json_data = json.load(in_file)
23
- return json_data
24
-
25
-
26
- def get_json(file_path="", replace_single_quote=False):
27
- if len(file_path) > 0:
28
- return get_json_from_file(file_path, replace_single_quote)
29
-
30
-
31
- def save_nmdc_dict_as_json_to_file(nmdc_dict: dict, file_path: str):
32
- with open(file_path, "w") as f:
33
- json.dump(nmdc_dict, f, indent=2)
34
- return json.dumps(nmdc_dict, indent=2)
35
-
36
-
37
- def save_nmdc_dict(nmdc_dict: dict, file_path="", data_format="json"):
38
- if len(file_path) > 0:
39
- if "json" == data_format:
40
- return save_nmdc_dict_as_json_to_file(nmdc_dict, file_path)
41
-
42
-
43
- def make_nmdc_example_database(
44
- gold_study_file="output/nmdc_etl/gold_study.json",
45
- gold_omics_processing_file="output/nmdc_etl/gold_omics_processing.json",
46
- gold_biosample_file="output/nmdc_etl/gold_biosample.json",
47
- jgi_fastq_data_object_file="output/nmdc_etl/jgi_fastq_data_objects.json",
48
- output_file="output/nmdc_example-database.json",
49
- ):
50
- ## load json files
51
- biosample_json = get_json(gold_biosample_file)
52
- projects_json = get_json(gold_omics_processing_file)
53
- study_json = get_json(gold_study_file)
54
- data_objects_json = get_json(jgi_fastq_data_object_file)
55
-
56
- ## get a list of distinct omics processing study ids, and choose the first 3 studies
57
- study_ids = set(
58
- jq.compile(".[] | .part_of[]").input(projects_json).all()
59
- ) # all returns a list
60
- study_ids = list(study_ids)[0:3]
61
- # study_ids =
62
-
63
- ## build a test set of studies from the study ids
64
- study_test = (
65
- jq.compile(
66
- ".[] | select( .id == ("
67
- + ", ".join('"{0}"'.format(id) for id in study_ids)
68
- + "))"
69
- )
70
- .input(study_json)
71
- .all()
72
- ) # all() returns a list
73
-
74
- ## build a test set of projects from the study ids
75
- ## note: the jq query only selects first omics found for a given study id
76
- projects_test = []
77
- for id in study_ids:
78
- j = (
79
- jq.compile(f'[.[] | select( .part_of[]? | . == "{id}")][0]')
80
- .input(projects_json)
81
- .all()
82
- )
83
- projects_test.append(*j)
84
-
85
- ## get list of unique biossample ids from omics processing and build biosample test set
86
- biosample_ids = (
87
- jq.compile(".[] | .has_input[]?").input(projects_test).all()
88
- ) # all() returns a list
89
- biosample_test = (
90
- jq.compile(
91
- ".[] | select( .id == ("
92
- + ", ".join('"{0}"'.format(id) for id in biosample_ids)
93
- + "))"
94
- )
95
- .input(biosample_json)
96
- .all()
97
- ) # all() returns a list
98
-
99
- ## get a list of data object ids and build data objects test set
100
- data_objects_ids = (
101
- jq.compile(".[] | .has_output[]?").input(projects_test).all()
102
- ) # all() returns a list
103
- data_objects_test = (
104
- jq.compile(
105
- ".[] | select( .id == ("
106
- + ", ".join('"{0}"'.format(id) for id in data_objects_ids)
107
- + "))"
108
- )
109
- .input(data_objects_json)
110
- .all()
111
- ) # all() returns a list
112
-
113
- ## compile into database object
114
- database = {
115
- "study_set": [*study_test],
116
- "omics_processing_set": [*projects_test],
117
- "biosample_set": [*biosample_test],
118
- "data_object_set": [*data_objects_test],
119
- }
120
-
121
- save_json(database, output_file)