nmdc-runtime 1.3.1__py3-none-any.whl → 2.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. nmdc_runtime/Dockerfile +177 -0
  2. nmdc_runtime/api/analytics.py +90 -0
  3. nmdc_runtime/api/boot/capabilities.py +9 -0
  4. nmdc_runtime/api/boot/object_types.py +126 -0
  5. nmdc_runtime/api/boot/triggers.py +84 -0
  6. nmdc_runtime/api/boot/workflows.py +116 -0
  7. nmdc_runtime/api/core/auth.py +212 -0
  8. nmdc_runtime/api/core/idgen.py +200 -0
  9. nmdc_runtime/api/core/metadata.py +777 -0
  10. nmdc_runtime/api/core/util.py +114 -0
  11. nmdc_runtime/api/db/mongo.py +436 -0
  12. nmdc_runtime/api/db/s3.py +37 -0
  13. nmdc_runtime/api/endpoints/capabilities.py +25 -0
  14. nmdc_runtime/api/endpoints/find.py +634 -0
  15. nmdc_runtime/api/endpoints/jobs.py +206 -0
  16. nmdc_runtime/api/endpoints/lib/helpers.py +274 -0
  17. nmdc_runtime/api/endpoints/lib/linked_instances.py +193 -0
  18. nmdc_runtime/api/endpoints/lib/path_segments.py +165 -0
  19. nmdc_runtime/api/endpoints/metadata.py +260 -0
  20. nmdc_runtime/api/endpoints/nmdcschema.py +515 -0
  21. nmdc_runtime/api/endpoints/object_types.py +38 -0
  22. nmdc_runtime/api/endpoints/objects.py +277 -0
  23. nmdc_runtime/api/endpoints/operations.py +78 -0
  24. nmdc_runtime/api/endpoints/queries.py +701 -0
  25. nmdc_runtime/api/endpoints/runs.py +98 -0
  26. nmdc_runtime/api/endpoints/search.py +38 -0
  27. nmdc_runtime/api/endpoints/sites.py +205 -0
  28. nmdc_runtime/api/endpoints/triggers.py +25 -0
  29. nmdc_runtime/api/endpoints/users.py +214 -0
  30. nmdc_runtime/api/endpoints/util.py +817 -0
  31. nmdc_runtime/api/endpoints/wf_file_staging.py +307 -0
  32. nmdc_runtime/api/endpoints/workflows.py +353 -0
  33. nmdc_runtime/api/entrypoint.sh +7 -0
  34. nmdc_runtime/api/main.py +495 -0
  35. nmdc_runtime/api/middleware.py +43 -0
  36. nmdc_runtime/api/models/capability.py +14 -0
  37. nmdc_runtime/api/models/id.py +92 -0
  38. nmdc_runtime/api/models/job.py +57 -0
  39. nmdc_runtime/api/models/lib/helpers.py +78 -0
  40. nmdc_runtime/api/models/metadata.py +11 -0
  41. nmdc_runtime/api/models/nmdc_schema.py +146 -0
  42. nmdc_runtime/api/models/object.py +180 -0
  43. nmdc_runtime/api/models/object_type.py +20 -0
  44. nmdc_runtime/api/models/operation.py +66 -0
  45. nmdc_runtime/api/models/query.py +246 -0
  46. nmdc_runtime/api/models/query_continuation.py +111 -0
  47. nmdc_runtime/api/models/run.py +161 -0
  48. nmdc_runtime/api/models/site.py +87 -0
  49. nmdc_runtime/api/models/trigger.py +13 -0
  50. nmdc_runtime/api/models/user.py +207 -0
  51. nmdc_runtime/api/models/util.py +260 -0
  52. nmdc_runtime/api/models/wfe_file_stages.py +122 -0
  53. nmdc_runtime/api/models/workflow.py +15 -0
  54. nmdc_runtime/api/openapi.py +178 -0
  55. nmdc_runtime/api/swagger_ui/assets/EllipsesButton.js +146 -0
  56. nmdc_runtime/api/swagger_ui/assets/EndpointSearchWidget.js +369 -0
  57. nmdc_runtime/api/swagger_ui/assets/script.js +252 -0
  58. nmdc_runtime/api/swagger_ui/assets/style.css +155 -0
  59. nmdc_runtime/api/swagger_ui/swagger_ui.py +34 -0
  60. nmdc_runtime/config.py +56 -0
  61. nmdc_runtime/minter/adapters/repository.py +22 -2
  62. nmdc_runtime/minter/config.py +30 -4
  63. nmdc_runtime/minter/domain/model.py +55 -1
  64. nmdc_runtime/minter/entrypoints/fastapi_app.py +1 -1
  65. nmdc_runtime/mongo_util.py +89 -0
  66. nmdc_runtime/site/backup/nmdcdb_mongodump.py +1 -1
  67. nmdc_runtime/site/backup/nmdcdb_mongoexport.py +1 -3
  68. nmdc_runtime/site/changesheets/data/OmicsProcessing-to-catted-Biosamples.tsv +1561 -0
  69. nmdc_runtime/site/changesheets/scripts/missing_neon_soils_ecosystem_data.py +311 -0
  70. nmdc_runtime/site/changesheets/scripts/neon_soils_add_ncbi_ids.py +210 -0
  71. nmdc_runtime/site/dagster.yaml +53 -0
  72. nmdc_runtime/site/entrypoint-daemon.sh +29 -0
  73. nmdc_runtime/site/entrypoint-dagit-readonly.sh +26 -0
  74. nmdc_runtime/site/entrypoint-dagit.sh +29 -0
  75. nmdc_runtime/site/export/ncbi_xml.py +1331 -0
  76. nmdc_runtime/site/export/ncbi_xml_utils.py +405 -0
  77. nmdc_runtime/site/export/study_metadata.py +27 -4
  78. nmdc_runtime/site/graphs.py +294 -45
  79. nmdc_runtime/site/ops.py +1008 -230
  80. nmdc_runtime/site/repair/database_updater.py +451 -0
  81. nmdc_runtime/site/repository.py +368 -133
  82. nmdc_runtime/site/resources.py +154 -80
  83. nmdc_runtime/site/translation/gold_translator.py +235 -83
  84. nmdc_runtime/site/translation/neon_benthic_translator.py +212 -188
  85. nmdc_runtime/site/translation/neon_soil_translator.py +82 -58
  86. nmdc_runtime/site/translation/neon_surface_water_translator.py +698 -0
  87. nmdc_runtime/site/translation/neon_utils.py +24 -7
  88. nmdc_runtime/site/translation/submission_portal_translator.py +616 -162
  89. nmdc_runtime/site/translation/translator.py +73 -3
  90. nmdc_runtime/site/util.py +26 -7
  91. nmdc_runtime/site/validation/emsl.py +1 -0
  92. nmdc_runtime/site/validation/gold.py +1 -0
  93. nmdc_runtime/site/validation/util.py +16 -12
  94. nmdc_runtime/site/workspace.yaml +13 -0
  95. nmdc_runtime/static/NMDC_logo.svg +1073 -0
  96. nmdc_runtime/static/ORCID-iD_icon_vector.svg +4 -0
  97. nmdc_runtime/static/README.md +5 -0
  98. nmdc_runtime/static/favicon.ico +0 -0
  99. nmdc_runtime/util.py +236 -192
  100. nmdc_runtime-2.12.0.dist-info/METADATA +45 -0
  101. nmdc_runtime-2.12.0.dist-info/RECORD +131 -0
  102. {nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info}/WHEEL +1 -2
  103. {nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info}/entry_points.txt +0 -1
  104. nmdc_runtime/containers.py +0 -14
  105. nmdc_runtime/core/db/Database.py +0 -15
  106. nmdc_runtime/core/exceptions/__init__.py +0 -23
  107. nmdc_runtime/core/exceptions/base.py +0 -47
  108. nmdc_runtime/core/exceptions/token.py +0 -13
  109. nmdc_runtime/domain/users/queriesInterface.py +0 -18
  110. nmdc_runtime/domain/users/userSchema.py +0 -37
  111. nmdc_runtime/domain/users/userService.py +0 -14
  112. nmdc_runtime/infrastructure/database/db.py +0 -3
  113. nmdc_runtime/infrastructure/database/models/user.py +0 -10
  114. nmdc_runtime/lib/__init__.py +0 -1
  115. nmdc_runtime/lib/extract_nmdc_data.py +0 -41
  116. nmdc_runtime/lib/load_nmdc_data.py +0 -121
  117. nmdc_runtime/lib/nmdc_dataframes.py +0 -829
  118. nmdc_runtime/lib/nmdc_etl_class.py +0 -402
  119. nmdc_runtime/lib/transform_nmdc_data.py +0 -1117
  120. nmdc_runtime/site/drsobjects/ingest.py +0 -93
  121. nmdc_runtime/site/drsobjects/registration.py +0 -131
  122. nmdc_runtime/site/terminusdb/generate.py +0 -198
  123. nmdc_runtime/site/terminusdb/ingest.py +0 -44
  124. nmdc_runtime/site/terminusdb/schema.py +0 -1671
  125. nmdc_runtime/site/translation/emsl.py +0 -42
  126. nmdc_runtime/site/translation/gold.py +0 -53
  127. nmdc_runtime/site/translation/jgi.py +0 -31
  128. nmdc_runtime/site/translation/util.py +0 -132
  129. nmdc_runtime/site/validation/jgi.py +0 -42
  130. nmdc_runtime-1.3.1.dist-info/METADATA +0 -181
  131. nmdc_runtime-1.3.1.dist-info/RECORD +0 -81
  132. nmdc_runtime-1.3.1.dist-info/top_level.txt +0 -1
  133. /nmdc_runtime/{client → api}/__init__.py +0 -0
  134. /nmdc_runtime/{core → api/boot}/__init__.py +0 -0
  135. /nmdc_runtime/{core/db → api/core}/__init__.py +0 -0
  136. /nmdc_runtime/{domain → api/db}/__init__.py +0 -0
  137. /nmdc_runtime/{domain/users → api/endpoints}/__init__.py +0 -0
  138. /nmdc_runtime/{infrastructure → api/endpoints/lib}/__init__.py +0 -0
  139. /nmdc_runtime/{infrastructure/database → api/models}/__init__.py +0 -0
  140. /nmdc_runtime/{infrastructure/database/models → api/models/lib}/__init__.py +0 -0
  141. /nmdc_runtime/{site/drsobjects/__init__.py → api/models/minter.py} +0 -0
  142. /nmdc_runtime/site/{terminusdb → repair}/__init__.py +0 -0
  143. {nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,207 @@
1
+ import logging
2
+ from typing import List, Optional, Union
3
+
4
+ import pymongo.database
5
+ from fastapi import Depends, HTTPException, status
6
+ from jose import jwt
7
+ from pydantic import BaseModel
8
+ from jose.exceptions import ExpiredSignatureError, JWTClaimsError, JWTError
9
+
10
+ from nmdc_runtime.api.core.auth import (
11
+ verify_password,
12
+ SECRET_KEY,
13
+ ALGORITHM,
14
+ oauth2_scheme,
15
+ credentials_exception,
16
+ TokenData,
17
+ bearer_scheme,
18
+ )
19
+
20
+ from nmdc_runtime.api.models.site import get_site
21
+
22
+ from nmdc_runtime.api.db.mongo import get_mongo_db
23
+
24
+
25
+ class User(BaseModel):
26
+ username: str
27
+ email: Optional[str] = None
28
+ full_name: Optional[str] = None
29
+ site_admin: Optional[List[str]] = []
30
+ disabled: Optional[bool] = False
31
+
32
+
33
+ class UserIn(User):
34
+ password: str
35
+
36
+
37
+ class UserInDB(User):
38
+ hashed_password: str
39
+
40
+
41
+ def get_user(mdb, username: str) -> Optional[UserInDB]:
42
+ r"""
43
+ Returns the user having the specified username.
44
+ """
45
+
46
+ user = mdb.users.find_one({"username": username})
47
+ if user is not None:
48
+ return UserInDB(**user)
49
+
50
+
51
+ def authenticate_user(mdb, username: str, password: str) -> Union[UserInDB, bool]:
52
+ r"""
53
+ Returns the user, if any, having the specified username/password combination.
54
+ """
55
+
56
+ user = get_user(mdb, username)
57
+ if not user:
58
+ return False
59
+ if not verify_password(password, user.hashed_password):
60
+ return False
61
+ return user
62
+
63
+
64
+ async def get_current_user(
65
+ token: str = Depends(oauth2_scheme),
66
+ bearer_credentials: str = Depends(bearer_scheme),
67
+ mdb: pymongo.database.Database = Depends(get_mongo_db),
68
+ ) -> UserInDB:
69
+ r"""
70
+ Returns a user based upon the provided token.
71
+
72
+ If the token belongs to a site client, the returned user is an ephemeral "user"
73
+ whose username is the site client's `client_id`.
74
+
75
+ Raises an exception if the token is invalid.
76
+
77
+ Reference: The following web page contains information about JWT claims:
78
+ https://auth0.com/docs/secure/tokens/json-web-tokens/json-web-token-claims
79
+ """
80
+
81
+ # Define some exceptions, which contain actionable—but not sensitive—information.
82
+ invalid_subject_exception = HTTPException(
83
+ status_code=status.HTTP_401_UNAUTHORIZED,
84
+ detail="Access token is invalid. Please log in again.",
85
+ headers={"WWW-Authenticate": "Bearer"},
86
+ )
87
+ invalid_claims_exception = HTTPException(
88
+ status_code=status.HTTP_401_UNAUTHORIZED,
89
+ detail="Access token is invalid. Please log in again.",
90
+ headers={"WWW-Authenticate": "Bearer"},
91
+ )
92
+ invalid_token_exception = HTTPException(
93
+ status_code=status.HTTP_401_UNAUTHORIZED,
94
+ detail="Access token is invalid. Please log in again.",
95
+ headers={"WWW-Authenticate": "Bearer"},
96
+ )
97
+ invalidated_token_exception = HTTPException(
98
+ status_code=status.HTTP_401_UNAUTHORIZED,
99
+ detail="Access token has been invalidated. Please log in again.",
100
+ headers={"WWW-Authenticate": "Bearer"},
101
+ )
102
+ expired_token_exception = HTTPException(
103
+ status_code=status.HTTP_401_UNAUTHORIZED,
104
+ detail="Access token has expired. Please log in again.",
105
+ headers={"WWW-Authenticate": "Bearer"},
106
+ )
107
+ invalid_or_missing_token_exception = HTTPException(
108
+ status_code=status.HTTP_401_UNAUTHORIZED,
109
+ detail="Access token is invalid or missing. Please log in again.",
110
+ headers={"WWW-Authenticate": "Bearer"},
111
+ )
112
+
113
+ # Check whether there is a token, and whether it has been invalidated.
114
+ if token is None:
115
+ raise invalid_or_missing_token_exception
116
+ elif mdb.invalidated_tokens.find_one({"_id": token}):
117
+ raise invalidated_token_exception
118
+
119
+ # Validate the signature of the JWT and extract its payload.
120
+ try:
121
+ payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
122
+ except ExpiredSignatureError as e:
123
+ logging.exception(e)
124
+ raise expired_token_exception
125
+ except JWTClaimsError as e:
126
+ logging.exception(e)
127
+ raise invalid_claims_exception
128
+ except (JWTError, AttributeError) as e:
129
+ logging.exception(e)
130
+ raise invalid_token_exception
131
+
132
+ # Extract the prefix and the username from the subject.
133
+ subject: Optional[str] = payload.get("sub", None)
134
+ if isinstance(subject, str):
135
+ if subject.startswith("user:"):
136
+ subject_prefix = "user:"
137
+ elif subject.startswith("client:"):
138
+ subject_prefix = "client:"
139
+ else:
140
+ logging.warning("The subject contains an invalid prefix.")
141
+ raise invalid_subject_exception
142
+ username = subject.removeprefix(subject_prefix)
143
+ if username == "":
144
+ logging.warning("The subject contains nothing after the prefix.")
145
+ raise invalid_subject_exception
146
+ else:
147
+ logging.warning("The subject is not a string.")
148
+ raise invalid_subject_exception
149
+ token_data = TokenData(subject=username)
150
+
151
+ # Coerce a "client" into a "user"
152
+ # TODO: consolidate the client/user distinction.
153
+ if not isinstance(token_data.subject, str):
154
+ logging.warning("The subject is not a string.")
155
+ raise invalid_subject_exception
156
+ elif subject_prefix == "user:":
157
+ user = get_user(mdb, username=token_data.subject)
158
+ elif subject_prefix == "client:":
159
+ # construct a user from the client_id
160
+ user = get_client_user(mdb, client_id=token_data.subject)
161
+ else:
162
+ # Note: We already validate the subject's prefix above, so we expect this case to never occur.
163
+ logging.warning("The subject prefix is not something we recognize.")
164
+ user = None
165
+
166
+ if user is None:
167
+ logging.warning(
168
+ f"Failed to resolve token subject '{token_data.subject}' to a user."
169
+ )
170
+ raise invalid_subject_exception
171
+ return user
172
+
173
+
174
+ def get_client_user(mdb, client_id: str) -> UserInDB:
175
+ r"""
176
+ Returns an ephemeral "user" whose username is the specified `client_id`
177
+ and whose password is the hashed secret of the client; provided that the
178
+ specified `client_id` is associated with a site in the database.
179
+
180
+ TODO: Clarify the above summary of the function.
181
+ """
182
+
183
+ # Get the site associated with the identified client.
184
+ site = get_site(mdb, client_id)
185
+ if site is None:
186
+ raise credentials_exception
187
+
188
+ # Get the client, itself, via the site.
189
+ client = next(client for client in site.clients if client.id == client_id)
190
+ if client is None:
191
+ raise credentials_exception
192
+
193
+ # Make an ephemeral "user" whose username matches the client's `id`.
194
+ user = UserInDB(username=client.id, hashed_password=client.hashed_secret)
195
+ return user
196
+
197
+
198
+ async def get_current_active_user(
199
+ current_user: UserInDB = Depends(get_current_user),
200
+ ) -> UserInDB:
201
+ r"""
202
+ Returns the current user, provided their user account is not disabled.
203
+ """
204
+
205
+ if current_user.disabled:
206
+ raise HTTPException(status_code=400, detail="Inactive user")
207
+ return current_user
@@ -0,0 +1,260 @@
1
+ from typing import TypeVar, List, Optional, Generic, Annotated
2
+
3
+ from pydantic import model_validator, Field, BaseModel
4
+
5
+ ResultT = TypeVar("ResultT")
6
+
7
+
8
+ class ListResponse(BaseModel, Generic[ResultT]):
9
+ resources: List[ResultT]
10
+ next_page_token: Optional[str] = None
11
+
12
+
13
+ class ListRequest(BaseModel):
14
+ r"""
15
+ An encapsulation of a set of parameters accepted by API endpoints related to listing things.
16
+
17
+ Note: This class was documented after the `FindRequest` class was documented. You can refer to the documentation of
18
+ the latter class for additional context about the usage of Pydantic's `Field` constructor in this class.
19
+ """
20
+
21
+ filter: Optional[str] = Field(
22
+ default=None,
23
+ title="Filter",
24
+ description="""The criteria by which you want to filter the resources, in the same format as the [`query`
25
+ parameter](https://www.mongodb.com/docs/manual/reference/method/db.collection.find/#std-label-method-find-query)
26
+ of MongoDB's `db.collection.find()` method.\n\n_Example:_
27
+ `{"lat_lon.latitude": {"$gt": 45.0}, "ecosystem_category": "Plants"}`""",
28
+ examples=[
29
+ r'{"ecosystem_type": "Freshwater"}',
30
+ r'{"lat_lon.latitude": {"$gt": 45.0}, "ecosystem_category": "Plants"}',
31
+ ],
32
+ )
33
+ # TODO: Document the following things about this type hint and `Field` definition:
34
+ # (a) why the type here is `int` as opposed to `PerPageRange` (`FindRequest` uses the latter),
35
+ # (b) why the default value here is 20 as opposed to 25 (the default value in `FindRequest`), and
36
+ # (c) why there is no upper limit on the value (the `PerPageRange` type has an upper limit of 2000).
37
+ #
38
+ # Note: If the HTTP request lacks a value for this parameter, Pydantic will fall back to the default value specified here.
39
+ max_page_size: int = Field(
40
+ default=20,
41
+ title="Resources per page",
42
+ description="How many resources you want _each page_ to contain, formatted as a positive integer.",
43
+ examples=[20],
44
+ )
45
+ page_token: Optional[str] = Field(
46
+ default=None,
47
+ title="Next page token",
48
+ description="""A bookmark you can use to fetch the _next_ page of resources. You can get this from the
49
+ `next_page_token` field in a previous response from this endpoint.\n\n_Example_:
50
+ `nmdc:sys0zr0fbt71`""",
51
+ examples=[
52
+ "nmdc:sys0zr0fbt71",
53
+ ],
54
+ )
55
+ # TODO: Document the endpoint's behavior when a projection includes a _nested_ field identifier (i.e. `foo.bar`),
56
+ # and ensure the endpoint doesn't break when the projection includes field descriptors that contain commas.
57
+ projection: Optional[str] = Field(
58
+ default=None,
59
+ title="Projection",
60
+ description="""Comma-delimited list of the names of the fields you want the resources in the response to
61
+ include. Note: In addition to those fields, the response will also include the `id`
62
+ field.\n\n_Example_: `name, ecosystem_type`""",
63
+ examples=[
64
+ "name, ecosystem_type",
65
+ ],
66
+ )
67
+
68
+
69
+ PerPageRange = Annotated[int, Field(gt=0, le=2_000)]
70
+
71
+
72
+ class FindRequest(BaseModel):
73
+ r"""
74
+ An encapsulation of a set of parameters accepted by API endpoints related to finding things.
75
+
76
+ Notes:
77
+ - The "Query Parameter Models" section of the FastAPI docs says that this way of encapsulating
78
+ a set of query parameter definitions in a Pydantic model — so that Swagger UI displays a given
79
+ parameter's _description_ — was introduced in FastAPI 0.115.0.
80
+ Reference: https://fastapi.tiangolo.com/tutorial/query-param-models/
81
+ - While Swagger UI does show the parameter's _description_, specifically, it does not currently show the
82
+ parameter's _title_ or example value(s). The approach shown in the "Classes as Dependencies" section
83
+ of the FastAPI docs (i.e. https://fastapi.tiangolo.com/tutorial/dependencies/classes-as-dependencies/)
84
+ does result in Swagger UI showing those additional things, but the approach involves not inheriting
85
+ from Pydantic's `BaseModel` class and involves defining an `__init__` method for the class. That is
86
+ further than I want to take these classes from their existing selves at this point. To compensate
87
+ for that, I have included examples _within_ some of the descriptions.
88
+ Reference: https://github.com/fastapi/fastapi/issues/318#issuecomment-507043221
89
+ - The "Fields" section of the Pydantic docs says:
90
+ > "The `Field` function is used to customize and add metadata to fields of models."
91
+ References: https://docs.pydantic.dev/latest/concepts/fields/
92
+ """
93
+
94
+ filter: Optional[str] = Field(
95
+ default=None,
96
+ title="Filter",
97
+ description="""The criteria by which you want to filter the resources, formatted as a comma-separated list of
98
+ `attribute:value` pairs. The `value` can include a comparison operator (e.g. `>=`). If the attribute
99
+ is of type _string_ and you append `.search` to its name, the server will perform a full-text
100
+ search.\n\n_Example:_ `ecosystem_category:Plants, lat_lon.latitude:>35.0`""",
101
+ examples=[
102
+ "ecosystem_category:Plants",
103
+ "ecosystem_category:Plants, lat_lon.latitude:>35.0",
104
+ ],
105
+ )
106
+ search: Optional[str] = Field(
107
+ default=None,
108
+ title="Search",
109
+ description="N/A _(not implemented yet)_",
110
+ )
111
+ sort: Optional[str] = Field(
112
+ default=None,
113
+ title="Sort",
114
+ description="""How you want the resources to be ordered in the response, formatted as a comma-separated list of
115
+ `attribute:value` pairs. Each `attribute` is the name of a field you want the resources to be
116
+ ordered by, and each `value` is the direction you want the values in that field to be ordered
117
+ (i.e. `asc` or no value for _ascending_ order, and `desc` for _descending_ order).\n\n_Example:_
118
+ `depth.has_numeric_value:desc, ecosystem_type`""",
119
+ examples=[
120
+ "depth.has_numeric_value:desc",
121
+ "depth.has_numeric_value:desc, ecosystem_type",
122
+ ],
123
+ )
124
+ page: Optional[int] = Field(
125
+ default=None,
126
+ title="Page number",
127
+ description="""_Which page_ of resources you want to retrieve, when using page number-based pagination.
128
+ This is the page number formatted as an integer ≥ 1.
129
+ **Limitation:** When using _page number_-based pagination, only the first 10,000 resources
130
+ are accessible. You can access resources beyond that by using _cursor_-based pagination.""",
131
+ examples=[1],
132
+ )
133
+ per_page: PerPageRange = Field(
134
+ default=25,
135
+ title="Resources per page",
136
+ description="How many resources you want _each page_ to contain, formatted as a positive integer ≤ 2000.",
137
+ examples=[25],
138
+ )
139
+ cursor: Optional[str] = Field(
140
+ default=None,
141
+ title="Cursor",
142
+ description="""A bookmark you can use to fetch the _next_ page of resources, when using cursor-based pagination.
143
+ To begin using cursor-based pagination, set the `cursor` parameter to `*`. The response's `meta` object will
144
+ include a `next_cursor` field, whose value can be used as the `cursor` parameter in a subsequent
145
+ request.\n\n_Example_: `nmdc:sys0zr0fbt71`""",
146
+ examples=[
147
+ "*",
148
+ "nmdc:sys0zr0fbt71",
149
+ ],
150
+ )
151
+ group_by: Optional[str] = Field(
152
+ default=None,
153
+ title="Group by",
154
+ description="N/A _(not implemented yet)_",
155
+ )
156
+ fields: Optional[str] = Field(
157
+ default=None,
158
+ title="Fields",
159
+ description="""The fields you want the resources to include in the response, formatted as a comma-separated list
160
+ of field names. This can be used to reduce the size and complexity of the response.\n\n_Example:_
161
+ `name, ess_dive_datasets`""",
162
+ examples=[
163
+ "name",
164
+ "name, ess_dive_datasets",
165
+ ],
166
+ )
167
+
168
+ # Reference: https://docs.pydantic.dev/latest/concepts/validators/#model-validators
169
+ @model_validator(mode="before")
170
+ def set_page_if_cursor_unset(cls, values):
171
+ page, cursor = values.get("page"), values.get("cursor")
172
+ if page is not None and cursor is not None:
173
+ raise ValueError("cannot use cursor- and page-based pagination together")
174
+ if page is None and cursor is None:
175
+ values["page"] = 1
176
+ return values
177
+
178
+
179
+ class FindResponse(BaseModel):
180
+ meta: dict
181
+ results: List[dict]
182
+ group_by: List[dict]
183
+
184
+
185
+ class DeleteResponse(BaseModel):
186
+ r"""
187
+ Response model for "delete" operations. It summarizes the result of the
188
+ operation and it lists identifiers of the documents that were deleted.
189
+ """
190
+
191
+ message: str = Field(
192
+ description="Success message describing the deletion operation"
193
+ )
194
+ deleted_workflow_execution_ids: List[str] = Field(
195
+ # Note: `default_factory=list` sets this to an empty list by default.
196
+ default_factory=list,
197
+ description="The `id`s of the `WorkflowExecution`s that were deleted",
198
+ )
199
+ deleted_data_object_ids: List[str] = Field(
200
+ default_factory=list,
201
+ description="The `id`s of the `DataObject`s that were deleted",
202
+ )
203
+ deleted_functional_annotation_agg_oids: List[str] = Field(
204
+ default_factory=list,
205
+ description="The internal MongoDB `ObjectId`s of the `FunctionalAnnotationAggMember`s that were deleted",
206
+ )
207
+ deleted_job_ids: List[str] = Field(
208
+ default_factory=list,
209
+ description="The `id`s of the `jobs` documents that were deleted",
210
+ )
211
+
212
+
213
+ # Note: For MongoDB, a single collection can have no more than 64 indexes
214
+ # Note: Each collection has a unique index set on "id" elsewhere.
215
+ entity_attributes_to_index = {
216
+ "biosample_set": {
217
+ "alternative_identifiers",
218
+ "env_broad_scale.has_raw_value",
219
+ "env_local_scale.has_raw_value",
220
+ "env_medium.has_raw_value",
221
+ "collection_date.has_raw_value",
222
+ "ecosystem",
223
+ "ecosystem_category",
224
+ "ecosystem_type",
225
+ "ecosystem_subtype",
226
+ "specific_ecosystem",
227
+ # Note: if `lat_lon` was GeoJSON, i.e. {type,coordinates}, MongoDB has a "2dsphere" index
228
+ "lat_lon.latitude",
229
+ "lat_lon.longitude",
230
+ },
231
+ "study_set": {
232
+ "has_credit_associations.applied_roles",
233
+ "has_credit_associations.applies_to_person.name",
234
+ "has_credit_associations.applies_to_person.orcid",
235
+ },
236
+ "data_object_set": {
237
+ "data_object_type",
238
+ "file_size_bytes",
239
+ "md5_checksum",
240
+ "url",
241
+ },
242
+ # TODO: Refrain from ensuring indexes exist in the `omics_processing_set` collection,
243
+ # since that collection was deleted as part of the "Berkeley schema" refactor.
244
+ # Reference: https://microbiomedata.github.io/nmdc-schema/v10-vs-v11-retrospective/#slots-removed-from-database
245
+ "omics_processing_set": {
246
+ "has_input",
247
+ "has_output",
248
+ "instrument_name",
249
+ "alternative_identifiers",
250
+ },
251
+ "functional_annotation_agg": {"was_generated_by"},
252
+ "workflow_execution_set": {
253
+ "has_input",
254
+ "has_output",
255
+ },
256
+ # Note: The `jobs` collection is not described by the NMDC schema.
257
+ "jobs": {
258
+ "config.activity_id",
259
+ },
260
+ }
@@ -0,0 +1,122 @@
1
+ from pydantic import BaseModel, Field
2
+ from typing import Optional
3
+ from enum import Enum
4
+ import datetime
5
+
6
+
7
+ class WorkflowFileStagingCollectionName(str, Enum):
8
+ """The name of a MongoDB collection related to workflow file staging."""
9
+
10
+ JGI_SEQUENCING_PROJECTS = "wf_file_staging.jgi_sequencing_projects"
11
+
12
+
13
+ class GlobusTaskStatus(str, Enum):
14
+ ACTIVE = "ACTIVE"
15
+ INACTIVE = "INACTIVE"
16
+ SUCCEEDED = "SUCCEEDED"
17
+ FAILED = "FAILED"
18
+ PENDING = "PENDING"
19
+ IN_PROGRESS = "IN_PROGRESS"
20
+ COMPLETED = "COMPLETED"
21
+
22
+
23
+ class JDPFileStatus(str, Enum):
24
+ RESTORED = "RESTORED"
25
+ PURGED = "PURGED"
26
+ READY = "READY"
27
+ EXPIRED = "EXPIRED"
28
+
29
+
30
+ class GlobusTask(BaseModel):
31
+ """
32
+ Represents a Globus file transfer configuration.
33
+ """
34
+
35
+ task_id: str = Field(
36
+ ..., description="ID from Globus of the task", examples=["Some task id"]
37
+ )
38
+ task_status: str = Field(
39
+ ..., description="Status of the Globus task.", examples=["Some status"]
40
+ )
41
+
42
+
43
+ class JGISample(BaseModel):
44
+ """
45
+ Represents a JGI Sample for workflow file staging. Information from JDP, Gold, and Globus is gathered on these records.
46
+ """
47
+
48
+ jdp_file_id: str = Field(
49
+ ...,
50
+ description="JGI Data Portal File ID",
51
+ examples=["6011bc6e117e5d4b9d2b2073"],
52
+ )
53
+ ap_gold_id: str = Field(
54
+ ..., description="Gold Analysis Project ID", examples=["Ga0307276"]
55
+ )
56
+ gold_study_id: str = Field(..., description="Gold Study ID", examples=["Gs0135149"])
57
+ its_ap_id: str = Field(
58
+ ..., description="ITS Analysis Project ID from the JDP", examples=["1196479.0"]
59
+ )
60
+ sequencing_project_name: str = Field(
61
+ ...,
62
+ description="Sequencing project name. This relates to a record in the `/wf_staging_file/sequencing_project` endpoints.",
63
+ examples=["Some Project Name"],
64
+ )
65
+ gold_biosample_id: str = Field(
66
+ ..., description="Gold Biosample ID", examples=["Gb0191643"]
67
+ )
68
+ gold_seq_id: str = Field(..., description="Gold Sequence ID", examples=["1196479"])
69
+ file_name: str = Field(..., description="File Name", examples=["filename.tar.gz"])
70
+ jdp_file_status: str = Field(
71
+ ...,
72
+ description="File staging status. Grabbed from the JDP file restoration endpoint.",
73
+ examples=["RESTORED"],
74
+ )
75
+ globus_file_status: str = Field(
76
+ ...,
77
+ description="File staging status. Recieved from Globus when the file state is queried.",
78
+ examples=["ACTIVE"],
79
+ )
80
+ jdp_file_size: int = Field(
81
+ ..., description="File size in bytes from JDP.", examples=[123456]
82
+ )
83
+ md5sum: Optional[str] = Field(
84
+ None, description="MD5 Sum", examples=["D43F2404CA13E22594E5C8B04D3BBB81"]
85
+ )
86
+ jgi_ap_id: str = Field(
87
+ ..., description="JGI Analysis Project ID", examples=["1196479"]
88
+ )
89
+ create_date: datetime.datetime = Field(
90
+ ..., description="Creation Date", examples=["2023-01-01T00:00:00Z"]
91
+ )
92
+ update_date: Optional[datetime.datetime] = Field(
93
+ None, description="Update Date", examples=["2023-01-01T00:00:00Z"]
94
+ )
95
+ request_id: int = Field(
96
+ ...,
97
+ description="Request ID from the JGI data portal after a request to have the files restored from tape is submitted.",
98
+ examples=[1],
99
+ )
100
+
101
+
102
+ class JGISequencingProject(BaseModel):
103
+ """
104
+ A representation of a JGI sequencing project and its associated metadata.
105
+ """
106
+
107
+ sequencing_project_name: str = Field(
108
+ ...,
109
+ description="Name of the sequencing project that we can refer to while staging files.",
110
+ examples=["Human Genome Project"],
111
+ )
112
+ sequencing_project_description: str = Field(
113
+ ...,
114
+ description="Detailed description of the sequencing project",
115
+ examples=["A project to sequence the human genome."],
116
+ )
117
+ jgi_proposal_id: str = Field(
118
+ ..., description="JGI proposal ID", examples=["503568"]
119
+ )
120
+ nmdc_study_id: str = Field(
121
+ ..., description="NMDC study ID", examples=["nmdc:sty-11-28tm5d36"]
122
+ )
@@ -0,0 +1,15 @@
1
+ import datetime
2
+ from typing import Optional, List
3
+
4
+ from pydantic import BaseModel
5
+
6
+
7
+ class WorkflowBase(BaseModel):
8
+ name: Optional[str] = None
9
+ description: Optional[str] = None
10
+ capability_ids: Optional[List[str]] = None
11
+
12
+
13
+ class Workflow(WorkflowBase):
14
+ id: str
15
+ created_at: Optional[datetime.datetime] = None