nmdc-runtime 1.3.1__py3-none-any.whl → 2.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. nmdc_runtime/Dockerfile +177 -0
  2. nmdc_runtime/api/analytics.py +90 -0
  3. nmdc_runtime/api/boot/capabilities.py +9 -0
  4. nmdc_runtime/api/boot/object_types.py +126 -0
  5. nmdc_runtime/api/boot/triggers.py +84 -0
  6. nmdc_runtime/api/boot/workflows.py +116 -0
  7. nmdc_runtime/api/core/auth.py +212 -0
  8. nmdc_runtime/api/core/idgen.py +200 -0
  9. nmdc_runtime/api/core/metadata.py +777 -0
  10. nmdc_runtime/api/core/util.py +114 -0
  11. nmdc_runtime/api/db/mongo.py +436 -0
  12. nmdc_runtime/api/db/s3.py +37 -0
  13. nmdc_runtime/api/endpoints/capabilities.py +25 -0
  14. nmdc_runtime/api/endpoints/find.py +634 -0
  15. nmdc_runtime/api/endpoints/jobs.py +206 -0
  16. nmdc_runtime/api/endpoints/lib/helpers.py +274 -0
  17. nmdc_runtime/api/endpoints/lib/linked_instances.py +193 -0
  18. nmdc_runtime/api/endpoints/lib/path_segments.py +165 -0
  19. nmdc_runtime/api/endpoints/metadata.py +260 -0
  20. nmdc_runtime/api/endpoints/nmdcschema.py +515 -0
  21. nmdc_runtime/api/endpoints/object_types.py +38 -0
  22. nmdc_runtime/api/endpoints/objects.py +277 -0
  23. nmdc_runtime/api/endpoints/operations.py +78 -0
  24. nmdc_runtime/api/endpoints/queries.py +701 -0
  25. nmdc_runtime/api/endpoints/runs.py +98 -0
  26. nmdc_runtime/api/endpoints/search.py +38 -0
  27. nmdc_runtime/api/endpoints/sites.py +205 -0
  28. nmdc_runtime/api/endpoints/triggers.py +25 -0
  29. nmdc_runtime/api/endpoints/users.py +214 -0
  30. nmdc_runtime/api/endpoints/util.py +817 -0
  31. nmdc_runtime/api/endpoints/wf_file_staging.py +307 -0
  32. nmdc_runtime/api/endpoints/workflows.py +353 -0
  33. nmdc_runtime/api/entrypoint.sh +7 -0
  34. nmdc_runtime/api/main.py +495 -0
  35. nmdc_runtime/api/middleware.py +43 -0
  36. nmdc_runtime/api/models/capability.py +14 -0
  37. nmdc_runtime/api/models/id.py +92 -0
  38. nmdc_runtime/api/models/job.py +57 -0
  39. nmdc_runtime/api/models/lib/helpers.py +78 -0
  40. nmdc_runtime/api/models/metadata.py +11 -0
  41. nmdc_runtime/api/models/nmdc_schema.py +146 -0
  42. nmdc_runtime/api/models/object.py +180 -0
  43. nmdc_runtime/api/models/object_type.py +20 -0
  44. nmdc_runtime/api/models/operation.py +66 -0
  45. nmdc_runtime/api/models/query.py +246 -0
  46. nmdc_runtime/api/models/query_continuation.py +111 -0
  47. nmdc_runtime/api/models/run.py +161 -0
  48. nmdc_runtime/api/models/site.py +87 -0
  49. nmdc_runtime/api/models/trigger.py +13 -0
  50. nmdc_runtime/api/models/user.py +207 -0
  51. nmdc_runtime/api/models/util.py +260 -0
  52. nmdc_runtime/api/models/wfe_file_stages.py +122 -0
  53. nmdc_runtime/api/models/workflow.py +15 -0
  54. nmdc_runtime/api/openapi.py +178 -0
  55. nmdc_runtime/api/swagger_ui/assets/EllipsesButton.js +146 -0
  56. nmdc_runtime/api/swagger_ui/assets/EndpointSearchWidget.js +369 -0
  57. nmdc_runtime/api/swagger_ui/assets/script.js +252 -0
  58. nmdc_runtime/api/swagger_ui/assets/style.css +155 -0
  59. nmdc_runtime/api/swagger_ui/swagger_ui.py +34 -0
  60. nmdc_runtime/config.py +56 -0
  61. nmdc_runtime/minter/adapters/repository.py +22 -2
  62. nmdc_runtime/minter/config.py +30 -4
  63. nmdc_runtime/minter/domain/model.py +55 -1
  64. nmdc_runtime/minter/entrypoints/fastapi_app.py +1 -1
  65. nmdc_runtime/mongo_util.py +89 -0
  66. nmdc_runtime/site/backup/nmdcdb_mongodump.py +1 -1
  67. nmdc_runtime/site/backup/nmdcdb_mongoexport.py +1 -3
  68. nmdc_runtime/site/changesheets/data/OmicsProcessing-to-catted-Biosamples.tsv +1561 -0
  69. nmdc_runtime/site/changesheets/scripts/missing_neon_soils_ecosystem_data.py +311 -0
  70. nmdc_runtime/site/changesheets/scripts/neon_soils_add_ncbi_ids.py +210 -0
  71. nmdc_runtime/site/dagster.yaml +53 -0
  72. nmdc_runtime/site/entrypoint-daemon.sh +29 -0
  73. nmdc_runtime/site/entrypoint-dagit-readonly.sh +26 -0
  74. nmdc_runtime/site/entrypoint-dagit.sh +29 -0
  75. nmdc_runtime/site/export/ncbi_xml.py +1331 -0
  76. nmdc_runtime/site/export/ncbi_xml_utils.py +405 -0
  77. nmdc_runtime/site/export/study_metadata.py +27 -4
  78. nmdc_runtime/site/graphs.py +294 -45
  79. nmdc_runtime/site/ops.py +1008 -230
  80. nmdc_runtime/site/repair/database_updater.py +451 -0
  81. nmdc_runtime/site/repository.py +368 -133
  82. nmdc_runtime/site/resources.py +154 -80
  83. nmdc_runtime/site/translation/gold_translator.py +235 -83
  84. nmdc_runtime/site/translation/neon_benthic_translator.py +212 -188
  85. nmdc_runtime/site/translation/neon_soil_translator.py +82 -58
  86. nmdc_runtime/site/translation/neon_surface_water_translator.py +698 -0
  87. nmdc_runtime/site/translation/neon_utils.py +24 -7
  88. nmdc_runtime/site/translation/submission_portal_translator.py +616 -162
  89. nmdc_runtime/site/translation/translator.py +73 -3
  90. nmdc_runtime/site/util.py +26 -7
  91. nmdc_runtime/site/validation/emsl.py +1 -0
  92. nmdc_runtime/site/validation/gold.py +1 -0
  93. nmdc_runtime/site/validation/util.py +16 -12
  94. nmdc_runtime/site/workspace.yaml +13 -0
  95. nmdc_runtime/static/NMDC_logo.svg +1073 -0
  96. nmdc_runtime/static/ORCID-iD_icon_vector.svg +4 -0
  97. nmdc_runtime/static/README.md +5 -0
  98. nmdc_runtime/static/favicon.ico +0 -0
  99. nmdc_runtime/util.py +236 -192
  100. nmdc_runtime-2.12.0.dist-info/METADATA +45 -0
  101. nmdc_runtime-2.12.0.dist-info/RECORD +131 -0
  102. {nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info}/WHEEL +1 -2
  103. {nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info}/entry_points.txt +0 -1
  104. nmdc_runtime/containers.py +0 -14
  105. nmdc_runtime/core/db/Database.py +0 -15
  106. nmdc_runtime/core/exceptions/__init__.py +0 -23
  107. nmdc_runtime/core/exceptions/base.py +0 -47
  108. nmdc_runtime/core/exceptions/token.py +0 -13
  109. nmdc_runtime/domain/users/queriesInterface.py +0 -18
  110. nmdc_runtime/domain/users/userSchema.py +0 -37
  111. nmdc_runtime/domain/users/userService.py +0 -14
  112. nmdc_runtime/infrastructure/database/db.py +0 -3
  113. nmdc_runtime/infrastructure/database/models/user.py +0 -10
  114. nmdc_runtime/lib/__init__.py +0 -1
  115. nmdc_runtime/lib/extract_nmdc_data.py +0 -41
  116. nmdc_runtime/lib/load_nmdc_data.py +0 -121
  117. nmdc_runtime/lib/nmdc_dataframes.py +0 -829
  118. nmdc_runtime/lib/nmdc_etl_class.py +0 -402
  119. nmdc_runtime/lib/transform_nmdc_data.py +0 -1117
  120. nmdc_runtime/site/drsobjects/ingest.py +0 -93
  121. nmdc_runtime/site/drsobjects/registration.py +0 -131
  122. nmdc_runtime/site/terminusdb/generate.py +0 -198
  123. nmdc_runtime/site/terminusdb/ingest.py +0 -44
  124. nmdc_runtime/site/terminusdb/schema.py +0 -1671
  125. nmdc_runtime/site/translation/emsl.py +0 -42
  126. nmdc_runtime/site/translation/gold.py +0 -53
  127. nmdc_runtime/site/translation/jgi.py +0 -31
  128. nmdc_runtime/site/translation/util.py +0 -132
  129. nmdc_runtime/site/validation/jgi.py +0 -42
  130. nmdc_runtime-1.3.1.dist-info/METADATA +0 -181
  131. nmdc_runtime-1.3.1.dist-info/RECORD +0 -81
  132. nmdc_runtime-1.3.1.dist-info/top_level.txt +0 -1
  133. /nmdc_runtime/{client → api}/__init__.py +0 -0
  134. /nmdc_runtime/{core → api/boot}/__init__.py +0 -0
  135. /nmdc_runtime/{core/db → api/core}/__init__.py +0 -0
  136. /nmdc_runtime/{domain → api/db}/__init__.py +0 -0
  137. /nmdc_runtime/{domain/users → api/endpoints}/__init__.py +0 -0
  138. /nmdc_runtime/{infrastructure → api/endpoints/lib}/__init__.py +0 -0
  139. /nmdc_runtime/{infrastructure/database → api/models}/__init__.py +0 -0
  140. /nmdc_runtime/{infrastructure/database/models → api/models/lib}/__init__.py +0 -0
  141. /nmdc_runtime/{site/drsobjects/__init__.py → api/models/minter.py} +0 -0
  142. /nmdc_runtime/site/{terminusdb → repair}/__init__.py +0 -0
  143. {nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,246 @@
1
+ import json
2
+ import logging
3
+ from typing import Optional, Any, Dict, List, Union, TypedDict
4
+
5
+ import bson
6
+ import bson.json_util
7
+ from pydantic import (
8
+ model_validator,
9
+ Field,
10
+ BaseModel,
11
+ PositiveInt,
12
+ NonNegativeInt,
13
+ field_validator,
14
+ WrapSerializer,
15
+ )
16
+ from toolz import assoc, assoc_in
17
+ from typing_extensions import Annotated
18
+
19
+ from nmdc_runtime.api.core.util import pick
20
+
21
+
22
+ def bson_to_json(doc: Any, handler) -> dict:
23
+ """Ensure a dict with e.g. mongo ObjectIds will serialize as JSON."""
24
+ return json.loads(bson.json_util.dumps(doc))
25
+
26
+
27
+ Document = Annotated[Dict[str, Any], WrapSerializer(bson_to_json)]
28
+
29
+ OneOrZero = Annotated[int, Field(ge=0, le=1)]
30
+ One = Annotated[int, Field(ge=1, le=1)]
31
+ MinusOne = Annotated[int, Field(ge=-1, le=-1)]
32
+ OneOrMinusOne = Union[One, MinusOne]
33
+
34
+
35
+ class CommandBase(BaseModel):
36
+ comment: Optional[Any] = None
37
+
38
+
39
+ class CollStatsCommand(CommandBase):
40
+ collStats: str
41
+ scale: Optional[int] = 1
42
+
43
+
44
+ class CountCommand(CommandBase):
45
+ count: str
46
+ query: Optional[Document] = None
47
+
48
+
49
+ class FindCommand(CommandBase):
50
+ find: str
51
+ filter: Optional[Document] = None
52
+ projection: Optional[Dict[str, OneOrZero]] = None
53
+ allowPartialResults: Optional[bool] = True
54
+ batchSize: Optional[PositiveInt] = 101
55
+ sort: Optional[Dict[str, OneOrMinusOne]] = None
56
+ limit: Optional[NonNegativeInt] = None
57
+
58
+
59
+ class AggregateCommand(CommandBase):
60
+ aggregate: str
61
+ pipeline: List[Document]
62
+ allowDiskUse: Optional[bool] = False
63
+ cursor: Optional[Document] = None
64
+
65
+ @field_validator("pipeline")
66
+ @classmethod
67
+ def disallow_invalid_pipeline_stages(
68
+ cls, pipeline: List[Document]
69
+ ) -> List[Document]:
70
+ deny_list = ["$out", "$merge"]
71
+
72
+ if any(
73
+ key in deny_list for pipeline_stage in pipeline for key in pipeline_stage
74
+ ):
75
+ raise ValueError("$Out and $merge pipeline stages are not allowed.")
76
+
77
+ return pipeline
78
+
79
+ @model_validator(mode="before")
80
+ @classmethod
81
+ def ensure_default_value_for_cursor(cls, data: Any) -> Document:
82
+ if isinstance(data, dict) and "cursor" not in data:
83
+ return assoc(data, "cursor", {"batchSize": 25})
84
+ return data
85
+
86
+
87
+ class GetMoreCommand(CommandBase):
88
+ # Note: No `collection` field. See `QueryContinuation` for inter-API-request "sessions" are modeled.
89
+ getMore: str # Note: runtime uses a `str` id, not an `int` like mongo's native session cursors.
90
+ batchSize: Optional[PositiveInt] = None
91
+
92
+
93
+ class CommandResponse(BaseModel):
94
+ ok: OneOrZero
95
+
96
+
97
+ class CollStatsCommandResponse(CommandResponse):
98
+ ns: str
99
+ size: float
100
+ count: float
101
+ avgObjSize: Optional[float] = None
102
+ storageSize: float
103
+ totalIndexSize: float
104
+ totalSize: float
105
+ scaleFactor: float
106
+
107
+
108
+ class CountCommandResponse(CommandResponse):
109
+ n: NonNegativeInt
110
+
111
+
112
+ class CommandResponseCursor(BaseModel):
113
+ # Note: No `ns` field, `id` is a `str`, and `partialResultsReturned` aliased to `queriedShardsUnavailable` to be
114
+ # less confusing to Runtime API clients. See `QueryContinuation` for inter-API-request "sessions" are modeled.
115
+ batch: List[Document]
116
+ partialResultsReturned: Optional[bool] = Field(
117
+ None, alias="queriedShardsUnavailable"
118
+ )
119
+ id: Optional[str] = None
120
+
121
+ @field_validator("id", mode="before")
122
+ @classmethod
123
+ def coerce_int_to_str(cls, value: Any) -> Any:
124
+ if isinstance(value, int):
125
+ return str(value)
126
+ else:
127
+ return value
128
+
129
+
130
+ class CursorYieldingCommandResponse(CommandResponse):
131
+ cursor: CommandResponseCursor
132
+
133
+ @classmethod
134
+ def slimmed(cls, cmd_response) -> Optional["CursorYieldingCommandResponse"]:
135
+ """Create a new response object that retains only the `_id` for each cursor batch document."""
136
+ dump: dict = cmd_response.model_dump(exclude_unset=True)
137
+
138
+ # If any dictionary in this batch lacks an `_id` key, log a warning and return `None`.`
139
+ id_list = [pick(["_id"], batch_doc) for batch_doc in dump["cursor"]["batch"]]
140
+ if any("_id" not in doc for doc in id_list):
141
+ logging.warning("Some documents in the batch lack an `_id` field.")
142
+ return None
143
+
144
+ dump = assoc_in(
145
+ dump,
146
+ ["cursor", "batch"],
147
+ id_list,
148
+ )
149
+ return cls(**dump)
150
+
151
+
152
+ class DeleteStatement(BaseModel):
153
+ q: Document
154
+ # `limit` is required: https://www.mongodb.com/docs/manual/reference/command/delete/#std-label-deletes-array-limit
155
+ limit: OneOrZero
156
+ hint: Optional[Dict[str, OneOrMinusOne]] = None
157
+
158
+
159
+ class DeleteCommand(CommandBase):
160
+ delete: str
161
+ deletes: List[DeleteStatement]
162
+
163
+
164
+ class DeleteCommandResponse(CommandResponse):
165
+ ok: OneOrZero
166
+ n: NonNegativeInt
167
+ writeErrors: Optional[List[Document]] = None
168
+
169
+
170
+ # Custom types for the `delete_specs` derived from `DeleteStatement`s.
171
+ DeleteSpec = TypedDict("DeleteSpec", {"filter": Document, "limit": OneOrZero})
172
+ DeleteSpecs = List[DeleteSpec]
173
+
174
+
175
+ # If `multi==True` all documents that meet the query criteria will be updated.
176
+ # Else only a single document that meets the query criteria will be updated.
177
+ class UpdateStatement(BaseModel):
178
+ q: Document
179
+ u: Document
180
+ upsert: bool = False
181
+ multi: bool = False
182
+ hint: Optional[Dict[str, OneOrMinusOne]] = None
183
+
184
+
185
+ # Custom types for the `update_specs` derived from `UpdateStatement`s.
186
+ UpdateSpec = TypedDict("UpdateSpec", {"filter": Document, "limit": OneOrZero})
187
+ UpdateSpecs = List[UpdateSpec]
188
+
189
+
190
+ class UpdateCommand(CommandBase):
191
+ update: str
192
+ updates: List[UpdateStatement]
193
+
194
+
195
+ class DocumentUpserted(BaseModel):
196
+ index: NonNegativeInt
197
+ _id: bson.ObjectId
198
+
199
+
200
+ class UpdateCommandResponse(CommandResponse):
201
+ ok: OneOrZero
202
+ n: NonNegativeInt
203
+ nModified: NonNegativeInt
204
+ upserted: Optional[List[DocumentUpserted]] = None
205
+ writeErrors: Optional[List[Document]] = None
206
+
207
+
208
+ QueryCmd = Union[FindCommand, AggregateCommand]
209
+
210
+ CursorYieldingCommand = Union[
211
+ QueryCmd,
212
+ GetMoreCommand,
213
+ ]
214
+
215
+
216
+ Cmd = Union[
217
+ CursorYieldingCommand,
218
+ CollStatsCommand,
219
+ CountCommand,
220
+ DeleteCommand,
221
+ UpdateCommand,
222
+ ]
223
+
224
+ CommandResponseOptions = Union[
225
+ CursorYieldingCommandResponse,
226
+ CollStatsCommandResponse,
227
+ CountCommandResponse,
228
+ DeleteCommandResponse,
229
+ UpdateCommandResponse,
230
+ ]
231
+
232
+
233
+ def command_response_for(type_):
234
+ r"""
235
+ TODO: Add a docstring and type hints to this function.
236
+ """
237
+ if issubclass(type_, CursorYieldingCommand):
238
+ return CursorYieldingCommandResponse
239
+
240
+ d = {
241
+ CollStatsCommand: CollStatsCommandResponse,
242
+ CountCommand: CountCommandResponse,
243
+ DeleteCommand: DeleteCommandResponse,
244
+ UpdateCommand: UpdateCommandResponse,
245
+ }
246
+ return d.get(type_)
@@ -0,0 +1,111 @@
1
+ """
2
+ A *query continuation* is a means to effectively resume a query, i.e. a `find` or `aggregate` MongoDB database command.
3
+
4
+ A *query continuation* document represents a *continuation* (cf. <https://en.wikipedia.org/wiki/Continuation>) for a
5
+ query and uses a stored value ("cursor") for MongoDB's guaranteed unique-valued document field, `_id`,
6
+ such that the documents returned by the command are guaranteed to be sorted in ascending order by `_id`.
7
+
8
+ In this way, an API client may retrieve all documents defined by a `find` or `aggregate` command over multiple HTTP
9
+ requests. One can think of this process as akin to pagination; however, with "cursor-based" pagination, there are no
10
+ guarantees wrt a fixed "page size".
11
+
12
+ """
13
+
14
+ import datetime
15
+ import logging
16
+ import json
17
+
18
+ from pydantic import BaseModel, Field
19
+ from pymongo.database import Database as MongoDatabase
20
+
21
+ from nmdc_runtime.api.core.idgen import generate_one_id
22
+ from nmdc_runtime.api.core.util import now
23
+ from nmdc_runtime.api.db.mongo import get_mongo_db
24
+ from nmdc_runtime.api.models.query import (
25
+ CommandResponse,
26
+ QueryCmd,
27
+ )
28
+
29
+ COLLECTION_NAME_FOR_QUERY_CONTINUATIONS = "_runtime.query_continuations"
30
+
31
+ _mdb: MongoDatabase = get_mongo_db()
32
+ _qc_collection = _mdb[COLLECTION_NAME_FOR_QUERY_CONTINUATIONS]
33
+
34
+ # Ensure one-hour TTL on `_runtime.query_continuations` documents via TTL Index.
35
+ # Reference: https://www.mongodb.com/docs/manual/core/index-ttl/
36
+ _qc_collection.create_index({"last_modified": 1}, expireAfterSeconds=3600)
37
+
38
+
39
+ def not_empty(lst: list) -> bool:
40
+ return len(lst) > 0
41
+
42
+
43
+ class QueryContinuation(BaseModel):
44
+ """A query that has not completed, and that may be resumed, using `cursor` to modify `query_cmd`.
45
+
46
+ This model is intended to represent the state of a logical "session" to "page" through a query's results
47
+ over several HTTP requests, and may be discarded after fetching all "batches" of documents.
48
+
49
+ Thus, a mongo collection tracking query continuations may be reasonably given e.g. a so-called "TTL Index"
50
+ for the `last_modified` field, assuming that `last_modified` is updated each time `query` is updated.
51
+ """
52
+
53
+ id: str = Field(..., alias="_id")
54
+ query_cmd: QueryCmd
55
+ cursor: str
56
+ last_modified: datetime.datetime
57
+
58
+
59
+ class QueryContinuationError(Exception):
60
+ def __init__(self, detail: str):
61
+ self.detail = detail
62
+
63
+ def __repr__(self):
64
+ return f"{self.__class__.__name__}: {self.detail})"
65
+
66
+
67
+ def dump_qc(m: BaseModel):
68
+ return m.model_dump(by_alias=True, exclude_unset=True)
69
+
70
+
71
+ def create_qc(query_cmd: QueryCmd, cmd_response: CommandResponse) -> QueryContinuation:
72
+ """Creates query continuation from command and response, and persists continuation to database."""
73
+
74
+ logging.info(f"cmd_response: {cmd_response}")
75
+ last_id = json.dumps(cmd_response.cursor.batch[-1]["_id"])
76
+ logging.info(f"Last document ID for query continuation: {last_id}")
77
+ cc = QueryContinuation(
78
+ _id=generate_one_id(_mdb, "query_continuation"),
79
+ query_cmd=query_cmd,
80
+ cursor=last_id,
81
+ last_modified=now(),
82
+ )
83
+ _qc_collection.insert_one(dump_qc(cc))
84
+ return cc
85
+
86
+
87
+ def get_qc_by__id(_id: str) -> QueryContinuation | None:
88
+ r"""
89
+ Returns the `QueryContinuation` having the specified `_id` value, raising an exception
90
+ if the corresponding document does not exist in the database.
91
+ """
92
+ doc = _qc_collection.find_one({"_id": _id})
93
+ if doc is None:
94
+ raise QueryContinuationError(f"cannot find cc with id {_id}")
95
+ return QueryContinuation(**doc)
96
+
97
+
98
+ def get_last_doc__id_for_qc(query_continuation: QueryContinuation) -> str:
99
+ """
100
+ Retrieve the last document `_id` for the given `QueryContinuation`.
101
+ """
102
+ # Assuming `query_continuation` has an attribute `cursor` that stores the last document _id
103
+ logging.info(f"Cursor for last doc query continuation: {query_continuation.cursor}")
104
+ return json.loads(query_continuation.cursor)
105
+
106
+
107
+ def get_initial_query_for_qc(query_continuation: QueryContinuation) -> QueryCmd:
108
+ """
109
+ Retrieve the initial query command for the given `QueryContinuation`.
110
+ """
111
+ return query_continuation.query_cmd
@@ -0,0 +1,161 @@
1
+ from enum import Enum
2
+ import os
3
+ from functools import lru_cache
4
+ from typing import List, Optional
5
+
6
+ from dagster_graphql import DagsterGraphQLClient
7
+ from pydantic import BaseModel
8
+ from pymongo.database import Database as MongoDatabase
9
+ from toolz import merge
10
+
11
+ from nmdc_runtime.api.core.idgen import generate_one_id
12
+ from nmdc_runtime.api.core.util import now, now_str, raise404_if_none, pick
13
+ from nmdc_runtime.api.models.user import User
14
+
15
+ PRODUCER_URL_BASE_DEFAULT = (
16
+ "https://github.com/microbiomedata/nmdc-runtime/tree/main/nmdc_runtime/"
17
+ )
18
+ SCHEMA_URL_BASE_DEFAULT = (
19
+ "https://github.com/microbiomedata/nmdc-runtime/tree/main/nmdc_runtime/"
20
+ )
21
+
22
+ PRODUCER_URL = PRODUCER_URL_BASE_DEFAULT.replace("/main/", "/v0-0-1/") + "producer"
23
+ SCHEMA_URL = SCHEMA_URL_BASE_DEFAULT.replace("/main/", "/v0-0-1/") + "schema.json"
24
+
25
+
26
+ class OpenLineageBase(BaseModel):
27
+ producer: str
28
+ schemaURL: str
29
+
30
+
31
+ class RunUserSpec(BaseModel):
32
+ job_id: str
33
+ run_config: dict = {}
34
+ inputs: List[str] = []
35
+
36
+
37
+ class JobSummary(OpenLineageBase):
38
+ id: str
39
+ description: str
40
+
41
+
42
+ class Run(BaseModel):
43
+ id: str
44
+ facets: Optional[dict] = None
45
+
46
+
47
+ class RunEventType(str, Enum):
48
+ REQUESTED = "REQUESTED"
49
+ STARTED = "STARTED"
50
+ FAIL = "FAIL"
51
+ COMPLETE = "COMPLETE"
52
+
53
+
54
+ class RunSummary(OpenLineageBase):
55
+ id: str
56
+ status: RunEventType
57
+ started_at_time: str
58
+ was_started_by: str
59
+ inputs: List[str]
60
+ outputs: List[str]
61
+ job: JobSummary
62
+
63
+
64
+ class RunEvent(OpenLineageBase):
65
+ run: Run
66
+ job: JobSummary
67
+ type: RunEventType
68
+ time: str
69
+ inputs: Optional[List[str]] = []
70
+ outputs: Optional[List[str]] = []
71
+
72
+
73
+ @lru_cache
74
+ def get_dagster_graphql_client() -> DagsterGraphQLClient:
75
+ hostname, port_str = os.getenv("DAGIT_HOST").split("://", 1)[-1].split(":", 1)
76
+ port_number = int(port_str)
77
+ return DagsterGraphQLClient(hostname=hostname, port_number=port_number)
78
+
79
+
80
+ def _add_run_requested_event(run_spec: RunUserSpec, mdb: MongoDatabase, user: User):
81
+ # XXX what we consider a "job" here, is currently a "workflow" elsewhere...
82
+ job = raise404_if_none(mdb.workflows.find_one({"id": run_spec.job_id}))
83
+ run_id = generate_one_id(mdb, "runs")
84
+ event = RunEvent(
85
+ producer=user.username,
86
+ schemaURL=SCHEMA_URL,
87
+ run=Run(id=run_id, facets={"nmdcRuntime_runConfig": run_spec.run_config}),
88
+ job=merge(
89
+ pick(["id", "description"], job),
90
+ {"producer": PRODUCER_URL, "schemaURL": SCHEMA_URL},
91
+ ),
92
+ type=RunEventType.REQUESTED,
93
+ time=now_str(),
94
+ inputs=run_spec.inputs,
95
+ )
96
+ mdb.run_events.insert_one(event.model_dump())
97
+ return run_id
98
+
99
+
100
+ def _add_run_started_event(run_id: str, mdb: MongoDatabase):
101
+ requested: RunEvent = RunEvent(
102
+ **raise404_if_none(
103
+ mdb.run_events.find_one(
104
+ {"run.id": run_id, "type": "REQUESTED"}, sort=[("time", -1)]
105
+ )
106
+ )
107
+ )
108
+ mdb.run_events.insert_one(
109
+ RunEvent(
110
+ producer=PRODUCER_URL,
111
+ schemaURL=SCHEMA_URL,
112
+ run=requested.run,
113
+ job=requested.job,
114
+ type=RunEventType.STARTED,
115
+ time=now_str(),
116
+ ).model_dump()
117
+ )
118
+ return run_id
119
+
120
+
121
+ def _add_run_fail_event(run_id: str, mdb: MongoDatabase):
122
+ requested: RunEvent = RunEvent(
123
+ **raise404_if_none(
124
+ mdb.run_events.find_one(
125
+ {"run.id": run_id, "type": "REQUESTED"}, sort=[("time", -1)]
126
+ )
127
+ )
128
+ )
129
+ mdb.run_events.insert_one(
130
+ RunEvent(
131
+ producer=PRODUCER_URL,
132
+ schemaURL=SCHEMA_URL,
133
+ run=requested.run,
134
+ job=requested.job,
135
+ type=RunEventType.FAIL,
136
+ time=now_str(),
137
+ ).model_dump()
138
+ )
139
+ return run_id
140
+
141
+
142
+ def _add_run_complete_event(run_id: str, mdb: MongoDatabase, outputs: List[str]):
143
+ started: RunEvent = RunEvent(
144
+ **raise404_if_none(
145
+ mdb.run_events.find_one(
146
+ {"run.id": run_id, "type": "STARTED"}, sort=[("time", -1)]
147
+ )
148
+ )
149
+ )
150
+ mdb.run_events.insert_one(
151
+ RunEvent(
152
+ producer=PRODUCER_URL,
153
+ schemaURL=SCHEMA_URL,
154
+ run=started.run,
155
+ job=started.job,
156
+ type=RunEventType.COMPLETE,
157
+ time=now_str(),
158
+ outputs=outputs,
159
+ ).model_dump()
160
+ )
161
+ return run_id
@@ -0,0 +1,87 @@
1
+ from typing import List, Optional
2
+
3
+ import pymongo.database
4
+ from fastapi import Depends
5
+ from jose import JWTError, jwt
6
+ from pydantic import BaseModel
7
+
8
+ from nmdc_runtime.api.core.auth import (
9
+ verify_password,
10
+ TokenData,
11
+ optional_oauth2_scheme,
12
+ )
13
+ from nmdc_runtime.api.db.mongo import get_mongo_db
14
+ from nmdc_runtime.api.models.user import (
15
+ oauth2_scheme,
16
+ credentials_exception,
17
+ SECRET_KEY,
18
+ ALGORITHM,
19
+ )
20
+
21
+
22
+ class Site(BaseModel):
23
+ id: str
24
+ capability_ids: List[str] = []
25
+
26
+
27
+ class SiteClientInDB(BaseModel):
28
+ id: str
29
+ hashed_secret: str
30
+
31
+
32
+ class SiteInDB(Site):
33
+ clients: List[SiteClientInDB] = []
34
+
35
+
36
+ def get_site(mdb, client_id: str) -> Optional[SiteInDB]:
37
+ r"""
38
+ Returns the site, if any, for which the specified `client_id` was generated.
39
+ """
40
+
41
+ site = mdb.sites.find_one({"clients.id": client_id})
42
+ if site is not None:
43
+ return SiteInDB(**site)
44
+
45
+
46
+ def authenticate_site_client(mdb, client_id: str, client_secret: str):
47
+ site = get_site(mdb, client_id)
48
+ if not site:
49
+ return False
50
+ hashed_secret = next(
51
+ client.hashed_secret for client in site.clients if client.id == client_id
52
+ )
53
+ if not verify_password(client_secret, hashed_secret):
54
+ return False
55
+ return site
56
+
57
+
58
+ async def get_current_client_site(
59
+ token: str = Depends(oauth2_scheme),
60
+ mdb: pymongo.database.Database = Depends(get_mongo_db),
61
+ ):
62
+ if mdb.invalidated_tokens.find_one({"_id": token}):
63
+ raise credentials_exception
64
+ try:
65
+ payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
66
+ subject: str = payload.get("sub")
67
+ if subject is None:
68
+ raise credentials_exception
69
+ if not subject.startswith("client:"):
70
+ raise credentials_exception
71
+ client_id = subject.split("client:", 1)[1]
72
+ token_data = TokenData(subject=client_id)
73
+ except JWTError:
74
+ raise credentials_exception
75
+ site = get_site(mdb, client_id=token_data.subject)
76
+ if site is None:
77
+ raise credentials_exception
78
+ return site
79
+
80
+
81
+ async def maybe_get_current_client_site(
82
+ token: str = Depends(optional_oauth2_scheme),
83
+ mdb: pymongo.database.Database = Depends(get_mongo_db),
84
+ ):
85
+ if token is None:
86
+ return None
87
+ return await get_current_client_site(token, mdb)
@@ -0,0 +1,13 @@
1
+ import datetime
2
+
3
+ from pydantic import BaseModel
4
+
5
+
6
+ class TriggerBase(BaseModel):
7
+ object_type_id: str
8
+ workflow_id: str
9
+
10
+
11
+ class Trigger(TriggerBase):
12
+ id: str
13
+ created_at: datetime.datetime