nmdc-runtime 2.6.0__py3-none-any.whl → 2.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nmdc_runtime/Dockerfile +177 -0
- nmdc_runtime/api/analytics.py +90 -0
- nmdc_runtime/api/boot/capabilities.py +9 -0
- nmdc_runtime/api/boot/object_types.py +126 -0
- nmdc_runtime/api/boot/triggers.py +84 -0
- nmdc_runtime/api/boot/workflows.py +116 -0
- nmdc_runtime/api/core/auth.py +212 -0
- nmdc_runtime/api/core/idgen.py +200 -0
- nmdc_runtime/api/core/metadata.py +777 -0
- nmdc_runtime/api/core/util.py +114 -0
- nmdc_runtime/api/db/mongo.py +436 -0
- nmdc_runtime/api/db/s3.py +37 -0
- nmdc_runtime/api/endpoints/capabilities.py +25 -0
- nmdc_runtime/api/endpoints/find.py +634 -0
- nmdc_runtime/api/endpoints/jobs.py +206 -0
- nmdc_runtime/api/endpoints/lib/helpers.py +274 -0
- nmdc_runtime/api/endpoints/lib/linked_instances.py +193 -0
- nmdc_runtime/api/endpoints/lib/path_segments.py +165 -0
- nmdc_runtime/api/endpoints/metadata.py +260 -0
- nmdc_runtime/api/endpoints/nmdcschema.py +515 -0
- nmdc_runtime/api/endpoints/object_types.py +38 -0
- nmdc_runtime/api/endpoints/objects.py +277 -0
- nmdc_runtime/api/endpoints/operations.py +78 -0
- nmdc_runtime/api/endpoints/queries.py +701 -0
- nmdc_runtime/api/endpoints/runs.py +98 -0
- nmdc_runtime/api/endpoints/search.py +38 -0
- nmdc_runtime/api/endpoints/sites.py +205 -0
- nmdc_runtime/api/endpoints/triggers.py +25 -0
- nmdc_runtime/api/endpoints/users.py +214 -0
- nmdc_runtime/api/endpoints/util.py +817 -0
- nmdc_runtime/api/endpoints/wf_file_staging.py +307 -0
- nmdc_runtime/api/endpoints/workflows.py +353 -0
- nmdc_runtime/api/entrypoint.sh +7 -0
- nmdc_runtime/api/main.py +495 -0
- nmdc_runtime/api/middleware.py +43 -0
- nmdc_runtime/api/models/capability.py +14 -0
- nmdc_runtime/api/models/id.py +92 -0
- nmdc_runtime/api/models/job.py +57 -0
- nmdc_runtime/api/models/lib/helpers.py +78 -0
- nmdc_runtime/api/models/metadata.py +11 -0
- nmdc_runtime/api/models/nmdc_schema.py +146 -0
- nmdc_runtime/api/models/object.py +180 -0
- nmdc_runtime/api/models/object_type.py +20 -0
- nmdc_runtime/api/models/operation.py +66 -0
- nmdc_runtime/api/models/query.py +246 -0
- nmdc_runtime/api/models/query_continuation.py +111 -0
- nmdc_runtime/api/models/run.py +161 -0
- nmdc_runtime/api/models/site.py +87 -0
- nmdc_runtime/api/models/trigger.py +13 -0
- nmdc_runtime/api/models/user.py +207 -0
- nmdc_runtime/api/models/util.py +260 -0
- nmdc_runtime/api/models/wfe_file_stages.py +122 -0
- nmdc_runtime/api/models/workflow.py +15 -0
- nmdc_runtime/api/openapi.py +178 -0
- nmdc_runtime/api/swagger_ui/assets/EllipsesButton.js +146 -0
- nmdc_runtime/api/swagger_ui/assets/EndpointSearchWidget.js +369 -0
- nmdc_runtime/api/swagger_ui/assets/script.js +252 -0
- nmdc_runtime/api/swagger_ui/assets/style.css +155 -0
- nmdc_runtime/api/swagger_ui/swagger_ui.py +34 -0
- nmdc_runtime/config.py +56 -1
- nmdc_runtime/minter/adapters/repository.py +22 -2
- nmdc_runtime/minter/config.py +2 -0
- nmdc_runtime/minter/domain/model.py +55 -1
- nmdc_runtime/minter/entrypoints/fastapi_app.py +1 -1
- nmdc_runtime/mongo_util.py +89 -0
- nmdc_runtime/site/backup/nmdcdb_mongodump.py +1 -1
- nmdc_runtime/site/backup/nmdcdb_mongoexport.py +1 -3
- nmdc_runtime/site/changesheets/data/OmicsProcessing-to-catted-Biosamples.tsv +1561 -0
- nmdc_runtime/site/changesheets/scripts/missing_neon_soils_ecosystem_data.py +311 -0
- nmdc_runtime/site/changesheets/scripts/neon_soils_add_ncbi_ids.py +210 -0
- nmdc_runtime/site/dagster.yaml +53 -0
- nmdc_runtime/site/entrypoint-daemon.sh +29 -0
- nmdc_runtime/site/entrypoint-dagit-readonly.sh +26 -0
- nmdc_runtime/site/entrypoint-dagit.sh +29 -0
- nmdc_runtime/site/export/ncbi_xml.py +731 -40
- nmdc_runtime/site/export/ncbi_xml_utils.py +142 -26
- nmdc_runtime/site/graphs.py +80 -29
- nmdc_runtime/site/ops.py +522 -183
- nmdc_runtime/site/repair/database_updater.py +210 -1
- nmdc_runtime/site/repository.py +108 -117
- nmdc_runtime/site/resources.py +72 -36
- nmdc_runtime/site/translation/gold_translator.py +22 -21
- nmdc_runtime/site/translation/neon_benthic_translator.py +1 -1
- nmdc_runtime/site/translation/neon_soil_translator.py +5 -5
- nmdc_runtime/site/translation/neon_surface_water_translator.py +1 -2
- nmdc_runtime/site/translation/submission_portal_translator.py +216 -69
- nmdc_runtime/site/translation/translator.py +64 -1
- nmdc_runtime/site/util.py +8 -3
- nmdc_runtime/site/validation/util.py +16 -12
- nmdc_runtime/site/workspace.yaml +13 -0
- nmdc_runtime/static/NMDC_logo.svg +1073 -0
- nmdc_runtime/static/ORCID-iD_icon_vector.svg +4 -0
- nmdc_runtime/static/README.md +5 -0
- nmdc_runtime/static/favicon.ico +0 -0
- nmdc_runtime/util.py +175 -348
- nmdc_runtime-2.12.0.dist-info/METADATA +45 -0
- nmdc_runtime-2.12.0.dist-info/RECORD +131 -0
- {nmdc_runtime-2.6.0.dist-info → nmdc_runtime-2.12.0.dist-info}/WHEEL +1 -2
- nmdc_runtime/containers.py +0 -14
- nmdc_runtime/core/db/Database.py +0 -15
- nmdc_runtime/core/exceptions/__init__.py +0 -23
- nmdc_runtime/core/exceptions/base.py +0 -47
- nmdc_runtime/core/exceptions/token.py +0 -13
- nmdc_runtime/domain/users/queriesInterface.py +0 -18
- nmdc_runtime/domain/users/userSchema.py +0 -37
- nmdc_runtime/domain/users/userService.py +0 -14
- nmdc_runtime/infrastructure/database/db.py +0 -3
- nmdc_runtime/infrastructure/database/models/user.py +0 -10
- nmdc_runtime/lib/__init__.py +0 -1
- nmdc_runtime/lib/extract_nmdc_data.py +0 -41
- nmdc_runtime/lib/load_nmdc_data.py +0 -121
- nmdc_runtime/lib/nmdc_dataframes.py +0 -829
- nmdc_runtime/lib/nmdc_etl_class.py +0 -402
- nmdc_runtime/lib/transform_nmdc_data.py +0 -1117
- nmdc_runtime/site/drsobjects/ingest.py +0 -93
- nmdc_runtime/site/drsobjects/registration.py +0 -131
- nmdc_runtime/site/translation/emsl.py +0 -43
- nmdc_runtime/site/translation/gold.py +0 -53
- nmdc_runtime/site/translation/jgi.py +0 -32
- nmdc_runtime/site/translation/util.py +0 -132
- nmdc_runtime/site/validation/jgi.py +0 -43
- nmdc_runtime-2.6.0.dist-info/METADATA +0 -199
- nmdc_runtime-2.6.0.dist-info/RECORD +0 -83
- nmdc_runtime-2.6.0.dist-info/top_level.txt +0 -1
- /nmdc_runtime/{client → api}/__init__.py +0 -0
- /nmdc_runtime/{core → api/boot}/__init__.py +0 -0
- /nmdc_runtime/{core/db → api/core}/__init__.py +0 -0
- /nmdc_runtime/{domain → api/db}/__init__.py +0 -0
- /nmdc_runtime/{domain/users → api/endpoints}/__init__.py +0 -0
- /nmdc_runtime/{infrastructure → api/endpoints/lib}/__init__.py +0 -0
- /nmdc_runtime/{infrastructure/database → api/models}/__init__.py +0 -0
- /nmdc_runtime/{infrastructure/database/models → api/models/lib}/__init__.py +0 -0
- /nmdc_runtime/{site/drsobjects/__init__.py → api/models/minter.py} +0 -0
- {nmdc_runtime-2.6.0.dist-info → nmdc_runtime-2.12.0.dist-info}/entry_points.txt +0 -0
- {nmdc_runtime-2.6.0.dist-info → nmdc_runtime-2.12.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
from typing import Optional, Dict, Any, List
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel, Field
|
|
5
|
+
|
|
6
|
+
from nmdc_runtime.api.models.operation import Metadata as OperationMetadata
|
|
7
|
+
from nmdc_runtime.api.models.workflow import Workflow
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class JobBase(BaseModel):
|
|
11
|
+
workflow: Workflow
|
|
12
|
+
name: Optional[str] = Field(
|
|
13
|
+
None, description="Name of the job", examples=["Some job"]
|
|
14
|
+
)
|
|
15
|
+
description: Optional[str] = Field(
|
|
16
|
+
None, description="Description of the job", examples=["Some description"]
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class JobClaim(BaseModel):
|
|
21
|
+
op_id: str
|
|
22
|
+
site_id: str
|
|
23
|
+
done: Optional[bool] = None
|
|
24
|
+
cancelled: Optional[bool] = None
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class Job(JobBase):
|
|
28
|
+
id: str
|
|
29
|
+
created_at: Optional[datetime.datetime] = None
|
|
30
|
+
config: Dict[str, Any]
|
|
31
|
+
claims: List[JobClaim] = []
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class JobIn(JobBase):
|
|
35
|
+
"""Payload of an HTTP request to create a `Job`."""
|
|
36
|
+
|
|
37
|
+
# Consider forbidding extra fields (once the workflow automation developers have
|
|
38
|
+
# updated the client code accordingly).
|
|
39
|
+
# See: https://docs.pydantic.dev/latest/api/config/#pydantic.config.ConfigDict.extra
|
|
40
|
+
##model_config = ConfigDict(extra="forbid")
|
|
41
|
+
|
|
42
|
+
config: Dict[str, Any] = Field(
|
|
43
|
+
..., description="Configuration of the associated workflow", examples=[{}]
|
|
44
|
+
)
|
|
45
|
+
claims: List[JobClaim] = Field(
|
|
46
|
+
default_factory=list, description="Claims of the job", examples=[[]]
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class JobExecution(BaseModel):
|
|
51
|
+
id: str
|
|
52
|
+
job: Job
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class JobOperationMetadata(OperationMetadata):
|
|
56
|
+
job: Job
|
|
57
|
+
site_id: str
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
from nmdc_runtime.api.models.query import (
|
|
2
|
+
DeleteCommand,
|
|
3
|
+
DeleteSpecs,
|
|
4
|
+
UpdateCommand,
|
|
5
|
+
UpdateSpecs,
|
|
6
|
+
)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def derive_delete_specs(delete_command: DeleteCommand) -> DeleteSpecs:
|
|
10
|
+
r"""
|
|
11
|
+
Derives a list of delete specifications from the given `DeleteCommand`.
|
|
12
|
+
|
|
13
|
+
Note: This algorithm was copied from the `_run_mdb_cmd`
|
|
14
|
+
function in `nmdc_runtime/api/endpoints/queries.py`.
|
|
15
|
+
|
|
16
|
+
To run doctests: $ python -m doctest nmdc_runtime/api/models/lib/helpers.py
|
|
17
|
+
|
|
18
|
+
>>> delete_command = DeleteCommand(**{
|
|
19
|
+
... "delete": "collection_name",
|
|
20
|
+
... "deletes": [
|
|
21
|
+
... {
|
|
22
|
+
... "q": {"color": "blue"},
|
|
23
|
+
... "limit": 0,
|
|
24
|
+
... "hint": {"potato": 1}
|
|
25
|
+
... },
|
|
26
|
+
... {
|
|
27
|
+
... "q": {"color": "green"},
|
|
28
|
+
... "limit": 1,
|
|
29
|
+
... }
|
|
30
|
+
... ],
|
|
31
|
+
... })
|
|
32
|
+
>>> delete_specs = derive_delete_specs(delete_command)
|
|
33
|
+
>>> delete_specs[0]
|
|
34
|
+
{'filter': {'color': 'blue'}, 'limit': 0}
|
|
35
|
+
>>> delete_specs[1]
|
|
36
|
+
{'filter': {'color': 'green'}, 'limit': 1}
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
return [
|
|
40
|
+
{"filter": delete_statement.q, "limit": delete_statement.limit}
|
|
41
|
+
for delete_statement in delete_command.deletes
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def derive_update_specs(update_command: UpdateCommand) -> UpdateSpecs:
|
|
46
|
+
r"""
|
|
47
|
+
Derives a list of update specifications from the given `UpdateCommand`.
|
|
48
|
+
|
|
49
|
+
Note: This algorithm was copied from the `_run_mdb_cmd`
|
|
50
|
+
function in `nmdc_runtime/api/endpoints/queries.py`.
|
|
51
|
+
|
|
52
|
+
>>> update_command = UpdateCommand(**{
|
|
53
|
+
... "update": "collection_name",
|
|
54
|
+
... "updates": [
|
|
55
|
+
... {
|
|
56
|
+
... "q": {"color": "blue"},
|
|
57
|
+
... "u": {"$set": {"color": "red"}},
|
|
58
|
+
... "upsert": False,
|
|
59
|
+
... "multi": True,
|
|
60
|
+
... "hint": {"potato": 1}
|
|
61
|
+
... },
|
|
62
|
+
... {
|
|
63
|
+
... "q": {"color": "green"},
|
|
64
|
+
... "u": {"$set": {"color": "yellow"}},
|
|
65
|
+
... }
|
|
66
|
+
... ],
|
|
67
|
+
... })
|
|
68
|
+
>>> update_specs = derive_update_specs(update_command)
|
|
69
|
+
>>> update_specs[0]
|
|
70
|
+
{'filter': {'color': 'blue'}, 'limit': 0}
|
|
71
|
+
>>> update_specs[1]
|
|
72
|
+
{'filter': {'color': 'green'}, 'limit': 1}
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
return [
|
|
76
|
+
{"filter": update_statement.q, "limit": 0 if update_statement.multi else 1}
|
|
77
|
+
for update_statement in update_command.updates
|
|
78
|
+
]
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
import inspect
|
|
2
|
+
from enum import Enum
|
|
3
|
+
from typing import List, Any, Dict, Optional
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field, create_model
|
|
6
|
+
from refscan.lib.helpers import get_collection_names_from_schema
|
|
7
|
+
|
|
8
|
+
from nmdc_runtime.util import nmdc_schema_view
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class FileTypeEnum(str, Enum):
|
|
12
|
+
ft_icr_ms_analysis_results = "FT ICR-MS Analysis Results"
|
|
13
|
+
gc_ms_metabolomics_results = "GC-MS Metabolomics Results"
|
|
14
|
+
metaproteomics_workflow_statistics = "Metaproteomics Workflow Statistics"
|
|
15
|
+
protein_report = "Protein Report"
|
|
16
|
+
peptide_report = "Peptide Report"
|
|
17
|
+
unfiltered_metaproteomics_results = "Unfiltered Metaproteomics Results"
|
|
18
|
+
read_count_and_rpkm = "Read Count and RPKM"
|
|
19
|
+
qc_non_rrna_r2 = "QC non-rRNA R2"
|
|
20
|
+
qc_non_rrna_r1 = "QC non-rRNA R1"
|
|
21
|
+
metagenome_bins = "Metagenome Bins"
|
|
22
|
+
checkm_statistics = "CheckM Statistics"
|
|
23
|
+
gottcha2_krona_plot = "GOTTCHA2 Krona Plot"
|
|
24
|
+
kraken2_krona_plot = "Kraken2 Krona Plot"
|
|
25
|
+
centrifuge_krona_plot = "Centrifuge Krona Plot"
|
|
26
|
+
kraken2_classification_report = "Kraken2 Classification Report"
|
|
27
|
+
kraken2_taxonomic_classification = "Kraken2 Taxonomic Classification"
|
|
28
|
+
centrifuge_classification_report = "Centrifuge Classification Report"
|
|
29
|
+
centrifuge_taxonomic_classification = "Centrifuge Taxonomic Classification"
|
|
30
|
+
structural_annotation_gff = "Structural Annotation GFF"
|
|
31
|
+
functional_annotation_gff = "Functional Annotation GFF"
|
|
32
|
+
annotation_amino_acid_fasta = "Annotation Amino Acid FASTA"
|
|
33
|
+
annotation_enzyme_commission = "Annotation Enzyme Commission"
|
|
34
|
+
annotation_kegg_orthology = "Annotation KEGG Orthology"
|
|
35
|
+
assembly_coverage_bam = "Assembly Coverage BAM"
|
|
36
|
+
assembly_agp = "Assembly AGP"
|
|
37
|
+
assembly_scaffolds = "Assembly Scaffolds"
|
|
38
|
+
assembly_contigs = "Assembly Contigs"
|
|
39
|
+
assembly_coverage_stats = "Assembly Coverage Stats"
|
|
40
|
+
filtered_sequencing_reads = "Filtered Sequencing Reads"
|
|
41
|
+
qc_statistics = "QC Statistics"
|
|
42
|
+
tigrfam_annotation_gff = "TIGRFam Annotation GFF"
|
|
43
|
+
clusters_of_orthologous_groups__cog__annotation_gff = (
|
|
44
|
+
"Clusters of Orthologous Groups (COG) Annotation GFF"
|
|
45
|
+
)
|
|
46
|
+
cath_funfams__functional_families__annotation_gff = (
|
|
47
|
+
"CATH FunFams (Functional Families) Annotation GFF"
|
|
48
|
+
)
|
|
49
|
+
superfam_annotation_gff = "SUPERFam Annotation GFF"
|
|
50
|
+
smart_annotation_gff = "SMART Annotation GFF"
|
|
51
|
+
pfam_annotation_gff = "Pfam Annotation GFF"
|
|
52
|
+
direct_infusion_ft_icr_ms_raw_data = "Direct Infusion FT ICR-MS Raw Data"
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class DataObject(BaseModel):
|
|
56
|
+
id: str = Field(None)
|
|
57
|
+
name: str = Field(None, description="A human readable label for an entity")
|
|
58
|
+
description: str = Field(
|
|
59
|
+
None, description="a human-readable description of a thing"
|
|
60
|
+
)
|
|
61
|
+
alternative_identifiers: List[str] = Field(
|
|
62
|
+
None, description="A list of alternative identifiers for the entity."
|
|
63
|
+
)
|
|
64
|
+
compression_type: str = Field(
|
|
65
|
+
None, description="If provided, specifies the compression type"
|
|
66
|
+
)
|
|
67
|
+
data_object_type: FileTypeEnum = Field(None)
|
|
68
|
+
file_size_bytes: int = Field(None, description="Size of the file in bytes")
|
|
69
|
+
md5_checksum: str = Field(None, description="MD5 checksum of file (pre-compressed)")
|
|
70
|
+
type: str = Field(
|
|
71
|
+
"nmdc:DataObject",
|
|
72
|
+
description="An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.",
|
|
73
|
+
)
|
|
74
|
+
url: str = Field(None)
|
|
75
|
+
was_generated_by: str = Field(None)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
list_request_ops_per_field_type = {
|
|
79
|
+
(object,): ["$eq", "$neq", "$in", "$nin"],
|
|
80
|
+
(str,): ["$regex"],
|
|
81
|
+
(int, float): ["$gt", "$gte", "$lt", "$lte"],
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
list_request_ops_with_many_args = {"$in", "$nin"}
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def create_list_request_model_for(cls):
|
|
88
|
+
field_filters = []
|
|
89
|
+
sig = inspect.signature(cls)
|
|
90
|
+
for p in sig.parameters.values():
|
|
91
|
+
field_name, field_type = p.name, p.annotation
|
|
92
|
+
if hasattr(field_type, "__origin__"): # a GenericAlias object, e.g. List[str].
|
|
93
|
+
field_type = field_type.__args__[0]
|
|
94
|
+
field_default = (
|
|
95
|
+
None if p.default in (inspect.Parameter.empty, []) else p.default
|
|
96
|
+
)
|
|
97
|
+
field_ops = []
|
|
98
|
+
for types_ok, type_ops in list_request_ops_per_field_type.items():
|
|
99
|
+
if field_type in types_ok or isinstance(field_type, types_ok):
|
|
100
|
+
field_ops.extend(type_ops)
|
|
101
|
+
for op in field_ops:
|
|
102
|
+
field_filters.append(
|
|
103
|
+
{
|
|
104
|
+
"name": field_name,
|
|
105
|
+
"arg_type": field_type,
|
|
106
|
+
"default": field_default,
|
|
107
|
+
"op": op,
|
|
108
|
+
}
|
|
109
|
+
)
|
|
110
|
+
create_model_kwargs = {"max_page_size": (int, 20), "page_token": (str, None)}
|
|
111
|
+
for ff in field_filters:
|
|
112
|
+
model_field_name = f'{ff["name"]}_{ff["op"][1:]}'
|
|
113
|
+
model_field_type = ff["arg_type"]
|
|
114
|
+
create_model_kwargs[model_field_name] = (model_field_type, ff["default"])
|
|
115
|
+
return create_model(f"{cls.__name__}ListRequest", **create_model_kwargs)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def list_request_filter_to_mongo_filter(req: dict):
|
|
119
|
+
filter_ = {}
|
|
120
|
+
for k, v in req.items():
|
|
121
|
+
if not v:
|
|
122
|
+
continue
|
|
123
|
+
field, op = k.rsplit("_", maxsplit=1)
|
|
124
|
+
op = f"${op}"
|
|
125
|
+
if field not in filter_:
|
|
126
|
+
filter_[field] = {}
|
|
127
|
+
if op not in filter_[field]:
|
|
128
|
+
if op in list_request_ops_with_many_args:
|
|
129
|
+
filter_[field][op] = v.split(",")
|
|
130
|
+
else:
|
|
131
|
+
filter_[field][op] = v
|
|
132
|
+
return filter_
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
DataObjectListRequest = create_list_request_model_for(DataObject)
|
|
136
|
+
|
|
137
|
+
SimplifiedDocument = Dict[str, Any]
|
|
138
|
+
|
|
139
|
+
schema_view = nmdc_schema_view()
|
|
140
|
+
SimplifiedNMDCDatabase = create_model(
|
|
141
|
+
"NMDCDatabase",
|
|
142
|
+
**{
|
|
143
|
+
coll_name: Optional[list[SimplifiedDocument]]
|
|
144
|
+
for coll_name in get_collection_names_from_schema(schema_view)
|
|
145
|
+
},
|
|
146
|
+
)
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import hashlib
|
|
3
|
+
import http
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from typing import Optional, List, Dict
|
|
6
|
+
|
|
7
|
+
from pydantic import (
|
|
8
|
+
field_validator,
|
|
9
|
+
model_validator,
|
|
10
|
+
Field,
|
|
11
|
+
StringConstraints,
|
|
12
|
+
BaseModel,
|
|
13
|
+
AnyUrl,
|
|
14
|
+
HttpUrl,
|
|
15
|
+
field_serializer,
|
|
16
|
+
)
|
|
17
|
+
from typing_extensions import Annotated
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class AccessMethodType(str, Enum):
|
|
21
|
+
s3 = "s3"
|
|
22
|
+
gs = "gs"
|
|
23
|
+
ftp = "ftp"
|
|
24
|
+
gsiftp = "gsiftp"
|
|
25
|
+
globus = "globus"
|
|
26
|
+
htsget = "htsget"
|
|
27
|
+
https = "https"
|
|
28
|
+
file = "file"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class AccessURL(BaseModel):
|
|
32
|
+
headers: Optional[Dict[str, str]] = None
|
|
33
|
+
url: AnyUrl
|
|
34
|
+
|
|
35
|
+
@field_serializer("url")
|
|
36
|
+
def serialize_url(self, url: AnyUrl, _info):
|
|
37
|
+
return str(url)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class AccessMethod(BaseModel):
|
|
41
|
+
access_id: Optional[Annotated[str, StringConstraints(min_length=1)]] = None
|
|
42
|
+
access_url: Optional[AccessURL] = None
|
|
43
|
+
region: Optional[str] = None
|
|
44
|
+
type: AccessMethodType = AccessMethodType.https
|
|
45
|
+
|
|
46
|
+
@model_validator(mode="before")
|
|
47
|
+
def at_least_one_of_access_id_and_url(cls, values):
|
|
48
|
+
access_id, access_url = values.get("access_id"), values.get("access_url")
|
|
49
|
+
if access_id is None and access_url is None:
|
|
50
|
+
raise ValueError(
|
|
51
|
+
"At least one of access_url and access_id must be provided."
|
|
52
|
+
)
|
|
53
|
+
return values
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
ChecksumType = Annotated[
|
|
57
|
+
str,
|
|
58
|
+
StringConstraints(
|
|
59
|
+
pattern=rf"(?P<checksumtype>({'|'.join(sorted(hashlib.algorithms_guaranteed))}))"
|
|
60
|
+
),
|
|
61
|
+
]
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class Checksum(BaseModel):
|
|
65
|
+
checksum: Annotated[str, StringConstraints(min_length=1)]
|
|
66
|
+
type: ChecksumType
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
DrsId = Annotated[str, StringConstraints(pattern=r"^[A-Za-z0-9._~\-]+$")]
|
|
70
|
+
PortableFilename = Annotated[str, StringConstraints(pattern=r"^[A-Za-z0-9._\-]+$")]
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class ContentsObject(BaseModel):
|
|
74
|
+
contents: Optional[List["ContentsObject"]] = None
|
|
75
|
+
drs_uri: Optional[List[AnyUrl]] = None
|
|
76
|
+
id: Optional[DrsId] = None
|
|
77
|
+
name: PortableFilename
|
|
78
|
+
|
|
79
|
+
@model_validator(mode="before")
|
|
80
|
+
def no_contents_means_single_blob(cls, values):
|
|
81
|
+
contents, id_ = values.get("contents"), values.get("id")
|
|
82
|
+
if contents is None and id_ is None:
|
|
83
|
+
raise ValueError("no contents means no further nesting, so id required")
|
|
84
|
+
return values
|
|
85
|
+
|
|
86
|
+
@field_serializer("drs_uri")
|
|
87
|
+
def serialize_url(self, drs_uri: Optional[List[AnyUrl]], _info):
|
|
88
|
+
if drs_uri is not None and len(drs_uri) > 0:
|
|
89
|
+
return [str(u) for u in drs_uri]
|
|
90
|
+
return drs_uri
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
# Note: Between Pydantic v1 and v2, the `update_forward_refs` method was renamed to `model_rebuild`.
|
|
94
|
+
# Reference: https://docs.pydantic.dev/2.11/migration/#changes-to-pydanticbasemodel
|
|
95
|
+
ContentsObject.model_rebuild()
|
|
96
|
+
|
|
97
|
+
Mimetype = Annotated[str, StringConstraints(pattern=r"^\w+/[-+.\w]+$")]
|
|
98
|
+
SizeInBytes = Annotated[int, Field(strict=True, ge=0)]
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class Error(BaseModel):
|
|
102
|
+
msg: Optional[str] = None
|
|
103
|
+
status_code: http.HTTPStatus
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class DrsObjectBase(BaseModel):
|
|
107
|
+
aliases: Optional[List[str]] = None
|
|
108
|
+
description: Optional[str] = None
|
|
109
|
+
mime_type: Optional[Mimetype] = None
|
|
110
|
+
name: Optional[PortableFilename] = None
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class DrsObjectIn(DrsObjectBase):
|
|
114
|
+
access_methods: Optional[List[AccessMethod]] = None
|
|
115
|
+
checksums: List[Checksum]
|
|
116
|
+
contents: Optional[List[ContentsObject]] = None
|
|
117
|
+
created_time: datetime.datetime
|
|
118
|
+
size: SizeInBytes
|
|
119
|
+
updated_time: Optional[datetime.datetime] = None
|
|
120
|
+
version: Optional[str] = None
|
|
121
|
+
|
|
122
|
+
@model_validator(mode="before")
|
|
123
|
+
def no_contents_means_single_blob(cls, values):
|
|
124
|
+
contents, access_methods = values.get("contents"), values.get("access_methods")
|
|
125
|
+
if contents is None and access_methods is None:
|
|
126
|
+
raise ValueError(
|
|
127
|
+
"no contents means single blob, which requires access_methods"
|
|
128
|
+
)
|
|
129
|
+
return values
|
|
130
|
+
|
|
131
|
+
@field_validator("checksums")
|
|
132
|
+
@classmethod
|
|
133
|
+
def at_least_one_checksum(cls, v):
|
|
134
|
+
if not len(v) >= 1:
|
|
135
|
+
raise ValueError("At least one checksum requried")
|
|
136
|
+
return v
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
class DrsObject(DrsObjectIn):
|
|
140
|
+
id: DrsId
|
|
141
|
+
self_uri: AnyUrl
|
|
142
|
+
|
|
143
|
+
@field_serializer("self_uri")
|
|
144
|
+
def serialize_url(self, self_uri: AnyUrl, _info):
|
|
145
|
+
return str(self_uri)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
Seconds = Annotated[int, Field(strict=True, gt=0)]
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
class ObjectPresignedUrl(BaseModel):
|
|
152
|
+
url: HttpUrl
|
|
153
|
+
expires_in: Seconds = 300
|
|
154
|
+
|
|
155
|
+
@field_serializer("url")
|
|
156
|
+
def serialize_url(self, url: HttpUrl, _info):
|
|
157
|
+
return str(url)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
class DrsObjectOutBase(DrsObjectBase):
|
|
161
|
+
checksums: List[Checksum]
|
|
162
|
+
created_time: datetime.datetime
|
|
163
|
+
id: DrsId
|
|
164
|
+
self_uri: AnyUrl
|
|
165
|
+
size: SizeInBytes
|
|
166
|
+
updated_time: Optional[datetime.datetime] = None
|
|
167
|
+
version: Optional[str] = None
|
|
168
|
+
|
|
169
|
+
@field_serializer("self_uri")
|
|
170
|
+
def serialize_url(self, self_uri: AnyUrl, _info):
|
|
171
|
+
return str(self_uri)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
class DrsObjectBlobOut(DrsObjectOutBase):
|
|
175
|
+
access_methods: List[AccessMethod]
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
class DrsObjectBundleOut(DrsObjectOutBase):
|
|
179
|
+
access_methods: Optional[List[AccessMethod]] = None
|
|
180
|
+
contents: List[ContentsObject]
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
from typing import Optional, List
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
|
|
6
|
+
from nmdc_runtime.api.models.object import DrsObject
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ObjectTypeBase(BaseModel):
|
|
10
|
+
name: Optional[str] = None
|
|
11
|
+
description: Optional[str] = None
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ObjectType(ObjectTypeBase):
|
|
15
|
+
id: str
|
|
16
|
+
created_at: datetime.datetime
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class DrsObjectWithTypes(DrsObject):
|
|
20
|
+
types: Optional[List[str]] = None
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
from typing import Generic, TypeVar, Optional, List, Any, Union
|
|
3
|
+
|
|
4
|
+
from pydantic import StringConstraints, BaseModel, HttpUrl, field_serializer
|
|
5
|
+
|
|
6
|
+
from nmdc_runtime.api.models.util import ResultT
|
|
7
|
+
from typing_extensions import Annotated
|
|
8
|
+
|
|
9
|
+
MetadataT = TypeVar("MetadataT")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
PythonImportPath = Annotated[str, StringConstraints(pattern=r"^[A-Za-z0-9_.]+$")]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class OperationError(BaseModel):
|
|
16
|
+
code: str
|
|
17
|
+
message: str
|
|
18
|
+
details: Any = None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class Operation(BaseModel, Generic[ResultT, MetadataT]):
|
|
22
|
+
id: str
|
|
23
|
+
done: bool = False
|
|
24
|
+
expire_time: datetime.datetime
|
|
25
|
+
result: Optional[Union[ResultT, OperationError]] = None
|
|
26
|
+
metadata: Optional[MetadataT] = None
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class UpdateOperationRequest(BaseModel, Generic[ResultT, MetadataT]):
|
|
30
|
+
done: bool = False
|
|
31
|
+
result: Optional[Union[ResultT, OperationError]] = None
|
|
32
|
+
metadata: Optional[MetadataT] = {}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class ListOperationsResponse(BaseModel, Generic[ResultT, MetadataT]):
|
|
36
|
+
resources: List[Operation[ResultT, MetadataT]]
|
|
37
|
+
next_page_token: Optional[str] = None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class Result(BaseModel):
|
|
41
|
+
model: Optional[PythonImportPath] = None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class EmptyResult(Result):
|
|
45
|
+
pass
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class Metadata(BaseModel):
|
|
49
|
+
# XXX alternative: set model field using __class__ on __init__()?
|
|
50
|
+
model: Optional[PythonImportPath] = None
|
|
51
|
+
cancelled: Optional[bool] = None
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class PausedOrNot(Metadata):
|
|
55
|
+
paused: bool
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class ObjectPutMetadata(Metadata):
|
|
59
|
+
object_id: str
|
|
60
|
+
site_id: str
|
|
61
|
+
url: HttpUrl
|
|
62
|
+
expires_in_seconds: int
|
|
63
|
+
|
|
64
|
+
@field_serializer("url")
|
|
65
|
+
def serialize_url(self, url: HttpUrl, _info):
|
|
66
|
+
return str(url)
|