nmdc-runtime 2.8.0__py3-none-any.whl → 2.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nmdc-runtime might be problematic. Click here for more details.
- nmdc_runtime/api/__init__.py +0 -0
- nmdc_runtime/api/analytics.py +70 -0
- nmdc_runtime/api/boot/__init__.py +0 -0
- nmdc_runtime/api/boot/capabilities.py +9 -0
- nmdc_runtime/api/boot/object_types.py +126 -0
- nmdc_runtime/api/boot/triggers.py +84 -0
- nmdc_runtime/api/boot/workflows.py +116 -0
- nmdc_runtime/api/core/__init__.py +0 -0
- nmdc_runtime/api/core/auth.py +208 -0
- nmdc_runtime/api/core/idgen.py +170 -0
- nmdc_runtime/api/core/metadata.py +788 -0
- nmdc_runtime/api/core/util.py +109 -0
- nmdc_runtime/api/db/__init__.py +0 -0
- nmdc_runtime/api/db/mongo.py +447 -0
- nmdc_runtime/api/db/s3.py +37 -0
- nmdc_runtime/api/endpoints/__init__.py +0 -0
- nmdc_runtime/api/endpoints/capabilities.py +25 -0
- nmdc_runtime/api/endpoints/find.py +794 -0
- nmdc_runtime/api/endpoints/ids.py +192 -0
- nmdc_runtime/api/endpoints/jobs.py +143 -0
- nmdc_runtime/api/endpoints/lib/__init__.py +0 -0
- nmdc_runtime/api/endpoints/lib/helpers.py +274 -0
- nmdc_runtime/api/endpoints/lib/path_segments.py +165 -0
- nmdc_runtime/api/endpoints/metadata.py +260 -0
- nmdc_runtime/api/endpoints/nmdcschema.py +581 -0
- nmdc_runtime/api/endpoints/object_types.py +38 -0
- nmdc_runtime/api/endpoints/objects.py +277 -0
- nmdc_runtime/api/endpoints/operations.py +105 -0
- nmdc_runtime/api/endpoints/queries.py +679 -0
- nmdc_runtime/api/endpoints/runs.py +98 -0
- nmdc_runtime/api/endpoints/search.py +38 -0
- nmdc_runtime/api/endpoints/sites.py +229 -0
- nmdc_runtime/api/endpoints/triggers.py +25 -0
- nmdc_runtime/api/endpoints/users.py +214 -0
- nmdc_runtime/api/endpoints/util.py +774 -0
- nmdc_runtime/api/endpoints/workflows.py +353 -0
- nmdc_runtime/api/main.py +401 -0
- nmdc_runtime/api/middleware.py +43 -0
- nmdc_runtime/api/models/__init__.py +0 -0
- nmdc_runtime/api/models/capability.py +14 -0
- nmdc_runtime/api/models/id.py +92 -0
- nmdc_runtime/api/models/job.py +37 -0
- nmdc_runtime/api/models/lib/__init__.py +0 -0
- nmdc_runtime/api/models/lib/helpers.py +78 -0
- nmdc_runtime/api/models/metadata.py +11 -0
- nmdc_runtime/api/models/minter.py +0 -0
- nmdc_runtime/api/models/nmdc_schema.py +146 -0
- nmdc_runtime/api/models/object.py +180 -0
- nmdc_runtime/api/models/object_type.py +20 -0
- nmdc_runtime/api/models/operation.py +66 -0
- nmdc_runtime/api/models/query.py +246 -0
- nmdc_runtime/api/models/query_continuation.py +111 -0
- nmdc_runtime/api/models/run.py +161 -0
- nmdc_runtime/api/models/site.py +87 -0
- nmdc_runtime/api/models/trigger.py +13 -0
- nmdc_runtime/api/models/user.py +140 -0
- nmdc_runtime/api/models/util.py +253 -0
- nmdc_runtime/api/models/workflow.py +15 -0
- nmdc_runtime/api/openapi.py +242 -0
- nmdc_runtime/config.py +55 -4
- nmdc_runtime/core/db/Database.py +1 -3
- nmdc_runtime/infrastructure/database/models/user.py +0 -9
- nmdc_runtime/lib/extract_nmdc_data.py +0 -8
- nmdc_runtime/lib/nmdc_dataframes.py +3 -7
- nmdc_runtime/lib/nmdc_etl_class.py +1 -7
- nmdc_runtime/minter/adapters/repository.py +1 -2
- nmdc_runtime/minter/config.py +2 -0
- nmdc_runtime/minter/domain/model.py +35 -1
- nmdc_runtime/minter/entrypoints/fastapi_app.py +1 -1
- nmdc_runtime/mongo_util.py +1 -2
- nmdc_runtime/site/backup/nmdcdb_mongodump.py +1 -1
- nmdc_runtime/site/backup/nmdcdb_mongoexport.py +1 -3
- nmdc_runtime/site/export/ncbi_xml.py +1 -2
- nmdc_runtime/site/export/ncbi_xml_utils.py +1 -1
- nmdc_runtime/site/graphs.py +33 -28
- nmdc_runtime/site/ops.py +97 -237
- nmdc_runtime/site/repair/database_updater.py +8 -0
- nmdc_runtime/site/repository.py +7 -117
- nmdc_runtime/site/resources.py +4 -4
- nmdc_runtime/site/translation/gold_translator.py +22 -21
- nmdc_runtime/site/translation/neon_benthic_translator.py +0 -1
- nmdc_runtime/site/translation/neon_soil_translator.py +4 -5
- nmdc_runtime/site/translation/neon_surface_water_translator.py +0 -2
- nmdc_runtime/site/translation/submission_portal_translator.py +64 -54
- nmdc_runtime/site/translation/translator.py +63 -1
- nmdc_runtime/site/util.py +8 -3
- nmdc_runtime/site/validation/util.py +10 -5
- nmdc_runtime/util.py +9 -321
- {nmdc_runtime-2.8.0.dist-info → nmdc_runtime-2.10.0.dist-info}/METADATA +57 -6
- nmdc_runtime-2.10.0.dist-info/RECORD +138 -0
- nmdc_runtime/site/translation/emsl.py +0 -43
- nmdc_runtime/site/translation/gold.py +0 -53
- nmdc_runtime/site/translation/jgi.py +0 -32
- nmdc_runtime/site/translation/util.py +0 -132
- nmdc_runtime/site/validation/jgi.py +0 -43
- nmdc_runtime-2.8.0.dist-info/RECORD +0 -84
- {nmdc_runtime-2.8.0.dist-info → nmdc_runtime-2.10.0.dist-info}/WHEEL +0 -0
- {nmdc_runtime-2.8.0.dist-info → nmdc_runtime-2.10.0.dist-info}/entry_points.txt +0 -0
- {nmdc_runtime-2.8.0.dist-info → nmdc_runtime-2.10.0.dist-info}/licenses/LICENSE +0 -0
- {nmdc_runtime-2.8.0.dist-info → nmdc_runtime-2.10.0.dist-info}/top_level.txt +0 -0
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
from enum import Enum
|
|
2
|
+
import re
|
|
2
3
|
from typing import Optional
|
|
3
4
|
|
|
5
|
+
from base32_lib import base32
|
|
4
6
|
from pydantic import BaseModel, PositiveInt
|
|
5
7
|
|
|
6
|
-
from nmdc_runtime.minter.config import schema_classes
|
|
8
|
+
from nmdc_runtime.minter.config import schema_classes, typecodes
|
|
7
9
|
|
|
8
10
|
|
|
9
11
|
class Entity(BaseModel):
|
|
@@ -71,3 +73,35 @@ class Identifier(Entity):
|
|
|
71
73
|
class Typecode(Entity):
|
|
72
74
|
schema_class: str
|
|
73
75
|
name: str
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
id_prefix_pattern = rf"(?P<prefix>nmdc)"
|
|
79
|
+
id_typecode_pattern = rf"(?P<typecode>[a-z]{{1,6}})"
|
|
80
|
+
id_shoulder_pattern = rf"(?P<shoulder>[0-9][a-z]{{0,6}}[0-9])"
|
|
81
|
+
id_blade_pattern = rf"(?P<blade>[A-Za-z0-9]+)"
|
|
82
|
+
id_version_pattern = rf"(?P<version>(\.[A-Za-z0-9]+)*)"
|
|
83
|
+
id_locus_pattern = rf"(?P<locus>_[A-Za-z0-9_\.-]+)?"
|
|
84
|
+
id_pattern = (
|
|
85
|
+
rf"^{id_prefix_pattern}:{id_typecode_pattern}-{id_shoulder_pattern}-"
|
|
86
|
+
rf"{id_blade_pattern}{id_version_pattern}{id_locus_pattern}$"
|
|
87
|
+
)
|
|
88
|
+
ID_TYPECODE_VALUES = [t["name"] for t in typecodes()]
|
|
89
|
+
id_typecode_pattern_strict = rf"(?P<typecode_strict>({'|'.join(ID_TYPECODE_VALUES)}))"
|
|
90
|
+
id_blade_pattern_strict = rf"(?P<blade_strict>[{base32.ENCODING_CHARS}]+)"
|
|
91
|
+
id_pattern_strict = (
|
|
92
|
+
rf"^{id_prefix_pattern}:{id_typecode_pattern_strict}-{id_shoulder_pattern}-"
|
|
93
|
+
rf"{id_blade_pattern_strict}{id_version_pattern}{id_locus_pattern}$"
|
|
94
|
+
)
|
|
95
|
+
id_pattern_strict_compiled = re.compile(id_pattern_strict)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def check_valid_ids(ids: list[str]):
|
|
99
|
+
for id_ in ids:
|
|
100
|
+
if not re.match(id_pattern, id_):
|
|
101
|
+
raise ValueError(
|
|
102
|
+
(
|
|
103
|
+
f"Invalid ID format for given ID: '{id_}'.\n\nAn ID must match the pattern: '{id_pattern}'.\n\n"
|
|
104
|
+
"See: <https://microbiomedata.github.io/nmdc-schema/identifiers/#ids-minted-for-use-within-nmdc>"
|
|
105
|
+
)
|
|
106
|
+
)
|
|
107
|
+
return ids
|
|
@@ -8,7 +8,7 @@ from nmdc_runtime.api.core.util import raise404_if_none
|
|
|
8
8
|
from nmdc_runtime.api.db.mongo import get_mongo_db
|
|
9
9
|
from nmdc_runtime.api.models.site import get_current_client_site, Site
|
|
10
10
|
from nmdc_runtime.minter.adapters.repository import MongoIDStore, MinterError
|
|
11
|
-
from nmdc_runtime.minter.config import minting_service_id
|
|
11
|
+
from nmdc_runtime.minter.config import minting_service_id
|
|
12
12
|
from nmdc_runtime.minter.domain.model import (
|
|
13
13
|
Identifier,
|
|
14
14
|
AuthenticatedMintingRequest,
|
nmdc_runtime/mongo_util.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
|
-
from pymongo import MongoClient
|
|
2
1
|
from pymongo.database import Database
|
|
3
2
|
from pymongo.collection import Collection
|
|
4
|
-
from typing import Any,
|
|
3
|
+
from typing import Any, Optional
|
|
5
4
|
from pymongo.client_session import ClientSession
|
|
6
5
|
import inspect
|
|
7
6
|
|
|
@@ -16,9 +16,7 @@ from toolz import assoc
|
|
|
16
16
|
|
|
17
17
|
from nmdc_runtime.api.core.util import pick
|
|
18
18
|
from nmdc_runtime.api.db.mongo import get_mongo_db
|
|
19
|
-
from nmdc_runtime.
|
|
20
|
-
from nmdc_runtime.site.resources import get_mongo
|
|
21
|
-
from nmdc_runtime.util import nmdc_jsonschema, schema_collection_names_with_id_field
|
|
19
|
+
from nmdc_runtime.util import schema_collection_names_with_id_field
|
|
22
20
|
|
|
23
21
|
|
|
24
22
|
def collection_stats(mdb: MongoDatabase):
|
|
@@ -4,7 +4,7 @@ import datetime
|
|
|
4
4
|
import xml.etree.ElementTree as ET
|
|
5
5
|
import xml.dom.minidom
|
|
6
6
|
|
|
7
|
-
from typing import Any, List
|
|
7
|
+
from typing import Any, List
|
|
8
8
|
from urllib.parse import urlparse
|
|
9
9
|
from nmdc_runtime.site.export.ncbi_xml_utils import (
|
|
10
10
|
handle_controlled_identified_term_value,
|
|
@@ -16,7 +16,6 @@ from nmdc_runtime.site.export.ncbi_xml_utils import (
|
|
|
16
16
|
handle_float_value,
|
|
17
17
|
handle_string_value,
|
|
18
18
|
load_mappings,
|
|
19
|
-
validate_xml,
|
|
20
19
|
)
|
|
21
20
|
|
|
22
21
|
|
nmdc_runtime/site/graphs.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
|
-
from dagster import graph
|
|
1
|
+
from dagster import graph
|
|
2
2
|
|
|
3
3
|
from nmdc_runtime.site.ops import (
|
|
4
|
-
build_merged_db,
|
|
5
4
|
generate_biosample_set_for_nmdc_study_from_gold,
|
|
6
5
|
nmdc_schema_database_export_filename,
|
|
7
6
|
nmdc_schema_database_from_gold_study,
|
|
@@ -12,8 +11,6 @@ from nmdc_runtime.site.ops import (
|
|
|
12
11
|
gold_projects_by_study,
|
|
13
12
|
gold_study,
|
|
14
13
|
poll_for_run_completion,
|
|
15
|
-
run_etl,
|
|
16
|
-
local_file_to_api_object,
|
|
17
14
|
get_operation,
|
|
18
15
|
produce_curated_db,
|
|
19
16
|
delete_operations,
|
|
@@ -70,24 +67,6 @@ from nmdc_runtime.site.ops import (
|
|
|
70
67
|
from nmdc_runtime.site.export.study_metadata import get_biosamples_by_study_id
|
|
71
68
|
|
|
72
69
|
|
|
73
|
-
@graph
|
|
74
|
-
def gold_translation():
|
|
75
|
-
"""
|
|
76
|
-
Translating an export of the JGI GOLD [1] SQL database to the NMDC database JSON schema.
|
|
77
|
-
|
|
78
|
-
[1] Genomes OnLine Database (GOLD) <https://gold.jgi.doe.gov/>.
|
|
79
|
-
"""
|
|
80
|
-
local_file_to_api_object(run_etl(build_merged_db()))
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
@graph()
|
|
84
|
-
def gold_translation_curation():
|
|
85
|
-
# TODO
|
|
86
|
-
# - have produce_curated_db do actual curation (see notebook), persisting to db.
|
|
87
|
-
# - more steps in pipeline? Or handoff via run_status_sensor on DagsterRunStatus.SUCCESS.
|
|
88
|
-
produce_curated_db(get_operation())
|
|
89
|
-
|
|
90
|
-
|
|
91
70
|
@graph()
|
|
92
71
|
def create_objects_from_site_object_puts():
|
|
93
72
|
delete_operations(
|
|
@@ -160,6 +139,7 @@ def gold_study_to_database():
|
|
|
160
139
|
study_type,
|
|
161
140
|
gold_nmdc_instrument_mapping_file_url,
|
|
162
141
|
include_field_site_info,
|
|
142
|
+
enable_biosample_filtering,
|
|
163
143
|
) = get_gold_study_pipeline_inputs()
|
|
164
144
|
|
|
165
145
|
projects = gold_projects_by_study(study_id)
|
|
@@ -176,6 +156,7 @@ def gold_study_to_database():
|
|
|
176
156
|
analysis_projects,
|
|
177
157
|
gold_nmdc_instrument_map_df,
|
|
178
158
|
include_field_site_info,
|
|
159
|
+
enable_biosample_filtering,
|
|
179
160
|
)
|
|
180
161
|
database_dict = nmdc_schema_object_to_dict(database)
|
|
181
162
|
filename = nmdc_schema_database_export_filename(study)
|
|
@@ -506,11 +487,19 @@ def nmdc_study_to_ncbi_submission_export():
|
|
|
506
487
|
|
|
507
488
|
@graph
|
|
508
489
|
def generate_data_generation_set_for_biosamples_in_nmdc_study():
|
|
509
|
-
(
|
|
490
|
+
(
|
|
491
|
+
study_id,
|
|
492
|
+
gold_nmdc_instrument_mapping_file_url,
|
|
493
|
+
include_field_site_info,
|
|
494
|
+
enable_biosample_filtering,
|
|
495
|
+
) = get_database_updater_inputs()
|
|
510
496
|
gold_nmdc_instrument_map_df = get_df_from_url(gold_nmdc_instrument_mapping_file_url)
|
|
511
497
|
|
|
512
498
|
database = generate_data_generation_set_post_biosample_ingest(
|
|
513
|
-
study_id,
|
|
499
|
+
study_id,
|
|
500
|
+
gold_nmdc_instrument_map_df,
|
|
501
|
+
include_field_site_info,
|
|
502
|
+
enable_biosample_filtering,
|
|
514
503
|
)
|
|
515
504
|
|
|
516
505
|
database_dict = nmdc_schema_object_to_dict(database)
|
|
@@ -523,11 +512,19 @@ def generate_data_generation_set_for_biosamples_in_nmdc_study():
|
|
|
523
512
|
|
|
524
513
|
@graph
|
|
525
514
|
def generate_biosample_set_from_samples_in_gold():
|
|
526
|
-
(
|
|
515
|
+
(
|
|
516
|
+
study_id,
|
|
517
|
+
gold_nmdc_instrument_mapping_file_url,
|
|
518
|
+
include_field_site_info,
|
|
519
|
+
enable_biosample_filtering,
|
|
520
|
+
) = get_database_updater_inputs()
|
|
527
521
|
gold_nmdc_instrument_map_df = get_df_from_url(gold_nmdc_instrument_mapping_file_url)
|
|
528
522
|
|
|
529
523
|
database = generate_biosample_set_for_nmdc_study_from_gold(
|
|
530
|
-
study_id,
|
|
524
|
+
study_id,
|
|
525
|
+
gold_nmdc_instrument_map_df,
|
|
526
|
+
include_field_site_info,
|
|
527
|
+
enable_biosample_filtering,
|
|
531
528
|
)
|
|
532
529
|
database_dict = nmdc_schema_object_to_dict(database)
|
|
533
530
|
filename = post_submission_portal_biosample_ingest_record_stitching_filename(
|
|
@@ -545,10 +542,18 @@ def generate_update_script_for_insdc_biosample_identifiers():
|
|
|
545
542
|
to generate a script for updating biosample records with INSDC identifiers obtained from GOLD.
|
|
546
543
|
The script is returned as a dictionary that can be executed against MongoDB.
|
|
547
544
|
"""
|
|
548
|
-
(
|
|
545
|
+
(
|
|
546
|
+
study_id,
|
|
547
|
+
gold_nmdc_instrument_mapping_file_url,
|
|
548
|
+
include_field_site_info,
|
|
549
|
+
enable_biosample_filtering,
|
|
550
|
+
) = get_database_updater_inputs()
|
|
549
551
|
gold_nmdc_instrument_map_df = get_df_from_url(gold_nmdc_instrument_mapping_file_url)
|
|
550
552
|
|
|
551
553
|
update_script = run_script_to_update_insdc_biosample_identifiers(
|
|
552
|
-
study_id,
|
|
554
|
+
study_id,
|
|
555
|
+
gold_nmdc_instrument_map_df,
|
|
556
|
+
include_field_site_info,
|
|
557
|
+
enable_biosample_filtering,
|
|
553
558
|
)
|
|
554
559
|
render_text(update_script)
|