nmdc-runtime 2.10.0__py3-none-any.whl → 2.11.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nmdc-runtime might be problematic. Click here for more details.
- nmdc_runtime/Dockerfile +177 -0
- nmdc_runtime/api/analytics.py +22 -2
- nmdc_runtime/api/core/idgen.py +36 -6
- nmdc_runtime/api/db/mongo.py +0 -12
- nmdc_runtime/api/endpoints/find.py +65 -225
- nmdc_runtime/api/endpoints/lib/linked_instances.py +180 -0
- nmdc_runtime/api/endpoints/nmdcschema.py +65 -144
- nmdc_runtime/api/endpoints/objects.py +4 -11
- nmdc_runtime/api/endpoints/operations.py +0 -27
- nmdc_runtime/api/endpoints/queries.py +22 -0
- nmdc_runtime/api/endpoints/sites.py +0 -24
- nmdc_runtime/api/endpoints/util.py +57 -35
- nmdc_runtime/api/entrypoint.sh +7 -0
- nmdc_runtime/api/main.py +84 -60
- nmdc_runtime/api/models/util.py +12 -5
- nmdc_runtime/api/openapi.py +116 -180
- nmdc_runtime/api/swagger_ui/assets/custom-elements.js +522 -0
- nmdc_runtime/api/swagger_ui/assets/script.js +247 -0
- nmdc_runtime/api/swagger_ui/assets/style.css +155 -0
- nmdc_runtime/api/swagger_ui/swagger_ui.py +34 -0
- nmdc_runtime/minter/adapters/repository.py +21 -0
- nmdc_runtime/minter/domain/model.py +20 -0
- nmdc_runtime/site/changesheets/data/OmicsProcessing-to-catted-Biosamples.tsv +1561 -0
- nmdc_runtime/site/changesheets/scripts/missing_neon_soils_ecosystem_data.py +311 -0
- nmdc_runtime/site/changesheets/scripts/neon_soils_add_ncbi_ids.py +210 -0
- nmdc_runtime/site/dagster.yaml +53 -0
- nmdc_runtime/site/entrypoint-daemon.sh +26 -0
- nmdc_runtime/site/entrypoint-dagit-readonly.sh +26 -0
- nmdc_runtime/site/entrypoint-dagit.sh +26 -0
- nmdc_runtime/site/export/ncbi_xml.py +632 -11
- nmdc_runtime/site/export/ncbi_xml_utils.py +114 -0
- nmdc_runtime/site/graphs.py +7 -0
- nmdc_runtime/site/ops.py +92 -34
- nmdc_runtime/site/repository.py +2 -0
- nmdc_runtime/site/resources.py +16 -3
- nmdc_runtime/site/translation/submission_portal_translator.py +82 -14
- nmdc_runtime/site/workspace.yaml +13 -0
- nmdc_runtime/static/NMDC_logo.svg +1073 -0
- nmdc_runtime/static/ORCID-iD_icon_vector.svg +4 -0
- nmdc_runtime/static/README.md +5 -0
- nmdc_runtime/static/favicon.ico +0 -0
- nmdc_runtime/util.py +87 -1
- nmdc_runtime-2.11.1.dist-info/METADATA +46 -0
- {nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.1.dist-info}/RECORD +47 -57
- {nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.1.dist-info}/WHEEL +1 -2
- nmdc_runtime/api/endpoints/ids.py +0 -192
- nmdc_runtime/client/__init__.py +0 -0
- nmdc_runtime/containers.py +0 -14
- nmdc_runtime/core/__init__.py +0 -0
- nmdc_runtime/core/db/Database.py +0 -13
- nmdc_runtime/core/db/__init__.py +0 -0
- nmdc_runtime/core/exceptions/__init__.py +0 -23
- nmdc_runtime/core/exceptions/base.py +0 -47
- nmdc_runtime/core/exceptions/token.py +0 -13
- nmdc_runtime/domain/__init__.py +0 -0
- nmdc_runtime/domain/users/__init__.py +0 -0
- nmdc_runtime/domain/users/queriesInterface.py +0 -18
- nmdc_runtime/domain/users/userSchema.py +0 -37
- nmdc_runtime/domain/users/userService.py +0 -14
- nmdc_runtime/infrastructure/__init__.py +0 -0
- nmdc_runtime/infrastructure/database/__init__.py +0 -0
- nmdc_runtime/infrastructure/database/db.py +0 -3
- nmdc_runtime/infrastructure/database/models/__init__.py +0 -0
- nmdc_runtime/infrastructure/database/models/user.py +0 -1
- nmdc_runtime/lib/__init__.py +0 -1
- nmdc_runtime/lib/extract_nmdc_data.py +0 -33
- nmdc_runtime/lib/load_nmdc_data.py +0 -121
- nmdc_runtime/lib/nmdc_dataframes.py +0 -825
- nmdc_runtime/lib/nmdc_etl_class.py +0 -396
- nmdc_runtime/lib/transform_nmdc_data.py +0 -1117
- nmdc_runtime/site/drsobjects/__init__.py +0 -0
- nmdc_runtime/site/drsobjects/ingest.py +0 -93
- nmdc_runtime/site/drsobjects/registration.py +0 -131
- nmdc_runtime-2.10.0.dist-info/METADATA +0 -265
- nmdc_runtime-2.10.0.dist-info/top_level.txt +0 -1
- {nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.1.dist-info}/entry_points.txt +0 -0
- {nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -145,6 +145,7 @@ class SubmissionPortalTranslator(Translator):
|
|
|
145
145
|
# See: https://github.com/microbiomedata/submission-schema/issues/162
|
|
146
146
|
study_category: Optional[str] = None,
|
|
147
147
|
study_pi_image_url: Optional[str] = None,
|
|
148
|
+
study_id: Optional[str] = None,
|
|
148
149
|
# Additional biosample-level metadata with optional column mapping information not captured
|
|
149
150
|
# by the submission portal currently.
|
|
150
151
|
# See: https://github.com/microbiomedata/submission-schema/issues/162
|
|
@@ -165,6 +166,7 @@ class SubmissionPortalTranslator(Translator):
|
|
|
165
166
|
nmdc.StudyCategoryEnum(study_category) if study_category else None
|
|
166
167
|
)
|
|
167
168
|
self.study_pi_image_url = study_pi_image_url
|
|
169
|
+
self.study_id = study_id
|
|
168
170
|
|
|
169
171
|
self.biosample_extras = group_dicts_by_key(
|
|
170
172
|
BIOSAMPLE_UNIQUE_KEY_SLOT, biosample_extras
|
|
@@ -174,6 +176,13 @@ class SubmissionPortalTranslator(Translator):
|
|
|
174
176
|
)
|
|
175
177
|
|
|
176
178
|
self.schema_view: SchemaView = _get_schema_view()
|
|
179
|
+
self._material_processing_subclass_names = []
|
|
180
|
+
for class_name in self.schema_view.class_descendants(
|
|
181
|
+
"MaterialProcessing", reflexive=False
|
|
182
|
+
):
|
|
183
|
+
class_def = self.schema_view.get_class(class_name)
|
|
184
|
+
if not class_def.abstract:
|
|
185
|
+
self._material_processing_subclass_names.append(class_name)
|
|
177
186
|
|
|
178
187
|
def _get_pi(
|
|
179
188
|
self, metadata_submission: JSON_OBJECT
|
|
@@ -542,6 +551,14 @@ class SubmissionPortalTranslator(Translator):
|
|
|
542
551
|
|
|
543
552
|
return data_objects, manifest
|
|
544
553
|
|
|
554
|
+
def _parse_sample_link(self, sample_link: str) -> tuple[str, list[str]] | None:
|
|
555
|
+
"""Parse a sample link in the form of `ProcessingName:SampleName,..."""
|
|
556
|
+
pattern = r"(" + "|".join(self._material_processing_subclass_names) + r"):(.+)"
|
|
557
|
+
match = re.match(pattern, sample_link)
|
|
558
|
+
if not match:
|
|
559
|
+
return None
|
|
560
|
+
return match.group(1), split_strip(match.group(2), ",")
|
|
561
|
+
|
|
545
562
|
def _translate_study(
|
|
546
563
|
self, metadata_submission: JSON_OBJECT, nmdc_study_id: str
|
|
547
564
|
) -> nmdc.Study:
|
|
@@ -752,11 +769,14 @@ class SubmissionPortalTranslator(Translator):
|
|
|
752
769
|
"metadata_submission", {}
|
|
753
770
|
)
|
|
754
771
|
|
|
755
|
-
# Generate one Study instance based on the metadata submission
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
772
|
+
# Generate one Study instance based on the metadata submission, if a study_id wasn't provided
|
|
773
|
+
if self.study_id:
|
|
774
|
+
nmdc_study_id = self.study_id
|
|
775
|
+
else:
|
|
776
|
+
nmdc_study_id = self._id_minter("nmdc:Study")[0]
|
|
777
|
+
database.study_set = [
|
|
778
|
+
self._translate_study(metadata_submission_data, nmdc_study_id)
|
|
779
|
+
]
|
|
760
780
|
|
|
761
781
|
# Automatically populate the `env_package` field in the sample data based on which
|
|
762
782
|
# environmental data tab the sample data came from.
|
|
@@ -788,15 +808,63 @@ class SubmissionPortalTranslator(Translator):
|
|
|
788
808
|
)
|
|
789
809
|
|
|
790
810
|
# Translate the sample data into nmdc:Biosample objects
|
|
791
|
-
database.biosample_set = [
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
811
|
+
database.biosample_set = []
|
|
812
|
+
for sample_data_id, sample_data in sample_data_by_id.items():
|
|
813
|
+
# This shouldn't happen, but just in case skip empty sample data
|
|
814
|
+
if not sample_data:
|
|
815
|
+
continue
|
|
816
|
+
|
|
817
|
+
# Find the first tab that has a sample_link value and attempt to parse it
|
|
818
|
+
sample_link = ""
|
|
819
|
+
for tab in sample_data:
|
|
820
|
+
if tab.get("sample_link"):
|
|
821
|
+
sample_link = tab.get("sample_link")
|
|
822
|
+
break
|
|
823
|
+
parsed_sample_link = self._parse_sample_link(sample_link)
|
|
824
|
+
|
|
825
|
+
# If the sample_link could be parsed according to the [ProcessName]:[InputSample,...]
|
|
826
|
+
# format, then create a ProcessedSample and MaterialProcessing instance instead of a
|
|
827
|
+
# Biosample instance. The input samples must be present in the submission for this to
|
|
828
|
+
# work. An exception is raised if any of the referenced input samples are missing.
|
|
829
|
+
if parsed_sample_link is not None:
|
|
830
|
+
processing_type, processing_inputs = parsed_sample_link
|
|
831
|
+
if not all(
|
|
832
|
+
input_id in sample_data_to_nmdc_biosample_ids
|
|
833
|
+
for input_id in processing_inputs
|
|
834
|
+
):
|
|
835
|
+
raise ValueError(
|
|
836
|
+
f"Could not find all input samples in sample_link '{sample_link}'"
|
|
837
|
+
)
|
|
838
|
+
processed_sample_id = self._id_minter("nmdc:ProcessedSample")[0]
|
|
839
|
+
database.processed_sample_set.append(
|
|
840
|
+
nmdc.ProcessedSample(
|
|
841
|
+
id=processed_sample_id,
|
|
842
|
+
type="nmdc:ProcessedSample",
|
|
843
|
+
name=sample_data[0].get(BIOSAMPLE_UNIQUE_KEY_SLOT, "").strip(),
|
|
844
|
+
)
|
|
845
|
+
)
|
|
846
|
+
|
|
847
|
+
processing_class = getattr(nmdc, processing_type)
|
|
848
|
+
material_processing = processing_class(
|
|
849
|
+
id=self._id_minter(f"nmdc:{processing_type}")[0],
|
|
850
|
+
type=f"nmdc:{processing_type}",
|
|
851
|
+
has_input=[
|
|
852
|
+
sample_data_to_nmdc_biosample_ids[input_id]
|
|
853
|
+
for input_id in processing_inputs
|
|
854
|
+
],
|
|
855
|
+
has_output=[processed_sample_id],
|
|
856
|
+
)
|
|
857
|
+
database.material_processing_set.append(material_processing)
|
|
858
|
+
|
|
859
|
+
# If there was no sample_link or it doesn't follow the expected format, create a
|
|
860
|
+
# Biosample instance as normal.
|
|
861
|
+
else:
|
|
862
|
+
biosample = self._translate_biosample(
|
|
863
|
+
sample_data,
|
|
864
|
+
nmdc_biosample_id=sample_data_to_nmdc_biosample_ids[sample_data_id],
|
|
865
|
+
nmdc_study_id=nmdc_study_id,
|
|
866
|
+
)
|
|
867
|
+
database.biosample_set.append(biosample)
|
|
800
868
|
|
|
801
869
|
# This section handles the translation of information in the external sequencing tabs into
|
|
802
870
|
# various NMDC objects.
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
load_from:
|
|
2
|
+
- python_package:
|
|
3
|
+
package_name: nmdc_runtime.site.repository
|
|
4
|
+
attribute: repo
|
|
5
|
+
- python_package:
|
|
6
|
+
package_name: nmdc_runtime.site.repository
|
|
7
|
+
attribute: biosample_submission_ingest
|
|
8
|
+
- python_package:
|
|
9
|
+
package_name: nmdc_runtime.site.repository
|
|
10
|
+
attribute: biosample_export
|
|
11
|
+
- python_package:
|
|
12
|
+
package_name: nmdc_runtime.site.repository
|
|
13
|
+
attribute: database_records_stitching
|