nmdc-runtime 2.10.0__py3-none-any.whl → 2.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nmdc-runtime might be problematic. Click here for more details.

Files changed (77) hide show
  1. nmdc_runtime/Dockerfile +167 -0
  2. nmdc_runtime/api/analytics.py +22 -2
  3. nmdc_runtime/api/core/idgen.py +36 -6
  4. nmdc_runtime/api/db/mongo.py +0 -12
  5. nmdc_runtime/api/endpoints/find.py +65 -225
  6. nmdc_runtime/api/endpoints/lib/linked_instances.py +180 -0
  7. nmdc_runtime/api/endpoints/nmdcschema.py +65 -144
  8. nmdc_runtime/api/endpoints/objects.py +4 -11
  9. nmdc_runtime/api/endpoints/operations.py +0 -27
  10. nmdc_runtime/api/endpoints/queries.py +22 -0
  11. nmdc_runtime/api/endpoints/sites.py +0 -24
  12. nmdc_runtime/api/endpoints/util.py +57 -35
  13. nmdc_runtime/api/entrypoint.sh +7 -0
  14. nmdc_runtime/api/main.py +84 -60
  15. nmdc_runtime/api/models/util.py +12 -5
  16. nmdc_runtime/api/openapi.py +116 -180
  17. nmdc_runtime/api/swagger_ui/assets/custom-elements.js +522 -0
  18. nmdc_runtime/api/swagger_ui/assets/script.js +247 -0
  19. nmdc_runtime/api/swagger_ui/assets/style.css +155 -0
  20. nmdc_runtime/api/swagger_ui/swagger_ui.py +34 -0
  21. nmdc_runtime/minter/adapters/repository.py +21 -0
  22. nmdc_runtime/minter/domain/model.py +20 -0
  23. nmdc_runtime/site/changesheets/data/OmicsProcessing-to-catted-Biosamples.tsv +1561 -0
  24. nmdc_runtime/site/changesheets/scripts/missing_neon_soils_ecosystem_data.py +311 -0
  25. nmdc_runtime/site/changesheets/scripts/neon_soils_add_ncbi_ids.py +210 -0
  26. nmdc_runtime/site/dagster.yaml +53 -0
  27. nmdc_runtime/site/entrypoint-daemon.sh +26 -0
  28. nmdc_runtime/site/entrypoint-dagit-readonly.sh +26 -0
  29. nmdc_runtime/site/entrypoint-dagit.sh +26 -0
  30. nmdc_runtime/site/export/ncbi_xml.py +632 -11
  31. nmdc_runtime/site/export/ncbi_xml_utils.py +114 -0
  32. nmdc_runtime/site/graphs.py +7 -0
  33. nmdc_runtime/site/ops.py +92 -34
  34. nmdc_runtime/site/repository.py +2 -0
  35. nmdc_runtime/site/resources.py +16 -3
  36. nmdc_runtime/site/translation/submission_portal_translator.py +82 -14
  37. nmdc_runtime/site/workspace.yaml +13 -0
  38. nmdc_runtime/static/NMDC_logo.svg +1073 -0
  39. nmdc_runtime/static/ORCID-iD_icon_vector.svg +4 -0
  40. nmdc_runtime/static/README.md +5 -0
  41. nmdc_runtime/static/favicon.ico +0 -0
  42. nmdc_runtime/util.py +87 -1
  43. nmdc_runtime-2.11.0.dist-info/METADATA +46 -0
  44. {nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.0.dist-info}/RECORD +47 -57
  45. {nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.0.dist-info}/WHEEL +1 -2
  46. nmdc_runtime/api/endpoints/ids.py +0 -192
  47. nmdc_runtime/client/__init__.py +0 -0
  48. nmdc_runtime/containers.py +0 -14
  49. nmdc_runtime/core/__init__.py +0 -0
  50. nmdc_runtime/core/db/Database.py +0 -13
  51. nmdc_runtime/core/db/__init__.py +0 -0
  52. nmdc_runtime/core/exceptions/__init__.py +0 -23
  53. nmdc_runtime/core/exceptions/base.py +0 -47
  54. nmdc_runtime/core/exceptions/token.py +0 -13
  55. nmdc_runtime/domain/__init__.py +0 -0
  56. nmdc_runtime/domain/users/__init__.py +0 -0
  57. nmdc_runtime/domain/users/queriesInterface.py +0 -18
  58. nmdc_runtime/domain/users/userSchema.py +0 -37
  59. nmdc_runtime/domain/users/userService.py +0 -14
  60. nmdc_runtime/infrastructure/__init__.py +0 -0
  61. nmdc_runtime/infrastructure/database/__init__.py +0 -0
  62. nmdc_runtime/infrastructure/database/db.py +0 -3
  63. nmdc_runtime/infrastructure/database/models/__init__.py +0 -0
  64. nmdc_runtime/infrastructure/database/models/user.py +0 -1
  65. nmdc_runtime/lib/__init__.py +0 -1
  66. nmdc_runtime/lib/extract_nmdc_data.py +0 -33
  67. nmdc_runtime/lib/load_nmdc_data.py +0 -121
  68. nmdc_runtime/lib/nmdc_dataframes.py +0 -825
  69. nmdc_runtime/lib/nmdc_etl_class.py +0 -396
  70. nmdc_runtime/lib/transform_nmdc_data.py +0 -1117
  71. nmdc_runtime/site/drsobjects/__init__.py +0 -0
  72. nmdc_runtime/site/drsobjects/ingest.py +0 -93
  73. nmdc_runtime/site/drsobjects/registration.py +0 -131
  74. nmdc_runtime-2.10.0.dist-info/METADATA +0 -265
  75. nmdc_runtime-2.10.0.dist-info/top_level.txt +0 -1
  76. {nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.0.dist-info}/entry_points.txt +0 -0
  77. {nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.0.dist-info}/licenses/LICENSE +0 -0
@@ -145,6 +145,7 @@ class SubmissionPortalTranslator(Translator):
145
145
  # See: https://github.com/microbiomedata/submission-schema/issues/162
146
146
  study_category: Optional[str] = None,
147
147
  study_pi_image_url: Optional[str] = None,
148
+ study_id: Optional[str] = None,
148
149
  # Additional biosample-level metadata with optional column mapping information not captured
149
150
  # by the submission portal currently.
150
151
  # See: https://github.com/microbiomedata/submission-schema/issues/162
@@ -165,6 +166,7 @@ class SubmissionPortalTranslator(Translator):
165
166
  nmdc.StudyCategoryEnum(study_category) if study_category else None
166
167
  )
167
168
  self.study_pi_image_url = study_pi_image_url
169
+ self.study_id = study_id
168
170
 
169
171
  self.biosample_extras = group_dicts_by_key(
170
172
  BIOSAMPLE_UNIQUE_KEY_SLOT, biosample_extras
@@ -174,6 +176,13 @@ class SubmissionPortalTranslator(Translator):
174
176
  )
175
177
 
176
178
  self.schema_view: SchemaView = _get_schema_view()
179
+ self._material_processing_subclass_names = []
180
+ for class_name in self.schema_view.class_descendants(
181
+ "MaterialProcessing", reflexive=False
182
+ ):
183
+ class_def = self.schema_view.get_class(class_name)
184
+ if not class_def.abstract:
185
+ self._material_processing_subclass_names.append(class_name)
177
186
 
178
187
  def _get_pi(
179
188
  self, metadata_submission: JSON_OBJECT
@@ -542,6 +551,14 @@ class SubmissionPortalTranslator(Translator):
542
551
 
543
552
  return data_objects, manifest
544
553
 
554
+ def _parse_sample_link(self, sample_link: str) -> tuple[str, list[str]] | None:
555
+ """Parse a sample link in the form of `ProcessingName:SampleName,..."""
556
+ pattern = r"(" + "|".join(self._material_processing_subclass_names) + r"):(.+)"
557
+ match = re.match(pattern, sample_link)
558
+ if not match:
559
+ return None
560
+ return match.group(1), split_strip(match.group(2), ",")
561
+
545
562
  def _translate_study(
546
563
  self, metadata_submission: JSON_OBJECT, nmdc_study_id: str
547
564
  ) -> nmdc.Study:
@@ -752,11 +769,14 @@ class SubmissionPortalTranslator(Translator):
752
769
  "metadata_submission", {}
753
770
  )
754
771
 
755
- # Generate one Study instance based on the metadata submission
756
- nmdc_study_id = self._id_minter("nmdc:Study")[0]
757
- database.study_set = [
758
- self._translate_study(metadata_submission_data, nmdc_study_id)
759
- ]
772
+ # Generate one Study instance based on the metadata submission, if a study_id wasn't provided
773
+ if self.study_id:
774
+ nmdc_study_id = self.study_id
775
+ else:
776
+ nmdc_study_id = self._id_minter("nmdc:Study")[0]
777
+ database.study_set = [
778
+ self._translate_study(metadata_submission_data, nmdc_study_id)
779
+ ]
760
780
 
761
781
  # Automatically populate the `env_package` field in the sample data based on which
762
782
  # environmental data tab the sample data came from.
@@ -788,15 +808,63 @@ class SubmissionPortalTranslator(Translator):
788
808
  )
789
809
 
790
810
  # Translate the sample data into nmdc:Biosample objects
791
- database.biosample_set = [
792
- self._translate_biosample(
793
- sample_data,
794
- nmdc_biosample_id=sample_data_to_nmdc_biosample_ids[sample_data_id],
795
- nmdc_study_id=nmdc_study_id,
796
- )
797
- for sample_data_id, sample_data in sample_data_by_id.items()
798
- if sample_data
799
- ]
811
+ database.biosample_set = []
812
+ for sample_data_id, sample_data in sample_data_by_id.items():
813
+ # This shouldn't happen, but just in case skip empty sample data
814
+ if not sample_data:
815
+ continue
816
+
817
+ # Find the first tab that has a sample_link value and attempt to parse it
818
+ sample_link = ""
819
+ for tab in sample_data:
820
+ if tab.get("sample_link"):
821
+ sample_link = tab.get("sample_link")
822
+ break
823
+ parsed_sample_link = self._parse_sample_link(sample_link)
824
+
825
+ # If the sample_link could be parsed according to the [ProcessName]:[InputSample,...]
826
+ # format, then create a ProcessedSample and MaterialProcessing instance instead of a
827
+ # Biosample instance. The input samples must be present in the submission for this to
828
+ # work. An exception is raised if any of the referenced input samples are missing.
829
+ if parsed_sample_link is not None:
830
+ processing_type, processing_inputs = parsed_sample_link
831
+ if not all(
832
+ input_id in sample_data_to_nmdc_biosample_ids
833
+ for input_id in processing_inputs
834
+ ):
835
+ raise ValueError(
836
+ f"Could not find all input samples in sample_link '{sample_link}'"
837
+ )
838
+ processed_sample_id = self._id_minter("nmdc:ProcessedSample")[0]
839
+ database.processed_sample_set.append(
840
+ nmdc.ProcessedSample(
841
+ id=processed_sample_id,
842
+ type="nmdc:ProcessedSample",
843
+ name=sample_data[0].get(BIOSAMPLE_UNIQUE_KEY_SLOT, "").strip(),
844
+ )
845
+ )
846
+
847
+ processing_class = getattr(nmdc, processing_type)
848
+ material_processing = processing_class(
849
+ id=self._id_minter(f"nmdc:{processing_type}")[0],
850
+ type=f"nmdc:{processing_type}",
851
+ has_input=[
852
+ sample_data_to_nmdc_biosample_ids[input_id]
853
+ for input_id in processing_inputs
854
+ ],
855
+ has_output=[processed_sample_id],
856
+ )
857
+ database.material_processing_set.append(material_processing)
858
+
859
+ # If there was no sample_link or it doesn't follow the expected format, create a
860
+ # Biosample instance as normal.
861
+ else:
862
+ biosample = self._translate_biosample(
863
+ sample_data,
864
+ nmdc_biosample_id=sample_data_to_nmdc_biosample_ids[sample_data_id],
865
+ nmdc_study_id=nmdc_study_id,
866
+ )
867
+ database.biosample_set.append(biosample)
800
868
 
801
869
  # This section handles the translation of information in the external sequencing tabs into
802
870
  # various NMDC objects.
@@ -0,0 +1,13 @@
1
+ load_from:
2
+ - python_package:
3
+ package_name: nmdc_runtime.site.repository
4
+ attribute: repo
5
+ - python_package:
6
+ package_name: nmdc_runtime.site.repository
7
+ attribute: biosample_submission_ingest
8
+ - python_package:
9
+ package_name: nmdc_runtime.site.repository
10
+ attribute: biosample_export
11
+ - python_package:
12
+ package_name: nmdc_runtime.site.repository
13
+ attribute: database_records_stitching