nmdc-runtime 2.8.0__py3-none-any.whl → 2.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nmdc-runtime might be problematic. Click here for more details.

Files changed (100) hide show
  1. nmdc_runtime/api/__init__.py +0 -0
  2. nmdc_runtime/api/analytics.py +70 -0
  3. nmdc_runtime/api/boot/__init__.py +0 -0
  4. nmdc_runtime/api/boot/capabilities.py +9 -0
  5. nmdc_runtime/api/boot/object_types.py +126 -0
  6. nmdc_runtime/api/boot/triggers.py +84 -0
  7. nmdc_runtime/api/boot/workflows.py +116 -0
  8. nmdc_runtime/api/core/__init__.py +0 -0
  9. nmdc_runtime/api/core/auth.py +208 -0
  10. nmdc_runtime/api/core/idgen.py +170 -0
  11. nmdc_runtime/api/core/metadata.py +788 -0
  12. nmdc_runtime/api/core/util.py +109 -0
  13. nmdc_runtime/api/db/__init__.py +0 -0
  14. nmdc_runtime/api/db/mongo.py +447 -0
  15. nmdc_runtime/api/db/s3.py +37 -0
  16. nmdc_runtime/api/endpoints/__init__.py +0 -0
  17. nmdc_runtime/api/endpoints/capabilities.py +25 -0
  18. nmdc_runtime/api/endpoints/find.py +794 -0
  19. nmdc_runtime/api/endpoints/ids.py +192 -0
  20. nmdc_runtime/api/endpoints/jobs.py +143 -0
  21. nmdc_runtime/api/endpoints/lib/__init__.py +0 -0
  22. nmdc_runtime/api/endpoints/lib/helpers.py +274 -0
  23. nmdc_runtime/api/endpoints/lib/path_segments.py +165 -0
  24. nmdc_runtime/api/endpoints/metadata.py +260 -0
  25. nmdc_runtime/api/endpoints/nmdcschema.py +581 -0
  26. nmdc_runtime/api/endpoints/object_types.py +38 -0
  27. nmdc_runtime/api/endpoints/objects.py +277 -0
  28. nmdc_runtime/api/endpoints/operations.py +105 -0
  29. nmdc_runtime/api/endpoints/queries.py +679 -0
  30. nmdc_runtime/api/endpoints/runs.py +98 -0
  31. nmdc_runtime/api/endpoints/search.py +38 -0
  32. nmdc_runtime/api/endpoints/sites.py +229 -0
  33. nmdc_runtime/api/endpoints/triggers.py +25 -0
  34. nmdc_runtime/api/endpoints/users.py +214 -0
  35. nmdc_runtime/api/endpoints/util.py +774 -0
  36. nmdc_runtime/api/endpoints/workflows.py +353 -0
  37. nmdc_runtime/api/main.py +401 -0
  38. nmdc_runtime/api/middleware.py +43 -0
  39. nmdc_runtime/api/models/__init__.py +0 -0
  40. nmdc_runtime/api/models/capability.py +14 -0
  41. nmdc_runtime/api/models/id.py +92 -0
  42. nmdc_runtime/api/models/job.py +37 -0
  43. nmdc_runtime/api/models/lib/__init__.py +0 -0
  44. nmdc_runtime/api/models/lib/helpers.py +78 -0
  45. nmdc_runtime/api/models/metadata.py +11 -0
  46. nmdc_runtime/api/models/minter.py +0 -0
  47. nmdc_runtime/api/models/nmdc_schema.py +146 -0
  48. nmdc_runtime/api/models/object.py +180 -0
  49. nmdc_runtime/api/models/object_type.py +20 -0
  50. nmdc_runtime/api/models/operation.py +66 -0
  51. nmdc_runtime/api/models/query.py +246 -0
  52. nmdc_runtime/api/models/query_continuation.py +111 -0
  53. nmdc_runtime/api/models/run.py +161 -0
  54. nmdc_runtime/api/models/site.py +87 -0
  55. nmdc_runtime/api/models/trigger.py +13 -0
  56. nmdc_runtime/api/models/user.py +140 -0
  57. nmdc_runtime/api/models/util.py +253 -0
  58. nmdc_runtime/api/models/workflow.py +15 -0
  59. nmdc_runtime/api/openapi.py +242 -0
  60. nmdc_runtime/config.py +55 -4
  61. nmdc_runtime/core/db/Database.py +1 -3
  62. nmdc_runtime/infrastructure/database/models/user.py +0 -9
  63. nmdc_runtime/lib/extract_nmdc_data.py +0 -8
  64. nmdc_runtime/lib/nmdc_dataframes.py +3 -7
  65. nmdc_runtime/lib/nmdc_etl_class.py +1 -7
  66. nmdc_runtime/minter/adapters/repository.py +1 -2
  67. nmdc_runtime/minter/config.py +2 -0
  68. nmdc_runtime/minter/domain/model.py +35 -1
  69. nmdc_runtime/minter/entrypoints/fastapi_app.py +1 -1
  70. nmdc_runtime/mongo_util.py +1 -2
  71. nmdc_runtime/site/backup/nmdcdb_mongodump.py +1 -1
  72. nmdc_runtime/site/backup/nmdcdb_mongoexport.py +1 -3
  73. nmdc_runtime/site/export/ncbi_xml.py +1 -2
  74. nmdc_runtime/site/export/ncbi_xml_utils.py +1 -1
  75. nmdc_runtime/site/graphs.py +33 -28
  76. nmdc_runtime/site/ops.py +97 -237
  77. nmdc_runtime/site/repair/database_updater.py +8 -0
  78. nmdc_runtime/site/repository.py +7 -117
  79. nmdc_runtime/site/resources.py +4 -4
  80. nmdc_runtime/site/translation/gold_translator.py +22 -21
  81. nmdc_runtime/site/translation/neon_benthic_translator.py +0 -1
  82. nmdc_runtime/site/translation/neon_soil_translator.py +4 -5
  83. nmdc_runtime/site/translation/neon_surface_water_translator.py +0 -2
  84. nmdc_runtime/site/translation/submission_portal_translator.py +64 -54
  85. nmdc_runtime/site/translation/translator.py +63 -1
  86. nmdc_runtime/site/util.py +8 -3
  87. nmdc_runtime/site/validation/util.py +10 -5
  88. nmdc_runtime/util.py +9 -321
  89. {nmdc_runtime-2.8.0.dist-info → nmdc_runtime-2.10.0.dist-info}/METADATA +57 -6
  90. nmdc_runtime-2.10.0.dist-info/RECORD +138 -0
  91. nmdc_runtime/site/translation/emsl.py +0 -43
  92. nmdc_runtime/site/translation/gold.py +0 -53
  93. nmdc_runtime/site/translation/jgi.py +0 -32
  94. nmdc_runtime/site/translation/util.py +0 -132
  95. nmdc_runtime/site/validation/jgi.py +0 -43
  96. nmdc_runtime-2.8.0.dist-info/RECORD +0 -84
  97. {nmdc_runtime-2.8.0.dist-info → nmdc_runtime-2.10.0.dist-info}/WHEEL +0 -0
  98. {nmdc_runtime-2.8.0.dist-info → nmdc_runtime-2.10.0.dist-info}/entry_points.txt +0 -0
  99. {nmdc_runtime-2.8.0.dist-info → nmdc_runtime-2.10.0.dist-info}/licenses/LICENSE +0 -0
  100. {nmdc_runtime-2.8.0.dist-info → nmdc_runtime-2.10.0.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,11 @@
1
1
  from enum import Enum
2
+ import re
2
3
  from typing import Optional
3
4
 
5
+ from base32_lib import base32
4
6
  from pydantic import BaseModel, PositiveInt
5
7
 
6
- from nmdc_runtime.minter.config import schema_classes
8
+ from nmdc_runtime.minter.config import schema_classes, typecodes
7
9
 
8
10
 
9
11
  class Entity(BaseModel):
@@ -71,3 +73,35 @@ class Identifier(Entity):
71
73
  class Typecode(Entity):
72
74
  schema_class: str
73
75
  name: str
76
+
77
+
78
+ id_prefix_pattern = rf"(?P<prefix>nmdc)"
79
+ id_typecode_pattern = rf"(?P<typecode>[a-z]{{1,6}})"
80
+ id_shoulder_pattern = rf"(?P<shoulder>[0-9][a-z]{{0,6}}[0-9])"
81
+ id_blade_pattern = rf"(?P<blade>[A-Za-z0-9]+)"
82
+ id_version_pattern = rf"(?P<version>(\.[A-Za-z0-9]+)*)"
83
+ id_locus_pattern = rf"(?P<locus>_[A-Za-z0-9_\.-]+)?"
84
+ id_pattern = (
85
+ rf"^{id_prefix_pattern}:{id_typecode_pattern}-{id_shoulder_pattern}-"
86
+ rf"{id_blade_pattern}{id_version_pattern}{id_locus_pattern}$"
87
+ )
88
+ ID_TYPECODE_VALUES = [t["name"] for t in typecodes()]
89
+ id_typecode_pattern_strict = rf"(?P<typecode_strict>({'|'.join(ID_TYPECODE_VALUES)}))"
90
+ id_blade_pattern_strict = rf"(?P<blade_strict>[{base32.ENCODING_CHARS}]+)"
91
+ id_pattern_strict = (
92
+ rf"^{id_prefix_pattern}:{id_typecode_pattern_strict}-{id_shoulder_pattern}-"
93
+ rf"{id_blade_pattern_strict}{id_version_pattern}{id_locus_pattern}$"
94
+ )
95
+ id_pattern_strict_compiled = re.compile(id_pattern_strict)
96
+
97
+
98
+ def check_valid_ids(ids: list[str]):
99
+ for id_ in ids:
100
+ if not re.match(id_pattern, id_):
101
+ raise ValueError(
102
+ (
103
+ f"Invalid ID format for given ID: '{id_}'.\n\nAn ID must match the pattern: '{id_pattern}'.\n\n"
104
+ "See: <https://microbiomedata.github.io/nmdc-schema/identifiers/#ids-minted-for-use-within-nmdc>"
105
+ )
106
+ )
107
+ return ids
@@ -8,7 +8,7 @@ from nmdc_runtime.api.core.util import raise404_if_none
8
8
  from nmdc_runtime.api.db.mongo import get_mongo_db
9
9
  from nmdc_runtime.api.models.site import get_current_client_site, Site
10
10
  from nmdc_runtime.minter.adapters.repository import MongoIDStore, MinterError
11
- from nmdc_runtime.minter.config import minting_service_id, schema_classes
11
+ from nmdc_runtime.minter.config import minting_service_id
12
12
  from nmdc_runtime.minter.domain.model import (
13
13
  Identifier,
14
14
  AuthenticatedMintingRequest,
@@ -1,7 +1,6 @@
1
- from pymongo import MongoClient
2
1
  from pymongo.database import Database
3
2
  from pymongo.collection import Collection
4
- from typing import Any, Mapping, Optional, Type, Callable
3
+ from typing import Any, Optional
5
4
  from pymongo.client_session import ClientSession
6
5
  import inspect
7
6
 
@@ -6,7 +6,7 @@ $ nmdcdb-mongodump
6
6
 
7
7
  import os
8
8
  import subprocess
9
- from datetime import datetime, timezone
9
+ from datetime import datetime
10
10
  from pathlib import Path
11
11
  from zoneinfo import ZoneInfo
12
12
 
@@ -16,9 +16,7 @@ from toolz import assoc
16
16
 
17
17
  from nmdc_runtime.api.core.util import pick
18
18
  from nmdc_runtime.api.db.mongo import get_mongo_db
19
- from nmdc_runtime.site.repository import run_config_frozen__normal_env
20
- from nmdc_runtime.site.resources import get_mongo
21
- from nmdc_runtime.util import nmdc_jsonschema, schema_collection_names_with_id_field
19
+ from nmdc_runtime.util import schema_collection_names_with_id_field
22
20
 
23
21
 
24
22
  def collection_stats(mdb: MongoDatabase):
@@ -4,7 +4,7 @@ import datetime
4
4
  import xml.etree.ElementTree as ET
5
5
  import xml.dom.minidom
6
6
 
7
- from typing import Any, List, Union
7
+ from typing import Any, List
8
8
  from urllib.parse import urlparse
9
9
  from nmdc_runtime.site.export.ncbi_xml_utils import (
10
10
  handle_controlled_identified_term_value,
@@ -16,7 +16,6 @@ from nmdc_runtime.site.export.ncbi_xml_utils import (
16
16
  handle_float_value,
17
17
  handle_string_value,
18
18
  load_mappings,
19
- validate_xml,
20
19
  )
21
20
 
22
21
 
@@ -1,5 +1,5 @@
1
1
  from io import BytesIO, StringIO
2
- from typing import Any, Dict, List, Union
2
+ from typing import Any, Dict, List
3
3
 
4
4
  from nmdc_runtime.api.endpoints.util import strip_oid
5
5
  from nmdc_runtime.minter.config import typecodes
@@ -1,7 +1,6 @@
1
- from dagster import graph, GraphIn
1
+ from dagster import graph
2
2
 
3
3
  from nmdc_runtime.site.ops import (
4
- build_merged_db,
5
4
  generate_biosample_set_for_nmdc_study_from_gold,
6
5
  nmdc_schema_database_export_filename,
7
6
  nmdc_schema_database_from_gold_study,
@@ -12,8 +11,6 @@ from nmdc_runtime.site.ops import (
12
11
  gold_projects_by_study,
13
12
  gold_study,
14
13
  poll_for_run_completion,
15
- run_etl,
16
- local_file_to_api_object,
17
14
  get_operation,
18
15
  produce_curated_db,
19
16
  delete_operations,
@@ -70,24 +67,6 @@ from nmdc_runtime.site.ops import (
70
67
  from nmdc_runtime.site.export.study_metadata import get_biosamples_by_study_id
71
68
 
72
69
 
73
- @graph
74
- def gold_translation():
75
- """
76
- Translating an export of the JGI GOLD [1] SQL database to the NMDC database JSON schema.
77
-
78
- [1] Genomes OnLine Database (GOLD) <https://gold.jgi.doe.gov/>.
79
- """
80
- local_file_to_api_object(run_etl(build_merged_db()))
81
-
82
-
83
- @graph()
84
- def gold_translation_curation():
85
- # TODO
86
- # - have produce_curated_db do actual curation (see notebook), persisting to db.
87
- # - more steps in pipeline? Or handoff via run_status_sensor on DagsterRunStatus.SUCCESS.
88
- produce_curated_db(get_operation())
89
-
90
-
91
70
  @graph()
92
71
  def create_objects_from_site_object_puts():
93
72
  delete_operations(
@@ -160,6 +139,7 @@ def gold_study_to_database():
160
139
  study_type,
161
140
  gold_nmdc_instrument_mapping_file_url,
162
141
  include_field_site_info,
142
+ enable_biosample_filtering,
163
143
  ) = get_gold_study_pipeline_inputs()
164
144
 
165
145
  projects = gold_projects_by_study(study_id)
@@ -176,6 +156,7 @@ def gold_study_to_database():
176
156
  analysis_projects,
177
157
  gold_nmdc_instrument_map_df,
178
158
  include_field_site_info,
159
+ enable_biosample_filtering,
179
160
  )
180
161
  database_dict = nmdc_schema_object_to_dict(database)
181
162
  filename = nmdc_schema_database_export_filename(study)
@@ -506,11 +487,19 @@ def nmdc_study_to_ncbi_submission_export():
506
487
 
507
488
  @graph
508
489
  def generate_data_generation_set_for_biosamples_in_nmdc_study():
509
- (study_id, gold_nmdc_instrument_mapping_file_url) = get_database_updater_inputs()
490
+ (
491
+ study_id,
492
+ gold_nmdc_instrument_mapping_file_url,
493
+ include_field_site_info,
494
+ enable_biosample_filtering,
495
+ ) = get_database_updater_inputs()
510
496
  gold_nmdc_instrument_map_df = get_df_from_url(gold_nmdc_instrument_mapping_file_url)
511
497
 
512
498
  database = generate_data_generation_set_post_biosample_ingest(
513
- study_id, gold_nmdc_instrument_map_df
499
+ study_id,
500
+ gold_nmdc_instrument_map_df,
501
+ include_field_site_info,
502
+ enable_biosample_filtering,
514
503
  )
515
504
 
516
505
  database_dict = nmdc_schema_object_to_dict(database)
@@ -523,11 +512,19 @@ def generate_data_generation_set_for_biosamples_in_nmdc_study():
523
512
 
524
513
  @graph
525
514
  def generate_biosample_set_from_samples_in_gold():
526
- (study_id, gold_nmdc_instrument_mapping_file_url) = get_database_updater_inputs()
515
+ (
516
+ study_id,
517
+ gold_nmdc_instrument_mapping_file_url,
518
+ include_field_site_info,
519
+ enable_biosample_filtering,
520
+ ) = get_database_updater_inputs()
527
521
  gold_nmdc_instrument_map_df = get_df_from_url(gold_nmdc_instrument_mapping_file_url)
528
522
 
529
523
  database = generate_biosample_set_for_nmdc_study_from_gold(
530
- study_id, gold_nmdc_instrument_map_df
524
+ study_id,
525
+ gold_nmdc_instrument_map_df,
526
+ include_field_site_info,
527
+ enable_biosample_filtering,
531
528
  )
532
529
  database_dict = nmdc_schema_object_to_dict(database)
533
530
  filename = post_submission_portal_biosample_ingest_record_stitching_filename(
@@ -545,10 +542,18 @@ def generate_update_script_for_insdc_biosample_identifiers():
545
542
  to generate a script for updating biosample records with INSDC identifiers obtained from GOLD.
546
543
  The script is returned as a dictionary that can be executed against MongoDB.
547
544
  """
548
- (study_id, gold_nmdc_instrument_mapping_file_url) = get_database_updater_inputs()
545
+ (
546
+ study_id,
547
+ gold_nmdc_instrument_mapping_file_url,
548
+ include_field_site_info,
549
+ enable_biosample_filtering,
550
+ ) = get_database_updater_inputs()
549
551
  gold_nmdc_instrument_map_df = get_df_from_url(gold_nmdc_instrument_mapping_file_url)
550
552
 
551
553
  update_script = run_script_to_update_insdc_biosample_identifiers(
552
- study_id, gold_nmdc_instrument_map_df
554
+ study_id,
555
+ gold_nmdc_instrument_map_df,
556
+ include_field_site_info,
557
+ enable_biosample_filtering,
553
558
  )
554
559
  render_text(update_script)