nmdc-runtime 2.8.0__py3-none-any.whl → 2.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nmdc-runtime might be problematic. Click here for more details.

nmdc_runtime/config.py CHANGED
@@ -1,5 +1,57 @@
1
- DATABASE_CLASS_NAME = "Database"
1
+ """
2
+ This module acts as a unified interface between the codebase and the environment.
3
+ We will eventually move all of the Runtime's environment variables reads into this
4
+ module, instead of leaving them sprinkled throughout the codebase.
5
+
6
+ TODO: Move all environment variable reads into this module and update references accordingly.
7
+ """
8
+
9
+ from typing import Set
10
+ import os
11
+
12
+
13
+ def is_env_var_true(name: str, default: str = "false") -> bool:
14
+ r"""
15
+ Checks whether the value of the specified environment variable
16
+ meets our criteria for true-ness.
17
+
18
+ Reference: https://docs.python.org/3/library/os.html#os.environ
19
+
20
+ Run doctests via: $ python -m doctest nmdc_runtime/config.py
21
+
22
+ >>> import os
23
+ >>> name = "EXAMPLE_ENV_VAR"
24
+ >>> os.unsetenv(name) # Undefined
25
+ >>> is_env_var_true(name)
26
+ False
27
+ >>> is_env_var_true(name, "true") # Undefined, overridden default
28
+ True
29
+ >>> os.environ[name] = "false" # Defined as false
30
+ >>> is_env_var_true(name)
31
+ False
32
+ >>> os.environ[name] = "true" # Defined as true
33
+ >>> is_env_var_true(name)
34
+ True
35
+ >>> os.environ[name] = "TRUE" # Case-insensitive
36
+ >>> is_env_var_true(name)
37
+ True
38
+ >>> os.environ[name] = "potato" # Non-boolean string
39
+ >>> is_env_var_true(name)
40
+ False
41
+ """
42
+ lowercase_true_strings: Set[str] = {"true"}
43
+ return os.environ.get(name, default).lower() in lowercase_true_strings
44
+
45
+
46
+ # The name of the schema class representing the database. We don't bother to
47
+ # make this customizable via the environment, as we expect it to never change.
48
+ DATABASE_CLASS_NAME: str = "Database"
2
49
 
3
50
  # Feature flag that can be used to enable/disable the `/nmdcschema/related_ids`
4
51
  # endpoint and the tests that target it.
5
- IS_RELATED_IDS_ENDPOINT_ENABLED = False
52
+ IS_RELATED_IDS_ENDPOINT_ENABLED: bool = is_env_var_true(
53
+ "IS_RELATED_IDS_ENDPOINT_ENABLED", default="true"
54
+ )
55
+
56
+ # Feature flag that can be used to enable/disable the `/scalar` endpoint.
57
+ IS_SCALAR_ENABLED: bool = is_env_var_true("IS_SCALAR_ENABLED", default="true")
@@ -160,6 +160,7 @@ def gold_study_to_database():
160
160
  study_type,
161
161
  gold_nmdc_instrument_mapping_file_url,
162
162
  include_field_site_info,
163
+ enable_biosample_filtering,
163
164
  ) = get_gold_study_pipeline_inputs()
164
165
 
165
166
  projects = gold_projects_by_study(study_id)
@@ -176,6 +177,7 @@ def gold_study_to_database():
176
177
  analysis_projects,
177
178
  gold_nmdc_instrument_map_df,
178
179
  include_field_site_info,
180
+ enable_biosample_filtering,
179
181
  )
180
182
  database_dict = nmdc_schema_object_to_dict(database)
181
183
  filename = nmdc_schema_database_export_filename(study)
@@ -506,11 +508,19 @@ def nmdc_study_to_ncbi_submission_export():
506
508
 
507
509
  @graph
508
510
  def generate_data_generation_set_for_biosamples_in_nmdc_study():
509
- (study_id, gold_nmdc_instrument_mapping_file_url) = get_database_updater_inputs()
511
+ (
512
+ study_id,
513
+ gold_nmdc_instrument_mapping_file_url,
514
+ include_field_site_info,
515
+ enable_biosample_filtering,
516
+ ) = get_database_updater_inputs()
510
517
  gold_nmdc_instrument_map_df = get_df_from_url(gold_nmdc_instrument_mapping_file_url)
511
518
 
512
519
  database = generate_data_generation_set_post_biosample_ingest(
513
- study_id, gold_nmdc_instrument_map_df
520
+ study_id,
521
+ gold_nmdc_instrument_map_df,
522
+ include_field_site_info,
523
+ enable_biosample_filtering,
514
524
  )
515
525
 
516
526
  database_dict = nmdc_schema_object_to_dict(database)
@@ -523,11 +533,19 @@ def generate_data_generation_set_for_biosamples_in_nmdc_study():
523
533
 
524
534
  @graph
525
535
  def generate_biosample_set_from_samples_in_gold():
526
- (study_id, gold_nmdc_instrument_mapping_file_url) = get_database_updater_inputs()
536
+ (
537
+ study_id,
538
+ gold_nmdc_instrument_mapping_file_url,
539
+ include_field_site_info,
540
+ enable_biosample_filtering,
541
+ ) = get_database_updater_inputs()
527
542
  gold_nmdc_instrument_map_df = get_df_from_url(gold_nmdc_instrument_mapping_file_url)
528
543
 
529
544
  database = generate_biosample_set_for_nmdc_study_from_gold(
530
- study_id, gold_nmdc_instrument_map_df
545
+ study_id,
546
+ gold_nmdc_instrument_map_df,
547
+ include_field_site_info,
548
+ enable_biosample_filtering,
531
549
  )
532
550
  database_dict = nmdc_schema_object_to_dict(database)
533
551
  filename = post_submission_portal_biosample_ingest_record_stitching_filename(
@@ -545,10 +563,18 @@ def generate_update_script_for_insdc_biosample_identifiers():
545
563
  to generate a script for updating biosample records with INSDC identifiers obtained from GOLD.
546
564
  The script is returned as a dictionary that can be executed against MongoDB.
547
565
  """
548
- (study_id, gold_nmdc_instrument_mapping_file_url) = get_database_updater_inputs()
566
+ (
567
+ study_id,
568
+ gold_nmdc_instrument_mapping_file_url,
569
+ include_field_site_info,
570
+ enable_biosample_filtering,
571
+ ) = get_database_updater_inputs()
549
572
  gold_nmdc_instrument_map_df = get_df_from_url(gold_nmdc_instrument_mapping_file_url)
550
573
 
551
574
  update_script = run_script_to_update_insdc_biosample_identifiers(
552
- study_id, gold_nmdc_instrument_map_df
575
+ study_id,
576
+ gold_nmdc_instrument_map_df,
577
+ include_field_site_info,
578
+ enable_biosample_filtering,
553
579
  )
554
580
  render_text(update_script)
nmdc_runtime/site/ops.py CHANGED
@@ -10,7 +10,7 @@ from datetime import datetime, timezone
10
10
  from io import BytesIO, StringIO
11
11
  from pprint import pformat
12
12
  from toolz.dicttoolz import keyfilter
13
- from typing import Tuple, Set
13
+ from typing import Tuple, Set, Union
14
14
  from zipfile import ZipFile
15
15
  from itertools import chain
16
16
  from ontology_loader.ontology_load_controller import OntologyLoaderController
@@ -44,7 +44,7 @@ from dagster import (
44
44
  from gridfs import GridFS
45
45
  from linkml_runtime.utils.dictutils import as_simple_dict
46
46
  from linkml_runtime.utils.yamlutils import YAMLRoot
47
- from nmdc_runtime.api.db.mongo import get_mongo_db
47
+ from nmdc_runtime.api.db.mongo import get_mongo_db, validate_json
48
48
  from nmdc_runtime.api.core.idgen import generate_one_id
49
49
  from nmdc_runtime.api.core.metadata import (
50
50
  _validate_changesheet,
@@ -106,7 +106,6 @@ from nmdc_runtime.util import (
106
106
  get_names_of_classes_in_effective_range_of_slot,
107
107
  pluralize,
108
108
  put_object,
109
- validate_json,
110
109
  specialize_activity_set_docs,
111
110
  collection_name_to_class_names,
112
111
  class_hierarchy_as_list,
@@ -481,83 +480,6 @@ def get_json_in(context):
481
480
  return rv.json()
482
481
 
483
482
 
484
- def ensure_data_object_type(docs: Dict[str, list], mdb: MongoDatabase):
485
- """
486
- Does not ensure ordering of `docs`.
487
-
488
- TODO: Document this function. What _does_ it do (or what was it designed to do)?
489
- What, conceptually, did the author design it to receive (as `docs`); a dict
490
- having a `data_object_set` item whose value is a list of documents.
491
- What, conceptually, did the author design it to return?
492
- """
493
-
494
- if ("data_object_set" not in docs) or len(docs["data_object_set"]) == 0:
495
- return docs, 0
496
-
497
- do_docs = docs["data_object_set"]
498
-
499
- class FileTypeEnumBase(BaseModel):
500
- name: str
501
- description: str
502
- filter: str # JSON-encoded data_object_set mongo collection filter document
503
-
504
- class FileTypeEnum(FileTypeEnumBase):
505
- id: str
506
-
507
- # Make a temporary collection (which will be dropped below) and insert the
508
- # specified `data_object_set` documents into it.
509
- temp_collection_name = f"tmp.data_object_set.{ObjectId()}"
510
- temp_collection = mdb[temp_collection_name]
511
- temp_collection.insert_many(do_docs)
512
- temp_collection.create_index("id")
513
-
514
- def fte_matches(fte_filter: str) -> List[dict]:
515
- r"""
516
- Returns a list of documents—without their `_id` field—that match the specified filter,
517
- which is encoded as a JSON string.
518
- """
519
- return [
520
- dissoc(d, "_id") for d in mdb.temp_collection.find(json.loads(fte_filter))
521
- ]
522
-
523
- # Create a mapping from each document's `id` to the document, itself.
524
- do_docs_map = {d["id"]: d for d in do_docs}
525
-
526
- n_docs_with_types_added = 0
527
-
528
- # For each `file_type_enum` document in the database, find all the documents (among the
529
- # `data_object_set` documents provided by the caller) that match that `file_type_enum`
530
- # document's filter.
531
- #
532
- # If any of those documents lacks a `data_object_type` field, update the original
533
- # `data_object_set` document so that its `data_object_type` field is set to
534
- # the `file_type_enum` document's `id` (why not its `name`?).
535
- #
536
- # TODO: I don't know why this sets `data_object_type` to `file_type_enum.id`,
537
- # as opposed to `file_type_enum.name`.
538
- #
539
- for fte_doc in mdb.file_type_enum.find():
540
- fte = FileTypeEnum(**fte_doc)
541
- docs_matching = fte_matches(fte.filter)
542
- for doc in docs_matching:
543
- if "data_object_type" not in doc:
544
- do_docs_map[doc["id"]] = assoc(doc, "data_object_type", fte.id)
545
- n_docs_with_types_added += 1
546
-
547
- mdb.drop_collection(temp_collection_name)
548
-
549
- # Returns a tuple. The first item is the original `docs` dictionary, but with the
550
- # `data_object_set` list replaced by the list of the documents that are in the
551
- # `do_docs_map` dictionary (with their `_id` fields omitted). The second item is
552
- # the number of documents to which this function added a `data_object_type` field.
553
- return (
554
- assoc(
555
- docs, "data_object_set", [dissoc(v, "_id") for v in do_docs_map.values()]
556
- ),
557
- n_docs_with_types_added,
558
- )
559
-
560
-
561
483
  @op(required_resource_keys={"runtime_api_site_client", "mongo"})
562
484
  def perform_mongo_updates(context, json_in):
563
485
  mongo = context.resources.mongo
@@ -566,8 +488,6 @@ def perform_mongo_updates(context, json_in):
566
488
 
567
489
  docs = json_in
568
490
  docs, _ = specialize_activity_set_docs(docs)
569
- docs, n_docs_with_types_added = ensure_data_object_type(docs, mongo.db)
570
- context.log.info(f"added `data_object_type` to {n_docs_with_types_added} docs")
571
491
  context.log.debug(f"{docs}")
572
492
 
573
493
  rv = validate_json(
@@ -636,22 +556,25 @@ def add_output_run_event(context: OpExecutionContext, outputs: List[str]):
636
556
  "study_type": str,
637
557
  "gold_nmdc_instrument_mapping_file_url": str,
638
558
  "include_field_site_info": bool,
559
+ "enable_biosample_filtering": bool,
639
560
  },
640
561
  out={
641
562
  "study_id": Out(str),
642
563
  "study_type": Out(str),
643
564
  "gold_nmdc_instrument_mapping_file_url": Out(str),
644
565
  "include_field_site_info": Out(bool),
566
+ "enable_biosample_filtering": Out(bool),
645
567
  },
646
568
  )
647
569
  def get_gold_study_pipeline_inputs(
648
570
  context: OpExecutionContext,
649
- ) -> Tuple[str, str, str, bool]:
571
+ ) -> Tuple[str, str, str, bool, bool]:
650
572
  return (
651
573
  context.op_config["study_id"],
652
574
  context.op_config["study_type"],
653
575
  context.op_config["gold_nmdc_instrument_mapping_file_url"],
654
576
  context.op_config["include_field_site_info"],
577
+ context.op_config["enable_biosample_filtering"],
655
578
  )
656
579
 
657
580
 
@@ -695,6 +618,7 @@ def nmdc_schema_database_from_gold_study(
695
618
  analysis_projects: List[Dict[str, Any]],
696
619
  gold_nmdc_instrument_map_df: pd.DataFrame,
697
620
  include_field_site_info: bool,
621
+ enable_biosample_filtering: bool,
698
622
  ) -> nmdc.Database:
699
623
  client: RuntimeApiSiteClient = context.resources.runtime_api_site_client
700
624
 
@@ -710,6 +634,7 @@ def nmdc_schema_database_from_gold_study(
710
634
  analysis_projects,
711
635
  gold_nmdc_instrument_map_df,
712
636
  include_field_site_info,
637
+ enable_biosample_filtering,
713
638
  id_minter=id_minter,
714
639
  )
715
640
  database = translator.get_database()
@@ -1110,6 +1035,8 @@ def load_ontology(context: OpExecutionContext):
1110
1035
  source_ontology=source_ontology,
1111
1036
  output_directory=output_directory,
1112
1037
  generate_reports=generate_reports,
1038
+ mongo_client=context.resources.mongo.client,
1039
+ db_name=context.resources.mongo.db.name,
1113
1040
  )
1114
1041
 
1115
1042
  loader.run_ontology_loader()
@@ -1192,8 +1119,8 @@ def _add_related_ids_to_alldocs(
1192
1119
  "has_mass_spectrometry_configuration", # a `nmdc:PlannedProcess` was influenced by its `nmdc:Configuration`.
1193
1120
  "instrument_used", # a `nmdc:PlannedProcess` was influenced by a used `nmdc:Instrument`.
1194
1121
  "uses_calibration", # a `nmdc:PlannedProcess` was influenced by `nmdc:CalibrationInformation`.
1195
- "was_generated_by", # prov:wasGeneratedBy rdfs:subPropertyOf prov:wasInfluencedBy .
1196
- "was_informed_by", # prov:wasInformedBy rdfs:subPropertyOf prov:wasInfluencedBy .
1122
+ "was_generated_by", # prov:wasGeneratedBy rdfs:subPropertyOf prov:wasInfluencedBy.
1123
+ "was_informed_by", # prov:wasInformedBy rdfs:subPropertyOf prov:wasInfluencedBy.
1197
1124
  ]
1198
1125
  # An "outbound" slot is one for which an entity in the domain "influences"
1199
1126
  # (i.e., [owl:inverseOf prov:wasInfluencedBy]) an entity in the range.
@@ -1572,16 +1499,24 @@ def post_submission_portal_biosample_ingest_record_stitching_filename(
1572
1499
  config_schema={
1573
1500
  "nmdc_study_id": str,
1574
1501
  "gold_nmdc_instrument_mapping_file_url": str,
1502
+ "include_field_site_info": bool,
1503
+ "enable_biosample_filtering": bool,
1575
1504
  },
1576
1505
  out={
1577
1506
  "nmdc_study_id": Out(str),
1578
1507
  "gold_nmdc_instrument_mapping_file_url": Out(str),
1508
+ "include_field_site_info": Out(bool),
1509
+ "enable_biosample_filtering": Out(bool),
1579
1510
  },
1580
1511
  )
1581
- def get_database_updater_inputs(context: OpExecutionContext) -> Tuple[str, str]:
1512
+ def get_database_updater_inputs(
1513
+ context: OpExecutionContext,
1514
+ ) -> Tuple[str, str, bool, bool]:
1582
1515
  return (
1583
1516
  context.op_config["nmdc_study_id"],
1584
1517
  context.op_config["gold_nmdc_instrument_mapping_file_url"],
1518
+ context.op_config["include_field_site_info"],
1519
+ context.op_config["enable_biosample_filtering"],
1585
1520
  )
1586
1521
 
1587
1522
 
@@ -1596,6 +1531,8 @@ def generate_data_generation_set_post_biosample_ingest(
1596
1531
  context: OpExecutionContext,
1597
1532
  nmdc_study_id: str,
1598
1533
  gold_nmdc_instrument_map_df: pd.DataFrame,
1534
+ include_field_site_info: bool,
1535
+ enable_biosample_filtering: bool,
1599
1536
  ) -> nmdc.Database:
1600
1537
  runtime_api_user_client: RuntimeApiUserClient = (
1601
1538
  context.resources.runtime_api_user_client
@@ -1611,6 +1548,8 @@ def generate_data_generation_set_post_biosample_ingest(
1611
1548
  gold_api_client,
1612
1549
  nmdc_study_id,
1613
1550
  gold_nmdc_instrument_map_df,
1551
+ include_field_site_info,
1552
+ enable_biosample_filtering,
1614
1553
  )
1615
1554
  database = (
1616
1555
  database_updater.generate_data_generation_set_records_from_gold_api_for_study()
@@ -1630,6 +1569,8 @@ def generate_biosample_set_for_nmdc_study_from_gold(
1630
1569
  context: OpExecutionContext,
1631
1570
  nmdc_study_id: str,
1632
1571
  gold_nmdc_instrument_map_df: pd.DataFrame,
1572
+ include_field_site_info: bool = False,
1573
+ enable_biosample_filtering: bool = False,
1633
1574
  ) -> nmdc.Database:
1634
1575
  runtime_api_user_client: RuntimeApiUserClient = (
1635
1576
  context.resources.runtime_api_user_client
@@ -1645,6 +1586,8 @@ def generate_biosample_set_for_nmdc_study_from_gold(
1645
1586
  gold_api_client,
1646
1587
  nmdc_study_id,
1647
1588
  gold_nmdc_instrument_map_df,
1589
+ include_field_site_info,
1590
+ enable_biosample_filtering,
1648
1591
  )
1649
1592
  database = database_updater.generate_biosample_set_from_gold_api_for_study()
1650
1593
 
@@ -1656,13 +1599,16 @@ def generate_biosample_set_for_nmdc_study_from_gold(
1656
1599
  "runtime_api_user_client",
1657
1600
  "runtime_api_site_client",
1658
1601
  "gold_api_client",
1659
- }
1602
+ },
1603
+ out=Out(Any),
1660
1604
  )
1661
1605
  def run_script_to_update_insdc_biosample_identifiers(
1662
1606
  context: OpExecutionContext,
1663
1607
  nmdc_study_id: str,
1664
1608
  gold_nmdc_instrument_map_df: pd.DataFrame,
1665
- ) -> Dict[str, Any]:
1609
+ include_field_site_info: bool,
1610
+ enable_biosample_filtering: bool,
1611
+ ):
1666
1612
  """Generates a MongoDB update script to add INSDC biosample identifiers to biosamples.
1667
1613
 
1668
1614
  This op uses the DatabaseUpdater to generate a script that can be used to update biosample
@@ -1674,7 +1620,7 @@ def run_script_to_update_insdc_biosample_identifiers(
1674
1620
  gold_nmdc_instrument_map_df: A dataframe mapping GOLD instrument IDs to NMDC instrument set records
1675
1621
 
1676
1622
  Returns:
1677
- A dictionary containing the MongoDB update script
1623
+ A dictionary or list of dictionaries containing the MongoDB update script(s)
1678
1624
  """
1679
1625
  runtime_api_user_client: RuntimeApiUserClient = (
1680
1626
  context.resources.runtime_api_user_client
@@ -1690,11 +1636,17 @@ def run_script_to_update_insdc_biosample_identifiers(
1690
1636
  gold_api_client,
1691
1637
  nmdc_study_id,
1692
1638
  gold_nmdc_instrument_map_df,
1639
+ include_field_site_info,
1640
+ enable_biosample_filtering,
1693
1641
  )
1694
1642
  update_script = database_updater.queries_run_script_to_update_insdc_identifiers()
1695
1643
 
1644
+ if isinstance(update_script, list):
1645
+ total_updates = sum(len(item.get("updates", [])) for item in update_script)
1646
+ else:
1647
+ total_updates = len(update_script.get("updates", []))
1696
1648
  context.log.info(
1697
- f"Generated update script for study {nmdc_study_id} with {len(update_script.get('updates', []))} updates"
1649
+ f"Generated update script for study {nmdc_study_id} with {total_updates} updates"
1698
1650
  )
1699
1651
 
1700
1652
  return update_script
@@ -18,6 +18,8 @@ class DatabaseUpdater:
18
18
  gold_api_client: GoldApiClient,
19
19
  study_id: str,
20
20
  gold_nmdc_instrument_map_df: pd.DataFrame = pd.DataFrame(),
21
+ include_field_site_info: bool = False,
22
+ enable_biosample_filtering: bool = True,
21
23
  ):
22
24
  """This class serves as an API for repairing connections in the database by
23
25
  adding records that are essentially missing "links"/"connections". As we identify
@@ -39,6 +41,8 @@ class DatabaseUpdater:
39
41
  self.gold_api_client = gold_api_client
40
42
  self.study_id = study_id
41
43
  self.gold_nmdc_instrument_map_df = gold_nmdc_instrument_map_df
44
+ self.include_field_site_info = include_field_site_info
45
+ self.enable_biosample_filtering = enable_biosample_filtering
42
46
 
43
47
  @lru_cache
44
48
  def _fetch_gold_biosample(self, gold_biosample_id: str) -> List[Dict[str, Any]]:
@@ -95,6 +99,8 @@ class DatabaseUpdater:
95
99
  biosamples=all_gold_biosamples,
96
100
  projects=all_gold_projects,
97
101
  gold_nmdc_instrument_map_df=self.gold_nmdc_instrument_map_df,
102
+ include_field_site_info=self.include_field_site_info,
103
+ enable_biosample_filtering=self.enable_biosample_filtering,
98
104
  )
99
105
 
100
106
  # The GoldStudyTranslator class has some pre-processing logic which filters out
@@ -214,6 +220,8 @@ class DatabaseUpdater:
214
220
  projects=gold_sequencing_projects_for_study,
215
221
  analysis_projects=gold_analysis_projects_for_study,
216
222
  gold_nmdc_instrument_map_df=self.gold_nmdc_instrument_map_df,
223
+ include_field_site_info=self.include_field_site_info,
224
+ enable_biosample_filtering=self.enable_biosample_filtering,
217
225
  )
218
226
 
219
227
  translated_biosamples = gold_study_translator.biosamples
@@ -463,11 +463,6 @@ def claim_and_run_apply_changesheet_jobs(_context):
463
463
  yield SkipReason("; ".join(skip_notes))
464
464
 
465
465
 
466
- # TODO ensure data_object_type values from file_type_enum
467
- # see /metadata-translation/notebooks/202106_curation_updates.ipynb
468
- # for details ("Create file_type_enum collection" section).
469
-
470
-
471
466
  @sensor(job=create_objects_from_site_object_puts.to_job(**preset_normal))
472
467
  def done_object_put_ops(_context):
473
468
  client = get_runtime_api_site_client(run_config_frozen__normal_env)
@@ -574,6 +569,7 @@ def biosample_submission_ingest():
574
569
  "study_type": "research_study",
575
570
  "gold_nmdc_instrument_mapping_file_url": "https://raw.githubusercontent.com/microbiomedata/berkeley-schema-fy24/main/assets/misc/gold_seqMethod_to_nmdc_instrument_set.tsv",
576
571
  "include_field_site_info": False,
572
+ "enable_biosample_filtering": True,
577
573
  },
578
574
  },
579
575
  "export_json_to_drs": {"config": {"username": ""}},
@@ -1018,6 +1014,8 @@ def database_records_stitching():
1018
1014
  "config": {
1019
1015
  "nmdc_study_id": "",
1020
1016
  "gold_nmdc_instrument_mapping_file_url": "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/refs/heads/main/assets/misc/gold_seqMethod_to_nmdc_instrument_set.tsv",
1017
+ "include_field_site_info": False,
1018
+ "enable_biosample_filtering": True,
1021
1019
  }
1022
1020
  },
1023
1021
  "export_json_to_drs": {"config": {"username": ""}},
@@ -1060,6 +1058,8 @@ def database_records_stitching():
1060
1058
  "config": {
1061
1059
  "nmdc_study_id": "",
1062
1060
  "gold_nmdc_instrument_mapping_file_url": "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/refs/heads/main/assets/misc/gold_seqMethod_to_nmdc_instrument_set.tsv",
1061
+ "include_field_site_info": False,
1062
+ "enable_biosample_filtering": True,
1063
1063
  }
1064
1064
  },
1065
1065
  "export_json_to_drs": {"config": {"username": ""}},
@@ -1102,6 +1102,8 @@ def database_records_stitching():
1102
1102
  "config": {
1103
1103
  "nmdc_study_id": "",
1104
1104
  "gold_nmdc_instrument_mapping_file_url": "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/refs/heads/main/assets/misc/gold_seqMethod_to_nmdc_instrument_set.tsv",
1105
+ "include_field_site_info": False,
1106
+ "enable_biosample_filtering": True,
1105
1107
  }
1106
1108
  },
1107
1109
  },
@@ -109,7 +109,7 @@ class RuntimeApiUserClient(RuntimeApiClient):
109
109
  },
110
110
  )
111
111
  response.raise_for_status()
112
- return response.json()["cursor"]["firstBatch"]
112
+ return response.json()["cursor"]["batch"]
113
113
 
114
114
  def get_omics_processing_records_by_gold_project_id(self, gold_project_id: str):
115
115
  gold_project_id = normalize_gold_id(gold_project_id)
@@ -126,7 +126,7 @@ class RuntimeApiUserClient(RuntimeApiClient):
126
126
  },
127
127
  )
128
128
  response.raise_for_status()
129
- return response.json()["cursor"]["firstBatch"]
129
+ return response.json()["cursor"]["batch"]
130
130
 
131
131
  def get_biosamples_for_study(self, study_id: str):
132
132
  # TODO: 10000 is an arbitrarily large number that has been chosen for the max_page_size param.
@@ -170,7 +170,7 @@ class RuntimeApiUserClient(RuntimeApiClient):
170
170
  },
171
171
  )
172
172
  response.raise_for_status()
173
- return response.json()["cursor"]["firstBatch"]
173
+ return response.json()["cursor"]["batch"]
174
174
 
175
175
  def get_study(self, study_id: str):
176
176
  response = self.request(
@@ -182,7 +182,7 @@ class RuntimeApiUserClient(RuntimeApiClient):
182
182
  },
183
183
  )
184
184
  response.raise_for_status()
185
- return response.json()["cursor"]["firstBatch"]
185
+ return response.json()["cursor"]["batch"]
186
186
 
187
187
 
188
188
  class RuntimeApiSiteClient(RuntimeApiClient):
@@ -45,6 +45,7 @@ class GoldStudyTranslator(Translator):
45
45
  analysis_projects: List[JSON_OBJECT] = [],
46
46
  gold_nmdc_instrument_map_df: pd.DataFrame = pd.DataFrame(),
47
47
  include_field_site_info: bool = False,
48
+ enable_biosample_filtering: bool = True,
48
49
  *args,
49
50
  **kwargs,
50
51
  ) -> None:
@@ -53,15 +54,20 @@ class GoldStudyTranslator(Translator):
53
54
  self.study = study
54
55
  self.study_type = nmdc.StudyCategoryEnum(study_type)
55
56
  self.include_field_site_info = include_field_site_info
57
+ self.enable_biosample_filtering = enable_biosample_filtering
56
58
  # Filter biosamples to only those with `sequencingStrategy` of
57
- # "Metagenome" or "Metatranscriptome"
58
- self.biosamples = [
59
- biosample
60
- for biosample in biosamples
61
- if any(
62
- _is_valid_project(project) for project in biosample.get("projects", [])
63
- )
64
- ]
59
+ # "Metagenome" or "Metatranscriptome" if filtering is enabled
60
+ if enable_biosample_filtering:
61
+ self.biosamples = [
62
+ biosample
63
+ for biosample in biosamples
64
+ if any(
65
+ _is_valid_project(project)
66
+ for project in biosample.get("projects", [])
67
+ )
68
+ ]
69
+ else:
70
+ self.biosamples = biosamples
65
71
  # Fetch the valid projectGoldIds that are associated with filtered
66
72
  # biosamples on their `projects` field
67
73
  valid_project_ids = {
@@ -116,6 +122,9 @@ class GoldStudyTranslator(Translator):
116
122
  :param gold_entity: GOLD entity object
117
123
  :return: PersonValue corresponding to the first PI in the `contacts` field
118
124
  """
125
+ if "contacts" not in gold_entity:
126
+ return None
127
+
119
128
  pi_dict = next(
120
129
  (
121
130
  contact
@@ -169,7 +178,7 @@ class GoldStudyTranslator(Translator):
169
178
  project["ncbiBioSampleAccession"], default_prefix="biosample"
170
179
  )
171
180
  for project in biosample_projects
172
- if project["ncbiBioSampleAccession"]
181
+ if project.get("ncbiBioSampleAccession")
173
182
  ]
174
183
 
175
184
  def _get_samp_taxon_id(
@@ -47,6 +47,12 @@ DATA_URL_SET_AND_ANALYTE_TO_DATA_OBJECT_TYPE: dict[tuple[DataUrlSet, str], str]
47
47
  (INTERLEAVED, str(METATRANSCRIPTOME)): "Metatranscriptome Raw Reads",
48
48
  }
49
49
 
50
+ UNIT_OVERRIDES: dict[str, dict[str, str]] = {
51
+ "Biosample": {
52
+ "depth": "m",
53
+ }
54
+ }
55
+
50
56
 
51
57
  class EnvironmentPackage(Enum):
52
58
  r"""
@@ -475,6 +481,50 @@ class SubmissionPortalTranslator(Translator):
475
481
 
476
482
  return value
477
483
 
484
+ def _get_study_dois(self, metadata_submission) -> Union[List[nmdc.Doi], None]:
485
+ """Collect and format DOIs from submission portal schema in nmdc format DOIs
486
+
487
+ If there were no DOIs, None is returned.
488
+
489
+ :param metadata_submission: submission portal entry
490
+ :return: list of nmdc.DOI objects
491
+ """
492
+ data_dois = self._get_from(metadata_submission, ["studyForm", "dataDois"])
493
+ award_dois = self._get_from(
494
+ metadata_submission, ["multiOmicsForm", "awardDois"]
495
+ )
496
+ if data_dois and len(data_dois) > 0:
497
+ updated_data_dois = [
498
+ nmdc.Doi(
499
+ doi_category="dataset_doi",
500
+ doi_provider=doi["provider"],
501
+ doi_value=self._ensure_curie(doi["value"], default_prefix="doi"),
502
+ type="nmdc:Doi",
503
+ )
504
+ for doi in data_dois
505
+ ]
506
+ else:
507
+ updated_data_dois = []
508
+
509
+ if award_dois and len(award_dois) > 0:
510
+ updated_award_dois = [
511
+ nmdc.Doi(
512
+ doi_category="award_doi",
513
+ doi_provider=doi["provider"],
514
+ doi_value=self._ensure_curie(doi["value"], default_prefix="doi"),
515
+ type="nmdc:Doi",
516
+ )
517
+ for doi in award_dois
518
+ ]
519
+ else:
520
+ updated_award_dois = []
521
+
522
+ return_val = updated_data_dois + updated_award_dois
523
+ if len(return_val) == 0:
524
+ return_val = None
525
+
526
+ return return_val
527
+
478
528
  def _get_data_objects_from_fields(
479
529
  self,
480
530
  sample_data: JSON_OBJECT,
@@ -591,6 +641,7 @@ class SubmissionPortalTranslator(Translator):
591
641
  websites=self._get_from(
592
642
  metadata_submission, ["studyForm", "linkOutWebpage"]
593
643
  ),
644
+ associated_dois=self._get_study_dois(metadata_submission),
594
645
  )
595
646
 
596
647
  def _transform_value_for_slot(
@@ -660,6 +711,17 @@ class SubmissionPortalTranslator(Translator):
660
711
  logging.warning(f"No slot '{slot_name}' on class '{class_name}'")
661
712
  continue
662
713
 
714
+ # This step handles cases where the submission portal/schema instructs a user to
715
+ # provide a value in a specific unit. The unit cannot be parsed out of the raw value
716
+ # in these cases, so we have to manually set it via UNIT_OVERRIDES. This part can
717
+ # go away once units are encoded in the schema itself.
718
+ # See: https://github.com/microbiomedata/nmdc-schema/issues/2517
719
+ if class_name in UNIT_OVERRIDES:
720
+ # If the class has unit overrides, check if the slot is in the overrides
721
+ unit_overrides = UNIT_OVERRIDES[class_name]
722
+ if slot_name in unit_overrides:
723
+ unit = unit_overrides[slot_name]
724
+
663
725
  slot_definition = self.schema_view.induced_slot(slot_name, class_name)
664
726
  if slot_definition.multivalued:
665
727
  value_list = value
nmdc_runtime/util.py CHANGED
@@ -3,36 +3,28 @@ import mimetypes
3
3
  import os
4
4
  import pkgutil
5
5
  from collections.abc import Iterable
6
- from contextlib import AbstractContextManager
7
6
  from copy import deepcopy
8
7
  from datetime import datetime, timezone
9
8
  from functools import lru_cache
10
9
  from io import BytesIO
11
10
  from itertools import chain
12
11
  from pathlib import Path
13
- from uuid import uuid4
14
12
  from typing import Callable, List, Optional, Set, Dict
15
13
 
16
14
  import fastjsonschema
17
15
  import requests
18
16
  from frozendict import frozendict
19
- from jsonschema.validators import Draft7Validator
20
17
  from linkml_runtime import linkml_model
21
18
  from linkml_runtime.utils.schemaview import SchemaView
22
- from nmdc_schema.nmdc import Database as NMDCDatabase
23
19
  from nmdc_schema.get_nmdc_view import ViewGetter
24
- from pydantic import Field, BaseModel
25
20
  from pymongo.database import Database as MongoDatabase
26
21
  from pymongo.errors import OperationFailure
27
22
  from refscan.lib.helpers import identify_references
28
- from refscan.lib.Finder import Finder
29
23
  from refscan.lib.ReferenceList import ReferenceList
30
- from refscan.scanner import scan_outgoing_references
31
- from toolz import merge, unique
24
+ from toolz import merge
32
25
 
33
26
  from nmdc_runtime.api.core.util import sha256hash_from_file
34
27
  from nmdc_runtime.api.models.object import DrsObjectIn
35
- from typing_extensions import Annotated
36
28
 
37
29
 
38
30
  def get_names_of_classes_in_effective_range_of_slot(
@@ -499,6 +491,11 @@ def populated_schema_collection_names_with_id_field(mdb: MongoDatabase) -> List[
499
491
 
500
492
  def ensure_unique_id_indexes(mdb: MongoDatabase):
501
493
  """Ensure that any collections with an "id" field have an index on "id"."""
494
+
495
+ # Note: The pipe (i.e. `|`) operator performs a union of the two sets. In this case,
496
+ # it creates a set (i.e. `candidate_names`) consisting of the names of both
497
+ # (a) all collections in the real database, and (b) all collections that
498
+ # the NMDC schema says can contain instances of classes that have an "id" slot.
502
499
  candidate_names = (
503
500
  set(mdb.list_collection_names()) | schema_collection_names_with_id_field()
504
501
  )
@@ -533,271 +530,6 @@ def ensure_unique_id_indexes(mdb: MongoDatabase):
533
530
  raise
534
531
 
535
532
 
536
- class UpdateStatement(BaseModel):
537
- q: dict
538
- u: dict
539
- upsert: bool = False
540
- multi: bool = False
541
-
542
-
543
- class DeleteStatement(BaseModel):
544
- q: dict
545
- limit: Annotated[int, Field(ge=0, le=1)] = 1
546
-
547
-
548
- class OverlayDBError(Exception):
549
- pass
550
-
551
-
552
- class OverlayDB(AbstractContextManager):
553
- """Provides a context whereby a base Database is overlaid with a temporary one.
554
-
555
- If you need to run basic simulations of updates to a base database,
556
- you don't want to actually commit transactions to the base database.
557
-
558
- For example, to insert or replace (matching on "id") many documents into a collection in order
559
- to then validate the resulting total set of collection documents, an OverlayDB writes to
560
- an overlay collection that "shadows" the base collection during a "find" query
561
- (the "merge_find" method of an OverlayDB object): if a document with `id0` is found in the
562
- overlay collection, that id is marked as "seen" and will not also be returned when
563
- subsequently scanning the (unmodified) base-database collection.
564
-
565
- Note: The OverlayDB object does not provide a means to perform arbitrary MongoDB queries on the virtual "merged"
566
- database. Callers can access the real database via `overlay_db._bottom_db` and the overlaying database via
567
- `overlay_db._top_db` and perform arbitrary MongoDB queries on the individual databases that way. Access to
568
- the virtual "merged" database is limited to the methods of the `OverlayDB` class, which simulates the
569
- "merging" just-in-time to process the method invocation. You can see an example of this in the implementation
570
- of the `merge_find` method, which internally accesses both the real database and the overlaying database.
571
-
572
- Mongo "update" commands (as the "apply_updates" method) are simulated by first copying affected
573
- documents from a base collection to the overlay, and then applying the updates to the overlay,
574
- so that again, base collections are unmodified, and a "merge_find" call will produce a result
575
- *as if* the base collection(s) were modified.
576
-
577
- Mongo deletions (as the "delete" method) also copy affected documents from the base collection
578
- to the overlay collection, and flag them using the "_deleted" field. In this way, a `merge_find`
579
- call will match a relevant document given a suitable filter, and will mark the document's id
580
- as "seen" *without* returning the document. Thus, the result is as if the document were deleted.
581
-
582
- Usage:
583
- ````
584
- with OverlayDB(mdb) as odb:
585
- # do stuff, e.g. `odb.replace_or_insert_many(...)`
586
- ```
587
- """
588
-
589
- def __init__(self, mdb: MongoDatabase):
590
- self._bottom_db = mdb
591
- self._top_db = self._bottom_db.client.get_database(f"overlay-{uuid4()}")
592
- ensure_unique_id_indexes(self._top_db)
593
-
594
- def __enter__(self):
595
- return self
596
-
597
- def __exit__(self, exc_type, exc_value, traceback):
598
- self._bottom_db.client.drop_database(self._top_db.name)
599
-
600
- def replace_or_insert_many(self, coll_name, documents: list):
601
- try:
602
- self._top_db[coll_name].insert_many(documents)
603
- except OperationFailure as e:
604
- raise OverlayDBError(str(e.details))
605
-
606
- def apply_updates(self, coll_name, updates: list):
607
- """prepare overlay db and apply updates to it."""
608
- assert all(UpdateStatement(**us) for us in updates)
609
- for update_spec in updates:
610
- for bottom_doc in self._bottom_db[coll_name].find(update_spec["q"]):
611
- self._top_db[coll_name].insert_one(bottom_doc)
612
- try:
613
- self._top_db.command({"update": coll_name, "updates": updates})
614
- except OperationFailure as e:
615
- raise OverlayDBError(str(e.details))
616
-
617
- def delete(self, coll_name, deletes: list):
618
- """ "apply" delete command by flagging docs in overlay database"""
619
- assert all(DeleteStatement(**us) for us in deletes)
620
- for delete_spec in deletes:
621
- for bottom_doc in self._bottom_db[coll_name].find(
622
- delete_spec["q"], limit=delete_spec.get("limit", 1)
623
- ):
624
- bottom_doc["_deleted"] = True
625
- self._top_db[coll_name].insert_one(bottom_doc)
626
-
627
- def merge_find(self, coll_name, find_spec: dict):
628
- """Yield docs first from overlay and then from base db, minding deletion flags."""
629
- # ensure projection of "id" and "_deleted"
630
- if "projection" in find_spec:
631
- proj = find_spec["projection"]
632
- if isinstance(proj, dict):
633
- proj = merge(proj, {"id": 1, "_deleted": 1})
634
- elif isinstance(proj, list):
635
- proj = list(unique(proj + ["id", "_deleted"]))
636
-
637
- top_docs = self._top_db[coll_name].find(**find_spec)
638
- bottom_docs = self._bottom_db[coll_name].find(**find_spec)
639
- top_seen_ids = set()
640
- for doc in top_docs:
641
- if not doc.get("_deleted"):
642
- yield doc
643
- top_seen_ids.add(doc["id"])
644
-
645
- for doc in bottom_docs:
646
- if doc["id"] not in top_seen_ids:
647
- yield doc
648
-
649
-
650
- def validate_json(
651
- in_docs: dict, mdb: MongoDatabase, check_inter_document_references: bool = False
652
- ):
653
- r"""
654
- Checks whether the specified dictionary represents a valid instance of the `Database` class
655
- defined in the NMDC Schema. Referential integrity checking is performed on an opt-in basis.
656
-
657
- Example dictionary:
658
- {
659
- "biosample_set": [
660
- {"id": "nmdc:bsm-00-000001", ...},
661
- {"id": "nmdc:bsm-00-000002", ...}
662
- ],
663
- "study_set": [
664
- {"id": "nmdc:sty-00-000001", ...},
665
- {"id": "nmdc:sty-00-000002", ...}
666
- ]
667
- }
668
-
669
- :param in_docs: The dictionary you want to validate
670
- :param mdb: A reference to a MongoDB database
671
- :param check_inter_document_references: Whether you want this function to check whether every document that
672
- is referenced by any of the documents passed in would, indeed, exist
673
- in the database, if the documents passed in were to be inserted into
674
- the database. In other words, set this to `True` if you want this
675
- function to perform referential integrity checks.
676
- """
677
- validator = Draft7Validator(get_nmdc_jsonschema_dict())
678
- docs = deepcopy(in_docs)
679
- validation_errors = {}
680
-
681
- known_coll_names = set(nmdc_database_collection_names())
682
- for coll_name, coll_docs in docs.items():
683
- if coll_name not in known_coll_names:
684
- # FIXME: Document what `@type` is (conceptually; e.g., why this function accepts it as a collection name).
685
- # See: https://github.com/microbiomedata/nmdc-runtime/discussions/858
686
- if coll_name == "@type" and coll_docs in ("Database", "nmdc:Database"):
687
- continue
688
- else:
689
- validation_errors[coll_name] = [
690
- f"'{coll_name}' is not a known schema collection name"
691
- ]
692
- continue
693
-
694
- errors = list(validator.iter_errors({coll_name: coll_docs}))
695
- validation_errors[coll_name] = [e.message for e in errors]
696
- if coll_docs:
697
- if not isinstance(coll_docs, list):
698
- validation_errors[coll_name].append("value must be a list")
699
- elif not all(isinstance(d, dict) for d in coll_docs):
700
- validation_errors[coll_name].append(
701
- "all elements of list must be dicts"
702
- )
703
- if not validation_errors[coll_name]:
704
- try:
705
- with OverlayDB(mdb) as odb:
706
- odb.replace_or_insert_many(coll_name, coll_docs)
707
- except OverlayDBError as e:
708
- validation_errors[coll_name].append(str(e))
709
-
710
- if all(len(v) == 0 for v in validation_errors.values()):
711
- # Second pass. Try instantiating linkml-sourced dataclass
712
- in_docs.pop("@type", None)
713
- try:
714
- NMDCDatabase(**in_docs)
715
- except Exception as e:
716
- return {"result": "errors", "detail": str(e)}
717
-
718
- # Third pass (if enabled): Check inter-document references.
719
- if check_inter_document_references is True:
720
- # Prepare to use `refscan`.
721
- #
722
- # Note: We check the inter-document references in two stages, which are:
723
- # 1. For each document in the JSON payload, check whether each document it references already exists
724
- # (in the collections the schema says it can exist in) in the database. We use the
725
- # `refscan` package to do this, which returns violation details we'll use in the second stage.
726
- # 2. For each violation found in the first stage (i.e. each reference to a not-found document), we
727
- # check whether that document exists (in the collections the schema says it can exist in) in the
728
- # JSON payload. If it does, then we "waive" (i.e. discard) that violation.
729
- # The violations that remain after those two stages are the ones we return to the caller.
730
- #
731
- # Note: The reason we do not insert documents into an `OverlayDB` and scan _that_, is that the `OverlayDB`
732
- # does not provide a means to perform arbitrary queries against its virtual "merged" database. It
733
- # is not a drop-in replacement for a pymongo's `Database` class, which is the only thing that
734
- # `refscan`'s `Finder` class accepts.
735
- #
736
- finder = Finder(database=mdb)
737
- references = get_allowed_references()
738
- reference_field_names_by_source_class_name = (
739
- references.get_reference_field_names_by_source_class_name()
740
- )
741
-
742
- # Iterate over the collections in the JSON payload.
743
- for source_collection_name, documents in in_docs.items():
744
- for document in documents:
745
- # Add an `_id` field to the document, since `refscan` requires the document to have one.
746
- source_document = dict(document, _id=None)
747
- violations = scan_outgoing_references(
748
- document=source_document,
749
- schema_view=nmdc_schema_view(),
750
- reference_field_names_by_source_class_name=reference_field_names_by_source_class_name,
751
- references=references,
752
- finder=finder,
753
- collection_names=nmdc_database_collection_names(),
754
- source_collection_name=source_collection_name,
755
- user_wants_to_locate_misplaced_documents=False,
756
- )
757
-
758
- # For each violation, check whether the misplaced document is in the JSON payload, itself.
759
- for violation in violations:
760
- can_waive_violation = False
761
- # Determine which collections can contain the referenced document, based upon
762
- # the schema class of which this source document is an instance.
763
- target_collection_names = (
764
- references.get_target_collection_names(
765
- source_class_name=violation.source_class_name,
766
- source_field_name=violation.source_field_name,
767
- )
768
- )
769
- # Check whether the referenced document exists in any of those collections in the JSON payload.
770
- for json_coll_name, json_coll_docs in in_docs.items():
771
- if json_coll_name in target_collection_names:
772
- for json_coll_doc in json_coll_docs:
773
- if json_coll_doc["id"] == violation.target_id:
774
- can_waive_violation = True
775
- break # stop checking
776
- if can_waive_violation:
777
- break # stop checking
778
- if not can_waive_violation:
779
- violation_as_str = (
780
- f"Document '{violation.source_document_id}' "
781
- f"in collection '{violation.source_collection_name}' "
782
- f"has a field '{violation.source_field_name}' that "
783
- f"references a document having id "
784
- f"'{violation.target_id}', but the latter document "
785
- f"does not exist in any of the collections the "
786
- f"NMDC Schema says it can exist in."
787
- )
788
- validation_errors[source_collection_name].append(
789
- violation_as_str
790
- )
791
-
792
- # If any collection's error list is not empty, return an error response.
793
- if any(len(v) > 0 for v in validation_errors.values()):
794
- return {"result": "errors", "detail": validation_errors}
795
-
796
- return {"result": "All Okay!"}
797
- else:
798
- return {"result": "errors", "detail": validation_errors}
799
-
800
-
801
533
  def decorate_if(condition: bool = False) -> Callable:
802
534
  r"""
803
535
  Decorator that applies another decorator only when `condition` is `True`.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nmdc_runtime
3
- Version: 2.8.0
3
+ Version: 2.9.0
4
4
  Summary: A runtime system for NMDC data management and orchestration
5
5
  Home-page: https://github.com/microbiomedata/nmdc-runtime
6
6
  Author: Donny Winston
@@ -1,9 +1,9 @@
1
1
  nmdc_runtime/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- nmdc_runtime/config.py,sha256=GKmovwYD3tIiUQX-mAOcHI8NMEMLhogjHDB9I8azA4c,195
2
+ nmdc_runtime/config.py,sha256=CW6LnN8Idsbra_mZnHU-kcWsYBZWbgivqVEp8rpOMi4,1989
3
3
  nmdc_runtime/containers.py,sha256=8m_S1wiFu8VOWvY7tyqzf-02X9gXY83YGc8FgjWzLGA,418
4
4
  nmdc_runtime/main.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  nmdc_runtime/mongo_util.py,sha256=7NRvqFE8W2CUcpcXAA4KElUACIdAkBehZ9TBG4k7zNE,3000
6
- nmdc_runtime/util.py,sha256=FfGNfcnHKS6Yzuwbdj0FtCcL-ks9HUjwWUfsPs1H2ao,33285
6
+ nmdc_runtime/util.py,sha256=Rw-OiQDHrz4cNX3ZdC-cgfHYUMq1qsk-_Mv81UrDlC8,19823
7
7
  nmdc_runtime/client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  nmdc_runtime/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  nmdc_runtime/core/db/Database.py,sha256=WamgBUbq85A7-fr3p5B9Tk92U__yPdr9pBb4zyQok-4,377
@@ -37,10 +37,10 @@ nmdc_runtime/minter/domain/model.py,sha256=WMOuKub3dVzkOt_EZSRDLeTsJPqFbKx01SMQ5
37
37
  nmdc_runtime/minter/entrypoints/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
38
  nmdc_runtime/minter/entrypoints/fastapi_app.py,sha256=JC4thvzfFwRc1mhWQ-kHy3yvs0SYxF6ktE7LXNCwqlI,4031
39
39
  nmdc_runtime/site/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
- nmdc_runtime/site/graphs.py,sha256=cJfLCRYH6l3SW-0MYIOihORit6Fe_gziwQ6BJaph55c,17713
41
- nmdc_runtime/site/ops.py,sha256=m9p8dlfNVpdEyu0o06cT9jMLkjZh0GGFxEQxDuDPUaA,65917
42
- nmdc_runtime/site/repository.py,sha256=ZkIykDDaFTxB4QW1Eo_w-9IywQrXXTV7Ugogf8vQ604,47439
43
- nmdc_runtime/site/resources.py,sha256=2R9X-06f9ZpDWYKltOkl_IIAScQGEEbsZF-URm4O6dM,20164
40
+ nmdc_runtime/site/graphs.py,sha256=CWbLLtoaakmNgSoaQWylXvcOY6qS7qwkTexEUDiMNfM,18295
41
+ nmdc_runtime/site/ops.py,sha256=y6bBJhAytrSqt0COkOqXVKgfSGVdgQ7uByUP8S-zUB4,63935
42
+ nmdc_runtime/site/repository.py,sha256=g0bZytvCrUjLpWuvkAzzmI16mChsrYPbWcvVFPNZFnM,47687
43
+ nmdc_runtime/site/resources.py,sha256=dLNtNa4FfSKN_6b21eItn-i8e0ZHyveoBsexl2I6zmo,20144
44
44
  nmdc_runtime/site/util.py,sha256=h70UJCT9g-I63EJn0drZjv1iaQ8LHJTbG29R9kqJ04c,1821
45
45
  nmdc_runtime/site/backup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
46
  nmdc_runtime/site/backup/nmdcdb_mongodump.py,sha256=H5uosmEiXwLwklJrYJWrNhb_Nuf_ew8dBpZLl6_dYhs,2699
@@ -58,17 +58,17 @@ nmdc_runtime/site/export/study_metadata.py,sha256=yR5pXL6JG8d7cAtqcF-60Hp7bLD3dJ
58
58
  nmdc_runtime/site/normalization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
59
59
  nmdc_runtime/site/normalization/gold.py,sha256=iISDD4qs4d6uLhv631WYNeQVOzY5DO201ZpPtxHdkVk,1311
60
60
  nmdc_runtime/site/repair/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
61
- nmdc_runtime/site/repair/database_updater.py,sha256=gRZ-NxZzXNd-vTIuygabEUqUSiF9eL4hL2rI9Qdf2WI,20764
61
+ nmdc_runtime/site/repair/database_updater.py,sha256=a6POYZcLEl0JvnuWxPjaOJtwZjkJhhvvUg1ABhnBiP8,21268
62
62
  nmdc_runtime/site/translation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
63
63
  nmdc_runtime/site/translation/emsl.py,sha256=-aCTJTSCNaK-Koh8BE_4fTf5nyxP1KkquR6lloLEJl0,1245
64
64
  nmdc_runtime/site/translation/gold.py,sha256=R3W99sdQb7Pgu_esN7ruIC-tyREQD_idJ4xCzkqWuGw,1622
65
- nmdc_runtime/site/translation/gold_translator.py,sha256=HGbWeuxppqlVfU8F5oKTYIDoC6qaftugJeWFIALB9XE,32720
65
+ nmdc_runtime/site/translation/gold_translator.py,sha256=n7PrAyZb6ODG1uaZ0cay91DygAHIefOL2qXLuukOyIM,33075
66
66
  nmdc_runtime/site/translation/jgi.py,sha256=qk878KhIw674TkrVfbl2x1QJrKi3zlvE0vesIpe9slM,876
67
67
  nmdc_runtime/site/translation/neon_benthic_translator.py,sha256=8_QF75Gf-dc2xVeO6jzTmdDrlGdh1-QrLJKG2SwUhCA,23797
68
68
  nmdc_runtime/site/translation/neon_soil_translator.py,sha256=IMeq4ABgWaSUbB_gmG8vBCMeynQSlbCUw9p2be6o8kE,38620
69
69
  nmdc_runtime/site/translation/neon_surface_water_translator.py,sha256=Js8_r6vHBW8b-_BpFySTUuYOFe7r51k8HwaNCQ7nAAg,30587
70
70
  nmdc_runtime/site/translation/neon_utils.py,sha256=d00o7duKKugpLHmsEifNbp4WjeC4GOqcgw0b5qlCg4I,5549
71
- nmdc_runtime/site/translation/submission_portal_translator.py,sha256=UEeqlkz_YGqcnx8vomFysetOlXxDu23q0Ryr93SZy78,41684
71
+ nmdc_runtime/site/translation/submission_portal_translator.py,sha256=d5ycQhd-I07iUeuqN0vcHvMkOHqrwB67j2Q64aFkKBw,44147
72
72
  nmdc_runtime/site/translation/translator.py,sha256=V6Aq0y03LoQ4LTL2iHDHxGTh_eMjOmDJJSwNHSrp2wo,837
73
73
  nmdc_runtime/site/translation/util.py,sha256=w_l3SiExGsl6cXRqto0a_ssDmHkP64ITvrOVfPxmNpY,4366
74
74
  nmdc_runtime/site/validation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -76,9 +76,9 @@ nmdc_runtime/site/validation/emsl.py,sha256=OG20mv_3E2rkQqTQtYO0_SVRqFb-Z_zKCiAV
76
76
  nmdc_runtime/site/validation/gold.py,sha256=Z5ZzYdjERbrJ2Tu06d0TDTBSfwaFdL1Z23Rl-YkZ2Ow,803
77
77
  nmdc_runtime/site/validation/jgi.py,sha256=LdJfhqBVHWCDp0Kzyk8eJZMwEI5NQ-zuTda31BcGwOA,1299
78
78
  nmdc_runtime/site/validation/util.py,sha256=GGbMDSwR090sr_E_fHffCN418gpYESaiot6XghS7OYk,3349
79
- nmdc_runtime-2.8.0.dist-info/licenses/LICENSE,sha256=VWiv65r7gHGjgtr3jMJYVmQny5GRpQ6H-W9sScb1x70,2408
80
- nmdc_runtime-2.8.0.dist-info/METADATA,sha256=B8Vhde36JVAAwdCqKkcFaTyF13D0uWL8KEQnsyJUajc,8953
81
- nmdc_runtime-2.8.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
82
- nmdc_runtime-2.8.0.dist-info/entry_points.txt,sha256=JxdvOnvxHK_8046cwlvE30s_fV0-k-eTpQtkKYA69nQ,224
83
- nmdc_runtime-2.8.0.dist-info/top_level.txt,sha256=b0K1s09L_iHH49ueBKaLrB5-lh6cyrSv9vL6x4Qvyz8,13
84
- nmdc_runtime-2.8.0.dist-info/RECORD,,
79
+ nmdc_runtime-2.9.0.dist-info/licenses/LICENSE,sha256=VWiv65r7gHGjgtr3jMJYVmQny5GRpQ6H-W9sScb1x70,2408
80
+ nmdc_runtime-2.9.0.dist-info/METADATA,sha256=4NgNI-Et3t1WLDfZPbSFT18JnMBVEuSCoFAZbm_V0xk,8953
81
+ nmdc_runtime-2.9.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
82
+ nmdc_runtime-2.9.0.dist-info/entry_points.txt,sha256=JxdvOnvxHK_8046cwlvE30s_fV0-k-eTpQtkKYA69nQ,224
83
+ nmdc_runtime-2.9.0.dist-info/top_level.txt,sha256=b0K1s09L_iHH49ueBKaLrB5-lh6cyrSv9vL6x4Qvyz8,13
84
+ nmdc_runtime-2.9.0.dist-info/RECORD,,