nmdc-runtime 2.9.0__py3-none-any.whl → 2.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nmdc-runtime might be problematic. Click here for more details.

Files changed (131) hide show
  1. nmdc_runtime/Dockerfile +167 -0
  2. nmdc_runtime/api/analytics.py +90 -0
  3. nmdc_runtime/api/boot/capabilities.py +9 -0
  4. nmdc_runtime/api/boot/object_types.py +126 -0
  5. nmdc_runtime/api/boot/triggers.py +84 -0
  6. nmdc_runtime/api/boot/workflows.py +116 -0
  7. nmdc_runtime/api/core/auth.py +208 -0
  8. nmdc_runtime/api/core/idgen.py +200 -0
  9. nmdc_runtime/api/core/metadata.py +788 -0
  10. nmdc_runtime/api/core/util.py +109 -0
  11. nmdc_runtime/api/db/mongo.py +435 -0
  12. nmdc_runtime/api/db/s3.py +37 -0
  13. nmdc_runtime/api/endpoints/capabilities.py +25 -0
  14. nmdc_runtime/api/endpoints/find.py +634 -0
  15. nmdc_runtime/api/endpoints/jobs.py +143 -0
  16. nmdc_runtime/api/endpoints/lib/helpers.py +274 -0
  17. nmdc_runtime/api/endpoints/lib/linked_instances.py +180 -0
  18. nmdc_runtime/api/endpoints/lib/path_segments.py +165 -0
  19. nmdc_runtime/api/endpoints/metadata.py +260 -0
  20. nmdc_runtime/api/endpoints/nmdcschema.py +502 -0
  21. nmdc_runtime/api/endpoints/object_types.py +38 -0
  22. nmdc_runtime/api/endpoints/objects.py +270 -0
  23. nmdc_runtime/api/endpoints/operations.py +78 -0
  24. nmdc_runtime/api/endpoints/queries.py +701 -0
  25. nmdc_runtime/api/endpoints/runs.py +98 -0
  26. nmdc_runtime/api/endpoints/search.py +38 -0
  27. nmdc_runtime/api/endpoints/sites.py +205 -0
  28. nmdc_runtime/api/endpoints/triggers.py +25 -0
  29. nmdc_runtime/api/endpoints/users.py +214 -0
  30. nmdc_runtime/api/endpoints/util.py +796 -0
  31. nmdc_runtime/api/endpoints/workflows.py +353 -0
  32. nmdc_runtime/api/entrypoint.sh +7 -0
  33. nmdc_runtime/api/main.py +425 -0
  34. nmdc_runtime/api/middleware.py +43 -0
  35. nmdc_runtime/api/models/capability.py +14 -0
  36. nmdc_runtime/api/models/id.py +92 -0
  37. nmdc_runtime/api/models/job.py +37 -0
  38. nmdc_runtime/api/models/lib/helpers.py +78 -0
  39. nmdc_runtime/api/models/metadata.py +11 -0
  40. nmdc_runtime/api/models/nmdc_schema.py +146 -0
  41. nmdc_runtime/api/models/object.py +180 -0
  42. nmdc_runtime/api/models/object_type.py +20 -0
  43. nmdc_runtime/api/models/operation.py +66 -0
  44. nmdc_runtime/api/models/query.py +246 -0
  45. nmdc_runtime/api/models/query_continuation.py +111 -0
  46. nmdc_runtime/api/models/run.py +161 -0
  47. nmdc_runtime/api/models/site.py +87 -0
  48. nmdc_runtime/api/models/trigger.py +13 -0
  49. nmdc_runtime/api/models/user.py +140 -0
  50. nmdc_runtime/api/models/util.py +260 -0
  51. nmdc_runtime/api/models/workflow.py +15 -0
  52. nmdc_runtime/api/openapi.py +178 -0
  53. nmdc_runtime/api/swagger_ui/assets/custom-elements.js +522 -0
  54. nmdc_runtime/api/swagger_ui/assets/script.js +247 -0
  55. nmdc_runtime/api/swagger_ui/assets/style.css +155 -0
  56. nmdc_runtime/api/swagger_ui/swagger_ui.py +34 -0
  57. nmdc_runtime/config.py +7 -8
  58. nmdc_runtime/minter/adapters/repository.py +22 -2
  59. nmdc_runtime/minter/config.py +2 -0
  60. nmdc_runtime/minter/domain/model.py +55 -1
  61. nmdc_runtime/minter/entrypoints/fastapi_app.py +1 -1
  62. nmdc_runtime/mongo_util.py +1 -2
  63. nmdc_runtime/site/backup/nmdcdb_mongodump.py +1 -1
  64. nmdc_runtime/site/backup/nmdcdb_mongoexport.py +1 -3
  65. nmdc_runtime/site/changesheets/data/OmicsProcessing-to-catted-Biosamples.tsv +1561 -0
  66. nmdc_runtime/site/changesheets/scripts/missing_neon_soils_ecosystem_data.py +311 -0
  67. nmdc_runtime/site/changesheets/scripts/neon_soils_add_ncbi_ids.py +210 -0
  68. nmdc_runtime/site/dagster.yaml +53 -0
  69. nmdc_runtime/site/entrypoint-daemon.sh +26 -0
  70. nmdc_runtime/site/entrypoint-dagit-readonly.sh +26 -0
  71. nmdc_runtime/site/entrypoint-dagit.sh +26 -0
  72. nmdc_runtime/site/export/ncbi_xml.py +633 -13
  73. nmdc_runtime/site/export/ncbi_xml_utils.py +115 -1
  74. nmdc_runtime/site/graphs.py +8 -22
  75. nmdc_runtime/site/ops.py +147 -181
  76. nmdc_runtime/site/repository.py +2 -112
  77. nmdc_runtime/site/resources.py +16 -3
  78. nmdc_runtime/site/translation/gold_translator.py +4 -12
  79. nmdc_runtime/site/translation/neon_benthic_translator.py +0 -1
  80. nmdc_runtime/site/translation/neon_soil_translator.py +4 -5
  81. nmdc_runtime/site/translation/neon_surface_water_translator.py +0 -2
  82. nmdc_runtime/site/translation/submission_portal_translator.py +84 -68
  83. nmdc_runtime/site/translation/translator.py +63 -1
  84. nmdc_runtime/site/util.py +8 -3
  85. nmdc_runtime/site/validation/util.py +10 -5
  86. nmdc_runtime/site/workspace.yaml +13 -0
  87. nmdc_runtime/static/NMDC_logo.svg +1073 -0
  88. nmdc_runtime/static/ORCID-iD_icon_vector.svg +4 -0
  89. nmdc_runtime/static/README.md +5 -0
  90. nmdc_runtime/static/favicon.ico +0 -0
  91. nmdc_runtime/util.py +90 -48
  92. nmdc_runtime-2.11.0.dist-info/METADATA +46 -0
  93. nmdc_runtime-2.11.0.dist-info/RECORD +128 -0
  94. {nmdc_runtime-2.9.0.dist-info → nmdc_runtime-2.11.0.dist-info}/WHEEL +1 -2
  95. nmdc_runtime/containers.py +0 -14
  96. nmdc_runtime/core/db/Database.py +0 -15
  97. nmdc_runtime/core/exceptions/__init__.py +0 -23
  98. nmdc_runtime/core/exceptions/base.py +0 -47
  99. nmdc_runtime/core/exceptions/token.py +0 -13
  100. nmdc_runtime/domain/users/queriesInterface.py +0 -18
  101. nmdc_runtime/domain/users/userSchema.py +0 -37
  102. nmdc_runtime/domain/users/userService.py +0 -14
  103. nmdc_runtime/infrastructure/database/db.py +0 -3
  104. nmdc_runtime/infrastructure/database/models/user.py +0 -10
  105. nmdc_runtime/lib/__init__.py +0 -1
  106. nmdc_runtime/lib/extract_nmdc_data.py +0 -41
  107. nmdc_runtime/lib/load_nmdc_data.py +0 -121
  108. nmdc_runtime/lib/nmdc_dataframes.py +0 -829
  109. nmdc_runtime/lib/nmdc_etl_class.py +0 -402
  110. nmdc_runtime/lib/transform_nmdc_data.py +0 -1117
  111. nmdc_runtime/site/drsobjects/ingest.py +0 -93
  112. nmdc_runtime/site/drsobjects/registration.py +0 -131
  113. nmdc_runtime/site/translation/emsl.py +0 -43
  114. nmdc_runtime/site/translation/gold.py +0 -53
  115. nmdc_runtime/site/translation/jgi.py +0 -32
  116. nmdc_runtime/site/translation/util.py +0 -132
  117. nmdc_runtime/site/validation/jgi.py +0 -43
  118. nmdc_runtime-2.9.0.dist-info/METADATA +0 -214
  119. nmdc_runtime-2.9.0.dist-info/RECORD +0 -84
  120. nmdc_runtime-2.9.0.dist-info/top_level.txt +0 -1
  121. /nmdc_runtime/{client → api}/__init__.py +0 -0
  122. /nmdc_runtime/{core → api/boot}/__init__.py +0 -0
  123. /nmdc_runtime/{core/db → api/core}/__init__.py +0 -0
  124. /nmdc_runtime/{domain → api/db}/__init__.py +0 -0
  125. /nmdc_runtime/{domain/users → api/endpoints}/__init__.py +0 -0
  126. /nmdc_runtime/{infrastructure → api/endpoints/lib}/__init__.py +0 -0
  127. /nmdc_runtime/{infrastructure/database → api/models}/__init__.py +0 -0
  128. /nmdc_runtime/{infrastructure/database/models → api/models/lib}/__init__.py +0 -0
  129. /nmdc_runtime/{site/drsobjects/__init__.py → api/models/minter.py} +0 -0
  130. {nmdc_runtime-2.9.0.dist-info → nmdc_runtime-2.11.0.dist-info}/entry_points.txt +0 -0
  131. {nmdc_runtime-2.9.0.dist-info → nmdc_runtime-2.11.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,5 +1,5 @@
1
1
  from io import BytesIO, StringIO
2
- from typing import Any, Dict, List, Union
2
+ from typing import Any, Dict, List
3
3
 
4
4
  from nmdc_runtime.api.endpoints.util import strip_oid
5
5
  from nmdc_runtime.minter.config import typecodes
@@ -275,6 +275,120 @@ def load_mappings(url):
275
275
  return attribute_mappings, slot_range_mappings
276
276
 
277
277
 
278
+ def check_pooling_for_biosamples(
279
+ material_processing_set: Collection, biosamples_list: List[Dict[str, Any]]
280
+ ) -> Dict[str, Dict[str, Any]]:
281
+ """Check which biosamples are part of pooling processes and return pooling information.
282
+
283
+ The way in which we check if a biosample is part of a Pooling process is by checking if
284
+ the biosample id has been asserted on the `has_input` slot/key of an `nmdc:Pooling` process
285
+ instance.
286
+
287
+ :param material_processing_set: reference to the material_processing_set collection
288
+ :param biosamples_list: list of all biosamples to check
289
+ :return: dictionary mapping biosample_id to pooling information (empty dict if not pooled)
290
+ """
291
+ result = {}
292
+ # get list of all biosample IDs that are part of a given study
293
+ biosample_lookup = {bs["id"]: bs for bs in biosamples_list}
294
+
295
+ # get list of all pooling processes
296
+ pooling_processes = list(material_processing_set.find({"type": "nmdc:Pooling"}))
297
+
298
+ # initialize all biosamples as not pooled
299
+ for biosample in biosamples_list:
300
+ result[biosample["id"]] = {}
301
+
302
+ # process each pooling process
303
+ for pooling_process in pooling_processes:
304
+ pooled_biosample_ids = pooling_process.get("has_input", [])
305
+
306
+ # get the processed sample output from the pooling process
307
+ has_output = pooling_process.get("has_output", [])
308
+ processed_sample_id = None
309
+
310
+ for output_id in has_output:
311
+ if get_classname_from_typecode(output_id) == "ProcessedSample":
312
+ processed_sample_id = output_id
313
+ break
314
+
315
+ # aggregate the values on `collection_date` and `depth` slots
316
+ # here, we are collecting the `collection_date` and `depth` values
317
+ # asserted on each of the biosamples that are part of a given pooling
318
+ # process in the following way:
319
+ # example of aggregated `collection_date`: 2017-06-05T16:50Z/2017-06-05T17:47Z
320
+ # example of aggregated `depth`: 0-10 m
321
+ collection_dates = []
322
+ depths = []
323
+
324
+ for bs_id in pooled_biosample_ids:
325
+ biosample = biosample_lookup.get(bs_id)
326
+ if not biosample:
327
+ continue
328
+
329
+ if "collection_date" in biosample:
330
+ collection_date = biosample["collection_date"]
331
+ if (
332
+ isinstance(collection_date, dict)
333
+ and "has_raw_value" in collection_date
334
+ ):
335
+ collection_dates.append(collection_date["has_raw_value"])
336
+ elif isinstance(collection_date, str):
337
+ collection_dates.append(collection_date)
338
+
339
+ if "depth" in biosample:
340
+ depth = biosample["depth"]
341
+ if isinstance(depth, dict):
342
+ if "has_numeric_value" in depth:
343
+ depths.append(depth["has_numeric_value"])
344
+ elif (
345
+ "has_minimum_numeric_value" in depth
346
+ and "has_maximum_numeric_value" in depth
347
+ ):
348
+ depths.extend(
349
+ [
350
+ depth["has_minimum_numeric_value"],
351
+ depth["has_maximum_numeric_value"],
352
+ ]
353
+ )
354
+ elif isinstance(depth, (int, float)):
355
+ depths.append(depth)
356
+
357
+ # create aggregated (forward slash separated) value for `collection_date`
358
+ aggregated_collection_date = None
359
+ if collection_dates:
360
+ sorted_dates = sorted(collection_dates)
361
+ if len(sorted_dates) > 1:
362
+ aggregated_collection_date = f"{sorted_dates[0]}/{sorted_dates[-1]}"
363
+ else:
364
+ aggregated_collection_date = sorted_dates[0]
365
+
366
+ # create aggregated (hyphen separated) value for `depth`
367
+ aggregated_depth = None
368
+ if depths:
369
+ min_depth = min(depths)
370
+ max_depth = max(depths)
371
+ if min_depth != max_depth:
372
+ aggregated_depth = f"{min_depth}-{max_depth} m"
373
+ else:
374
+ aggregated_depth = f"{min_depth} m"
375
+
376
+ # update all biosamples that are part of this pooling process
377
+ pooling_info = {
378
+ "processed_sample_id": processed_sample_id,
379
+ "pooling_process_id": pooling_process.get("id"),
380
+ "pooled_biosample_ids": pooled_biosample_ids,
381
+ "aggregated_collection_date": aggregated_collection_date,
382
+ "aggregated_depth": aggregated_depth,
383
+ }
384
+
385
+ for bs_id in pooled_biosample_ids:
386
+ if bs_id in result:
387
+ result[bs_id] = pooling_info
388
+
389
+ return result
390
+
391
+
278
392
  def validate_xml(xml, xsd_url):
279
393
  response = requests.get(xsd_url)
280
394
  response.raise_for_status()
@@ -1,7 +1,6 @@
1
- from dagster import graph, GraphIn
1
+ from dagster import graph
2
2
 
3
3
  from nmdc_runtime.site.ops import (
4
- build_merged_db,
5
4
  generate_biosample_set_for_nmdc_study_from_gold,
6
5
  nmdc_schema_database_export_filename,
7
6
  nmdc_schema_database_from_gold_study,
@@ -12,8 +11,6 @@ from nmdc_runtime.site.ops import (
12
11
  gold_projects_by_study,
13
12
  gold_study,
14
13
  poll_for_run_completion,
15
- run_etl,
16
- local_file_to_api_object,
17
14
  get_operation,
18
15
  produce_curated_db,
19
16
  delete_operations,
@@ -56,6 +53,7 @@ from nmdc_runtime.site.ops import (
56
53
  get_data_objects_from_biosamples,
57
54
  get_nucleotide_sequencing_from_biosamples,
58
55
  get_library_preparation_from_biosamples,
56
+ get_aggregated_pooled_biosamples,
59
57
  get_all_instruments,
60
58
  get_ncbi_export_pipeline_inputs,
61
59
  ncbi_submission_xml_from_nmdc_study,
@@ -70,24 +68,6 @@ from nmdc_runtime.site.ops import (
70
68
  from nmdc_runtime.site.export.study_metadata import get_biosamples_by_study_id
71
69
 
72
70
 
73
- @graph
74
- def gold_translation():
75
- """
76
- Translating an export of the JGI GOLD [1] SQL database to the NMDC database JSON schema.
77
-
78
- [1] Genomes OnLine Database (GOLD) <https://gold.jgi.doe.gov/>.
79
- """
80
- local_file_to_api_object(run_etl(build_merged_db()))
81
-
82
-
83
- @graph()
84
- def gold_translation_curation():
85
- # TODO
86
- # - have produce_curated_db do actual curation (see notebook), persisting to db.
87
- # - more steps in pipeline? Or handoff via run_status_sensor on DagsterRunStatus.SUCCESS.
88
- produce_curated_db(get_operation())
89
-
90
-
91
71
  @graph()
92
72
  def create_objects_from_site_object_puts():
93
73
  delete_operations(
@@ -194,6 +174,7 @@ def translate_metadata_submission_to_nmdc_schema_database():
194
174
  data_object_mapping_file_url,
195
175
  biosample_extras_file_url,
196
176
  biosample_extras_slot_mapping_file_url,
177
+ study_id,
197
178
  ) = get_submission_portal_pipeline_inputs()
198
179
 
199
180
  metadata_submission = fetch_nmdc_portal_submission_by_id(submission_id)
@@ -214,6 +195,7 @@ def translate_metadata_submission_to_nmdc_schema_database():
214
195
  biosample_extras=biosample_extras,
215
196
  biosample_extras_slot_mapping=biosample_extras_slot_mapping,
216
197
  instrument_mapping=instrument_mapping,
198
+ study_id=study_id,
217
199
  )
218
200
 
219
201
  validate_metadata(database)
@@ -234,6 +216,7 @@ def ingest_metadata_submission():
234
216
  data_object_mapping_file_url,
235
217
  biosample_extras_file_url,
236
218
  biosample_extras_slot_mapping_file_url,
219
+ study_id,
237
220
  ) = get_submission_portal_pipeline_inputs()
238
221
 
239
222
  metadata_submission = fetch_nmdc_portal_submission_by_id(submission_id)
@@ -254,6 +237,7 @@ def ingest_metadata_submission():
254
237
  biosample_extras=biosample_extras,
255
238
  biosample_extras_slot_mapping=biosample_extras_slot_mapping,
256
239
  instrument_mapping=instrument_mapping,
240
+ study_id=study_id,
257
241
  )
258
242
 
259
243
  log_database_ids(database)
@@ -493,6 +477,7 @@ def nmdc_study_to_ncbi_submission_export():
493
477
  )
494
478
  data_object_records = get_data_objects_from_biosamples(biosamples)
495
479
  library_preparation_records = get_library_preparation_from_biosamples(biosamples)
480
+ pooled_biosamples_data = get_aggregated_pooled_biosamples(biosamples)
496
481
  all_instruments = get_all_instruments()
497
482
  xml_data = ncbi_submission_xml_from_nmdc_study(
498
483
  nmdc_study,
@@ -502,6 +487,7 @@ def nmdc_study_to_ncbi_submission_export():
502
487
  data_object_records,
503
488
  library_preparation_records,
504
489
  all_instruments,
490
+ pooled_biosamples_data,
505
491
  )
506
492
  ncbi_submission_xml_asset(xml_data)
507
493