nmdc-runtime 1.3.1__py3-none-any.whl → 2.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. nmdc_runtime/Dockerfile +177 -0
  2. nmdc_runtime/api/analytics.py +90 -0
  3. nmdc_runtime/api/boot/capabilities.py +9 -0
  4. nmdc_runtime/api/boot/object_types.py +126 -0
  5. nmdc_runtime/api/boot/triggers.py +84 -0
  6. nmdc_runtime/api/boot/workflows.py +116 -0
  7. nmdc_runtime/api/core/auth.py +212 -0
  8. nmdc_runtime/api/core/idgen.py +200 -0
  9. nmdc_runtime/api/core/metadata.py +777 -0
  10. nmdc_runtime/api/core/util.py +114 -0
  11. nmdc_runtime/api/db/mongo.py +436 -0
  12. nmdc_runtime/api/db/s3.py +37 -0
  13. nmdc_runtime/api/endpoints/capabilities.py +25 -0
  14. nmdc_runtime/api/endpoints/find.py +634 -0
  15. nmdc_runtime/api/endpoints/jobs.py +206 -0
  16. nmdc_runtime/api/endpoints/lib/helpers.py +274 -0
  17. nmdc_runtime/api/endpoints/lib/linked_instances.py +193 -0
  18. nmdc_runtime/api/endpoints/lib/path_segments.py +165 -0
  19. nmdc_runtime/api/endpoints/metadata.py +260 -0
  20. nmdc_runtime/api/endpoints/nmdcschema.py +515 -0
  21. nmdc_runtime/api/endpoints/object_types.py +38 -0
  22. nmdc_runtime/api/endpoints/objects.py +277 -0
  23. nmdc_runtime/api/endpoints/operations.py +78 -0
  24. nmdc_runtime/api/endpoints/queries.py +701 -0
  25. nmdc_runtime/api/endpoints/runs.py +98 -0
  26. nmdc_runtime/api/endpoints/search.py +38 -0
  27. nmdc_runtime/api/endpoints/sites.py +205 -0
  28. nmdc_runtime/api/endpoints/triggers.py +25 -0
  29. nmdc_runtime/api/endpoints/users.py +214 -0
  30. nmdc_runtime/api/endpoints/util.py +817 -0
  31. nmdc_runtime/api/endpoints/wf_file_staging.py +307 -0
  32. nmdc_runtime/api/endpoints/workflows.py +353 -0
  33. nmdc_runtime/api/entrypoint.sh +7 -0
  34. nmdc_runtime/api/main.py +495 -0
  35. nmdc_runtime/api/middleware.py +43 -0
  36. nmdc_runtime/api/models/capability.py +14 -0
  37. nmdc_runtime/api/models/id.py +92 -0
  38. nmdc_runtime/api/models/job.py +57 -0
  39. nmdc_runtime/api/models/lib/helpers.py +78 -0
  40. nmdc_runtime/api/models/metadata.py +11 -0
  41. nmdc_runtime/api/models/nmdc_schema.py +146 -0
  42. nmdc_runtime/api/models/object.py +180 -0
  43. nmdc_runtime/api/models/object_type.py +20 -0
  44. nmdc_runtime/api/models/operation.py +66 -0
  45. nmdc_runtime/api/models/query.py +246 -0
  46. nmdc_runtime/api/models/query_continuation.py +111 -0
  47. nmdc_runtime/api/models/run.py +161 -0
  48. nmdc_runtime/api/models/site.py +87 -0
  49. nmdc_runtime/api/models/trigger.py +13 -0
  50. nmdc_runtime/api/models/user.py +207 -0
  51. nmdc_runtime/api/models/util.py +260 -0
  52. nmdc_runtime/api/models/wfe_file_stages.py +122 -0
  53. nmdc_runtime/api/models/workflow.py +15 -0
  54. nmdc_runtime/api/openapi.py +178 -0
  55. nmdc_runtime/api/swagger_ui/assets/EllipsesButton.js +146 -0
  56. nmdc_runtime/api/swagger_ui/assets/EndpointSearchWidget.js +369 -0
  57. nmdc_runtime/api/swagger_ui/assets/script.js +252 -0
  58. nmdc_runtime/api/swagger_ui/assets/style.css +155 -0
  59. nmdc_runtime/api/swagger_ui/swagger_ui.py +34 -0
  60. nmdc_runtime/config.py +56 -0
  61. nmdc_runtime/minter/adapters/repository.py +22 -2
  62. nmdc_runtime/minter/config.py +30 -4
  63. nmdc_runtime/minter/domain/model.py +55 -1
  64. nmdc_runtime/minter/entrypoints/fastapi_app.py +1 -1
  65. nmdc_runtime/mongo_util.py +89 -0
  66. nmdc_runtime/site/backup/nmdcdb_mongodump.py +1 -1
  67. nmdc_runtime/site/backup/nmdcdb_mongoexport.py +1 -3
  68. nmdc_runtime/site/changesheets/data/OmicsProcessing-to-catted-Biosamples.tsv +1561 -0
  69. nmdc_runtime/site/changesheets/scripts/missing_neon_soils_ecosystem_data.py +311 -0
  70. nmdc_runtime/site/changesheets/scripts/neon_soils_add_ncbi_ids.py +210 -0
  71. nmdc_runtime/site/dagster.yaml +53 -0
  72. nmdc_runtime/site/entrypoint-daemon.sh +29 -0
  73. nmdc_runtime/site/entrypoint-dagit-readonly.sh +26 -0
  74. nmdc_runtime/site/entrypoint-dagit.sh +29 -0
  75. nmdc_runtime/site/export/ncbi_xml.py +1331 -0
  76. nmdc_runtime/site/export/ncbi_xml_utils.py +405 -0
  77. nmdc_runtime/site/export/study_metadata.py +27 -4
  78. nmdc_runtime/site/graphs.py +294 -45
  79. nmdc_runtime/site/ops.py +1008 -230
  80. nmdc_runtime/site/repair/database_updater.py +451 -0
  81. nmdc_runtime/site/repository.py +368 -133
  82. nmdc_runtime/site/resources.py +154 -80
  83. nmdc_runtime/site/translation/gold_translator.py +235 -83
  84. nmdc_runtime/site/translation/neon_benthic_translator.py +212 -188
  85. nmdc_runtime/site/translation/neon_soil_translator.py +82 -58
  86. nmdc_runtime/site/translation/neon_surface_water_translator.py +698 -0
  87. nmdc_runtime/site/translation/neon_utils.py +24 -7
  88. nmdc_runtime/site/translation/submission_portal_translator.py +616 -162
  89. nmdc_runtime/site/translation/translator.py +73 -3
  90. nmdc_runtime/site/util.py +26 -7
  91. nmdc_runtime/site/validation/emsl.py +1 -0
  92. nmdc_runtime/site/validation/gold.py +1 -0
  93. nmdc_runtime/site/validation/util.py +16 -12
  94. nmdc_runtime/site/workspace.yaml +13 -0
  95. nmdc_runtime/static/NMDC_logo.svg +1073 -0
  96. nmdc_runtime/static/ORCID-iD_icon_vector.svg +4 -0
  97. nmdc_runtime/static/README.md +5 -0
  98. nmdc_runtime/static/favicon.ico +0 -0
  99. nmdc_runtime/util.py +236 -192
  100. nmdc_runtime-2.12.0.dist-info/METADATA +45 -0
  101. nmdc_runtime-2.12.0.dist-info/RECORD +131 -0
  102. {nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info}/WHEEL +1 -2
  103. {nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info}/entry_points.txt +0 -1
  104. nmdc_runtime/containers.py +0 -14
  105. nmdc_runtime/core/db/Database.py +0 -15
  106. nmdc_runtime/core/exceptions/__init__.py +0 -23
  107. nmdc_runtime/core/exceptions/base.py +0 -47
  108. nmdc_runtime/core/exceptions/token.py +0 -13
  109. nmdc_runtime/domain/users/queriesInterface.py +0 -18
  110. nmdc_runtime/domain/users/userSchema.py +0 -37
  111. nmdc_runtime/domain/users/userService.py +0 -14
  112. nmdc_runtime/infrastructure/database/db.py +0 -3
  113. nmdc_runtime/infrastructure/database/models/user.py +0 -10
  114. nmdc_runtime/lib/__init__.py +0 -1
  115. nmdc_runtime/lib/extract_nmdc_data.py +0 -41
  116. nmdc_runtime/lib/load_nmdc_data.py +0 -121
  117. nmdc_runtime/lib/nmdc_dataframes.py +0 -829
  118. nmdc_runtime/lib/nmdc_etl_class.py +0 -402
  119. nmdc_runtime/lib/transform_nmdc_data.py +0 -1117
  120. nmdc_runtime/site/drsobjects/ingest.py +0 -93
  121. nmdc_runtime/site/drsobjects/registration.py +0 -131
  122. nmdc_runtime/site/terminusdb/generate.py +0 -198
  123. nmdc_runtime/site/terminusdb/ingest.py +0 -44
  124. nmdc_runtime/site/terminusdb/schema.py +0 -1671
  125. nmdc_runtime/site/translation/emsl.py +0 -42
  126. nmdc_runtime/site/translation/gold.py +0 -53
  127. nmdc_runtime/site/translation/jgi.py +0 -31
  128. nmdc_runtime/site/translation/util.py +0 -132
  129. nmdc_runtime/site/validation/jgi.py +0 -42
  130. nmdc_runtime-1.3.1.dist-info/METADATA +0 -181
  131. nmdc_runtime-1.3.1.dist-info/RECORD +0 -81
  132. nmdc_runtime-1.3.1.dist-info/top_level.txt +0 -1
  133. /nmdc_runtime/{client → api}/__init__.py +0 -0
  134. /nmdc_runtime/{core → api/boot}/__init__.py +0 -0
  135. /nmdc_runtime/{core/db → api/core}/__init__.py +0 -0
  136. /nmdc_runtime/{domain → api/db}/__init__.py +0 -0
  137. /nmdc_runtime/{domain/users → api/endpoints}/__init__.py +0 -0
  138. /nmdc_runtime/{infrastructure → api/endpoints/lib}/__init__.py +0 -0
  139. /nmdc_runtime/{infrastructure/database → api/models}/__init__.py +0 -0
  140. /nmdc_runtime/{infrastructure/database/models → api/models/lib}/__init__.py +0 -0
  141. /nmdc_runtime/{site/drsobjects/__init__.py → api/models/minter.py} +0 -0
  142. /nmdc_runtime/site/{terminusdb → repair}/__init__.py +0 -0
  143. {nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info/licenses}/LICENSE +0 -0
@@ -1,42 +0,0 @@
1
- """
2
- Translates EMSL data into JSON conformant with the NMDC JSON schema
3
- """
4
- from dagster import op, graph
5
-
6
- from nmdc_runtime.lib.nmdc_etl_class import NMDC_ETL
7
- from nmdc_runtime.site.translation.util import (
8
- load_nmdc_etl_class,
9
- load_mongo_collection,
10
- preset_prod,
11
- preset_test,
12
- schema_validate,
13
- )
14
-
15
-
16
- @op
17
- def transform_emsl_omics_processing(_context, nmdc_etl: NMDC_ETL) -> tuple:
18
- return ("emsl.omics_processing_set", nmdc_etl.transform_emsl_omics_processing())
19
-
20
-
21
- @op
22
- def transform_emsl_data_object(_context, nmdc_etl: NMDC_ETL) -> tuple:
23
- return ("emsl.data_object_set", nmdc_etl.transform_emsl_data_object())
24
-
25
-
26
- @graph
27
- def emsl():
28
- # load_merged_data_source()
29
- nmdc_etl = load_nmdc_etl_class()
30
- emsl_omics_processing = transform_emsl_omics_processing(nmdc_etl)
31
- emsl_omics_processing_validated = schema_validate(emsl_omics_processing)
32
-
33
- emsl_data_object = transform_emsl_data_object(nmdc_etl)
34
- emsl_data_object_validated = schema_validate(emsl_data_object)
35
-
36
- # load data into mongo
37
- load_mongo_collection(emsl_omics_processing_validated)
38
- load_mongo_collection(emsl_data_object_validated)
39
-
40
-
41
- emsl_job = emsl.to_job(**preset_prod)
42
- test_emsl_job = emsl.to_job(name="test_emsl", **preset_test)
@@ -1,53 +0,0 @@
1
- """
2
- Translate an export of the JGI GOLD [1] study, project, and biosample data into JSON conformant with the NMDC JSON schema.
3
- [1] Genomes OnLine Database (GOLD) <https://gold.jgi.doe.gov/>.
4
- """
5
-
6
- from dagster import op, graph
7
-
8
- from nmdc_runtime.lib.nmdc_etl_class import NMDC_ETL
9
- from nmdc_runtime.site.translation.util import (
10
- load_nmdc_etl_class,
11
- load_mongo_collection,
12
- preset_prod,
13
- preset_test,
14
- schema_validate,
15
- )
16
-
17
-
18
- @op
19
- def transform_study(_context, nmdc_etl: NMDC_ETL) -> tuple:
20
- # return {"study_set": nmdc_etl.transform_study()}
21
- return ("gold.study_set", nmdc_etl.transform_study())
22
-
23
-
24
- @op
25
- def transform_gold_omics_processing(_context, nmdc_etl: NMDC_ETL) -> tuple:
26
- return ("gold.omics_processing_set", nmdc_etl.transform_omics_processing())
27
-
28
-
29
- @op
30
- def transform_biosample(_context, nmdc_etl: NMDC_ETL) -> tuple:
31
- return ("gold.biosample_set", nmdc_etl.transform_biosample())
32
-
33
-
34
- @graph
35
- def gold():
36
- nmdc_etl = load_nmdc_etl_class()
37
- gold_study = transform_study(nmdc_etl)
38
- gold_study_validated = schema_validate(gold_study)
39
-
40
- gold_omics_processing = transform_gold_omics_processing(nmdc_etl)
41
- gold_omics_processing_validated = schema_validate(gold_omics_processing)
42
-
43
- gold_biosample = transform_biosample(nmdc_etl)
44
- gold_biosample_validated = schema_validate(gold_biosample)
45
-
46
- # load data into mongo
47
- load_mongo_collection(gold_study_validated)
48
- load_mongo_collection(gold_omics_processing_validated)
49
- load_mongo_collection(gold_biosample_validated)
50
-
51
-
52
- gold_job = gold.to_job(**preset_prod)
53
- test_gold_job = gold.to_job(name="test_gold", **preset_test)
@@ -1,31 +0,0 @@
1
- """
2
- Translates EMSL data into JSON conformant with the NMDC JSON schema
3
- """
4
- from dagster import op, graph
5
-
6
- from nmdc_runtime.lib.nmdc_etl_class import NMDC_ETL
7
- from nmdc_runtime.site.translation.util import (
8
- load_nmdc_etl_class,
9
- load_mongo_collection,
10
- preset_prod,
11
- preset_test,
12
- schema_validate,
13
- )
14
-
15
-
16
- @op
17
- def transform_jgi_data_object(_context, nmdc_etl: NMDC_ETL) -> tuple:
18
- # return "jgi.data_object_set", [{"foo": "bar"}] # used for testing failure
19
- return "jgi.data_object_set", nmdc_etl.transform_jgi_data_object()
20
-
21
-
22
- @graph
23
- def jgi():
24
- nmdc_etl = load_nmdc_etl_class()
25
- jgi_data_object = transform_jgi_data_object(nmdc_etl)
26
- jgi_data_object_validated = schema_validate(jgi_data_object)
27
- load_mongo_collection(jgi_data_object_validated)
28
-
29
-
30
- jgi_job = jgi.to_job(**preset_prod)
31
- test_jgi_job = jgi.to_job(name="test_jgi", **preset_test)
@@ -1,132 +0,0 @@
1
- from pathlib import Path
2
-
3
- from dagster import op, Failure, AssetMaterialization
4
- from dagster.core.definitions.events import AssetKey, Output
5
- from fastjsonschema import JsonSchemaValueException
6
-
7
- from nmdc_runtime.lib.nmdc_etl_class import NMDC_ETL
8
- from nmdc_runtime.site.resources import mongo_resource
9
- from nmdc_runtime.util import nmdc_jsonschema_validator
10
-
11
- mode_prod = {"resource_defs": {"mongo": mongo_resource}}
12
- mode_dev = {
13
- "resource_defs": {"mongo": mongo_resource}
14
- } # Connect to a real MongoDB instance for development.
15
- mode_test = {
16
- "resource_defs": {"mongo": mongo_resource}
17
- } # Connect to a real MongoDB instance for testing.
18
-
19
- config_prod = {
20
- "resources": {
21
- "mongo": {
22
- "config": {
23
- "host": {"env": "MONGO_HOST"},
24
- "username": {"env": "MONGO_USERNAME"},
25
- "password": {"env": "MONGO_PASSWORD"},
26
- "dbname": "nmdc_etl_staging",
27
- },
28
- }
29
- },
30
- "ops": {
31
- "load_nmdc_etl_class": {
32
- "config": {
33
- "data_file": str(
34
- Path(__file__).parent.parent.parent.parent.joinpath(
35
- "metadata-translation/src/data/nmdc_merged_data.tsv.zip"
36
- )
37
- ),
38
- "sssom_map_file": "",
39
- "spec_file": str(
40
- Path(__file__).parent.parent.parent.parent.joinpath(
41
- "nmdc_runtime/lib/nmdc_data_source.yaml"
42
- )
43
- ),
44
- }
45
- }
46
- },
47
- }
48
-
49
- config_test = {
50
- "resources": {
51
- "mongo": {
52
- "config": {
53
- # local docker container via docker-compose.yml
54
- "host": "mongo",
55
- "username": "admin",
56
- "password": "root",
57
- "dbname": "nmdc_etl_staging",
58
- },
59
- }
60
- },
61
- "ops": {
62
- "load_nmdc_etl_class": {
63
- "config": {
64
- "data_file": str(
65
- Path(__file__).parent.parent.parent.parent.joinpath(
66
- "metadata-translation/src/data/nmdc_merged_data.tsv.zip"
67
- )
68
- ),
69
- "sssom_map_file": "",
70
- "spec_file": str(
71
- Path(__file__).parent.parent.parent.parent.joinpath(
72
- "nmdc_runtime/lib/nmdc_data_source.yaml"
73
- )
74
- ),
75
- }
76
- }
77
- },
78
- }
79
-
80
- preset_prod = dict(**mode_prod, config=config_prod)
81
- preset_test = dict(**mode_test, config=config_test)
82
-
83
-
84
- @op
85
- def load_nmdc_etl_class(context) -> NMDC_ETL:
86
- # build instance of NMDC_ETL class
87
- etl = NMDC_ETL(
88
- merged_data_file=context.op_config["data_file"],
89
- data_source_spec_file=context.op_config["spec_file"],
90
- sssom_file="",
91
- )
92
- return etl
93
-
94
-
95
- @op(required_resource_keys={"mongo"})
96
- def load_mongo_collection(context, data: tuple):
97
- mongo_db = context.resources.mongo.db
98
- collection_name, documents = data
99
- collection = mongo_db[collection_name] # get mongo collection
100
-
101
- # drop collection if exists
102
- collection.drop()
103
-
104
- # insert data
105
- collection.insert(documents)
106
- context.log.info(f"inserted {len(documents)} documents into {collection.name}")
107
- return collection_name
108
-
109
-
110
- @op()
111
- def schema_validate(context, data: tuple):
112
- def schema_validate_asset(collection_name, status, errors):
113
- return AssetMaterialization(
114
- asset_key=AssetKey(["translation", f"{collection_name}_translation"]),
115
- description=f"{collection_name} translation validation",
116
- metadata={"status": status, "errors": errors},
117
- )
118
-
119
- collection_name, documents = data
120
- _, schema_collection_name = collection_name.split(".")
121
- try:
122
- nmdc_jsonschema_validator({schema_collection_name: documents})
123
- context.log.info(f"data for {collection_name} is valid")
124
- yield schema_validate_asset(collection_name, "valid", "none")
125
- return data # do I need a return statement and an Output?
126
- except JsonSchemaValueException as e:
127
- context.log.error(f"validation failed for {schema_collection_name} " + str(e))
128
- context.log.error(f"documents: {documents}")
129
- yield schema_validate_asset(collection_name, "not valid", str(e))
130
- raise Failure(str(e))
131
- finally:
132
- yield Output(data)
@@ -1,42 +0,0 @@
1
- """
2
- Validates data in the JGI collection in the nmdc_etl_staging database.
3
- """
4
- from dagster import op, graph
5
-
6
- from nmdc_runtime.site.ops import local_file_to_api_object
7
- from nmdc_runtime.site.validation.util import (
8
- preset_prod,
9
- preset_test,
10
- validate_mongo_collection,
11
- write_to_local_file,
12
- announce_validation_report,
13
- )
14
-
15
-
16
- @op
17
- def jgi_data_object_set_collection_name():
18
- return "jgi.data_object_set"
19
-
20
-
21
- @graph()
22
- def jgi():
23
- report = validate_mongo_collection(jgi_data_object_set_collection_name())
24
- # the below could also be a @graph and loaded as a "subgraph" by e.g. the jgi graph job.
25
- local_path = write_to_local_file(report)
26
- obj = local_file_to_api_object(local_path)
27
- announce_validation_report(report, obj)
28
-
29
-
30
- # passing the collecton name via the config
31
- # problem: not sure if this best when multiple sets need to be validated
32
- # from toolz import assoc_in
33
- # config_ops = {
34
- # "validate_mongo_collection": {"config": {"collection_name": "jgi.data_object_set"}}
35
- # }
36
- # validate_jgi_job = jgi.to_job(**assoc_in(preset_prod, ["config", "ops"], config_ops))
37
- # test_validate_jgi_job = jgi.to_job(
38
- # **assoc_in(preset_test, ["config", "ops"], config_ops)
39
- # )
40
-
41
- validate_jgi_job = jgi.to_job(**preset_prod)
42
- test_validate_jgi_job = jgi.to_job(**preset_test)
@@ -1,181 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: nmdc_runtime
3
- Version: 1.3.1
4
- Summary: A runtime system for NMDC data management and orchestration
5
- Home-page: https://github.com/microbiomedata/nmdc-runtime
6
- Author: Donny Winston
7
- Author-email: donny@polyneme.xyz
8
- Classifier: Development Status :: 3 - Alpha
9
- Classifier: Programming Language :: Python :: 3
10
- Classifier: License :: OSI Approved :: Apache Software License
11
- Requires-Python: >=3.10
12
- Description-Content-Type: text/markdown
13
- License-File: LICENSE
14
-
15
- A runtime system for NMDC data management and orchestration.
16
-
17
- ## Service Status
18
-
19
- http://nmdcstatus.polyneme.xyz/
20
-
21
- ## How It Fits In
22
-
23
- * [issues](https://github.com/microbiomedata/issues)
24
- tracks issues related to NMDC, which may necessitate work across multiple repos.
25
-
26
- * [nmdc-schema](https://github.com/microbiomedata/nmdc-schema/)
27
- houses the LinkML schema specification, as well as generated artifacts (e.g. JSON Schema).
28
-
29
- * [nmdc-server](https://github.com/microbiomedata/nmdc-server)
30
- houses code specific to the data portal -- its database, back-end API, and front-end application.
31
-
32
- * [workflow_documentation](https://nmdc-workflow-documentation.readthedocs.io/en/latest/index.html)
33
- references workflow code spread across several repositories, that take source data and produce computed data.
34
-
35
- * This repo (nmdc-runtime)
36
- * houses code that takes source data and computed data, and transforms it
37
- to broadly accommodate downstream applications such as the data portal
38
- * manages execution of the above (i.e., lightweight data transformations) and also
39
- of computationally- and data-intensive workflows performed at other sites,
40
- ensuring that claimed jobs have access to needed configuration and data resources.
41
-
42
- ## Data exports
43
-
44
- The NMDC metadata as of 2021-10 is available here:
45
-
46
- https://drs.microbiomedata.org/ga4gh/drs/v1/objects/sys086d541
47
-
48
- The link returns a [GA4GH DRS API bundle object record](https://ga4gh.github.io/data-repository-service-schemas/preview/release/drs-1.0.0/docs/#_drs_datatypes), with the NMDC metadata collections (study_set, biosample_set, etc.) as contents, each a DRS API blob object.
49
-
50
- For example the blob for the study_set collection export, named "study_set.jsonl.gz", is listed with DRS API ID "sys0xsry70". Thus, it is retrievable via
51
-
52
- https://drs.microbiomedata.org/ga4gh/drs/v1/objects/sys0xsry70
53
-
54
- The returned blob object record lists https://nmdc-runtime.files.polyneme.xyz/nmdcdb-mongoexport/2021-10-14/study_set.jsonl.gz as the url for an access method.
55
-
56
- The 2021-10 exports are currently all accessible at `https://nmdc-runtime.files.polyneme.xyz/nmdcdb-mongoexport/2021-10-14/${COLLECTION_NAME}.jsonl.gz`, but the DRS API indirection allows these links to change in the future, for mirroring via other URLs, etc. So, the DRS API links should be the links you share.
57
-
58
- ## Overview
59
-
60
- The runtime features:
61
-
62
- 1. [Dagster](https://docs.dagster.io/concepts) orchestration:
63
- - dagit - a web UI to monitor and manage the running system.
64
- - dagster-daemon - a service that triggers pipeline runs based on time or external state.
65
- - PostgresSQL database - for storing run history, event logs, and scheduler state.
66
- - workspace code
67
- - Code to run is loaded into a Dagster `workspace`. This code is loaded from
68
- one or more dagster `repositories`. Each Dagster `repository` may be run with a different
69
- Python virtual environment if need be, and may be loaded from a local Python file or
70
- `pip install`ed from an external source. In our case, each Dagster `repository` is simply
71
- loaded from a Python file local to the nmdc-runtime GitHub repository, and all code is
72
- run in the same Python environment.
73
- - A Dagster repository consists of `solids` and `pipelines`,
74
- and optionally `schedules` and `sensors`.
75
- - `solids` represent individual units of computation
76
- - `pipelines` are built up from solids
77
- - `schedules` trigger recurring pipeline runs based on time
78
- - `sensors` trigger pipeline runs based on external state
79
- - Each `pipeline` can declare dependencies on any runtime `resources` or additional
80
- configuration. There are TerminusDB and MongoDB `resources` defined, as well as `preset`
81
- configuration definitions for both "dev" and "prod" `modes`. The `preset`s tell Dagster to
82
- look to a set of known environment variables to load resources configurations, depending on
83
- the `mode`.
84
-
85
- 2. A [TerminusDB](https://terminusdb.com/) database supporting revision control of schema-validated
86
- data.
87
-
88
- 3. A MongoDB database supporting write-once, high-throughput internal
89
- data storage by the nmdc-runtime FastAPI instance.
90
-
91
- 4. A [FastAPI](https://fastapi.tiangolo.com/) service to interface with the orchestrator and
92
- database, as a hub for data management and workflow automation.
93
-
94
- ## Local Development
95
-
96
- Ensure Docker (and Docker Compose) are installed; and the Docker engine is running.
97
-
98
- ```shell
99
- docker --version
100
- docker compose version
101
- docker info
102
- ```
103
-
104
- Ensure the permissions of `./mongoKeyFile` are such that only the file's owner can read or write the file.
105
-
106
- ```shell
107
- chmod 600 ./mongoKeyFile
108
- ```
109
-
110
- Ensure you have a `.env` file for the Docker services to source from. You may copy `.env.example` to
111
- `.env` (which is gitignore'd) to get started.
112
-
113
- ```shell
114
- cp .env.example .env
115
- ```
116
-
117
- Create environment variables in your shell session, based upon the contents of the `.env` file.
118
-
119
- ```shell
120
- export $(grep -v '^#' .env | xargs)
121
- ```
122
-
123
- If you are connecting to resources that require an SSH tunnel—for example, a MongoDB server that is only accessible on the NERSC network—set up the SSH tunnel.
124
-
125
- The following command could be useful to you, either directly or as a template (see `Makefile`).
126
-
127
- ```shell
128
- make nersc-mongo-tunnels
129
- ```
130
-
131
- Finally, spin up the Docker Compose stack.
132
-
133
- ```bash
134
- make up-dev
135
- ```
136
-
137
- Docker Compose is used to start local MongoDB and PostgresSQL (used by Dagster) instances, as well
138
- as a Dagster web server (dagit) and daemon (dagster-daemon).
139
-
140
- The Dagit web server is viewable at http://127.0.0.1:3000/.
141
-
142
- The FastAPI service is viewable at http://127.0.0.1:8000/ -- e.g., rendered documentation at
143
- http://127.0.0.1:8000/redoc/.
144
-
145
- ## Local Testing
146
-
147
- Tests can be found in `tests` and are run with the following commands:
148
-
149
- On an M1 Mac? May need to `export DOCKER_DEFAULT_PLATFORM=linux/amd64`.
150
-
151
- ```bash
152
- make up-test
153
- make test
154
- ```
155
-
156
- As you create Dagster solids and pipelines, add tests in `tests/` to check that your code behaves as
157
- desired and does not break over time.
158
-
159
- [For hints on how to write tests for solids and pipelines in Dagster, see their documentation
160
- tutorial on Testing](https://docs.dagster.io/tutorial/testable).
161
-
162
- ## Publish to PyPI
163
-
164
- This repository contains a GitHub Actions workflow that publishes a Python package to [PyPI](https://pypi.org/project/nmdc-runtime/).
165
-
166
- You can also _manually_ publish the Python package to PyPI by issuing the following commands in the root directory of the repository:
167
-
168
- ```
169
- rm -rf dist
170
- python -m build
171
- twine upload dist/*
172
- ```
173
-
174
- ## Links
175
-
176
- Here are links related to this repository:
177
-
178
- - Production API server: https://api.microbiomedata.org
179
- - PyPI package: https://pypi.org/project/nmdc-runtime
180
- - DockerHub image (API server): https://hub.docker.com/r/microbiomedata/nmdc-runtime-fastapi
181
- - DockerHub image (Dagster): https://hub.docker.com/r/microbiomedata/nmdc-runtime-dagster
@@ -1,81 +0,0 @@
1
- nmdc_runtime/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- nmdc_runtime/containers.py,sha256=8m_S1wiFu8VOWvY7tyqzf-02X9gXY83YGc8FgjWzLGA,418
3
- nmdc_runtime/main.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- nmdc_runtime/util.py,sha256=o74ZKOmSD79brPFAcQFsYpA6wh9287m0hDhDlIpn9VM,19872
5
- nmdc_runtime/client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- nmdc_runtime/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- nmdc_runtime/core/db/Database.py,sha256=WamgBUbq85A7-fr3p5B9Tk92U__yPdr9pBb4zyQok-4,377
8
- nmdc_runtime/core/db/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- nmdc_runtime/core/exceptions/__init__.py,sha256=s486odD0uhUuk9K7M5_NISOgRrUE5RNnDJSypA2Qe_I,520
10
- nmdc_runtime/core/exceptions/base.py,sha256=G5azYv0FJvbjrpQtK90BkM-KK2f534szdwrHj9N-SNo,1343
11
- nmdc_runtime/core/exceptions/token.py,sha256=7iTdfRQjfijDExd6-KJBjN7t0BGI_Kc1F6Lc-d0AsE8,293
12
- nmdc_runtime/domain/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
- nmdc_runtime/domain/users/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- nmdc_runtime/domain/users/queriesInterface.py,sha256=0DjOehnsA5oKADmRKh8NTool2zoQZaejFigXHuUGoOg,476
15
- nmdc_runtime/domain/users/userSchema.py,sha256=eVpsB5aSbT89XjPh2_m7ao8XyyinEC94hpZQIouV4uk,758
16
- nmdc_runtime/domain/users/userService.py,sha256=b-HD7N-wWQyAux_iZsXMBFrz5_j9ygRc3qsJlm-vQGI,428
17
- nmdc_runtime/infrastructure/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
- nmdc_runtime/infrastructure/database/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
- nmdc_runtime/infrastructure/database/db.py,sha256=djdqVxXvvJWtJUj4yariINcOuYOkQ_OiAYI_jGqOtM8,32
20
- nmdc_runtime/infrastructure/database/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
- nmdc_runtime/infrastructure/database/models/user.py,sha256=gOZVsQ9uZ_JlPKUcNOiJpj4_CQ9p2BpCaegcPiJQETs,188
22
- nmdc_runtime/lib/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
23
- nmdc_runtime/lib/extract_nmdc_data.py,sha256=xDFPoYsgkauN48R4v-tJIF0cP_p3J-sBjnyHd0InD9Y,1177
24
- nmdc_runtime/lib/load_nmdc_data.py,sha256=KO2cIqkY3cBCVcFIwsGokZNOKntOejZVG8ecq43NjFM,3934
25
- nmdc_runtime/lib/nmdc_dataframes.py,sha256=rVTczY2Jey1yE3x3nZ-RTgtdc2XkzLtKhB_PM3FIb-E,28849
26
- nmdc_runtime/lib/nmdc_etl_class.py,sha256=tVh3rKVMkBHQE65_LhKeIjCsaCZQk_HJzbc9K4xUNCs,13522
27
- nmdc_runtime/lib/transform_nmdc_data.py,sha256=hij4lR3IMQRJQdL-rsP_I-m_WyFPsBMchV2MNFUkh0M,39906
28
- nmdc_runtime/minter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
- nmdc_runtime/minter/bootstrap.py,sha256=5Ej6pJVBRryRIi0ZwEloY78Zky7iE2okF6tPwRI2axM,822
30
- nmdc_runtime/minter/config.py,sha256=mq_s0xjLZK-zwjwk3IGgnk9ZIvvejyyZ7_qZkLt3V-c,1409
31
- nmdc_runtime/minter/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
32
- nmdc_runtime/minter/adapters/repository.py,sha256=I-jmGP38-9kPhkogrwUht_Ir0CfHA9_5ZImw5I_wbcw,8323
33
- nmdc_runtime/minter/domain/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
- nmdc_runtime/minter/domain/model.py,sha256=WMOuKub3dVzkOt_EZSRDLeTsJPqFbKx01SMQ53TOlDU,1416
35
- nmdc_runtime/minter/entrypoints/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
- nmdc_runtime/minter/entrypoints/fastapi_app.py,sha256=JC4thvzfFwRc1mhWQ-kHy3yvs0SYxF6ktE7LXNCwqlI,4031
37
- nmdc_runtime/site/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
- nmdc_runtime/site/graphs.py,sha256=siHlRnD2eS9nw3Ne049TcGG6I6IYFvjgWQuuSHzEOqc,9492
39
- nmdc_runtime/site/ops.py,sha256=YzDm7Dm2sELptwTew8DTOcS3nYBH_JegXhu3wzZuuiY,32482
40
- nmdc_runtime/site/repository.py,sha256=UgY9eMnNgZxa-Y0QeDyENh4KHtxuBWkYCjxltM4mTzA,30938
41
- nmdc_runtime/site/resources.py,sha256=pQSwg1dRpL_D91gYLzzaOIDZ3qa69rPqSlsq5dS9i_M,17783
42
- nmdc_runtime/site/util.py,sha256=6hyVPpb6ZkWEG8Nm7uQxnZ-QmuPOG9hgWvl0mUBr5JU,1303
43
- nmdc_runtime/site/backup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
- nmdc_runtime/site/backup/nmdcdb_mongodump.py,sha256=H5uosmEiXwLwklJrYJWrNhb_Nuf_ew8dBpZLl6_dYhs,2699
45
- nmdc_runtime/site/backup/nmdcdb_mongoexport.py,sha256=XIFI_AI3zl0dFr-ELOEmwvT41MyRKBGFaAT3RcamTNE,4166
46
- nmdc_runtime/site/backup/nmdcdb_mongoimport.py,sha256=k6w5yscMNYoMBVkaAA9soWS0Dj2CB0FRBSFlifRO3Ro,1739
47
- nmdc_runtime/site/changesheets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
- nmdc_runtime/site/changesheets/base.py,sha256=lZT6WCsEBl-FsTr7Ki8_ploT93uMiVyIWWKM36aOrRk,3171
49
- nmdc_runtime/site/drsobjects/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
50
- nmdc_runtime/site/drsobjects/ingest.py,sha256=pcMP69WSzFHFqHB9JIL55ePFhilnCLRc2XHCQ97w1Ik,3107
51
- nmdc_runtime/site/drsobjects/registration.py,sha256=D1T3QUuxEOxqKZIvB5rkb_6ZxFZiA-U9SMPajyeWC2Y,3572
52
- nmdc_runtime/site/export/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
- nmdc_runtime/site/export/study_metadata.py,sha256=3X0Lh85WCExhamWPeAOrUIcf5JvxCabWNH6F7n5xxyU,4026
54
- nmdc_runtime/site/normalization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
55
- nmdc_runtime/site/normalization/gold.py,sha256=iISDD4qs4d6uLhv631WYNeQVOzY5DO201ZpPtxHdkVk,1311
56
- nmdc_runtime/site/terminusdb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
57
- nmdc_runtime/site/terminusdb/generate.py,sha256=Z3c06LDx3TGw4pvPRO97caQvzc8SuhGmPIr_d5b_E9I,6144
58
- nmdc_runtime/site/terminusdb/ingest.py,sha256=WE_V4vRRnlL6hIBU1TDSUheYOBWS9d5g6FHPS64jzvM,1245
59
- nmdc_runtime/site/terminusdb/schema.py,sha256=3e39rHUSZsNbN_F0SHHNsvcEGRWtYa6O9KNj3cH3tUs,77129
60
- nmdc_runtime/site/translation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
61
- nmdc_runtime/site/translation/emsl.py,sha256=l6Q9Jj3RNJFQNYAU_TtKTJ7cyFcR93xBRs_lLdX0bMQ,1244
62
- nmdc_runtime/site/translation/gold.py,sha256=R3W99sdQb7Pgu_esN7ruIC-tyREQD_idJ4xCzkqWuGw,1622
63
- nmdc_runtime/site/translation/gold_translator.py,sha256=8i5FxrgAG4rLbM0mcCSBaZEzyReht6xwmpm4xeX4HwI,26451
64
- nmdc_runtime/site/translation/jgi.py,sha256=bh73r0uq5BT3ywXwIa1OEKKtz9LbFsSng472tdr-xtg,875
65
- nmdc_runtime/site/translation/neon_benthic_translator.py,sha256=e_7tXFrP0PpdhqUCxXmOaFViSuG36IIMDqyj3FHLcgQ,23069
66
- nmdc_runtime/site/translation/neon_soil_translator.py,sha256=x-FfNKsIv0efgxty9v4wOxNu5nrrS-N8phx12IqfLOI,37624
67
- nmdc_runtime/site/translation/neon_utils.py,sha256=k8JYMnm-L981BTOdAMomR1CulS_Hz5v7aYxrJ94KEJc,5086
68
- nmdc_runtime/site/translation/submission_portal_translator.py,sha256=lHcrfPR5wk3BcZ0Uw5zUyWu5XRVikgOzdzSb5nFVS9I,27964
69
- nmdc_runtime/site/translation/translator.py,sha256=xM9dM-nTgSWwu5HFoUVNHf8kqk9iiH4PgWdSx4OKxEk,601
70
- nmdc_runtime/site/translation/util.py,sha256=w_l3SiExGsl6cXRqto0a_ssDmHkP64ITvrOVfPxmNpY,4366
71
- nmdc_runtime/site/validation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
72
- nmdc_runtime/site/validation/emsl.py,sha256=TgckqKkFquHDLso77sn-jZRu5ZaBevGCt5p8e3AqOak,670
73
- nmdc_runtime/site/validation/gold.py,sha256=kJ1L081SZb-8qKpF731r5aQOueM206SUfUYMTTNTFMc,802
74
- nmdc_runtime/site/validation/jgi.py,sha256=lBo-FCtEYedT74CpW-Kdj512Ib963ik-4YIYmY5puDo,1298
75
- nmdc_runtime/site/validation/util.py,sha256=GGbMDSwR090sr_E_fHffCN418gpYESaiot6XghS7OYk,3349
76
- nmdc_runtime-1.3.1.dist-info/LICENSE,sha256=VWiv65r7gHGjgtr3jMJYVmQny5GRpQ6H-W9sScb1x70,2408
77
- nmdc_runtime-1.3.1.dist-info/METADATA,sha256=EqLqcS_qe3Qmh7aGB9GKNyKyBzZ2Lbj3Op8zoQG93Y4,7424
78
- nmdc_runtime-1.3.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
79
- nmdc_runtime-1.3.1.dist-info/entry_points.txt,sha256=nfH6-K9tDKv7va8ENfShsBnxVQoYJdEe7HHdwtkbh1Y,289
80
- nmdc_runtime-1.3.1.dist-info/top_level.txt,sha256=b0K1s09L_iHH49ueBKaLrB5-lh6cyrSv9vL6x4Qvyz8,13
81
- nmdc_runtime-1.3.1.dist-info/RECORD,,
@@ -1 +0,0 @@
1
- nmdc_runtime
File without changes
File without changes
File without changes
File without changes
File without changes