nmdc-runtime 1.3.1__py3-none-any.whl → 2.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nmdc_runtime/Dockerfile +177 -0
- nmdc_runtime/api/analytics.py +90 -0
- nmdc_runtime/api/boot/capabilities.py +9 -0
- nmdc_runtime/api/boot/object_types.py +126 -0
- nmdc_runtime/api/boot/triggers.py +84 -0
- nmdc_runtime/api/boot/workflows.py +116 -0
- nmdc_runtime/api/core/auth.py +212 -0
- nmdc_runtime/api/core/idgen.py +200 -0
- nmdc_runtime/api/core/metadata.py +777 -0
- nmdc_runtime/api/core/util.py +114 -0
- nmdc_runtime/api/db/mongo.py +436 -0
- nmdc_runtime/api/db/s3.py +37 -0
- nmdc_runtime/api/endpoints/capabilities.py +25 -0
- nmdc_runtime/api/endpoints/find.py +634 -0
- nmdc_runtime/api/endpoints/jobs.py +206 -0
- nmdc_runtime/api/endpoints/lib/helpers.py +274 -0
- nmdc_runtime/api/endpoints/lib/linked_instances.py +193 -0
- nmdc_runtime/api/endpoints/lib/path_segments.py +165 -0
- nmdc_runtime/api/endpoints/metadata.py +260 -0
- nmdc_runtime/api/endpoints/nmdcschema.py +515 -0
- nmdc_runtime/api/endpoints/object_types.py +38 -0
- nmdc_runtime/api/endpoints/objects.py +277 -0
- nmdc_runtime/api/endpoints/operations.py +78 -0
- nmdc_runtime/api/endpoints/queries.py +701 -0
- nmdc_runtime/api/endpoints/runs.py +98 -0
- nmdc_runtime/api/endpoints/search.py +38 -0
- nmdc_runtime/api/endpoints/sites.py +205 -0
- nmdc_runtime/api/endpoints/triggers.py +25 -0
- nmdc_runtime/api/endpoints/users.py +214 -0
- nmdc_runtime/api/endpoints/util.py +817 -0
- nmdc_runtime/api/endpoints/wf_file_staging.py +307 -0
- nmdc_runtime/api/endpoints/workflows.py +353 -0
- nmdc_runtime/api/entrypoint.sh +7 -0
- nmdc_runtime/api/main.py +495 -0
- nmdc_runtime/api/middleware.py +43 -0
- nmdc_runtime/api/models/capability.py +14 -0
- nmdc_runtime/api/models/id.py +92 -0
- nmdc_runtime/api/models/job.py +57 -0
- nmdc_runtime/api/models/lib/helpers.py +78 -0
- nmdc_runtime/api/models/metadata.py +11 -0
- nmdc_runtime/api/models/nmdc_schema.py +146 -0
- nmdc_runtime/api/models/object.py +180 -0
- nmdc_runtime/api/models/object_type.py +20 -0
- nmdc_runtime/api/models/operation.py +66 -0
- nmdc_runtime/api/models/query.py +246 -0
- nmdc_runtime/api/models/query_continuation.py +111 -0
- nmdc_runtime/api/models/run.py +161 -0
- nmdc_runtime/api/models/site.py +87 -0
- nmdc_runtime/api/models/trigger.py +13 -0
- nmdc_runtime/api/models/user.py +207 -0
- nmdc_runtime/api/models/util.py +260 -0
- nmdc_runtime/api/models/wfe_file_stages.py +122 -0
- nmdc_runtime/api/models/workflow.py +15 -0
- nmdc_runtime/api/openapi.py +178 -0
- nmdc_runtime/api/swagger_ui/assets/EllipsesButton.js +146 -0
- nmdc_runtime/api/swagger_ui/assets/EndpointSearchWidget.js +369 -0
- nmdc_runtime/api/swagger_ui/assets/script.js +252 -0
- nmdc_runtime/api/swagger_ui/assets/style.css +155 -0
- nmdc_runtime/api/swagger_ui/swagger_ui.py +34 -0
- nmdc_runtime/config.py +56 -0
- nmdc_runtime/minter/adapters/repository.py +22 -2
- nmdc_runtime/minter/config.py +30 -4
- nmdc_runtime/minter/domain/model.py +55 -1
- nmdc_runtime/minter/entrypoints/fastapi_app.py +1 -1
- nmdc_runtime/mongo_util.py +89 -0
- nmdc_runtime/site/backup/nmdcdb_mongodump.py +1 -1
- nmdc_runtime/site/backup/nmdcdb_mongoexport.py +1 -3
- nmdc_runtime/site/changesheets/data/OmicsProcessing-to-catted-Biosamples.tsv +1561 -0
- nmdc_runtime/site/changesheets/scripts/missing_neon_soils_ecosystem_data.py +311 -0
- nmdc_runtime/site/changesheets/scripts/neon_soils_add_ncbi_ids.py +210 -0
- nmdc_runtime/site/dagster.yaml +53 -0
- nmdc_runtime/site/entrypoint-daemon.sh +29 -0
- nmdc_runtime/site/entrypoint-dagit-readonly.sh +26 -0
- nmdc_runtime/site/entrypoint-dagit.sh +29 -0
- nmdc_runtime/site/export/ncbi_xml.py +1331 -0
- nmdc_runtime/site/export/ncbi_xml_utils.py +405 -0
- nmdc_runtime/site/export/study_metadata.py +27 -4
- nmdc_runtime/site/graphs.py +294 -45
- nmdc_runtime/site/ops.py +1008 -230
- nmdc_runtime/site/repair/database_updater.py +451 -0
- nmdc_runtime/site/repository.py +368 -133
- nmdc_runtime/site/resources.py +154 -80
- nmdc_runtime/site/translation/gold_translator.py +235 -83
- nmdc_runtime/site/translation/neon_benthic_translator.py +212 -188
- nmdc_runtime/site/translation/neon_soil_translator.py +82 -58
- nmdc_runtime/site/translation/neon_surface_water_translator.py +698 -0
- nmdc_runtime/site/translation/neon_utils.py +24 -7
- nmdc_runtime/site/translation/submission_portal_translator.py +616 -162
- nmdc_runtime/site/translation/translator.py +73 -3
- nmdc_runtime/site/util.py +26 -7
- nmdc_runtime/site/validation/emsl.py +1 -0
- nmdc_runtime/site/validation/gold.py +1 -0
- nmdc_runtime/site/validation/util.py +16 -12
- nmdc_runtime/site/workspace.yaml +13 -0
- nmdc_runtime/static/NMDC_logo.svg +1073 -0
- nmdc_runtime/static/ORCID-iD_icon_vector.svg +4 -0
- nmdc_runtime/static/README.md +5 -0
- nmdc_runtime/static/favicon.ico +0 -0
- nmdc_runtime/util.py +236 -192
- nmdc_runtime-2.12.0.dist-info/METADATA +45 -0
- nmdc_runtime-2.12.0.dist-info/RECORD +131 -0
- {nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info}/WHEEL +1 -2
- {nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info}/entry_points.txt +0 -1
- nmdc_runtime/containers.py +0 -14
- nmdc_runtime/core/db/Database.py +0 -15
- nmdc_runtime/core/exceptions/__init__.py +0 -23
- nmdc_runtime/core/exceptions/base.py +0 -47
- nmdc_runtime/core/exceptions/token.py +0 -13
- nmdc_runtime/domain/users/queriesInterface.py +0 -18
- nmdc_runtime/domain/users/userSchema.py +0 -37
- nmdc_runtime/domain/users/userService.py +0 -14
- nmdc_runtime/infrastructure/database/db.py +0 -3
- nmdc_runtime/infrastructure/database/models/user.py +0 -10
- nmdc_runtime/lib/__init__.py +0 -1
- nmdc_runtime/lib/extract_nmdc_data.py +0 -41
- nmdc_runtime/lib/load_nmdc_data.py +0 -121
- nmdc_runtime/lib/nmdc_dataframes.py +0 -829
- nmdc_runtime/lib/nmdc_etl_class.py +0 -402
- nmdc_runtime/lib/transform_nmdc_data.py +0 -1117
- nmdc_runtime/site/drsobjects/ingest.py +0 -93
- nmdc_runtime/site/drsobjects/registration.py +0 -131
- nmdc_runtime/site/terminusdb/generate.py +0 -198
- nmdc_runtime/site/terminusdb/ingest.py +0 -44
- nmdc_runtime/site/terminusdb/schema.py +0 -1671
- nmdc_runtime/site/translation/emsl.py +0 -42
- nmdc_runtime/site/translation/gold.py +0 -53
- nmdc_runtime/site/translation/jgi.py +0 -31
- nmdc_runtime/site/translation/util.py +0 -132
- nmdc_runtime/site/validation/jgi.py +0 -42
- nmdc_runtime-1.3.1.dist-info/METADATA +0 -181
- nmdc_runtime-1.3.1.dist-info/RECORD +0 -81
- nmdc_runtime-1.3.1.dist-info/top_level.txt +0 -1
- /nmdc_runtime/{client → api}/__init__.py +0 -0
- /nmdc_runtime/{core → api/boot}/__init__.py +0 -0
- /nmdc_runtime/{core/db → api/core}/__init__.py +0 -0
- /nmdc_runtime/{domain → api/db}/__init__.py +0 -0
- /nmdc_runtime/{domain/users → api/endpoints}/__init__.py +0 -0
- /nmdc_runtime/{infrastructure → api/endpoints/lib}/__init__.py +0 -0
- /nmdc_runtime/{infrastructure/database → api/models}/__init__.py +0 -0
- /nmdc_runtime/{infrastructure/database/models → api/models/lib}/__init__.py +0 -0
- /nmdc_runtime/{site/drsobjects/__init__.py → api/models/minter.py} +0 -0
- /nmdc_runtime/site/{terminusdb → repair}/__init__.py +0 -0
- {nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info/licenses}/LICENSE +0 -0
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
nmdc_runtime/Dockerfile,sha256=X6_f-HX993Q6PH5J9mf01-3w0dSs_5tfUX7Eq7RzugE,10052
|
|
2
|
+
nmdc_runtime/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
+
nmdc_runtime/config.py,sha256=fVxcqdXv13Fa9CSRPnFIsfmvmlos8o4SFUZcmsXfX_8,2020
|
|
4
|
+
nmdc_runtime/main.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
|
+
nmdc_runtime/mongo_util.py,sha256=L6UxK_6f0wQw2NTKCUVKCp-QLhBudQczDLUdF5odbP8,2943
|
|
6
|
+
nmdc_runtime/util.py,sha256=okP1_LYhGiWeL7Qkq6VRVMNtKfjetXyDiAs_Q3KU1fM,21058
|
|
7
|
+
nmdc_runtime/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
+
nmdc_runtime/api/analytics.py,sha256=Vp-BYQZ_sqR282FLLkGUoxlieSqyXxU0DmMvjDzMVsI,3723
|
|
9
|
+
nmdc_runtime/api/entrypoint.sh,sha256=y3lZU2xPnZ3cAVK7tn0Nw-qVGElryt9fzj1IaZ5kkGw,209
|
|
10
|
+
nmdc_runtime/api/main.py,sha256=07VP_YEIIi_hPlZNch3zfQ5l7vQCfZ08j8ABuAmc15o,19023
|
|
11
|
+
nmdc_runtime/api/middleware.py,sha256=GUVN26Ym9H87gaxrBs0NAMpOoA7qQfv-7UnIJOkcQkI,1703
|
|
12
|
+
nmdc_runtime/api/openapi.py,sha256=8E522L6zvhtfM_-eyTNE_Z7EwVvLQh54QUhvvSO5bj0,10159
|
|
13
|
+
nmdc_runtime/api/boot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
|
+
nmdc_runtime/api/boot/capabilities.py,sha256=9Cp_yULUeEXN7Nz-WC5XJXTaB7fhOWOCGp8mx050qgg,291
|
|
15
|
+
nmdc_runtime/api/boot/object_types.py,sha256=JL6OZw34lKkbKJJXDIiswfLmn1tkOng4ZKF6ypqWKhs,4382
|
|
16
|
+
nmdc_runtime/api/boot/triggers.py,sha256=fLM588CBYft_no1ENN13XSO6Cj4DB90ZKJl-1UgfsYw,2723
|
|
17
|
+
nmdc_runtime/api/boot/workflows.py,sha256=UpOAMjVKagat-PCKPx7HuriLTnCbhj0EVgpk7UuLpQQ,3826
|
|
18
|
+
nmdc_runtime/api/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
19
|
+
nmdc_runtime/api/core/auth.py,sha256=LnQAzChhRoLOTphew8LrZKZbM8osPI69QFW41wTfoYM,7514
|
|
20
|
+
nmdc_runtime/api/core/idgen.py,sha256=ru9nAczRAEp1Koq8yXLCMdkN39dNvfTrIwWwVI2SeWo,8101
|
|
21
|
+
nmdc_runtime/api/core/metadata.py,sha256=sUp6bBWiTmOXIuf8Si617qiFzvFHBQWAjFYDZlwD-TU,27530
|
|
22
|
+
nmdc_runtime/api/core/util.py,sha256=gnz5t7Dy9uzcmDChwQqgIU7_1oYFLYBUvFD_LhmnO3g,3374
|
|
23
|
+
nmdc_runtime/api/db/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
24
|
+
nmdc_runtime/api/db/mongo.py,sha256=xeTRqsZZzmSW4SJuGy9XNNMlwuTSpsLweWNnwQxS5G4,19982
|
|
25
|
+
nmdc_runtime/api/db/s3.py,sha256=tRFEjjVXHMiUdZtRiq1ImvLza2s86TkgubDw3kchDOA,1046
|
|
26
|
+
nmdc_runtime/api/endpoints/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
27
|
+
nmdc_runtime/api/endpoints/capabilities.py,sha256=AkSh8wcsVSOkkjH0kzcM2TcO2o8AuKyXGt1km_VgwVs,717
|
|
28
|
+
nmdc_runtime/api/endpoints/find.py,sha256=AimO1vgq2DlO17yrPa1QCkEzZDbxyIBFxd09cTGbcOE,27063
|
|
29
|
+
nmdc_runtime/api/endpoints/jobs.py,sha256=OQImcwVuBdVXDtrEaT9awhxsFGtKsaKFr7UHTqsNjLg,7331
|
|
30
|
+
nmdc_runtime/api/endpoints/metadata.py,sha256=Msj7k5DxXUzXCZWcvy91T7AIFj1chhn_53Dsc44_qmc,9886
|
|
31
|
+
nmdc_runtime/api/endpoints/nmdcschema.py,sha256=VW-GOy12uXDnyfKZRsF6Nq13AWnJcJWFULu-js4FGK4,21163
|
|
32
|
+
nmdc_runtime/api/endpoints/object_types.py,sha256=dTbl3A9j9lyk186arA7cszTEKOY7vXWJO_aKYfFAV8s,1179
|
|
33
|
+
nmdc_runtime/api/endpoints/objects.py,sha256=GdrSWcxA87wx_-9gMR6ygxDbe2Ujd_NSA2nc955bAEs,10797
|
|
34
|
+
nmdc_runtime/api/endpoints/operations.py,sha256=iWDGoxa2w5k9tHaW0V_o6IjZZipUJqyY3OiCiYCBluA,2542
|
|
35
|
+
nmdc_runtime/api/endpoints/queries.py,sha256=2e3XiIH1dNULF42g3JHw-s0heRIjpe2rsBuIb4XhIb0,29716
|
|
36
|
+
nmdc_runtime/api/endpoints/runs.py,sha256=PWoEKCFQhWgpWklmRUjTc0UZCvDAa1i59gnPhAcZirA,3326
|
|
37
|
+
nmdc_runtime/api/endpoints/search.py,sha256=_h30mu8_Xndjggg3IllMDn5h8k92BX0ubxqRO85R0Ss,1187
|
|
38
|
+
nmdc_runtime/api/endpoints/sites.py,sha256=Cju6x1Trb25cZMc7BjumupVbpJleDf8Y3bVeuuxR4NE,6565
|
|
39
|
+
nmdc_runtime/api/endpoints/triggers.py,sha256=1DG2oEOV7zu5bT2qoeHrLNajY6K6sEGi7O7po3Bcmbk,673
|
|
40
|
+
nmdc_runtime/api/endpoints/users.py,sha256=syu_Tz05k-OAsDMhe125ofqeiCkmDMjdbRlfvRZ5_rI,7816
|
|
41
|
+
nmdc_runtime/api/endpoints/util.py,sha256=PVhapVtzUT8vPTF6hWbrHYHQt8Res2bt1A5pxXd5AAY,29538
|
|
42
|
+
nmdc_runtime/api/endpoints/wf_file_staging.py,sha256=kDclx-ijbGVz8K1HlXoIDMHXdVYlssQdDSZ_hs-3Ixk,10709
|
|
43
|
+
nmdc_runtime/api/endpoints/workflows.py,sha256=HWTnt-yrHp2DcrQT8BS_-SRQWNibkKNPVOpwqjS9QCA,14383
|
|
44
|
+
nmdc_runtime/api/endpoints/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
45
|
+
nmdc_runtime/api/endpoints/lib/helpers.py,sha256=E6pH0NtzKiSGBgIfoeukH5qeHKtxjLtCvw4F8LOacTQ,13874
|
|
46
|
+
nmdc_runtime/api/endpoints/lib/linked_instances.py,sha256=wbvjqSFMDgViRGtb30iCvEMV8BKIunw9RN6YNCUvAPs,7816
|
|
47
|
+
nmdc_runtime/api/endpoints/lib/path_segments.py,sha256=4nIy_KrYvTc80Np3ELnT94VCk2QfR-2055fMlcbBSPw,5724
|
|
48
|
+
nmdc_runtime/api/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
49
|
+
nmdc_runtime/api/models/capability.py,sha256=4__rqgLS4MCMjbaCM3e_ncR9KW001Klm34p2t_bp65k,262
|
|
50
|
+
nmdc_runtime/api/models/id.py,sha256=D8kDS-f3-OIxaNKrkhrdvyxu90ac4SDeFpVHboycDac,2724
|
|
51
|
+
nmdc_runtime/api/models/job.py,sha256=5NwbBNyz0FyY5QzdmFws4PMPYuf90UM6hdNQncUZ8Xs,1519
|
|
52
|
+
nmdc_runtime/api/models/metadata.py,sha256=mVCC0KODtKzNEYABk70jaKVoOTyZP87eCtzf96lgysw,196
|
|
53
|
+
nmdc_runtime/api/models/minter.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
54
|
+
nmdc_runtime/api/models/nmdc_schema.py,sha256=C-Jb_eHr4dSVoYP-r7ldBUW-JHhCqSMtxSMT9HWKZ70,5768
|
|
55
|
+
nmdc_runtime/api/models/object.py,sha256=A-U5RkWfmEsb50x9ai5xdpHo9xy-O9mZj2gKCoBR87E,5004
|
|
56
|
+
nmdc_runtime/api/models/object_type.py,sha256=2Ejn5iCbqwqVEaOcYo4mvUJgBTDMhlmw8cLD92bWwSE,399
|
|
57
|
+
nmdc_runtime/api/models/operation.py,sha256=Nb_Ys8P_vdxL-5fcKTeNTmB9CongxK3-JWs0vhgkNq8,1606
|
|
58
|
+
nmdc_runtime/api/models/query.py,sha256=899fPzA55xyskflLXQKlIADQATsnpl8-Pu5rhrE7MvA,6739
|
|
59
|
+
nmdc_runtime/api/models/query_continuation.py,sha256=bWVX7ijk34kOCedxAWFGgSTNgpy98yp0IZni31oD3-Y,4150
|
|
60
|
+
nmdc_runtime/api/models/run.py,sha256=oikVbpqEX6dOoW4ehWz6vELkO_fhPf_GZ37KuqTk_18,4387
|
|
61
|
+
nmdc_runtime/api/models/site.py,sha256=KLLgln2KJrinUDp6ixxci1JFmcLAL4O4vEtFTZKc82U,2310
|
|
62
|
+
nmdc_runtime/api/models/trigger.py,sha256=TviQMk9-2HMZgCiaXYAF0WFnjD295jxnJLJCWsmtem4,201
|
|
63
|
+
nmdc_runtime/api/models/user.py,sha256=pufgqBDAfC2xa5y9WFgcJO0eF4bpqlZpZE3cAsBOMjw,7007
|
|
64
|
+
nmdc_runtime/api/models/util.py,sha256=koEyCBqizwHr-fzjoD_OJCN0c1n5apd3bVYdU-YWRvM,11659
|
|
65
|
+
nmdc_runtime/api/models/wfe_file_stages.py,sha256=I-RXMJWqpK18eEL1KH9nrrDgGriwIfiNl6HLBZAPv0g,4048
|
|
66
|
+
nmdc_runtime/api/models/workflow.py,sha256=etPFP_L9DcRoIAFwvMYzLLT2WlwRG6T68-7tzNzXnQ0,326
|
|
67
|
+
nmdc_runtime/api/models/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
68
|
+
nmdc_runtime/api/models/lib/helpers.py,sha256=k6AihKIiQ0kg2kk_qY_VNWTb96LGkazuztARhgjHr8M,2410
|
|
69
|
+
nmdc_runtime/api/swagger_ui/swagger_ui.py,sha256=lGl4sF7MXsWbX7fiauDPKUOVSVag_WeH7ja5J_KBHg0,1887
|
|
70
|
+
nmdc_runtime/api/swagger_ui/assets/EllipsesButton.js,sha256=dpiHNsjSJpkDZ1uIV2YdjatULkTpd76Y44FbMMaDiE0,5925
|
|
71
|
+
nmdc_runtime/api/swagger_ui/assets/EndpointSearchWidget.js,sha256=a00v6gRTFJF_TLLAtOJjqbzY2RqjS-98qJKH-JE6qfg,17648
|
|
72
|
+
nmdc_runtime/api/swagger_ui/assets/script.js,sha256=oTEcRebeaAmKlkcPr6iaC7jDI-BJhox3jgpkdwY0LSw,13889
|
|
73
|
+
nmdc_runtime/api/swagger_ui/assets/style.css,sha256=BucritiMpu_UJdgSdDBImFpQHMQ2SaZzUd179gZ2Zx0,4565
|
|
74
|
+
nmdc_runtime/minter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
75
|
+
nmdc_runtime/minter/bootstrap.py,sha256=5Ej6pJVBRryRIi0ZwEloY78Zky7iE2okF6tPwRI2axM,822
|
|
76
|
+
nmdc_runtime/minter/config.py,sha256=-E1kQXTDraabrN4CENuVCHcNJafjVdiHWeUrucxBzMQ,2741
|
|
77
|
+
nmdc_runtime/minter/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
78
|
+
nmdc_runtime/minter/adapters/repository.py,sha256=CVVRdGclIDBgbqaOSmSl7OkOSE43kt6zJpE-etXIkOs,9267
|
|
79
|
+
nmdc_runtime/minter/domain/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
80
|
+
nmdc_runtime/minter/domain/model.py,sha256=k1n3O3GYfhmFdjU_oHadVTeUIxORQf-VPW4Xy6S5SCQ,3599
|
|
81
|
+
nmdc_runtime/minter/entrypoints/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
82
|
+
nmdc_runtime/minter/entrypoints/fastapi_app.py,sha256=I_lgExs6g1MRpMQdpedrnYdA1L7r_TBi4RiiD8ogrkM,4015
|
|
83
|
+
nmdc_runtime/site/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
84
|
+
nmdc_runtime/site/dagster.yaml,sha256=VCFx2naLjaNzP8AYr5uvzxRIf4fC0mHm7cctcYsVrNA,1204
|
|
85
|
+
nmdc_runtime/site/entrypoint-daemon.sh,sha256=qeVlcNSTK_IJdumWqaNJxd6R4DS5aCDtxNoM7P86riI,716
|
|
86
|
+
nmdc_runtime/site/entrypoint-dagit-readonly.sh,sha256=RwHiDxIzPB9_wIvdIMRvTPcWKXl2tRpQtXITTioAvAc,559
|
|
87
|
+
nmdc_runtime/site/entrypoint-dagit.sh,sha256=IcBNsz6gBQCHJLxy7w9KGqyDUtC67b_sgk6vng4yalU,740
|
|
88
|
+
nmdc_runtime/site/graphs.py,sha256=hN3gg6aSYL9k0YHXp4BUS1VrsJH_1WCkEge2DwH9fy8,17999
|
|
89
|
+
nmdc_runtime/site/ops.py,sha256=I5ZHu0AwJd2HeA-17QfrM8ETtiRGMnvHAGlUzG1z6PE,64329
|
|
90
|
+
nmdc_runtime/site/repository.py,sha256=Kcdt293K1irWcmw7R9L-5sTEBhdoaIdgdM0mgFNrMMc,44117
|
|
91
|
+
nmdc_runtime/site/resources.py,sha256=VNbmIUQ_C25oIu_5_HXpZ_mWyGlMDWSJ59FCZK5mKKw,20987
|
|
92
|
+
nmdc_runtime/site/util.py,sha256=4h0X_fhjf3HdX6XDR8GvHgkrpxQY4OnZVtaOeXJVxJQ,1935
|
|
93
|
+
nmdc_runtime/site/workspace.yaml,sha256=RCw5i-v0T9MWuEZE-y1vEIJz6x2bpm1fyxheLxLEVLk,431
|
|
94
|
+
nmdc_runtime/site/backup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
95
|
+
nmdc_runtime/site/backup/nmdcdb_mongodump.py,sha256=My2ORKVIk_Z9wzfnIuamDe3_hv4viid9ToSJDC5J4mY,2689
|
|
96
|
+
nmdc_runtime/site/backup/nmdcdb_mongoexport.py,sha256=y1x3B4-qxF5_itXOKYaix99OvDhW_PYxhLoLc4Y5E1M,4028
|
|
97
|
+
nmdc_runtime/site/backup/nmdcdb_mongoimport.py,sha256=k6w5yscMNYoMBVkaAA9soWS0Dj2CB0FRBSFlifRO3Ro,1739
|
|
98
|
+
nmdc_runtime/site/changesheets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
99
|
+
nmdc_runtime/site/changesheets/base.py,sha256=lZT6WCsEBl-FsTr7Ki8_ploT93uMiVyIWWKM36aOrRk,3171
|
|
100
|
+
nmdc_runtime/site/changesheets/data/OmicsProcessing-to-catted-Biosamples.tsv,sha256=lfAaAhnEOBbXoGmGhVoxbbJRnUOLDtWrF6ycAOtsJFg,128790
|
|
101
|
+
nmdc_runtime/site/changesheets/scripts/missing_neon_soils_ecosystem_data.py,sha256=w7025ohUmoWxYHAl4zTFaV_Ig-CWkeeiZGlEs2J57mc,11628
|
|
102
|
+
nmdc_runtime/site/changesheets/scripts/neon_soils_add_ncbi_ids.py,sha256=OfaVAlhtd7DbH3Y5jHMGnDgOobWel4FnL3tP5BGJqaA,7969
|
|
103
|
+
nmdc_runtime/site/export/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
104
|
+
nmdc_runtime/site/export/ncbi_xml.py,sha256=kDRIv_0eeXfXlpTmK6PbvQYHd1bS69fa3RfE5XpLeIM,54933
|
|
105
|
+
nmdc_runtime/site/export/ncbi_xml_utils.py,sha256=HkE3Iufoqe-9rc6gOVVaqxpz7j98O5idL027w624UwA,15423
|
|
106
|
+
nmdc_runtime/site/export/study_metadata.py,sha256=yR5pXL6JG8d7cAtqcF-60Hp7bLD3dJ0Rut4AtYc0tXA,4844
|
|
107
|
+
nmdc_runtime/site/normalization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
108
|
+
nmdc_runtime/site/normalization/gold.py,sha256=iISDD4qs4d6uLhv631WYNeQVOzY5DO201ZpPtxHdkVk,1311
|
|
109
|
+
nmdc_runtime/site/repair/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
110
|
+
nmdc_runtime/site/repair/database_updater.py,sha256=a6POYZcLEl0JvnuWxPjaOJtwZjkJhhvvUg1ABhnBiP8,21268
|
|
111
|
+
nmdc_runtime/site/translation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
112
|
+
nmdc_runtime/site/translation/gold_translator.py,sha256=4AFgbJtHaVwme3a57Y6Foi-uzI8oBHUlOt3Ns7_a5_o,32879
|
|
113
|
+
nmdc_runtime/site/translation/neon_benthic_translator.py,sha256=CMoC56ymA0DKPkzqdMR4m5yYV6EcyH3tOvOiA3P6goE,23762
|
|
114
|
+
nmdc_runtime/site/translation/neon_soil_translator.py,sha256=MMntFXwK62PdPNGpurTq5L3-pct7xAmUymRE2QqMPso,38572
|
|
115
|
+
nmdc_runtime/site/translation/neon_surface_water_translator.py,sha256=_-KDZzC30dQ1y57lXEKWXE6ZfGozNHxGFvbGaj4f0Lg,30536
|
|
116
|
+
nmdc_runtime/site/translation/neon_utils.py,sha256=d00o7duKKugpLHmsEifNbp4WjeC4GOqcgw0b5qlCg4I,5549
|
|
117
|
+
nmdc_runtime/site/translation/submission_portal_translator.py,sha256=bDxHIO6oR06KB1oziI8XLUDmMECHEM3jUxkoJrtIplM,48151
|
|
118
|
+
nmdc_runtime/site/translation/translator.py,sha256=WpIbuUie4h7FdNV_xkrz9AKD7sWbcxnI7jbiZJDTzTg,3677
|
|
119
|
+
nmdc_runtime/site/validation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
120
|
+
nmdc_runtime/site/validation/emsl.py,sha256=OG20mv_3E2rkQqTQtYO0_SVRqFb-Z_zKCiAVbty6Wl0,671
|
|
121
|
+
nmdc_runtime/site/validation/gold.py,sha256=Z5ZzYdjERbrJ2Tu06d0TDTBSfwaFdL1Z23Rl-YkZ2Ow,803
|
|
122
|
+
nmdc_runtime/site/validation/util.py,sha256=aLkyXCHeB4EJm4IM0cOUKxskPXlo8B1ZoB0jYvmeXwE,3618
|
|
123
|
+
nmdc_runtime/static/NMDC_logo.svg,sha256=-13FrhQMWOg7FMprd3p6dwSOEf8xWkrHJB_P-nxGvLw,40570
|
|
124
|
+
nmdc_runtime/static/ORCID-iD_icon_vector.svg,sha256=JOKAW7lAn7Giws6aS_jBQGYG4DoPUBQ5S5mgmCSwmcA,973
|
|
125
|
+
nmdc_runtime/static/README.md,sha256=g0fitEFvZfBcSWzgjOxzDdfouBlq95xFKpAag0VPzL4,397
|
|
126
|
+
nmdc_runtime/static/favicon.ico,sha256=FsOefOmgCU9fp2f1FxKimZ8zHg7mBYj_Cv70S5VIcuI,15086
|
|
127
|
+
nmdc_runtime-2.12.0.dist-info/METADATA,sha256=iFu7WHDbOb8wSOTjbX0x8rn70TgSbl--WNZaDZt8KjY,1440
|
|
128
|
+
nmdc_runtime-2.12.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
129
|
+
nmdc_runtime-2.12.0.dist-info/entry_points.txt,sha256=JxdvOnvxHK_8046cwlvE30s_fV0-k-eTpQtkKYA69nQ,224
|
|
130
|
+
nmdc_runtime-2.12.0.dist-info/licenses/LICENSE,sha256=VWiv65r7gHGjgtr3jMJYVmQny5GRpQ6H-W9sScb1x70,2408
|
|
131
|
+
nmdc_runtime-2.12.0.dist-info/RECORD,,
|
|
@@ -2,4 +2,3 @@
|
|
|
2
2
|
nmdcdb-mongodump = nmdc_runtime.site.backup.nmdcdb_mongodump:main
|
|
3
3
|
nmdcdb-mongoexport = nmdc_runtime.site.backup.nmdcdb_mongoexport:main
|
|
4
4
|
nmdcdb-mongoimport = nmdc_runtime.site.backup.nmdcdb_mongoimport:main
|
|
5
|
-
schemagen-terminusdb = nmdc_runtime.site.terminusdb.generate:cli
|
nmdc_runtime/containers.py
DELETED
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
"""Containers module."""
|
|
2
|
-
|
|
3
|
-
from dependency_injector import containers, providers
|
|
4
|
-
|
|
5
|
-
from nmdc_runtime.domain.users.userService import UserService
|
|
6
|
-
from nmdc_runtime.infrastructure.database.impl.mongo.models.user import (
|
|
7
|
-
UserQueries,
|
|
8
|
-
)
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class Container(containers.DeclarativeContainer):
|
|
12
|
-
user_queries = providers.Singleton(UserQueries)
|
|
13
|
-
|
|
14
|
-
user_service = providers.Factory(UserService, user_queries=user_queries)
|
nmdc_runtime/core/db/Database.py
DELETED
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
from contextlib import contextmanager, AbstractContextManager
|
|
2
|
-
from typing import Callable
|
|
3
|
-
import logging
|
|
4
|
-
|
|
5
|
-
from motor import motor_asyncio
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class Database:
|
|
9
|
-
def __init__(self, db_url: str) -> None:
|
|
10
|
-
self._client = motor_asyncio.AsyncIOMotorClient(db_url)
|
|
11
|
-
self._db = self._client["database"]
|
|
12
|
-
|
|
13
|
-
@contextmanager
|
|
14
|
-
def session(self):
|
|
15
|
-
return self._db
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
from .base import (
|
|
2
|
-
CustomException,
|
|
3
|
-
BadRequestException,
|
|
4
|
-
NotFoundException,
|
|
5
|
-
ForbiddenException,
|
|
6
|
-
UnprocessableEntity,
|
|
7
|
-
DuplicateValueException,
|
|
8
|
-
UnauthorizedException,
|
|
9
|
-
)
|
|
10
|
-
from .token import DecodeTokenException, ExpiredTokenException
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
__all__ = [
|
|
14
|
-
"CustomException",
|
|
15
|
-
"BadRequestException",
|
|
16
|
-
"NotFoundException",
|
|
17
|
-
"ForbiddenException",
|
|
18
|
-
"UnprocessableEntity",
|
|
19
|
-
"DuplicateValueException",
|
|
20
|
-
"UnauthorizedException",
|
|
21
|
-
"DecodeTokenException",
|
|
22
|
-
"ExpiredTokenException",
|
|
23
|
-
]
|
|
@@ -1,47 +0,0 @@
|
|
|
1
|
-
from http import HTTPStatus
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
class CustomException(Exception):
|
|
5
|
-
code = HTTPStatus.BAD_GATEWAY
|
|
6
|
-
error_code = HTTPStatus.BAD_GATEWAY
|
|
7
|
-
message = HTTPStatus.BAD_GATEWAY.description
|
|
8
|
-
|
|
9
|
-
def __init__(self, message=None):
|
|
10
|
-
if message:
|
|
11
|
-
self.message = message
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class BadRequestException(CustomException):
|
|
15
|
-
code = HTTPStatus.BAD_REQUEST
|
|
16
|
-
error_code = HTTPStatus.BAD_REQUEST
|
|
17
|
-
message = HTTPStatus.BAD_REQUEST.description
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
class NotFoundException(CustomException):
|
|
21
|
-
code = HTTPStatus.NOT_FOUND
|
|
22
|
-
error_code = HTTPStatus.NOT_FOUND
|
|
23
|
-
message = HTTPStatus.NOT_FOUND.description
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
class ForbiddenException(CustomException):
|
|
27
|
-
code = HTTPStatus.FORBIDDEN
|
|
28
|
-
error_code = HTTPStatus.FORBIDDEN
|
|
29
|
-
message = HTTPStatus.FORBIDDEN.description
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
class UnauthorizedException(CustomException):
|
|
33
|
-
code = HTTPStatus.UNAUTHORIZED
|
|
34
|
-
error_code = HTTPStatus.UNAUTHORIZED
|
|
35
|
-
message = HTTPStatus.UNAUTHORIZED.description
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
class UnprocessableEntity(CustomException):
|
|
39
|
-
code = HTTPStatus.UNPROCESSABLE_ENTITY
|
|
40
|
-
error_code = HTTPStatus.UNPROCESSABLE_ENTITY
|
|
41
|
-
message = HTTPStatus.UNPROCESSABLE_ENTITY.description
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
class DuplicateValueException(CustomException):
|
|
45
|
-
code = HTTPStatus.UNPROCESSABLE_ENTITY
|
|
46
|
-
error_code = HTTPStatus.UNPROCESSABLE_ENTITY
|
|
47
|
-
message = HTTPStatus.UNPROCESSABLE_ENTITY.description
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
from nmdc_runtime.core.exceptions import CustomException
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
class DecodeTokenException(CustomException):
|
|
5
|
-
code = 400
|
|
6
|
-
error_code = 10000
|
|
7
|
-
message = "token decode error"
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class ExpiredTokenException(CustomException):
|
|
11
|
-
code = 400
|
|
12
|
-
error_code = 10001
|
|
13
|
-
message = "expired token"
|
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
from abc import ABC
|
|
3
|
-
|
|
4
|
-
from abc import abstractmethod
|
|
5
|
-
|
|
6
|
-
from nmdc_runtime.domain.users.userSchema import UserAuth, UserUpdate, UserOut
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class IUserQueries(ABC):
|
|
10
|
-
@abstractmethod
|
|
11
|
-
async def create(self, user: UserAuth) -> UserOut:
|
|
12
|
-
"""Create new user"""
|
|
13
|
-
raise NotImplementedError
|
|
14
|
-
|
|
15
|
-
@abstractmethod
|
|
16
|
-
async def update(self, user: UserUpdate) -> UserOut:
|
|
17
|
-
"""Update user data"""
|
|
18
|
-
raise NotImplementedError
|
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
from typing import Optional, List
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
from pydantic import BaseModel, EmailStr
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class UserBase(BaseModel):
|
|
8
|
-
username: Optional[str] = None
|
|
9
|
-
email: Optional[str] = None
|
|
10
|
-
full_name: Optional[str] = None
|
|
11
|
-
site_admin: Optional[List[str]] = []
|
|
12
|
-
disabled: Optional[bool] = False
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class UserAuth(UserBase):
|
|
16
|
-
"""User register and login auth"""
|
|
17
|
-
|
|
18
|
-
username: str
|
|
19
|
-
password: str
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
# Properties to receive via API on update
|
|
23
|
-
class UserUpdate(UserBase):
|
|
24
|
-
"""Updatable user fields"""
|
|
25
|
-
|
|
26
|
-
email: Optional[EmailStr] = None
|
|
27
|
-
|
|
28
|
-
# User information
|
|
29
|
-
full_name: Optional[str] = None
|
|
30
|
-
password: Optional[str] = None
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
class UserOut(UserUpdate):
|
|
34
|
-
"""User fields pushed to the client"""
|
|
35
|
-
|
|
36
|
-
email: EmailStr
|
|
37
|
-
disabled: Optional[bool] = False
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
from typing import Any
|
|
2
|
-
|
|
3
|
-
from nmdc_runtime.domain.users.userSchema import UserAuth, UserUpdate, UserOut
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
class UserService:
|
|
7
|
-
def __init__(self, user_queries: Any) -> None:
|
|
8
|
-
self.__user_queries = user_queries
|
|
9
|
-
|
|
10
|
-
async def create_user(self, user: UserAuth) -> UserOut:
|
|
11
|
-
return await self.__user_queries.create(user)
|
|
12
|
-
|
|
13
|
-
async def update_user(self, username: str, new_user: UserUpdate) -> UserOut:
|
|
14
|
-
pass
|
nmdc_runtime/lib/__init__.py
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
|
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
## author: Bill Duncan
|
|
2
|
-
## summary: Contains methods for extracting data for the NMDC ETL pipeline.
|
|
3
|
-
|
|
4
|
-
## system level modules
|
|
5
|
-
import pandas as pds
|
|
6
|
-
import jq
|
|
7
|
-
import jsonasobj
|
|
8
|
-
import json
|
|
9
|
-
import zipfile
|
|
10
|
-
import yaml
|
|
11
|
-
from yaml import CLoader as Loader, CDumper as Dumper
|
|
12
|
-
from dotted_dict import DottedDict
|
|
13
|
-
from collections import namedtuple
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def extract_table(merged_df, table_name):
|
|
17
|
-
df = unpivot_dataframe(merged_df[merged_df.nmdc_data_source == table_name])
|
|
18
|
-
return df
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
def unpivot_dataframe(
|
|
22
|
-
df,
|
|
23
|
-
index="nmdc_record_id",
|
|
24
|
-
columns="attribute",
|
|
25
|
-
value="value",
|
|
26
|
-
splice=["nmdc_record_id", "attribute", "value"],
|
|
27
|
-
):
|
|
28
|
-
## reshape eav structure to row-column structure
|
|
29
|
-
## see: https://www.journaldev.com/33398/pandas-melt-unmelt-pivot-function
|
|
30
|
-
if len(splice) > 0:
|
|
31
|
-
df = df[splice].pivot(index=index, columns=columns)
|
|
32
|
-
else:
|
|
33
|
-
df = df.pivot(index=index, columns=columns)
|
|
34
|
-
|
|
35
|
-
if len(df) > 0:
|
|
36
|
-
df = df[value].reset_index() # drop value hierarchical index
|
|
37
|
-
if len(df) > 0:
|
|
38
|
-
df = df.where(pds.notnull(df), None) # replace an NaN values with None
|
|
39
|
-
df.columns.name = None # remove column name attribute
|
|
40
|
-
|
|
41
|
-
return df
|
|
@@ -1,121 +0,0 @@
|
|
|
1
|
-
## author: Bill Duncan
|
|
2
|
-
## summary: Contains methods for saving or loading NMDC data into a resource.
|
|
3
|
-
|
|
4
|
-
import json
|
|
5
|
-
import jq
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
def save_json(json_data, file_path: str):
|
|
9
|
-
## save json with changed data types
|
|
10
|
-
with open(file_path, "w") as out_file:
|
|
11
|
-
json.dump(json_data, out_file, indent=2)
|
|
12
|
-
return json_data
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def get_json_from_file(file_path: str, replace_single_quote=False):
|
|
16
|
-
## load json
|
|
17
|
-
with open(file_path, "r") as in_file:
|
|
18
|
-
if replace_single_quote: # json
|
|
19
|
-
text = in_file.read()
|
|
20
|
-
json_data = json.loads(text.replace("'", '"'))
|
|
21
|
-
else:
|
|
22
|
-
json_data = json.load(in_file)
|
|
23
|
-
return json_data
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
def get_json(file_path="", replace_single_quote=False):
|
|
27
|
-
if len(file_path) > 0:
|
|
28
|
-
return get_json_from_file(file_path, replace_single_quote)
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
def save_nmdc_dict_as_json_to_file(nmdc_dict: dict, file_path: str):
|
|
32
|
-
with open(file_path, "w") as f:
|
|
33
|
-
json.dump(nmdc_dict, f, indent=2)
|
|
34
|
-
return json.dumps(nmdc_dict, indent=2)
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def save_nmdc_dict(nmdc_dict: dict, file_path="", data_format="json"):
|
|
38
|
-
if len(file_path) > 0:
|
|
39
|
-
if "json" == data_format:
|
|
40
|
-
return save_nmdc_dict_as_json_to_file(nmdc_dict, file_path)
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def make_nmdc_example_database(
|
|
44
|
-
gold_study_file="output/nmdc_etl/gold_study.json",
|
|
45
|
-
gold_omics_processing_file="output/nmdc_etl/gold_omics_processing.json",
|
|
46
|
-
gold_biosample_file="output/nmdc_etl/gold_biosample.json",
|
|
47
|
-
jgi_fastq_data_object_file="output/nmdc_etl/jgi_fastq_data_objects.json",
|
|
48
|
-
output_file="output/nmdc_example-database.json",
|
|
49
|
-
):
|
|
50
|
-
## load json files
|
|
51
|
-
biosample_json = get_json(gold_biosample_file)
|
|
52
|
-
projects_json = get_json(gold_omics_processing_file)
|
|
53
|
-
study_json = get_json(gold_study_file)
|
|
54
|
-
data_objects_json = get_json(jgi_fastq_data_object_file)
|
|
55
|
-
|
|
56
|
-
## get a list of distinct omics processing study ids, and choose the first 3 studies
|
|
57
|
-
study_ids = set(
|
|
58
|
-
jq.compile(".[] | .part_of[]").input(projects_json).all()
|
|
59
|
-
) # all returns a list
|
|
60
|
-
study_ids = list(study_ids)[0:3]
|
|
61
|
-
# study_ids =
|
|
62
|
-
|
|
63
|
-
## build a test set of studies from the study ids
|
|
64
|
-
study_test = (
|
|
65
|
-
jq.compile(
|
|
66
|
-
".[] | select( .id == ("
|
|
67
|
-
+ ", ".join('"{0}"'.format(id) for id in study_ids)
|
|
68
|
-
+ "))"
|
|
69
|
-
)
|
|
70
|
-
.input(study_json)
|
|
71
|
-
.all()
|
|
72
|
-
) # all() returns a list
|
|
73
|
-
|
|
74
|
-
## build a test set of projects from the study ids
|
|
75
|
-
## note: the jq query only selects first omics found for a given study id
|
|
76
|
-
projects_test = []
|
|
77
|
-
for id in study_ids:
|
|
78
|
-
j = (
|
|
79
|
-
jq.compile(f'[.[] | select( .part_of[]? | . == "{id}")][0]')
|
|
80
|
-
.input(projects_json)
|
|
81
|
-
.all()
|
|
82
|
-
)
|
|
83
|
-
projects_test.append(*j)
|
|
84
|
-
|
|
85
|
-
## get list of unique biossample ids from omics processing and build biosample test set
|
|
86
|
-
biosample_ids = (
|
|
87
|
-
jq.compile(".[] | .has_input[]?").input(projects_test).all()
|
|
88
|
-
) # all() returns a list
|
|
89
|
-
biosample_test = (
|
|
90
|
-
jq.compile(
|
|
91
|
-
".[] | select( .id == ("
|
|
92
|
-
+ ", ".join('"{0}"'.format(id) for id in biosample_ids)
|
|
93
|
-
+ "))"
|
|
94
|
-
)
|
|
95
|
-
.input(biosample_json)
|
|
96
|
-
.all()
|
|
97
|
-
) # all() returns a list
|
|
98
|
-
|
|
99
|
-
## get a list of data object ids and build data objects test set
|
|
100
|
-
data_objects_ids = (
|
|
101
|
-
jq.compile(".[] | .has_output[]?").input(projects_test).all()
|
|
102
|
-
) # all() returns a list
|
|
103
|
-
data_objects_test = (
|
|
104
|
-
jq.compile(
|
|
105
|
-
".[] | select( .id == ("
|
|
106
|
-
+ ", ".join('"{0}"'.format(id) for id in data_objects_ids)
|
|
107
|
-
+ "))"
|
|
108
|
-
)
|
|
109
|
-
.input(data_objects_json)
|
|
110
|
-
.all()
|
|
111
|
-
) # all() returns a list
|
|
112
|
-
|
|
113
|
-
## compile into database object
|
|
114
|
-
database = {
|
|
115
|
-
"study_set": [*study_test],
|
|
116
|
-
"omics_processing_set": [*projects_test],
|
|
117
|
-
"biosample_set": [*biosample_test],
|
|
118
|
-
"data_object_set": [*data_objects_test],
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
save_json(database, output_file)
|