dxpy 0.373.0__tar.gz → 0.375.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. {dxpy-0.373.0 → dxpy-0.375.1}/PKG-INFO +1 -1
  2. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/__init__.py +17 -0
  3. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/bindings/dxapplet.py +1 -1
  4. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/bindings/dxfile_functions.py +8 -3
  5. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/cli/dataset_utilities.py +215 -46
  6. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/cli/download.py +0 -2
  7. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/cli/output_handling.py +20 -0
  8. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/dx_extract_utils/column_conditions.json +8 -1
  9. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/dx_extract_utils/column_conversion.json +9 -1
  10. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/dx_extract_utils/filter_to_payload.py +70 -37
  11. dxpy-0.375.1/dxpy/dx_extract_utils/germline_utils.py +330 -0
  12. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/dx_extract_utils/input_validation.py +143 -5
  13. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/dx_extract_utils/retrieve_genotype_schema.json +16 -5
  14. dxpy-0.375.1/dxpy/dx_extract_utils/return_columns_genotype_only.json +20 -0
  15. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/nextflow/ImageRef.py +2 -2
  16. dxpy-0.375.1/dxpy/nextflow/nextaur_assets.json +8 -0
  17. dxpy-0.375.1/dxpy/nextflow/nextaur_assets.staging.json +8 -0
  18. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/scripts/dx.py +48 -8
  19. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/scripts/dx_build_app.py +6 -5
  20. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/templating/templates/nextflow/src/nextflow.sh +47 -3
  21. dxpy-0.375.1/dxpy/toolkit_version.py +1 -0
  22. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/utils/describe.py +11 -1
  23. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy.egg-info/PKG-INFO +1 -1
  24. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy.egg-info/SOURCES.txt +2 -0
  25. {dxpy-0.373.0 → dxpy-0.375.1}/scripts/dx-jobutil-new-job +6 -3
  26. {dxpy-0.373.0 → dxpy-0.375.1}/test/test_dx_bash_helpers.py +17 -47
  27. {dxpy-0.373.0 → dxpy-0.375.1}/test/test_dxclient.py +50 -16
  28. {dxpy-0.373.0 → dxpy-0.375.1}/test/test_extract_assay.py +369 -5
  29. dxpy-0.373.0/dxpy/nextflow/nextaur_assets.json +0 -8
  30. dxpy-0.373.0/dxpy/nextflow/nextaur_assets.staging.json +0 -8
  31. dxpy-0.373.0/dxpy/toolkit_version.py +0 -1
  32. {dxpy-0.373.0 → dxpy-0.375.1}/MANIFEST.in +0 -0
  33. {dxpy-0.373.0 → dxpy-0.375.1}/Readme.md +0 -0
  34. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/api.py +0 -0
  35. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/app_builder.py +0 -0
  36. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/app_categories.py +0 -0
  37. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/asset_builder.py +0 -0
  38. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/bindings/__init__.py +0 -0
  39. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/bindings/apollo/__init__.py +0 -0
  40. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/bindings/apollo/cmd_line_options_validator.py +0 -0
  41. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/bindings/apollo/data_transformations.py +0 -0
  42. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/bindings/apollo/dataset.py +0 -0
  43. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/bindings/apollo/json_validation_by_schema.py +0 -0
  44. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/bindings/apollo/schemas/__init__.py +0 -0
  45. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/bindings/apollo/schemas/assay_filtering_conditions.py +0 -0
  46. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/bindings/apollo/schemas/assay_filtering_json_schemas.py +0 -0
  47. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/bindings/apollo/schemas/input_arguments_validation_schemas.py +0 -0
  48. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/bindings/apollo/vizclient.py +0 -0
  49. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/bindings/apollo/vizserver_filters_from_json_parser.py +0 -0
  50. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/bindings/apollo/vizserver_payload_builder.py +0 -0
  51. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/bindings/auth.py +0 -0
  52. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/bindings/download_all_inputs.py +0 -0
  53. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/bindings/dxanalysis.py +0 -0
  54. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/bindings/dxapp.py +0 -0
  55. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/bindings/dxapp_container_functions.py +0 -0
  56. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/bindings/dxdatabase.py +0 -0
  57. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/bindings/dxdatabase_functions.py +0 -0
  58. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/bindings/dxdataobject_functions.py +0 -0
  59. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/bindings/dxfile.py +0 -0
  60. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/bindings/dxglobalworkflow.py +0 -0
  61. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/bindings/dxjob.py +0 -0
  62. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/bindings/dxproject.py +0 -0
  63. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/bindings/dxrecord.py +0 -0
  64. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/bindings/dxworkflow.py +0 -0
  65. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/bindings/mount_all_inputs.py +0 -0
  66. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/bindings/search.py +0 -0
  67. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/cli/__init__.py +0 -0
  68. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/cli/cp.py +0 -0
  69. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/cli/exec_io.py +0 -0
  70. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/cli/help_messages.py +0 -0
  71. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/cli/org.py +0 -0
  72. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/cli/parsers.py +0 -0
  73. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/cli/workflow.py +0 -0
  74. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/compat.py +0 -0
  75. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/dx_extract_utils/Homo_sapiens_genes_manifest.json +0 -0
  76. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/dx_extract_utils/Homo_sapiens_genes_manifest_staging.json +0 -0
  77. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/dx_extract_utils/Homo_sapiens_genes_manifest_staging_vep.json +0 -0
  78. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/dx_extract_utils/Homo_sapiens_genes_manifest_vep.json +0 -0
  79. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/dx_extract_utils/__init__.py +0 -0
  80. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/dx_extract_utils/cohort_filter_payload.py +0 -0
  81. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/dx_extract_utils/input_validation_somatic.py +0 -0
  82. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/dx_extract_utils/retrieve_allele_schema.json +0 -0
  83. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/dx_extract_utils/retrieve_annotation_schema.json +0 -0
  84. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/dx_extract_utils/retrieve_bins.py +0 -0
  85. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/dx_extract_utils/return_columns_allele.json +0 -0
  86. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/dx_extract_utils/return_columns_annotation.json +0 -0
  87. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/dx_extract_utils/return_columns_genotype.json +0 -0
  88. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/dx_extract_utils/somatic_filter_payload.py +0 -0
  89. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/dxlog.py +0 -0
  90. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/exceptions.py +0 -0
  91. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/executable_builder.py +0 -0
  92. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/nextflow/ImageRefFactory.py +0 -0
  93. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/nextflow/__init__.py +0 -0
  94. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/nextflow/awscli_assets.json +0 -0
  95. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/nextflow/awscli_assets.staging.json +0 -0
  96. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/nextflow/collect_images.py +0 -0
  97. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/nextflow/nextflow_assets.json +0 -0
  98. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/nextflow/nextflow_assets.staging.json +0 -0
  99. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/nextflow/nextflow_builder.py +0 -0
  100. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/nextflow/nextflow_templates.py +0 -0
  101. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/nextflow/nextflow_utils.py +0 -0
  102. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/packages/__init__.py +0 -0
  103. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/scripts/__init__.py +0 -0
  104. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/scripts/dx_app_wizard.py +0 -0
  105. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/scripts/dx_build_applet.py +0 -0
  106. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/ssh_tunnel_app_support.py +0 -0
  107. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/system_requirements.py +0 -0
  108. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/templating/__init__.py +0 -0
  109. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/templating/bash.py +0 -0
  110. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/templating/python.py +0 -0
  111. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/templating/templates/Readme.md +0 -0
  112. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/templating/templates/bash/basic/dxapp.json +0 -0
  113. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/templating/templates/bash/basic/src/code.sh +0 -0
  114. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/templating/templates/bash/parallelized/dxapp.json +0 -0
  115. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/templating/templates/bash/parallelized/src/code.sh +0 -0
  116. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/templating/templates/bash/scatter-process-gather/dxapp.json +0 -0
  117. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/templating/templates/bash/scatter-process-gather/src/code.sh +0 -0
  118. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/templating/templates/nextflow/dxapp.json +0 -0
  119. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/templating/templates/python/basic/dxapp.json +0 -0
  120. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/templating/templates/python/basic/src/code.py +0 -0
  121. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/templating/templates/python/basic/test/test.py +0 -0
  122. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/templating/templates/python/parallelized/dxapp.json +0 -0
  123. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/templating/templates/python/parallelized/src/code.py +0 -0
  124. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/templating/templates/python/parallelized/test/test.py +0 -0
  125. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/templating/templates/python/scatter-process-gather/dxapp.json +0 -0
  126. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/templating/templates/python/scatter-process-gather/src/code.py +0 -0
  127. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/templating/templates/python/scatter-process-gather/test/test.py +0 -0
  128. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/templating/utils.py +0 -0
  129. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/utils/__init__.py +0 -0
  130. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/utils/batch_utils.py +0 -0
  131. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/utils/completer.py +0 -0
  132. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/utils/config.py +0 -0
  133. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/utils/exec_utils.py +0 -0
  134. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/utils/executable_unbuilder.py +0 -0
  135. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/utils/file_handle.py +0 -0
  136. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/utils/file_load_utils.py +0 -0
  137. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/utils/genomic_utils.py +0 -0
  138. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/utils/job_log_client.py +0 -0
  139. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/utils/local_exec_utils.py +0 -0
  140. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/utils/pathmatch.py +0 -0
  141. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/utils/pretty_print.py +0 -0
  142. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/utils/printing.py +0 -0
  143. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/utils/resolver.py +0 -0
  144. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/utils/spelling_corrector.py +0 -0
  145. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/utils/version.py +0 -0
  146. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy/workflow_builder.py +0 -0
  147. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy.egg-info/dependency_links.txt +0 -0
  148. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy.egg-info/entry_points.txt +0 -0
  149. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy.egg-info/not-zip-safe +0 -0
  150. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy.egg-info/requires.txt +0 -0
  151. {dxpy-0.373.0 → dxpy-0.375.1}/dxpy.egg-info/top_level.txt +0 -0
  152. {dxpy-0.373.0 → dxpy-0.375.1}/requirements.txt +0 -0
  153. {dxpy-0.373.0 → dxpy-0.375.1}/requirements_setuptools.txt +0 -0
  154. {dxpy-0.373.0 → dxpy-0.375.1}/requirements_test.txt +0 -0
  155. {dxpy-0.373.0 → dxpy-0.375.1}/scripts/dx-clone-asset +0 -0
  156. {dxpy-0.373.0 → dxpy-0.375.1}/scripts/dx-docker +0 -0
  157. {dxpy-0.373.0 → dxpy-0.375.1}/scripts/dx-download-all-inputs +0 -0
  158. {dxpy-0.373.0 → dxpy-0.375.1}/scripts/dx-fetch-bundled-depends +0 -0
  159. {dxpy-0.373.0 → dxpy-0.375.1}/scripts/dx-generate-dxapp +0 -0
  160. {dxpy-0.373.0 → dxpy-0.375.1}/scripts/dx-jobutil-add-output +0 -0
  161. {dxpy-0.373.0 → dxpy-0.375.1}/scripts/dx-jobutil-dxlink +0 -0
  162. {dxpy-0.373.0 → dxpy-0.375.1}/scripts/dx-jobutil-get-identity-token +0 -0
  163. {dxpy-0.373.0 → dxpy-0.375.1}/scripts/dx-jobutil-parse-link +0 -0
  164. {dxpy-0.373.0 → dxpy-0.375.1}/scripts/dx-jobutil-report-error +0 -0
  165. {dxpy-0.373.0 → dxpy-0.375.1}/scripts/dx-log-stream +0 -0
  166. {dxpy-0.373.0 → dxpy-0.375.1}/scripts/dx-mount-all-inputs +0 -0
  167. {dxpy-0.373.0 → dxpy-0.375.1}/scripts/dx-notebook-reconnect +0 -0
  168. {dxpy-0.373.0 → dxpy-0.375.1}/scripts/dx-print-bash-vars +0 -0
  169. {dxpy-0.373.0 → dxpy-0.375.1}/scripts/dx-upload-all-outputs +0 -0
  170. {dxpy-0.373.0 → dxpy-0.375.1}/setup.cfg +0 -0
  171. {dxpy-0.373.0 → dxpy-0.375.1}/setup.py +0 -0
  172. {dxpy-0.373.0 → dxpy-0.375.1}/test/test_batch.py +0 -0
  173. {dxpy-0.373.0 → dxpy-0.375.1}/test/test_create_cohort.py +0 -0
  174. {dxpy-0.373.0 → dxpy-0.375.1}/test/test_describe.py +0 -0
  175. {dxpy-0.373.0 → dxpy-0.375.1}/test/test_dx-docker.py +0 -0
  176. {dxpy-0.373.0 → dxpy-0.375.1}/test/test_dx_app_wizard.py +0 -0
  177. {dxpy-0.373.0 → dxpy-0.375.1}/test/test_dx_completion.py +0 -0
  178. {dxpy-0.373.0 → dxpy-0.375.1}/test/test_dx_symlink.py +0 -0
  179. {dxpy-0.373.0 → dxpy-0.375.1}/test/test_dxabs.py +0 -0
  180. {dxpy-0.373.0 → dxpy-0.375.1}/test/test_dxasset.py +0 -0
  181. {dxpy-0.373.0 → dxpy-0.375.1}/test/test_dxpy.py +0 -0
  182. {dxpy-0.373.0 → dxpy-0.375.1}/test/test_dxpy_utils.py +0 -0
  183. {dxpy-0.373.0 → dxpy-0.375.1}/test/test_dxunpack.py +0 -0
  184. {dxpy-0.373.0 → dxpy-0.375.1}/test/test_extract_dataset.py +0 -0
  185. {dxpy-0.373.0 → dxpy-0.375.1}/test/test_extract_expression.py +0 -0
  186. {dxpy-0.373.0 → dxpy-0.375.1}/test/test_extract_somatic.py +0 -0
  187. {dxpy-0.373.0 → dxpy-0.375.1}/test/test_nextflow.py +0 -0
  188. {dxpy-0.373.0 → dxpy-0.375.1}/test/test_nextflow_ImageRef.py +0 -0
  189. {dxpy-0.373.0 → dxpy-0.375.1}/test/test_nextflow_ImageRefFactory.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dxpy
3
- Version: 0.373.0
3
+ Version: 0.375.1
4
4
  Summary: DNAnexus Platform API bindings for Python
5
5
  Home-page: https://github.com/dnanexus/dx-toolkit
6
6
  Author: Aleksandra Zalcman, Andrey Kislyuk, Anurag Biyani, Geet Duggal, Katherine Lai, Kurt Jensen, Marek Hrvol, Ohad Rodeh, Phil Sung
@@ -560,6 +560,7 @@ def DXHTTPRequest(resource, data, method='POST', headers=None, auth=True,
560
560
 
561
561
  retried_responses = []
562
562
  _url = None
563
+ redirect_url = None
563
564
  while True:
564
565
  success, time_started = True, None
565
566
  response = None
@@ -628,6 +629,13 @@ def DXHTTPRequest(resource, data, method='POST', headers=None, auth=True,
628
629
  pass
629
630
  _UPGRADE_NOTIFY = False
630
631
 
632
+ # Handle redirection manually for symlink files
633
+ if response.status // 100 == 3:
634
+ redirect_url = response.headers.get('Location')
635
+ if not redirect_url:
636
+ raise exceptions.UrllibInternalError("Location not found in redirect response", response.status)
637
+ break
638
+
631
639
  # If an HTTP code that is not in the 200 series is received and the content is JSON, parse it and throw the
632
640
  # appropriate error. Otherwise, raise the usual exception.
633
641
  if response.status // 100 != 2:
@@ -784,6 +792,15 @@ def DXHTTPRequest(resource, data, method='POST', headers=None, auth=True,
784
792
  logger.info("[%s] %s %s: Recovered after %d retries", time.ctime(), method, _url, try_index)
785
793
 
786
794
  raise AssertionError('Should never reach this line: should have attempted a retry or reraised by now')
795
+
796
+ # Make a new request to the URL specified in the Location header if we got a redirect_url
797
+ if redirect_url:
798
+ return DXHTTPRequest(redirect_url, body, method=method, headers=headers, auth=auth, timeout=timeout,
799
+ use_compression=use_compression, jsonify_data=jsonify_data,
800
+ want_full_response=want_full_response,
801
+ decode_response_body=decode_response_body, prepend_srv=prepend_srv,
802
+ session_handler=session_handler,
803
+ max_retries=max_retries, always_retry=always_retry, **kwargs)
787
804
  raise AssertionError('Should never reach this line: should never break out of loop')
788
805
 
789
806
 
@@ -242,7 +242,7 @@ class DXExecutable:
242
242
  :param preserve_job_outputs: Copy cloneable outputs of every non-reused job entering "done" state in this root execution to a folder in the project. If value is True it will place job outputs into the "intermediateJobOutputs" subfolder under the output folder for the root execution. If the value is dict, it may contains "folder" key with desired folder path. If the folder path starts with '/' it refers to an absolute path within the project, otherwise, it refers to a subfolder under root execution's output folder.
243
243
  :type preserve_job_outputs: boolean or dict
244
244
  :param detailed_job_metrics: Enable detailed job metrics for this root execution
245
- :type preserve_job_outputs: boolean
245
+ :type detailed_job_metrics: boolean
246
246
  :param extra_args: If provided, a hash of options that will be merged into the underlying JSON given for the API call
247
247
  :type extra_args: dict
248
248
  :returns: Object handler of the newly created job
@@ -191,8 +191,12 @@ def _download_symbolic_link(dxid, md5digest, project, dest_filename, symlink_max
191
191
  "Please see the documentation at https://aria2.github.io/.")
192
192
  return
193
193
 
194
- dxfile = dxpy.DXFile(dxid)
195
- url, _headers = dxfile.get_download_url(preauthenticated=True,
194
+ if isinstance(dxid, DXFile):
195
+ dxf = dxid
196
+ else:
197
+ dxf = dxpy.DXFile(dxid)
198
+
199
+ url, _headers = dxf.get_download_url(preauthenticated=True,
196
200
  duration=6*3600,
197
201
  project=project)
198
202
 
@@ -272,7 +276,8 @@ def _download_dxfile(dxid, filename, part_retry_counter,
272
276
  dxfile_desc = dxfile.describe(fields={"parts"}, default_fields=True, **kwargs)
273
277
 
274
278
  # handling of symlinked files.
275
- if 'drive' in dxfile_desc:
279
+ if 'drive' in dxfile_desc and 'parts' not in dxfile_desc \
280
+ or 'drive' in dxfile_desc and dxfile_desc["drive"] == "drive-PUBLISHED":
276
281
  if 'md5' in dxfile_desc:
277
282
  md5 = dxfile_desc['md5']
278
283
  else:
@@ -17,7 +17,7 @@
17
17
  # License for the specific language governing permissions and limitations
18
18
  # under the License.
19
19
 
20
- from __future__ import print_function, unicode_literals, division, absolute_import
20
+ from __future__ import print_function, unicode_literals, division, absolute_import, annotations
21
21
 
22
22
  import sys
23
23
  import collections
@@ -47,6 +47,21 @@ from ..exceptions import (
47
47
  )
48
48
 
49
49
  from ..dx_extract_utils.filter_to_payload import validate_JSON, final_payload
50
+ from ..dx_extract_utils.germline_utils import (
51
+ get_genotype_only_types,
52
+ add_germline_base_sql,
53
+ sort_germline_variant,
54
+ harmonize_germline_sql,
55
+ harmonize_germline_results,
56
+ get_germline_ref_payload,
57
+ get_germline_loci_payload,
58
+ update_genotype_only_ref,
59
+ get_genotype_types,
60
+ infer_genotype_type,
61
+ get_types_to_filter_out_when_infering,
62
+ filter_results
63
+ )
64
+ from ..dx_extract_utils.input_validation import inference_validation
50
65
  from ..dx_extract_utils.input_validation_somatic import validate_somatic_filter
51
66
  from ..dx_extract_utils.somatic_filter_payload import somatic_final_payload
52
67
  from ..dx_extract_utils.cohort_filter_payload import cohort_filter_payload, cohort_final_payload
@@ -65,7 +80,7 @@ from ..bindings.apollo.vizserver_payload_builder import VizPayloadBuilder
65
80
  from ..bindings.apollo.vizclient import VizClient
66
81
 
67
82
  from ..bindings.apollo.data_transformations import transform_to_expression_matrix
68
- from .output_handling import write_expression_output
83
+ from .output_handling import write_expression_output, pretty_print_json
69
84
 
70
85
  from .help_messages import EXTRACT_ASSAY_EXPRESSION_JSON_HELP, EXTRACT_ASSAY_EXPRESSION_ADDITIONAL_FIELDS_HELP
71
86
 
@@ -671,6 +686,9 @@ def get_assay_name_info(
671
686
  if a["reference_genome"]:
672
687
  selected_ref_genome = a["reference_genome"]["name"].split(".", 1)[1]
673
688
  additional_descriptor_info["genotype_type_table"] = a["entities"]["genotype"]["fields"]["type"]["mapping"]["table"]
689
+ for exclude_genotype in ("exclude_refdata", "exclude_halfref", "exclude_nocall"):
690
+ if exclude_genotype in a:
691
+ additional_descriptor_info[exclude_genotype] = a[exclude_genotype]
674
692
  elif friendly_assay_type == "somatic":
675
693
  selected_ref_genome = ""
676
694
  for a in target_assays:
@@ -685,6 +703,18 @@ def comment_fill(string, comment_string='# ', **kwargs):
685
703
  return re.sub('^', comment_string, fill(string, width_adjustment=width_adjustment, **kwargs), flags=re.MULTILINE)
686
704
 
687
705
 
706
+ def retrieve_samples(resp: dict, assay_name: str, assay_id: str) -> list:
707
+ """
708
+ Get the list of sample_ids from the sample table for the selected assay.
709
+ """
710
+ sample_payload = {
711
+ "project_context": resp["datasetRecordProject"],
712
+ "fields": [{"sample_id": "sample$sample_id"}],
713
+ "raw_filters": {"assay_filters": {"name": assay_name, "id": assay_id}},
714
+ }
715
+ return [_["sample_id"] for _ in raw_api_call(resp, sample_payload)["results"]]
716
+
717
+
688
718
  def extract_assay_germline(args):
689
719
  """
690
720
  Retrieve the selected data or generate SQL to retrieve the data from an genetic variant assay in a dataset or cohort based on provided rules.
@@ -714,6 +744,13 @@ def extract_assay_germline(args):
714
744
  elif filter_given:
715
745
  err_exit("--list-assays cannot be presented with other options.")
716
746
 
747
+ #### Validate that a retrieve options infer_ref or infer_nocall are not passed with retrieve_allele or retrieve_annotation ####
748
+ if args.infer_ref or args.infer_nocall:
749
+ if args.retrieve_allele or args.retrieve_annotation or args.sql or args.list_assays:
750
+ err_exit(
751
+ "The flags, --infer-ref and --infer-nocall, can only be used with --retrieve-genotype."
752
+ )
753
+
717
754
  #### Check if the retrieve options are passed correctly, print help if needed ####
718
755
  if args.retrieve_allele:
719
756
  if args.json_help:
@@ -749,10 +786,36 @@ def extract_assay_germline(args):
749
786
  print(
750
787
  comment_fill('Filters and respective definitions', comment_string='# ') + '\n#\n' +
751
788
  comment_fill('allele_id: ID(s) of one or more alleles for which sample genotypes will be returned. If multiple values are provided, any samples having at least one allele that match any of the values specified will be listed. For example, ["1_1000_A_T", "1_1010_C_T"], will search for samples with at least one allele matching either "1_1000_A_T" or "1_1010_C_T". String match is case insensitive.') + '\n#\n' +
789
+ comment_fill('location: Genomic position in the reference genome of the starting position of the alleles. If multiple values are provided in the list, the conditional search will be, "OR." String match is case sensitive.') + '\n#\n' +
790
+ comment_fill('allele_id and location are mutually exclusive filters.') + '\n#\n' +
752
791
  comment_fill('sample_id: Optional, one or more sample IDs for which sample genotypes will be returned. If the provided object is a cohort, this further intersects the sample ids. If a user has a list of samples more than 1,000, it is recommended to use a cohort id containing all the samples.') + '\n#\n' +
753
- comment_fill('genotype_type: Optional, one or more genotype types for which sample genotype types will be returned. One of: hom-alt (homozygous for the non-ref allele), het-ref (heterozygous with a ref allele and alt allele), het-alt (heterozygous with two distinct alt alleles), half (only one alt allele is known, second allele is unknown).') + '\n#\n' +
754
- comment_fill('JSON filter template for --retrieve-genotype', comment_string='# ') + '\n' +
755
- '{\n "sample_id": ["s1", "s2"],\n "allele_id": ["1_1000_A_T","2_1000_G_C"],\n "genotype_type": ["het-ref", "hom-alt"]\n}'
792
+ comment_fill('genotype_type: Optional, one or more genotype types for which sample genotype types will be returned.') + '\n' +
793
+ comment_fill('One of:') + '\n' +
794
+ comment_fill('\tref\t(homozygous for the reference allele\t\t\te.g. 0/0)') + '\n' +
795
+ comment_fill('\thet-ref\t(heterozygous for the ref allele and alt allele\t\te.g. 0/1)') + '\n' +
796
+ comment_fill('\thom\t(homozygous for the non-ref allele\t\t\te.g. 1/1)') + '\n' +
797
+ comment_fill('\thet-alt\t(heterozygous with two distinct alt alleles\t\te.g. 1/2)') + '\n' +
798
+ comment_fill('\thalf\t(only one allele is known, second allele is unknown\te.g. ./1)') + '\n' +
799
+ comment_fill('\tno-call\t(both alleles are unknown\t\t\t\te.g. ./.)') + '\n#\n' +
800
+ comment_fill('JSON filter templates for --retrieve-genotype', comment_string='# ') + '\n#\n' +
801
+ comment_fill('Example using location:', comment_string='# ') + '\n' +
802
+ pretty_print_json(
803
+ {
804
+ "sample_id": ["s1", "s2"],
805
+ "location": [
806
+ {"chromosome": "1", "starting_position": "10000"},
807
+ {"chromosome": "X", "starting_position": "500"},
808
+ ],
809
+ "genotype_type": ["ref", "het-ref", "hom", "het-alt", "half", "no-call"],
810
+ }
811
+ ) + '\n' +
812
+ comment_fill('Example using allele_id:', comment_string='# ') + '\n' +
813
+ pretty_print_json({
814
+ "sample_id": ["s1", "s2"],
815
+ "allele_id": ["1_1000_A_T", "2_1000_G_C"],
816
+ "genotype_type": ["het-ref", "hom", "het-alt", "half"],
817
+ }
818
+ )
756
819
  )
757
820
  sys.exit(0)
758
821
 
@@ -795,57 +858,139 @@ def extract_assay_germline(args):
795
858
 
796
859
  out_file, print_to_stdout = assign_output_method(args, resp["recordName"], "germline")
797
860
 
798
- payload = {}
861
+ filter_type = None
799
862
  if args.retrieve_allele:
800
- payload, fields_list = final_payload(
801
- full_input_dict=filter_dict,
802
- name=selected_assay_name,
803
- id=selected_assay_id,
804
- project_context=project,
805
- genome_reference=selected_ref_genome,
806
- filter_type="allele",
807
- )
863
+ filter_type = "allele"
808
864
  elif args.retrieve_annotation:
865
+ filter_type = "annotation"
866
+
867
+ if filter_type and filter_given:
809
868
  payload, fields_list = final_payload(
810
869
  full_input_dict=filter_dict,
811
870
  name=selected_assay_name,
812
871
  id=selected_assay_id,
813
872
  project_context=project,
814
873
  genome_reference=selected_ref_genome,
815
- filter_type="annotation",
874
+ filter_type=filter_type,
816
875
  )
817
- elif args.retrieve_genotype:
818
- payload, fields_list = final_payload(
876
+
877
+ add_germline_base_sql(resp, payload)
878
+
879
+ if args.sql:
880
+ sql_results = raw_query_api_call(resp, payload)
881
+
882
+ if print_to_stdout:
883
+ print(sql_results)
884
+ else:
885
+ with open(out_file, "w") as sql_file:
886
+ print(sql_results, file=sql_file)
887
+ else:
888
+ resp_raw = raw_api_call(resp, payload)
889
+ ordered_results = sorted(resp_raw["results"], key=sort_germline_variant)
890
+
891
+ csv_from_json(
892
+ out_file_name=out_file,
893
+ print_to_stdout=print_to_stdout,
894
+ sep="\t",
895
+ raw_results=ordered_results,
896
+ column_names=fields_list,
897
+ quote_char=str("|"),
898
+ )
899
+
900
+ if args.retrieve_genotype and filter_given:
901
+ exclude_refdata: bool = additional_descriptor_info.get("exclude_refdata")
902
+ exclude_halfref: bool = additional_descriptor_info.get("exclude_halfref")
903
+ exclude_nocall: bool = additional_descriptor_info.get("exclude_nocall")
904
+ inference_validation(
905
+ args.infer_nocall,
906
+ args.infer_ref,
907
+ filter_dict,
908
+ exclude_nocall,
909
+ exclude_refdata,
910
+ exclude_halfref
911
+ )
912
+ # in case of infer flags, we query all the genotypes and do the filtering post query
913
+ if args.infer_ref or args.infer_nocall:
914
+ types_to_filter_out = get_types_to_filter_out_when_infering(filter_dict.get("genotype_type", []))
915
+ filter_dict["genotype_type"] = []
916
+
917
+ # get a list of requested genotype types for the genotype table only queries
918
+ if "allele_id" in filter_dict:
919
+ genotype_only_types = []
920
+ else:
921
+ genotype_only_types = get_genotype_only_types(filter_dict,
922
+ exclude_refdata, exclude_halfref, exclude_nocall)
923
+
924
+ # get the payload for the genotype/allele table query for alternate genotype types
925
+ genotype_payload, fields_list = final_payload(
819
926
  full_input_dict=filter_dict,
820
927
  name=selected_assay_name,
821
928
  id=selected_assay_id,
822
929
  project_context=project,
823
930
  genome_reference=selected_ref_genome,
824
931
  filter_type="genotype",
932
+ order=not genotype_only_types,
825
933
  )
826
934
 
827
- if "CohortBrowser" in resp["recordTypes"]:
828
- if resp.get("baseSql"):
829
- payload["base_sql"] = resp.get("baseSql")
830
- payload["filters"] = resp["filters"]
935
+ add_germline_base_sql(resp, genotype_payload)
936
+
937
+ genotype_only_payloads = []
938
+ if genotype_only_types:
939
+ # get the payloads for the genotype table only query
940
+ # assay_filter does not support "or" so there is a separate query for each partition
941
+ for i, genotype_only_type in enumerate(genotype_only_types):
942
+ genotype_only_filter_dict = filter_dict.copy()
943
+ if genotype_only_type == "ref":
944
+ genotype_only_filter_dict["ref_yn"] = True
945
+ elif genotype_only_type == "half":
946
+ genotype_only_filter_dict["halfref_yn"] = True
947
+ elif genotype_only_type == "no-call":
948
+ genotype_only_filter_dict["nocall_yn"] = True
949
+
950
+ genotype_only_payload, _ = final_payload(
951
+ full_input_dict=genotype_only_filter_dict,
952
+ name=selected_assay_name,
953
+ id=selected_assay_id,
954
+ project_context=project,
955
+ genome_reference=selected_ref_genome,
956
+ filter_type="genotype_only",
957
+ exclude_refdata=genotype_only_type != "ref",
958
+ exclude_halfref=genotype_only_type != "half",
959
+ exclude_nocall=genotype_only_type != "no-call",
960
+ order=i == len(genotype_only_types) - 1,
961
+ )
962
+
963
+ add_germline_base_sql(resp, genotype_only_payload)
964
+
965
+ genotype_only_payloads.append(genotype_only_payload)
966
+
967
+ # get the list of requested genotype types for the genotype/allele table query
968
+ genotype_types = get_genotype_types(filter_dict)
831
969
 
832
- #### Run api call to get sql or extract data ####
833
- if filter_given:
834
970
  if args.sql:
835
- sql_results = raw_query_api_call(resp, payload)
836
- if args.retrieve_genotype:
971
+ sql_queries = []
972
+ if genotype_types:
973
+ # get the genotype/allele table query
974
+ genotype_sql_query = raw_query_api_call(resp, genotype_payload)[:-1]
837
975
  try:
838
976
  geno_table_regex = r"\b" + additional_descriptor_info["genotype_type_table"] + r"\w+"
839
- geno_table = re.search(
840
- geno_table_regex, sql_results
841
- ).group()
977
+ re.search(geno_table_regex, genotype_sql_query).group()
842
978
  except Exception:
843
- err_exit("Failed to find the table, {}, in the generated SQL".format(additional_descriptor_info["genotype_type_table"]),
844
- expected_exceptions=(AttributeError,))
845
- substr = "`" + geno_table + "`.`type`"
846
- sql_results = sql_results.replace(
847
- substr, "REPLACE(`" + geno_table + "`.`type`, 'hom', 'hom-alt')", 1
848
- )
979
+ err_exit("Failed to find the table, {}, in the generated SQL".format(
980
+ additional_descriptor_info["genotype_type_table"]), expected_exceptions=(AttributeError,))
981
+ sql_queries.append(genotype_sql_query)
982
+
983
+ # get the genotype table only query
984
+ # assay_filter does not support "or" so there is a separate query for each partition
985
+ for genotype_only_payload in genotype_only_payloads:
986
+ # join the allele table to get the ref, will join on locus_id
987
+ genotype_only_payload["fields"].append({"ref": "allele$ref"})
988
+ genotype_only_sql_query = raw_query_api_call(resp, genotype_only_payload)[:-1]
989
+ # update the query to add column in the genotype/allele table query and join on locus_id
990
+ sql_queries.append(harmonize_germline_sql(genotype_only_sql_query))
991
+
992
+ # combine the queries into a single query
993
+ sql_results = " UNION ".join(sql_queries) + ";"
849
994
 
850
995
  if print_to_stdout:
851
996
  print(sql_results)
@@ -853,18 +998,42 @@ def extract_assay_germline(args):
853
998
  with open(out_file, "w") as sql_file:
854
999
  print(sql_results, file=sql_file)
855
1000
  else:
856
- resp_raw = raw_api_call(resp, payload)
857
- if args.retrieve_genotype:
858
- for r in resp_raw["results"]:
859
- if r["genotype_type"] == "hom":
860
- r["genotype_type"] = "hom-alt"
861
-
862
- def sort_variant(d):
863
- chrom, pos = d["allele_id"].split("_")[:2]
864
- if chrom.isdigit():
865
- return int(chrom), '', int(pos)
866
- return float('inf'), chrom, int(pos)
867
- ordered_results = sorted(resp_raw["results"], key=sort_variant)
1001
+ # get the list of dictionary results for the genotype/allele table query
1002
+ ordered_results = []
1003
+ if genotype_types:
1004
+ genotype_resp_raw = raw_api_call(resp, genotype_payload)
1005
+ ordered_results.extend(genotype_resp_raw["results"])
1006
+
1007
+ # get the list of dictionary results for each genotype table only query
1008
+ for genotype_only_payload in genotype_only_payloads:
1009
+ genotype_only_resp_raw = raw_api_call(resp, genotype_only_payload)
1010
+ # add missing keys that are in the allele table part of the genotype/allele table query
1011
+ ordered_results.extend(harmonize_germline_results(genotype_only_resp_raw["results"], fields_list))
1012
+
1013
+ if genotype_only_types:
1014
+ # get the ref value from the allele table using locus ids
1015
+ # ingestion of VCFs lines missing ALT is unsupported so the locus_id will exist in the allele table
1016
+ # normalized ref values in the locus_id will match the ref value for missing ALT lines if they
1017
+ # were ingested and locus_id could be parsed for the ref value
1018
+ ref_payload = get_germline_ref_payload(ordered_results, genotype_payload)
1019
+ if ref_payload:
1020
+ locus_id_refs = raw_api_call(resp, ref_payload)
1021
+ update_genotype_only_ref(ordered_results, locus_id_refs)
1022
+
1023
+ if args.infer_ref or args.infer_nocall:
1024
+ samples = retrieve_samples(resp, selected_assay_name, selected_assay_id)
1025
+ selected_samples = set(filter_dict.get("sample_id", []))
1026
+ if selected_samples:
1027
+ samples = list(selected_samples.intersection(samples))
1028
+ loci_payload = get_germline_loci_payload(filter_dict["location"], genotype_payload)
1029
+ loci = [locus for locus in raw_api_call(resp, loci_payload)["results"]]
1030
+ type_to_infer = "ref" if args.infer_ref else "no-call"
1031
+ ordered_results = infer_genotype_type(samples, loci, ordered_results, type_to_infer)
1032
+ # Filter out not requested genotypes
1033
+ if len(types_to_filter_out) > 0:
1034
+ ordered_results = filter_results(ordered_results, "genotype_type", types_to_filter_out)
1035
+
1036
+ ordered_results.sort(key=sort_germline_variant)
868
1037
 
869
1038
  csv_from_json(
870
1039
  out_file_name=out_file,
@@ -23,8 +23,6 @@ import collections
23
23
  import os
24
24
  import subprocess
25
25
  import sys
26
- import tempfile
27
- import warnings
28
26
  import logging
29
27
 
30
28
  import dxpy
@@ -1,6 +1,8 @@
1
+ from __future__ import annotations
1
2
  import sys
2
3
  import csv
3
4
  import os
5
+ import json
4
6
  from ..exceptions import err_exit
5
7
 
6
8
 
@@ -160,3 +162,21 @@ def write_expression_output(
160
162
 
161
163
  else:
162
164
  error_handler("Unexpected error occurred while writing output")
165
+
166
+
167
+
168
+ def pretty_print_json(json_dict: dict) -> str:
169
+ """Pretty-prints the provided JSON object.
170
+
171
+ Args:
172
+ json_dict: A string containing valid JSON data.
173
+
174
+ Returns:
175
+ Returns a string with formatted JSON or None if there's an error.
176
+ """
177
+ if isinstance(json_dict, dict):
178
+ formatted_json = json.dumps(json_dict, sort_keys=True, indent=4)
179
+ return formatted_json
180
+ else:
181
+ print("WARNING: Invalid JSON provided.", file=sys.stderr)
182
+ return None
@@ -10,6 +10,13 @@
10
10
  "allele_id": "in",
11
11
  "sample_id": "in"
12
12
  },
13
+ "genotype_only": {
14
+ "genotype_type": "in",
15
+ "sample_id": "in",
16
+ "ref_yn": "is",
17
+ "halfref_yn": "is",
18
+ "nocall_yn": "is"
19
+ },
13
20
  "annotation": {
14
21
  "allele_id": "in",
15
22
  "gene_name": "in",
@@ -20,4 +27,4 @@
20
27
  "hgvs_c": "in",
21
28
  "hgvs_p": "in"
22
29
  }
23
- }
30
+ }
@@ -10,6 +10,14 @@
10
10
  "sample_id": "genotype$sample_id",
11
11
  "allele_id": "allele$a_id"
12
12
  },
13
+ "genotype_only": {
14
+ "genotype_type": "genotype$type",
15
+ "sample_id": "genotype$sample_id",
16
+ "allele_id": "genotype$a_id",
17
+ "ref_yn": "genotype$ref_yn",
18
+ "halfref_yn": "genotype$halfref_yn",
19
+ "nocall_yn": "genotype$nocall_yn"
20
+ },
13
21
  "annotation": {
14
22
  "allele_id": "allele$a_id",
15
23
  "gene_name": "annotation$gene_name",
@@ -20,4 +28,4 @@
20
28
  "hgvs_c": "annotation$hgvs_c",
21
29
  "hgvs_p": "annotation$hgvs_p"
22
30
  }
23
- }
31
+ }