dpdispatcher 0.5.8__tar.gz → 0.5.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dpdispatcher might be problematic. Click here for more details.

Files changed (234) hide show
  1. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/.pre-commit-config.yaml +4 -4
  2. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/Dockerfile +1 -1
  3. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/PKG-INFO +8 -1
  4. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/README.md +6 -0
  5. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/doc/batch.md +6 -0
  6. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/doc/context.md +8 -0
  7. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/doc/install.md +6 -0
  8. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/dpdispatcher/__init__.py +4 -0
  9. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/dpdispatcher/_version.py +2 -2
  10. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/dpdispatcher/dp_cloud_server.py +7 -0
  11. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/dpdispatcher/dp_cloud_server_context.py +10 -7
  12. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/dpdispatcher/dpcloudserver/client.py +22 -9
  13. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/dpdispatcher/fugaku.py +1 -3
  14. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/dpdispatcher/hdfs_cli.py +4 -12
  15. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/dpdispatcher/hdfs_context.py +1 -4
  16. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/dpdispatcher/lsf.py +2 -6
  17. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/dpdispatcher/machine.py +1 -3
  18. dpdispatcher-0.5.10/dpdispatcher/openapi.py +198 -0
  19. dpdispatcher-0.5.10/dpdispatcher/openapi_context.py +259 -0
  20. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/dpdispatcher/pbs.py +4 -12
  21. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/dpdispatcher/slurm.py +2 -6
  22. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/dpdispatcher/submission.py +9 -19
  23. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/dpdispatcher.egg-info/PKG-INFO +8 -1
  24. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/dpdispatcher.egg-info/SOURCES.txt +3 -0
  25. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/dpdispatcher.egg-info/requires.txt +6 -0
  26. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/pyproject.toml +2 -1
  27. dpdispatcher-0.5.10/tests/jsons/machine_openapi.json +17 -0
  28. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/.github/workflows/ci-docker.yml +0 -0
  29. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/.github/workflows/machines.yml +0 -0
  30. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/.github/workflows/mirror_gitee.yml +0 -0
  31. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/.github/workflows/publish_conda.yml +0 -0
  32. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/.github/workflows/pyright.yml +0 -0
  33. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/.github/workflows/release.yml +0 -0
  34. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/.github/workflows/test.yml +0 -0
  35. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/.gitignore +0 -0
  36. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/CONTRIBUTING.md +0 -0
  37. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/LICENSE +0 -0
  38. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/ci/LICENSE +0 -0
  39. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/ci/README.md +0 -0
  40. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/ci/pbs/docker-compose.yml +0 -0
  41. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/ci/pbs/start-pbs.sh +0 -0
  42. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/ci/pbs.sh +0 -0
  43. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/ci/slurm/docker-compose.yml +0 -0
  44. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/ci/slurm/register_cluster.sh +0 -0
  45. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/ci/slurm/start-slurm.sh +0 -0
  46. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/ci/slurm.sh +0 -0
  47. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/ci/ssh/docker-compose.yml +0 -0
  48. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/ci/ssh/start-ssh.sh +0 -0
  49. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/ci/ssh.sh +0 -0
  50. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/ci/ssh_rsync.sh +0 -0
  51. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/codecov.yml +0 -0
  52. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/conda/conda_build_config.yaml +0 -0
  53. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/conda/meta.yaml +0 -0
  54. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/doc/.gitignore +0 -0
  55. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/doc/Makefile +0 -0
  56. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/doc/conf.py +0 -0
  57. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/doc/credits.rst +0 -0
  58. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/doc/dpdispatcher_on_yarn.md +0 -0
  59. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/doc/examples/expanse.md +0 -0
  60. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/doc/examples/g16.md +0 -0
  61. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/doc/examples/shell.md +0 -0
  62. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/doc/getting-started.md +0 -0
  63. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/doc/index.rst +0 -0
  64. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/doc/machine.rst +0 -0
  65. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/doc/make.bat +0 -0
  66. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/doc/requirements.txt +0 -0
  67. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/doc/resources.rst +0 -0
  68. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/doc/task.rst +0 -0
  69. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/dpdispatcher/JobStatus.py +0 -0
  70. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/dpdispatcher/arginfo.py +0 -0
  71. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/dpdispatcher/base_context.py +0 -0
  72. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/dpdispatcher/distributed_shell.py +0 -0
  73. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/dpdispatcher/dpcloudserver/__init__.py +0 -0
  74. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/dpdispatcher/dpcloudserver/config.py +0 -0
  75. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/dpdispatcher/dpcloudserver/retcode.py +0 -0
  76. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/dpdispatcher/dpcloudserver/temp_test.py +0 -0
  77. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/dpdispatcher/dpcloudserver/zip_file.py +0 -0
  78. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/dpdispatcher/dpdisp.py +0 -0
  79. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/dpdispatcher/lazy_local_context.py +0 -0
  80. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/dpdispatcher/local_context.py +0 -0
  81. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/dpdispatcher/shell.py +0 -0
  82. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/dpdispatcher/ssh_context.py +0 -0
  83. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/dpdispatcher/utils.py +0 -0
  84. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/dpdispatcher.egg-info/dependency_links.txt +0 -0
  85. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/dpdispatcher.egg-info/entry_points.txt +0 -0
  86. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/dpdispatcher.egg-info/top_level.txt +0 -0
  87. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/examples/machine/expanse.json +0 -0
  88. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/examples/machine/lazy_local.json +0 -0
  89. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/examples/machine/mandu.json +0 -0
  90. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/examples/resources/expanse_cpu.json +0 -0
  91. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/examples/resources/mandu.json +0 -0
  92. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/examples/task/deepmd-kit.json +0 -0
  93. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/examples/task/g16.json +0 -0
  94. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/scripts/script_gen_dargs_docs.py +0 -0
  95. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/scripts/script_gen_dargs_json.py +0 -0
  96. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/setup.cfg +0 -0
  97. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/.gitignore +0 -0
  98. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/__init__.py +0 -0
  99. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/batch.json +0 -0
  100. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/context.py +0 -0
  101. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/debug_test_class_submission_init.py +0 -0
  102. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/devel_test_ali_ehpc.py +0 -0
  103. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/devel_test_dp_cloud_server.py +0 -0
  104. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/devel_test_lazy_ali_ehpc.py +0 -0
  105. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/devel_test_lsf.py +0 -0
  106. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/devel_test_shell.py +0 -0
  107. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/devel_test_slurm.py +0 -0
  108. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/devel_test_ssh_ali_ehpc.py +0 -0
  109. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/graph.pb +0 -0
  110. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/jsons/job.json +0 -0
  111. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/jsons/machine.json +0 -0
  112. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/jsons/machine_ali_ehpc.json +0 -0
  113. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/jsons/machine_center.json +0 -0
  114. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/jsons/machine_diffenert.json +0 -0
  115. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/jsons/machine_dp_cloud_server.json +0 -0
  116. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/jsons/machine_fugaku.json +0 -0
  117. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/jsons/machine_if_cuda_multi_devices.json +0 -0
  118. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/jsons/machine_lazy_local_lsf.json +0 -0
  119. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/jsons/machine_lazy_local_slurm.json +0 -0
  120. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/jsons/machine_lazylocal_shell.json +0 -0
  121. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/jsons/machine_local_fugaku.json +0 -0
  122. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/jsons/machine_local_shell.json +0 -0
  123. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/jsons/machine_lsf.json +0 -0
  124. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/jsons/machine_slurm.json +0 -0
  125. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/jsons/machine_yarn.json +0 -0
  126. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/jsons/resources.json +0 -0
  127. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/jsons/submission.json +0 -0
  128. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/jsons/task.json +0 -0
  129. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/lsf/context.py +0 -0
  130. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/lsf/test_dispatcher.py +0 -0
  131. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/lsf/test_lsf_local.py +0 -0
  132. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/old/test_dispatcher_utils.py +0 -0
  133. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/old/test_lazy_local_context.py +0 -0
  134. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/old/test_local_context.py +0 -0
  135. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/old/test_local_session.py +0 -0
  136. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/old/test_ssh_context.py +0 -0
  137. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/pbs/context.py +0 -0
  138. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/pbs/test_dispatcher.py +0 -0
  139. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/pbs/test_pbs_local.py +0 -0
  140. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/sample_class.py +0 -0
  141. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/script_gen_json.py +0 -0
  142. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/shell/context.py +0 -0
  143. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/shell/test_dispatcher.py +0 -0
  144. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/shell/test_shell_local.py +0 -0
  145. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/shell/test_shell_ssh.py +0 -0
  146. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/slurm/context.py +0 -0
  147. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/slurm/test_dispatcher.py +0 -0
  148. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/slurm/test_dispatcher_lazy_local.py +0 -0
  149. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/slurm/test_slurm_lazy_local.py +0 -0
  150. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/slurm/test_slurm_local.py +0 -0
  151. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/slurm/test_slurm_ssh.py +0 -0
  152. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/slurm_test.env +0 -0
  153. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_argcheck.py +0 -0
  154. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_class_job.py +0 -0
  155. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_class_machine.py +0 -0
  156. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_class_machine_dispatch.py +0 -0
  157. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_class_resources.py +0 -0
  158. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_class_submission.py +0 -0
  159. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_class_submission_init.py +0 -0
  160. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_class_task.py +0 -0
  161. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_context_dir/0_md/bct-1/conf.lmp +0 -0
  162. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_context_dir/0_md/bct-1/input.lammps +0 -0
  163. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_context_dir/0_md/bct-1/some_dir/some_file +0 -0
  164. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_context_dir/0_md/bct-2/conf.lmp +0 -0
  165. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_context_dir/0_md/bct-2/input.lammps +0 -0
  166. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_context_dir/0_md/bct-3/conf.lmp +0 -0
  167. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_context_dir/0_md/bct-3/input.lammps +0 -0
  168. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_context_dir/0_md/bct-4/conf.lmp +0 -0
  169. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_context_dir/0_md/bct-4/input.lammps +0 -0
  170. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_context_dir/0_md/dir with space/file with space +0 -0
  171. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_context_dir/0_md/graph.pb +0 -0
  172. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_context_dir/0_md/some_dir/some_file +0 -0
  173. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_group_size.py +0 -0
  174. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_hdfs_context.py +0 -0
  175. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_hdfs_dir/0_md/bct-1/conf.lmp +0 -0
  176. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_hdfs_dir/0_md/bct-1/input.lammps +0 -0
  177. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_hdfs_dir/0_md/bct-2/conf.lmp +0 -0
  178. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_hdfs_dir/0_md/bct-2/input.lammps +0 -0
  179. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_hdfs_dir/0_md/bct-3/conf.lmp +0 -0
  180. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_hdfs_dir/0_md/bct-3/input.lammps +0 -0
  181. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_hdfs_dir/0_md/bct-4/conf.lmp +0 -0
  182. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_hdfs_dir/0_md/bct-4/input.lammps +0 -0
  183. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_hdfs_dir/0_md/graph.pb +0 -0
  184. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_if_cuda_multi_devices/test_dir/test.txt +0 -0
  185. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_import_classes.py +0 -0
  186. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_lazy_local_context.py +0 -0
  187. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_local_context.py +0 -0
  188. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_lsf_dir/0_md/bct-1/conf.lmp +0 -0
  189. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_lsf_dir/0_md/bct-1/input.lammps +0 -0
  190. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_lsf_dir/0_md/bct-2/conf.lmp +0 -0
  191. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_lsf_dir/0_md/bct-2/input.lammps +0 -0
  192. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_lsf_dir/0_md/bct-3/conf.lmp +0 -0
  193. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_lsf_dir/0_md/bct-3/input.lammps +0 -0
  194. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_lsf_dir/0_md/bct-4/conf.lmp +0 -0
  195. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_lsf_dir/0_md/bct-4/input.lammps +0 -0
  196. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_lsf_dir/0_md/graph.pb +0 -0
  197. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_lsf_dir/0_md/submission.json +0 -0
  198. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_lsf_script_generation.py +0 -0
  199. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_pbs_dir/0_md/bct-1/conf.lmp +0 -0
  200. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_pbs_dir/0_md/bct-1/input.lammps +0 -0
  201. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_pbs_dir/0_md/bct-2/conf.lmp +0 -0
  202. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_pbs_dir/0_md/bct-2/input.lammps +0 -0
  203. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_pbs_dir/0_md/bct-3/conf.lmp +0 -0
  204. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_pbs_dir/0_md/bct-3/input.lammps +0 -0
  205. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_pbs_dir/0_md/bct-4/conf.lmp +0 -0
  206. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_pbs_dir/0_md/bct-4/input.lammps +0 -0
  207. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_pbs_dir/0_md/graph.pb +0 -0
  208. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_retry.py +0 -0
  209. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_run_submission.py +0 -0
  210. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_run_submission_ratio_unfinished.py +0 -0
  211. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_shell_cuda_multi_devices.py +0 -0
  212. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_shell_trival.py +0 -0
  213. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_shell_trival_dir/fail_dir/mock_fail_task.txt +0 -0
  214. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_shell_trival_dir/parent_dir/dir with space/example.txt +0 -0
  215. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_shell_trival_dir/parent_dir/dir1/example.txt +0 -0
  216. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_shell_trival_dir/parent_dir/dir2/example.txt +0 -0
  217. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_shell_trival_dir/parent_dir/dir3/example.txt +0 -0
  218. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_shell_trival_dir/parent_dir/dir4/example.txt +0 -0
  219. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_shell_trival_dir/parent_dir/graph.pb +0 -0
  220. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_shell_trival_dir/recover_dir/mock_recover_task.txt +0 -0
  221. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_slurm_dir/0_md/bct-1/conf.lmp +0 -0
  222. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_slurm_dir/0_md/bct-1/input.lammps +0 -0
  223. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_slurm_dir/0_md/bct-2/conf.lmp +0 -0
  224. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_slurm_dir/0_md/bct-2/input.lammps +0 -0
  225. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_slurm_dir/0_md/bct-3/conf.lmp +0 -0
  226. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_slurm_dir/0_md/bct-3/input.lammps +0 -0
  227. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_slurm_dir/0_md/bct-4/conf.lmp +0 -0
  228. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_slurm_dir/0_md/bct-4/input.lammps +0 -0
  229. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_slurm_dir/0_md/d3c842c5b9476e48f7145b370cd330372b9293e1.json +0 -0
  230. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_slurm_dir/0_md/graph.pb +0 -0
  231. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_slurm_dir/0_md/submission.json +0 -0
  232. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_slurm_script_generation.py +0 -0
  233. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_ssh_context.py +0 -0
  234. {dpdispatcher-0.5.8 → dpdispatcher-0.5.10}/tests/test_work_path/.gitkeep +0 -0
@@ -17,12 +17,12 @@ repos:
17
17
  - id: check-toml
18
18
  # Python
19
19
  - repo: https://github.com/psf/black
20
- rev: 23.3.0
20
+ rev: 23.7.0
21
21
  hooks:
22
22
  - id: black-jupyter
23
- - repo: https://github.com/charliermarsh/ruff-pre-commit
23
+ - repo: https://github.com/astral-sh/ruff-pre-commit
24
24
  # Ruff version.
25
- rev: v0.0.275
25
+ rev: v0.0.278
26
26
  hooks:
27
27
  - id: ruff
28
28
  args: ["--fix"]
@@ -34,6 +34,6 @@ repos:
34
34
  args: ["--write"]
35
35
  # Python inside docs
36
36
  - repo: https://github.com/asottile/blacken-docs
37
- rev: 1.14.0
37
+ rev: 1.15.0
38
38
  hooks:
39
39
  - id: blacken-docs
@@ -2,4 +2,4 @@ FROM python:3.11
2
2
 
3
3
  WORKDIR /data/dpdispatcher
4
4
  COPY ./ ./
5
- RUN pip install .[cloudserver]
5
+ RUN pip install .[bohrium]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dpdispatcher
3
- Version: 0.5.8
3
+ Version: 0.5.10
4
4
  Summary: Generate HPC scheduler systems jobs input scripts, submit these scripts to HPC systems, and poke until they finish
5
5
  Author: DeepModeling
6
6
  License: GNU LESSER GENERAL PUBLIC LICENSE
@@ -186,6 +186,7 @@ Requires-Python: >=3.7
186
186
  Description-Content-Type: text/markdown
187
187
  Provides-Extra: docs
188
188
  Provides-Extra: cloudserver
189
+ Provides-Extra: bohrium
189
190
  Provides-Extra: test
190
191
  License-File: LICENSE
191
192
 
@@ -210,6 +211,12 @@ DPDispatcher can be installed by `pip`:
210
211
  pip install dpdispatcher
211
212
  ```
212
213
 
214
+ To add [Bohrium](https://bohrium.dp.tech/) support, execute
215
+
216
+ ```bash
217
+ pip install dpdispatcher[bohrium]
218
+ ```
219
+
213
220
  ## Usage
214
221
 
215
222
  See [Getting Started](https://dpdispatcher.readthedocs.io/en/latest/getting-started.html) for usage.
@@ -19,6 +19,12 @@ DPDispatcher can be installed by `pip`:
19
19
  pip install dpdispatcher
20
20
  ```
21
21
 
22
+ To add [Bohrium](https://bohrium.dp.tech/) support, execute
23
+
24
+ ```bash
25
+ pip install dpdispatcher[bohrium]
26
+ ```
27
+
22
28
  ## Usage
23
29
 
24
30
  See [Getting Started](https://dpdispatcher.readthedocs.io/en/latest/getting-started.html) for usage.
@@ -70,3 +70,9 @@ Read [Support DPDispatcher on Yarn](dpdispatcher_on_yarn.md) for details.
70
70
  [Fujitsu cloud service](https://doc.cloud.global.fujitsu.com/lib/common/jp/hpc-user-manual/) is a job scheduling system used by Fujitsu's HPCs such as Fugaku, ITO and K computer. It should be noted that although the same job scheduling system is used, there are some differences in the details, Fagaku class cannot be directly used for other HPCs.
71
71
 
72
72
  Read Fujitsu cloud service documentation for details.
73
+
74
+
75
+ ## OpenAPI
76
+
77
+ {dargs:argument}`batcy_type <resources/batch_type>`: `OpenAPI`
78
+ OpenAPI is a new way to submit jobs to Bohrium. It using [AccessKey](https://bohrium.dp.tech/personal/setting) instead of username and password. Read Bohrium documentation for details.
@@ -42,3 +42,11 @@ To use Bohrium, one needs to provide necessary parameters in {dargs:argument}`re
42
42
 
43
43
  The Hadoop Distributed File System (HDFS) is a distributed file system.
44
44
  Read [Support DPDispatcher on Yarn](dpdispatcher_on_yarn.md) for details.
45
+
46
+
47
+ ## OpenAPI
48
+
49
+ {dargs:argument}`context_type <machine/context_type>`: `OpenAPI`
50
+
51
+ OpenAPI is a new way to submit jobs to Bohrium. It using [AccessKey](https://bohrium.dp.tech/personal/setting) instead of username and password. Read Bohrium documentation for details.
52
+ To use OpenAPI, one needs to provide necessary parameters in {dargs:argument}`remote_profile <machine[OpenAPIContext]/remote_profile>`.
@@ -6,3 +6,9 @@ DPDispatcher can installed by `pip`:
6
6
  ```bash
7
7
  pip install dpdispatcher
8
8
  ```
9
+
10
+ To add [Bohrium](https://bohrium.dp.tech/) support, execute
11
+
12
+ ```bash
13
+ pip install dpdispatcher[bohrium]
14
+ ```
@@ -49,6 +49,8 @@ from .lazy_local_context import LazyLocalContext
49
49
  from .local_context import LocalContext
50
50
  from .lsf import LSF
51
51
  from .machine import Machine
52
+ from .openapi import OpenAPI
53
+ from .openapi_context import OpenAPIContext
52
54
  from .pbs import PBS, Torque
53
55
  from .shell import Shell
54
56
  from .slurm import Slurm
@@ -77,6 +79,8 @@ __all__ = [
77
79
  "__version__",
78
80
  "DistributedShell",
79
81
  "DpCloudServer",
82
+ "OpenAPI",
83
+ "OpenAPIContext",
80
84
  "DpCloudServerContext",
81
85
  "HDFSContext",
82
86
  "LazyLocalContext",
@@ -1,4 +1,4 @@
1
1
  # file generated by setuptools_scm
2
2
  # don't change, don't track in version control
3
- __version__ = version = '0.5.8'
4
- __version_tuple__ = version_tuple = (0, 5, 8)
3
+ __version__ = version = '0.5.10'
4
+ __version_tuple__ = version_tuple = (0, 5, 10)
@@ -31,6 +31,13 @@ class Bohrium(Machine):
31
31
  phone = context.remote_profile.get("phone", None)
32
32
  username = context.remote_profile.get("username", None)
33
33
  password = context.remote_profile.get("password", None)
34
+
35
+ ticket = os.environ.get("BOHR_TICKET", None)
36
+ if ticket:
37
+ self.api = Client(ticket=ticket)
38
+ self.group_id = None
39
+ return
40
+
34
41
  if email is None and username is not None:
35
42
  raise DeprecationWarning(
36
43
  "username is no longer support in current version, "
@@ -21,7 +21,7 @@ DP_CLOUD_SERVER_HOME_DIR = os.path.join(
21
21
  os.path.expanduser("~"), ".dpdispatcher/", "dp_cloud_server/"
22
22
  )
23
23
  ENDPOINT = "http://oss-cn-shenzhen.aliyuncs.com"
24
- BUCKET_NAME = "dpcloudserver"
24
+ BUCKET_NAME = os.environ.get("BUCKET_NAME", "dpcloudserver")
25
25
 
26
26
 
27
27
  class BohriumContext(BaseContext):
@@ -39,9 +39,16 @@ class BohriumContext(BaseContext):
39
39
  self.init_remote_root = remote_root
40
40
  self.temp_local_root = os.path.abspath(local_root)
41
41
  self.remote_profile = remote_profile
42
+ ticket = os.environ.get("BOHR_TICKET", None)
42
43
  email = remote_profile.get("email", None)
43
44
  phone = remote_profile.get("phone", None)
44
45
  password = remote_profile.get("password")
46
+ os.makedirs(DP_CLOUD_SERVER_HOME_DIR, exist_ok=True)
47
+
48
+ if ticket is not None:
49
+ self.api = Client(ticket=ticket)
50
+ return
51
+
45
52
  if email is None and phone is None:
46
53
  raise ValueError(
47
54
  "can not find email/phone number in remote_profile, please check your machine file."
@@ -57,8 +64,6 @@ class BohriumContext(BaseContext):
57
64
 
58
65
  self.api = Client(account, password)
59
66
 
60
- os.makedirs(DP_CLOUD_SERVER_HOME_DIR, exist_ok=True)
61
-
62
67
  @classmethod
63
68
  def load_from_dict(cls, context_dict):
64
69
  local_root = context_dict["local_root"]
@@ -256,9 +261,7 @@ class BohriumContext(BaseContext):
256
261
  return os.path.isfile(os.path.join(DP_CLOUD_SERVER_HOME_DIR, fname))
257
262
 
258
263
  def clean(self):
259
- submission_file_name = "{submission_hash}.json".format(
260
- submission_hash=self.submission.submission_hash
261
- )
264
+ submission_file_name = f"{self.submission.submission_hash}.json"
262
265
  submission_json = os.path.join(DP_CLOUD_SERVER_HOME_DIR, submission_file_name)
263
266
  os.remove(submission_json)
264
267
  return True
@@ -288,7 +291,7 @@ class BohriumContext(BaseContext):
288
291
  dict,
289
292
  [
290
293
  Argument("email", str, optional=True, doc="Email"),
291
- Argument("password", str, optional=False, doc="Password"),
294
+ Argument("password", str, optional=True, doc="Password"),
292
295
  Argument(
293
296
  "program_id",
294
297
  int,
@@ -25,7 +25,9 @@ class RequestInfoException(Exception):
25
25
 
26
26
 
27
27
  class Client:
28
- def __init__(self, email=None, password=None, debug=False, base_url=API_HOST):
28
+ def __init__(
29
+ self, email=None, password=None, debug=False, ticket=None, base_url=API_HOST
30
+ ):
29
31
  self.debug = debug
30
32
  self.debug = os.getenv("LBG_CLI_DEBUG_PRINT", debug)
31
33
  self.config = {}
@@ -35,6 +37,7 @@ class Client:
35
37
  self.config["password"] = password
36
38
  self.base_url = base_url
37
39
  self.last_log_offset = 0
40
+ self.ticket = ticket
38
41
 
39
42
  def post(self, url, data=None, header=None, params=None, retry=5):
40
43
  return self._req(
@@ -51,19 +54,26 @@ class Client:
51
54
  header = {}
52
55
  if not self.token:
53
56
  self.refresh_token()
57
+ self.ticket = os.environ.get("BOHR_TICKET", "")
54
58
  header["Authorization"] = f"jwt {self.token}"
59
+ header["Brm-Ticket"] = self.ticket
55
60
  resp_code = None
56
61
  err = None
57
62
  for i in range(retry):
58
63
  resp = None
59
- if method == "GET":
60
- resp = requests.get(url, params=params, headers=header)
61
- else:
62
- if self.debug:
63
- print(data)
64
- resp = requests.post(url, json=data, params=params, headers=header)
65
- if self.debug:
66
- print(resp.text)
64
+ try:
65
+ if method == "GET":
66
+ resp = requests.get(url, params=params, headers=header)
67
+ else:
68
+ if self.debug:
69
+ print(data)
70
+ resp = requests.post(url, json=data, params=params, headers=header)
71
+ except Exception as e:
72
+ dlog.error(f"request({i}) error {e}", i, stack_info=ENABLE_STACK)
73
+ err = e
74
+ time.sleep(1 * i)
75
+ continue
76
+
67
77
  resp_code = resp.status_code
68
78
  if not resp.ok:
69
79
  if self.debug:
@@ -96,6 +106,9 @@ class Client:
96
106
  self.user_id = resp["user_id"]
97
107
 
98
108
  def refresh_token(self, retry=3):
109
+ self.ticket = os.environ.get("BOHR_TICKET", "")
110
+ if self.ticket:
111
+ return
99
112
  url = "/account/login"
100
113
  post_data = {"email": self.config["email"], "password": self.config["password"]}
101
114
  resp_code = None
@@ -24,9 +24,7 @@ class Fugaku(Machine):
24
24
  ] = f'#PJM -L "node={resources.number_node}" '
25
25
  fugaku_script_header_dict[
26
26
  "fugaku_ntasks_per_node_line"
27
- ] = '#PJM --mpi "max-proc-per-node={cpu_per_node}"'.format(
28
- cpu_per_node=resources.cpu_per_node
29
- )
27
+ ] = f'#PJM --mpi "max-proc-per-node={resources.cpu_per_node}"'
30
28
  fugaku_script_header_dict[
31
29
  "queue_name_line"
32
30
  ] = f'#PJM -L "rscgrp={resources.queue_name}"'
@@ -90,9 +90,7 @@ class HDFS:
90
90
  raise RuntimeError(
91
91
  "try to access local_path[{}] " "but failed".format(local_path)
92
92
  )
93
- cmd = "hadoop fs -copyFromLocal -f {local} {remote}".format(
94
- local=local_path, remote=to_uri
95
- )
93
+ cmd = f"hadoop fs -copyFromLocal -f {local_path} {to_uri}"
96
94
  try:
97
95
  ret, out, err = run_cmd_with_all_output(cmd)
98
96
  if ret == 0:
@@ -106,9 +104,7 @@ class HDFS:
106
104
  )
107
105
  except Exception as e:
108
106
  raise RuntimeError(
109
- "Cannot copy local[{}] to remote[{}] with cmd[{}]".format(
110
- local_path, to_uri, cmd
111
- )
107
+ f"Cannot copy local[{local_path}] to remote[{to_uri}] with cmd[{cmd}]"
112
108
  ) from e
113
109
 
114
110
  @staticmethod
@@ -118,9 +114,7 @@ class HDFS:
118
114
  remote = from_uri
119
115
  elif isinstance(from_uri, list) or isinstance(from_uri, tuple):
120
116
  remote = " ".join(from_uri)
121
- cmd = "hadoop fs -copyToLocal {remote} {local}".format(
122
- remote=remote, local=local_path
123
- )
117
+ cmd = f"hadoop fs -copyToLocal {remote} {local_path}"
124
118
 
125
119
  try:
126
120
  ret, out, err = run_cmd_with_all_output(cmd)
@@ -135,9 +129,7 @@ class HDFS:
135
129
  )
136
130
  except Exception as e:
137
131
  raise RuntimeError(
138
- "Cannot copy remote[{}] to local[{}] with cmd[{}]".format(
139
- from_uri, local_path, cmd
140
- )
132
+ f"Cannot copy remote[{from_uri}] to local[{local_path}] with cmd[{cmd}]"
141
133
  ) from e
142
134
 
143
135
  @staticmethod
@@ -137,10 +137,7 @@ class HDFSContext(BaseContext):
137
137
  if os.path.exists(gz_dir):
138
138
  shutil.rmtree(gz_dir, ignore_errors=True)
139
139
  os.mkdir(os.path.join(self.local_root, "tmp"))
140
- rfile_tgz = "{}/{}_*_download.tar.gz".format(
141
- self.remote_root,
142
- submission.submission_hash,
143
- )
140
+ rfile_tgz = f"{self.remote_root}/{submission.submission_hash}_*_download.tar.gz"
144
141
  lfile_tgz = "%s/tmp/" % (self.local_root)
145
142
  HDFS.copy_to_local(rfile_tgz, lfile_tgz)
146
143
 
@@ -31,12 +31,8 @@ class LSF(Machine):
31
31
  "lsf_nodes_line": "#BSUB -n {number_cores}".format(
32
32
  number_cores=resources.number_node * resources.cpu_per_node
33
33
  ),
34
- "lsf_ptile_line": "#BSUB -R 'span[ptile={cpu_per_node}]'".format(
35
- cpu_per_node=resources.cpu_per_node
36
- ),
37
- "lsf_partition_line": "#BSUB -q {queue_name}".format(
38
- queue_name=resources.queue_name
39
- ),
34
+ "lsf_ptile_line": f"#BSUB -R 'span[ptile={resources.cpu_per_node}]'",
35
+ "lsf_partition_line": f"#BSUB -q {resources.queue_name}",
40
36
  }
41
37
  gpu_usage_flag = resources.kwargs.get("gpu_usage", False)
42
38
  gpu_new_syntax_flag = resources.kwargs.get("gpu_new_syntax", False)
@@ -208,9 +208,7 @@ class Machine(metaclass=ABCMeta):
208
208
 
209
209
  def check_if_recover(self, submission):
210
210
  submission_hash = submission.submission_hash
211
- submission_file_name = "{submission_hash}.json".format(
212
- submission_hash=submission_hash
213
- )
211
+ submission_file_name = f"{submission_hash}.json"
214
212
  if_recover = self.context.check_file_exists(submission_file_name)
215
213
  return if_recover
216
214
 
@@ -0,0 +1,198 @@
1
+ import os
2
+ import shutil
3
+ import time
4
+
5
+ try:
6
+ from bohriumsdk.client import Client
7
+ from bohriumsdk.job import Job
8
+ from bohriumsdk.storage import Storage
9
+ from bohriumsdk.util import Util
10
+ except ModuleNotFoundError:
11
+ found_bohriumsdk = False
12
+ else:
13
+ found_bohriumsdk = True
14
+
15
+ from dpdispatcher import dlog
16
+ from dpdispatcher.JobStatus import JobStatus
17
+ from dpdispatcher.machine import Machine
18
+
19
+ shell_script_header_template = """
20
+ #!/bin/bash -l
21
+ """
22
+
23
+
24
+ class OpenAPI(Machine):
25
+ def __init__(self, context):
26
+ if not found_bohriumsdk:
27
+ raise ModuleNotFoundError(
28
+ "bohriumsdk not installed. Install dpdispatcher with `pip install dpdispatcher[bohrium]`"
29
+ )
30
+ self.context = context
31
+ self.remote_profile = context.remote_profile.copy()
32
+
33
+ self.grouped = self.remote_profile.get("grouped", True)
34
+ self.client = Client()
35
+ self.job = Job(client=self.client)
36
+ self.storage = Storage(client=self.client)
37
+ self.group_id = None
38
+
39
+ def gen_script(self, job):
40
+ shell_script = super().gen_script(job)
41
+ return shell_script
42
+
43
+ def gen_script_header(self, job):
44
+ shell_script_header = shell_script_header_template
45
+ return shell_script_header
46
+
47
+ def gen_local_script(self, job):
48
+ script_str = self.gen_script(job)
49
+ script_file_name = job.script_file_name
50
+ self.context.write_local_file(fname=script_file_name, write_str=script_str)
51
+ return script_file_name
52
+
53
+ def _gen_backward_files_list(self, job):
54
+ result_file_list = []
55
+ # result_file_list.extend(job.backward_common_files)
56
+ for task in job.job_task_list:
57
+ result_file_list.extend(
58
+ [os.path.join(task.task_work_path, b_f) for b_f in task.backward_files]
59
+ )
60
+ result_file_list = list(set(result_file_list))
61
+ return result_file_list
62
+
63
+ def do_submit(self, job):
64
+ self.gen_local_script(job)
65
+
66
+ project_id = self.remote_profile.get("project_id", 0)
67
+
68
+ openapi_params = {
69
+ "oss_path": job.upload_path,
70
+ "input_file_type": 3,
71
+ "input_file_method": 1,
72
+ "job_type": "container",
73
+ "job_name": self.remote_profile.get("job_name", "DP-GEN"),
74
+ "project_id": project_id,
75
+ "scass_type": self.remote_profile.get("machine_type", ""),
76
+ "cmd": f"bash {job.script_file_name}",
77
+ "log_files": os.path.join(
78
+ job.job_task_list[0].task_work_path, job.job_task_list[0].outlog
79
+ ),
80
+ "out_files": self._gen_backward_files_list(job),
81
+ "platform": self.remote_profile.get("platform", "ali"),
82
+ "image_address": self.remote_profile.get("image_address", ""),
83
+ "job_id": job.job_id,
84
+ }
85
+
86
+ data = self.job.insert(**openapi_params)
87
+
88
+ job.job_id = data.get("jobId", 0) # type: ignore
89
+ # self.job_group_id = data.get("jobGroupId")
90
+ job.job_state = JobStatus.waiting
91
+ return job.job_id
92
+
93
+ def _get_job_detail(self, job_id, group_id):
94
+ check_return = self.job.detail(job_id)
95
+ assert check_return is not None, (
96
+ f"Failed to retrieve tasks information. To resubmit this job, please "
97
+ f"try again, if this problem still exists please delete the submission "
98
+ f"file and try again.\nYou can check submission.submission_hash in the "
99
+ f'previous log or type `grep -rl "{job_id}:job_group_id:{group_id}" '
100
+ f"~/.dpdispatcher/dp_cloud_server/` to find corresponding file. "
101
+ f"You can try with command:\n "
102
+ f'rm $(grep -rl "{job_id}:job_group_id:{group_id}" ~/.dpdispatcher/dp_cloud_server/)'
103
+ )
104
+ return check_return
105
+
106
+ def check_status(self, job):
107
+ if job.job_id == "":
108
+ return JobStatus.unsubmitted
109
+ job_id = job.job_id
110
+ group_id = None
111
+ if hasattr(job, "jgid"):
112
+ group_id = job.jgid
113
+ check_return = self._get_job_detail(job_id, group_id)
114
+ try:
115
+ dp_job_status = check_return["status"] # type: ignore
116
+ except IndexError as e:
117
+ dlog.error(
118
+ f"cannot find job information in bohrium for job {job.job_id}. check_return:{check_return}; retry one more time after 60 seconds"
119
+ )
120
+ time.sleep(60)
121
+ retry_return = self._get_job_detail(job_id, group_id)
122
+ try:
123
+ dp_job_status = retry_return["status"] # type: ignore
124
+ except IndexError as e:
125
+ raise RuntimeError(
126
+ f"cannot find job information in bohrium for job {job.job_id} {check_return} {retry_return}"
127
+ )
128
+
129
+ job_state = self.map_dp_job_state(dp_job_status)
130
+ if job_state == JobStatus.finished:
131
+ job_log = self.job.log(job_id)
132
+ if self.remote_profile.get("output_log"):
133
+ print(job_log, end="")
134
+ # print(job.job_id)
135
+ self._download_job(job)
136
+ elif self.remote_profile.get("output_log") and job_state == JobStatus.running:
137
+ job_log = self.job.log(job_id)
138
+ print(job_log, end="")
139
+ return job_state
140
+
141
+ def _download_job(self, job):
142
+ data = self.job.detail(job.job_id)
143
+ # print(data)
144
+ job_url = data["jobFiles"]["outFiles"][0]["url"] # type: ignore
145
+ if not job_url:
146
+ return
147
+ job_hash = job.job_hash
148
+ result_filename = job_hash + "_back.zip"
149
+ target_result_zip = os.path.join(self.context.local_root, result_filename)
150
+ self.storage.download_from_url(job_url, target_result_zip)
151
+ Util.unzip_file(target_result_zip, out_dir=self.context.local_root)
152
+ try:
153
+ os.makedirs(os.path.join(self.context.local_root, "backup"), exist_ok=True)
154
+ shutil.move(
155
+ target_result_zip,
156
+ os.path.join(
157
+ self.context.local_root,
158
+ "backup",
159
+ os.path.split(target_result_zip)[1],
160
+ ),
161
+ )
162
+ except (OSError, shutil.Error) as e:
163
+ dlog.exception("unable to backup file, " + str(e))
164
+
165
+ def check_finish_tag(self, job):
166
+ job_tag_finished = job.job_hash + "_job_tag_finished"
167
+ dlog.info("check if job finished: ", job.job_id, job_tag_finished)
168
+ return self.context.check_file_exists(job_tag_finished)
169
+ # return
170
+ # pass
171
+
172
+ def check_if_recover(self, submission):
173
+ return False
174
+ # pass
175
+
176
+ @staticmethod
177
+ def map_dp_job_state(status):
178
+ if isinstance(status, JobStatus):
179
+ return status
180
+ map_dict = {
181
+ -1: JobStatus.terminated,
182
+ 0: JobStatus.waiting,
183
+ 1: JobStatus.running,
184
+ 2: JobStatus.finished,
185
+ 3: JobStatus.waiting,
186
+ 4: JobStatus.running,
187
+ 5: JobStatus.terminated,
188
+ 6: JobStatus.running,
189
+ 9: JobStatus.waiting,
190
+ }
191
+ if status not in map_dict:
192
+ dlog.error(f"unknown job status {status}")
193
+ return JobStatus.unknown
194
+ return map_dict[status]
195
+
196
+ # def check_finish_tag(self, job):
197
+ # job_tag_finished = job.job_hash + '_job_tag_finished'
198
+ # return self.context.check_file_exists(job_tag_finished)