dpdispatcher 0.6.6__tar.gz → 0.6.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dpdispatcher might be problematic. Click here for more details.

Files changed (258) hide show
  1. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/.pre-commit-config.yaml +1 -1
  2. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/PKG-INFO +1 -1
  3. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/ci/pbs/start-pbs.sh +1 -1
  4. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/ci/pbs.sh +2 -1
  5. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/ci/slurm/register_cluster.sh +1 -1
  6. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/ci/slurm/start-slurm.sh +1 -1
  7. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/ci/slurm.sh +2 -1
  8. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/ci/ssh/start-ssh.sh +1 -1
  9. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/ci/ssh.sh +1 -1
  10. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/ci/ssh_rsync.sh +1 -1
  11. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/conf.py +5 -1
  12. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/context.md +4 -3
  13. dpdispatcher-0.6.7/doc/env.md +35 -0
  14. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/index.rst +1 -0
  15. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/_version.py +2 -2
  16. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/base_context.py +61 -1
  17. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/contexts/dp_cloud_server_context.py +5 -0
  18. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/contexts/hdfs_context.py +5 -0
  19. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/contexts/lazy_local_context.py +0 -17
  20. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/contexts/local_context.py +57 -31
  21. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/contexts/openapi_context.py +5 -0
  22. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/contexts/ssh_context.py +12 -35
  23. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/machine.py +12 -0
  24. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/machines/JH_UniScheduler.py +1 -1
  25. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/machines/distributed_shell.py +2 -2
  26. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/machines/lsf.py +1 -1
  27. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/machines/pbs.py +11 -8
  28. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/machines/shell.py +9 -8
  29. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/machines/slurm.py +18 -21
  30. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/submission.py +3 -3
  31. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher.egg-info/PKG-INFO +1 -1
  32. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher.egg-info/SOURCES.txt +1 -0
  33. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_argcheck.py +4 -1
  34. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_lsf_script_generation.py +5 -0
  35. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_run_submission.py +26 -1
  36. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/.git_archival.txt +0 -0
  37. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/.gitattributes +0 -0
  38. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/.github/dependabot.yml +0 -0
  39. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/.github/workflows/ci-docker.yml +0 -0
  40. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/.github/workflows/machines.yml +0 -0
  41. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/.github/workflows/mirror_gitee.yml +0 -0
  42. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/.github/workflows/publish_conda.yml +0 -0
  43. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/.github/workflows/pyright.yml +0 -0
  44. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/.github/workflows/release.yml +0 -0
  45. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/.github/workflows/test-bohrium.yml +0 -0
  46. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/.github/workflows/test.yml +0 -0
  47. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/.gitignore +0 -0
  48. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/.readthedocs.yaml +0 -0
  49. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/CONTRIBUTING.md +0 -0
  50. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/Dockerfile +0 -0
  51. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/LICENSE +0 -0
  52. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/README.md +0 -0
  53. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/ci/LICENSE +0 -0
  54. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/ci/README.md +0 -0
  55. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/ci/pbs/docker-compose.yml +0 -0
  56. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/ci/slurm/docker-compose.yml +0 -0
  57. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/ci/ssh/docker-compose.yml +0 -0
  58. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/codecov.yml +0 -0
  59. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/conda/conda_build_config.yaml +0 -0
  60. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/conda/meta.yaml +0 -0
  61. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/.gitignore +0 -0
  62. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/Makefile +0 -0
  63. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/batch.md +0 -0
  64. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/cli.rst +0 -0
  65. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/credits.rst +0 -0
  66. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/dpdispatcher_on_yarn.md +0 -0
  67. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/examples/expanse.md +0 -0
  68. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/examples/g16.md +0 -0
  69. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/examples/shell.md +0 -0
  70. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/examples/template.md +0 -0
  71. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/getting-started.md +0 -0
  72. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/install.md +0 -0
  73. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/machine.rst +0 -0
  74. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/make.bat +0 -0
  75. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/pep723.rst +0 -0
  76. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/requirements.txt +0 -0
  77. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/resources.rst +0 -0
  78. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/run.md +0 -0
  79. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/task.rst +0 -0
  80. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/__init__.py +0 -0
  81. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/__main__.py +0 -0
  82. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/arginfo.py +0 -0
  83. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/contexts/__init__.py +0 -0
  84. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/dlog.py +0 -0
  85. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/dpcloudserver/__init__.py +0 -0
  86. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/dpcloudserver/client.py +0 -0
  87. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/dpdisp.py +0 -0
  88. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/entrypoints/__init__.py +0 -0
  89. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/entrypoints/gui.py +0 -0
  90. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/entrypoints/run.py +0 -0
  91. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/entrypoints/submission.py +0 -0
  92. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/machines/__init__.py +0 -0
  93. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/machines/dp_cloud_server.py +0 -0
  94. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/machines/fugaku.py +0 -0
  95. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/machines/openapi.py +0 -0
  96. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/run.py +0 -0
  97. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/utils/__init__.py +0 -0
  98. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/utils/dpcloudserver/__init__.py +0 -0
  99. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/utils/dpcloudserver/client.py +0 -0
  100. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/utils/dpcloudserver/config.py +0 -0
  101. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/utils/dpcloudserver/retcode.py +0 -0
  102. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/utils/dpcloudserver/zip_file.py +0 -0
  103. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/utils/hdfs_cli.py +0 -0
  104. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/utils/job_status.py +0 -0
  105. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/utils/record.py +0 -0
  106. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/utils/utils.py +0 -0
  107. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher.egg-info/dependency_links.txt +0 -0
  108. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher.egg-info/entry_points.txt +0 -0
  109. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher.egg-info/requires.txt +0 -0
  110. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher.egg-info/top_level.txt +0 -0
  111. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/examples/dpdisp_run.py +0 -0
  112. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/examples/machine/expanse.json +0 -0
  113. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/examples/machine/lazy_local.json +0 -0
  114. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/examples/machine/mandu.json +0 -0
  115. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/examples/resources/expanse_cpu.json +0 -0
  116. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/examples/resources/mandu.json +0 -0
  117. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/examples/resources/template.slurm +0 -0
  118. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/examples/resources/tiger.json +0 -0
  119. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/examples/task/deepmd-kit.json +0 -0
  120. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/examples/task/g16.json +0 -0
  121. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/pyproject.toml +0 -0
  122. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/scripts/script_gen_dargs_docs.py +0 -0
  123. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/scripts/script_gen_dargs_json.py +0 -0
  124. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/setup.cfg +0 -0
  125. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/.gitignore +0 -0
  126. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/__init__.py +0 -0
  127. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/batch.json +0 -0
  128. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/context.py +0 -0
  129. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/debug_test_class_submission_init.py +0 -0
  130. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/devel_test_JH_UniScheduler.py +0 -0
  131. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/devel_test_ali_ehpc.py +0 -0
  132. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/devel_test_dp_cloud_server.py +0 -0
  133. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/devel_test_lazy_ali_ehpc.py +0 -0
  134. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/devel_test_lsf.py +0 -0
  135. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/devel_test_shell.py +0 -0
  136. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/devel_test_slurm.py +0 -0
  137. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/devel_test_ssh_ali_ehpc.py +0 -0
  138. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/graph.pb +0 -0
  139. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/hello_world.py +0 -0
  140. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/job.json +0 -0
  141. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/machine.json +0 -0
  142. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/machine_JH_UniScheduler.json +0 -0
  143. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/machine_ali_ehpc.json +0 -0
  144. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/machine_center.json +0 -0
  145. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/machine_diffenert.json +0 -0
  146. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/machine_dp_cloud_server.json +0 -0
  147. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/machine_fugaku.json +0 -0
  148. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/machine_if_cuda_multi_devices.json +0 -0
  149. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/machine_lazy_local_jh_unischeduler.json +0 -0
  150. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/machine_lazy_local_lsf.json +0 -0
  151. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/machine_lazy_local_slurm.json +0 -0
  152. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/machine_lazylocal_shell.json +0 -0
  153. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/machine_local_fugaku.json +0 -0
  154. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/machine_local_shell.json +0 -0
  155. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/machine_lsf.json +0 -0
  156. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/machine_openapi.json +0 -0
  157. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/machine_slurm.json +0 -0
  158. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/machine_yarn.json +0 -0
  159. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/resources.json +0 -0
  160. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/submission.json +0 -0
  161. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/task.json +0 -0
  162. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/sample_class.py +0 -0
  163. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/script_gen_json.py +0 -0
  164. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/slurm_test.env +0 -0
  165. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_JH_UniScheduler_script_generation.py +0 -0
  166. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_class_job.py +0 -0
  167. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_class_machine.py +0 -0
  168. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_class_machine_dispatch.py +0 -0
  169. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_class_resources.py +0 -0
  170. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_class_submission.py +0 -0
  171. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_class_submission_init.py +0 -0
  172. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_class_task.py +0 -0
  173. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_cli.py +0 -0
  174. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_context_dir/0_md/bct-1/conf.lmp +0 -0
  175. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_context_dir/0_md/bct-1/input.lammps +0 -0
  176. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_context_dir/0_md/bct-1/some_dir/some_file +0 -0
  177. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_context_dir/0_md/bct-2/conf.lmp +0 -0
  178. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_context_dir/0_md/bct-2/input.lammps +0 -0
  179. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_context_dir/0_md/bct-3/conf.lmp +0 -0
  180. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_context_dir/0_md/bct-3/input.lammps +0 -0
  181. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_context_dir/0_md/bct-4/conf.lmp +0 -0
  182. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_context_dir/0_md/bct-4/input.lammps +0 -0
  183. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_context_dir/0_md/dir with space/file with space +0 -0
  184. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_context_dir/0_md/graph.pb +0 -0
  185. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_context_dir/0_md/some_dir/some_file +0 -0
  186. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_examples.py +0 -0
  187. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_group_size.py +0 -0
  188. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_gui.py +0 -0
  189. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_hdfs_context.py +0 -0
  190. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_hdfs_dir/0_md/bct-1/conf.lmp +0 -0
  191. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_hdfs_dir/0_md/bct-1/input.lammps +0 -0
  192. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_hdfs_dir/0_md/bct-2/conf.lmp +0 -0
  193. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_hdfs_dir/0_md/bct-2/input.lammps +0 -0
  194. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_hdfs_dir/0_md/bct-3/conf.lmp +0 -0
  195. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_hdfs_dir/0_md/bct-3/input.lammps +0 -0
  196. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_hdfs_dir/0_md/bct-4/conf.lmp +0 -0
  197. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_hdfs_dir/0_md/bct-4/input.lammps +0 -0
  198. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_hdfs_dir/0_md/graph.pb +0 -0
  199. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_if_cuda_multi_devices/test_dir/test.txt +0 -0
  200. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_import_classes.py +0 -0
  201. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_jh_unischeduler/0_md/bct-1/conf.lmp +0 -0
  202. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_jh_unischeduler/0_md/bct-1/input.lammps +0 -0
  203. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_jh_unischeduler/0_md/bct-2/conf.lmp +0 -0
  204. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_jh_unischeduler/0_md/bct-2/input.lammps +0 -0
  205. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_jh_unischeduler/0_md/bct-3/conf.lmp +0 -0
  206. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_jh_unischeduler/0_md/bct-3/input.lammps +0 -0
  207. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_jh_unischeduler/0_md/bct-4/conf.lmp +0 -0
  208. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_jh_unischeduler/0_md/bct-4/input.lammps +0 -0
  209. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_jh_unischeduler/0_md/graph.pb +0 -0
  210. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_lazy_local_context.py +0 -0
  211. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_local_context.py +0 -0
  212. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_lsf_dir/0_md/bct-1/conf.lmp +0 -0
  213. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_lsf_dir/0_md/bct-1/input.lammps +0 -0
  214. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_lsf_dir/0_md/bct-2/conf.lmp +0 -0
  215. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_lsf_dir/0_md/bct-2/input.lammps +0 -0
  216. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_lsf_dir/0_md/bct-3/conf.lmp +0 -0
  217. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_lsf_dir/0_md/bct-3/input.lammps +0 -0
  218. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_lsf_dir/0_md/bct-4/conf.lmp +0 -0
  219. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_lsf_dir/0_md/bct-4/input.lammps +0 -0
  220. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_lsf_dir/0_md/graph.pb +0 -0
  221. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_lsf_dir/0_md/submission.json +0 -0
  222. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_pbs_dir/0_md/bct-1/conf.lmp +0 -0
  223. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_pbs_dir/0_md/bct-1/input.lammps +0 -0
  224. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_pbs_dir/0_md/bct-2/conf.lmp +0 -0
  225. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_pbs_dir/0_md/bct-2/input.lammps +0 -0
  226. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_pbs_dir/0_md/bct-3/conf.lmp +0 -0
  227. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_pbs_dir/0_md/bct-3/input.lammps +0 -0
  228. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_pbs_dir/0_md/bct-4/conf.lmp +0 -0
  229. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_pbs_dir/0_md/bct-4/input.lammps +0 -0
  230. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_pbs_dir/0_md/graph.pb +0 -0
  231. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_retry.py +0 -0
  232. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_run.py +0 -0
  233. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_run_submission_bohrium.py +0 -0
  234. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_run_submission_ratio_unfinished.py +0 -0
  235. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_shell_cuda_multi_devices.py +0 -0
  236. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_shell_trival.py +0 -0
  237. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_shell_trival_dir/fail_dir/mock_fail_task.txt +0 -0
  238. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_shell_trival_dir/parent_dir/dir with space/example.txt +0 -0
  239. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_shell_trival_dir/parent_dir/dir1/example.txt +0 -0
  240. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_shell_trival_dir/parent_dir/dir2/example.txt +0 -0
  241. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_shell_trival_dir/parent_dir/dir3/example.txt +0 -0
  242. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_shell_trival_dir/parent_dir/dir4/example.txt +0 -0
  243. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_shell_trival_dir/parent_dir/graph.pb +0 -0
  244. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_shell_trival_dir/recover_dir/mock_recover_task.txt +0 -0
  245. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_slurm_dir/0_md/bct-1/conf.lmp +0 -0
  246. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_slurm_dir/0_md/bct-1/input.lammps +0 -0
  247. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_slurm_dir/0_md/bct-2/conf.lmp +0 -0
  248. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_slurm_dir/0_md/bct-2/input.lammps +0 -0
  249. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_slurm_dir/0_md/bct-3/conf.lmp +0 -0
  250. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_slurm_dir/0_md/bct-3/input.lammps +0 -0
  251. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_slurm_dir/0_md/bct-4/conf.lmp +0 -0
  252. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_slurm_dir/0_md/bct-4/input.lammps +0 -0
  253. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_slurm_dir/0_md/d3c842c5b9476e48f7145b370cd330372b9293e1.json +0 -0
  254. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_slurm_dir/0_md/graph.pb +0 -0
  255. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_slurm_dir/0_md/submission.json +0 -0
  256. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_slurm_script_generation.py +0 -0
  257. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_ssh_context.py +0 -0
  258. {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_work_path/.gitkeep +0 -0
@@ -18,7 +18,7 @@ repos:
18
18
  # Python
19
19
  - repo: https://github.com/astral-sh/ruff-pre-commit
20
20
  # Ruff version.
21
- rev: v0.5.4
21
+ rev: v0.6.2
22
22
  hooks:
23
23
  - id: ruff
24
24
  args: ["--fix"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dpdispatcher
3
- Version: 0.6.6
3
+ Version: 0.6.7
4
4
  Summary: Generate HPC scheduler systems jobs input scripts, submit these scripts to HPC systems, and poke until they finish
5
5
  Author: DeepModeling
6
6
  License: GNU LESSER GENERAL PUBLIC LICENSE
@@ -1,6 +1,6 @@
1
1
  #!/bin/bash
2
2
 
3
- docker-compose up -d --no-build
3
+ docker compose up -d --no-build
4
4
  while [ `docker exec -u pbsuser pbs_master pbsnodes -a | grep "Mom = pbs_slave" | wc -l` -ne 2 ]
5
5
  do
6
6
  echo "Waiting for PBS slave nodes to become available";
@@ -2,13 +2,14 @@
2
2
  set -e
3
3
 
4
4
  cd ./ci/pbs
5
- docker-compose pull
5
+ docker compose pull
6
6
  ./start-pbs.sh
7
7
  cd -
8
8
 
9
9
  docker exec pbs_master /bin/bash -c "chmod -R 777 /shared_space"
10
10
  docker exec pbs_master /bin/bash -c "chown -R pbsuser:pbsuser /home/pbsuser"
11
11
 
12
+ docker exec pbs_master /bin/bash -c "yum install -y procps"
12
13
  docker exec pbs_master /bin/bash -c "cd /dpdispatcher && pip install uv && uv pip install --system .[test] coverage && chown -R pbsuser ."
13
14
  docker exec -u pbsuser pbs_master /bin/bash -c "cd /dpdispatcher && coverage run --source=./dpdispatcher -m unittest -v && coverage report"
14
15
  docker exec -u pbsuser --env-file <(env | grep GITHUB) pbs_master /bin/bash -c "cd /dpdispatcher && curl -Os https://uploader.codecov.io/latest/linux/codecov && chmod +x codecov && ./codecov"
@@ -2,4 +2,4 @@
2
2
  set -e
3
3
 
4
4
  docker exec slurmctld bash -c "/usr/bin/sacctmgr --immediate add cluster name=linux" && \
5
- docker-compose restart slurmdbd slurmctld
5
+ docker compose restart slurmdbd slurmctld
@@ -1,6 +1,6 @@
1
1
  #!/bin/bash
2
2
 
3
- docker-compose up -d --no-build
3
+ docker compose up -d --no-build
4
4
 
5
5
  while [ `./register_cluster.sh 2>&1 | grep "sacctmgr: error" | wc -l` -ne 0 ]
6
6
  do
@@ -2,9 +2,10 @@
2
2
  set -e
3
3
 
4
4
  cd ./ci/slurm
5
- docker-compose pull
5
+ docker compose pull
6
6
  ./start-slurm.sh
7
7
  cd -
8
8
 
9
+ docker exec slurmctld /bin/bash -c "yum install -y procps"
9
10
  docker exec slurmctld /bin/bash -c "cd dpdispatcher && pip install uv && uv pip install --system .[test] coverage && coverage run --source=./dpdispatcher -m unittest -v && coverage report"
10
11
  docker exec --env-file <(env | grep -e GITHUB -e CODECOV) slurmctld /bin/bash -c "cd dpdispatcher && curl -Os https://uploader.codecov.io/latest/linux/codecov && chmod +x codecov && ./codecov"
@@ -1,6 +1,6 @@
1
1
  #!/bin/bash
2
2
 
3
- docker-compose up -d --no-build
3
+ docker compose up -d --no-build
4
4
 
5
5
  docker exec server /bin/bash -c "ssh-keygen -b 2048 -t rsa -f /root/.ssh/id_rsa -q -N \"\" && cat /root/.ssh/id_rsa.pub >> /root/.ssh/authorized_keys && chmod 600 /root/.ssh/authorized_keys"
6
6
  docker exec server /bin/bash -c "mkdir -p /dpdispatcher_working"
@@ -2,7 +2,7 @@
2
2
  set -e
3
3
 
4
4
  cd ./ci/ssh
5
- docker-compose pull
5
+ docker compose pull
6
6
  ./start-ssh.sh
7
7
  cd -
8
8
 
@@ -2,7 +2,7 @@
2
2
  set -e
3
3
 
4
4
  cd ./ci/ssh
5
- docker-compose pull
5
+ docker compose pull
6
6
  ./start-ssh.sh
7
7
  cd -
8
8
 
@@ -21,7 +21,7 @@ from datetime import date
21
21
 
22
22
  project = "DPDispatcher"
23
23
  copyright = "2020-%d, Deep Modeling" % date.today().year
24
- author = "Deep Modeling"
24
+ author = "DeepModeling"
25
25
 
26
26
 
27
27
  # -- General configuration ---------------------------------------------------
@@ -97,3 +97,7 @@ intersphinx_mapping = {
97
97
  "python": ("https://docs.python.org/", None),
98
98
  "dargs": ("https://docs.deepmodeling.com/projects/dargs/en/latest/", None),
99
99
  }
100
+
101
+ myst_enable_extensions = [
102
+ "colon_fence",
103
+ ]
@@ -9,16 +9,17 @@ One needs to set {dargs:argument}`context_type <machine/context_type>` to one of
9
9
 
10
10
  `LazyLocal` directly runs jobs in the local server and local directory.
11
11
 
12
- Since [`bash -l`](https://www.gnu.org/software/bash/manual/bash.html#Invoking-Bash) is used in the shebang line of the submission scripts, the [login shell startup files](https://www.gnu.org/software/bash/manual/bash.html#Invoking-Bash) will be executed, potentially overriding the current environmental variables. Therefore, it's advisable to explicitly set the environmental variables using {dargs:argument}`envs <resources/envs>` or {dargs:argument}`source_list <resources/source_list>`.
12
+ Since [`bash -l`](https://www.gnu.org/software/bash/manual/bash.html#Invoking-Bash) is used in the shebang line of the submission scripts, the [login shell startup files](https://www.gnu.org/software/bash/manual/bash.html#Invoking-Bash) will be executed, potentially overriding the current environment variables. Therefore, it's advisable to explicitly set the environment variables using {dargs:argument}`envs <resources/envs>` or {dargs:argument}`source_list <resources/source_list>`.
13
13
 
14
14
  ## Local
15
15
 
16
16
  {dargs:argument}`context_type <machine/context_type>`: `Local`
17
17
 
18
18
  `Local` runs jobs in the local server, but in a different directory.
19
- Files will be copied to the remote directory before jobs start and copied back after jobs finish.
19
+ Files will be symlinked to the remote directory before jobs start and copied back after jobs finish.
20
+ If the local directory is not accessible with the [batch system](./batch.md), turn off {dargs:argument}`symlink <machine[LocalContext]/remote_profile/symlink>`, and then files on the local directory will be copied to the remote directory.
20
21
 
21
- Since [`bash -l`](https://www.gnu.org/software/bash/manual/bash.html#Invoking-Bash) is used in the shebang line of the submission scripts, the [login shell startup files](https://www.gnu.org/software/bash/manual/bash.html#Invoking-Bash) will be executed, potentially overriding the current environmental variables. Therefore, it's advisable to explicitly set the environmental variables using {dargs:argument}`envs <resources/envs>` or {dargs:argument}`source_list <resources/source_list>`.
22
+ Since [`bash -l`](https://www.gnu.org/software/bash/manual/bash.html#Invoking-Bash) is used in the shebang line of the submission scripts, the [login shell startup files](https://www.gnu.org/software/bash/manual/bash.html#Invoking-Bash) will be executed, potentially overriding the current environment variables. Therefore, it's advisable to explicitly set the environment variables using {dargs:argument}`envs <resources/envs>` or {dargs:argument}`source_list <resources/source_list>`.
22
23
 
23
24
  ## SSH
24
25
 
@@ -0,0 +1,35 @@
1
+ # Environment variables
2
+
3
+ When launching a job, dpdispatcher sets the following environment variables according to the resources, in addition to user-defined environment variables:
4
+
5
+ :::{envvar} DPDISPATCHER_NUMBER_NODE
6
+
7
+ The number of nodes required for each job.
8
+
9
+ :::
10
+
11
+ :::{envvar} DPDISPATCHER_CPU_PER_NODE
12
+
13
+ CPU numbers of each node assigned to each job.
14
+
15
+ :::
16
+
17
+ :::{envvar} DPDISPATCHER_GPU_PER_NODE
18
+
19
+ GPU numbers of each node assigned to each job.
20
+
21
+ :::
22
+
23
+ :::{envvar} DPDISPATCHER_QUEUE_NAME
24
+
25
+ The queue name of batch job scheduler system.
26
+
27
+ :::
28
+
29
+ :::{envvar} DPDISPATCHER_GROUP_SIZE
30
+
31
+ The number of tasks in a job. 0 means infinity.
32
+
33
+ :::
34
+
35
+ These environment variables can be used in the {dargs:argument}`command <task/command>`, for example, `mpirun -n ${DPDISPATCHER_CPU_PER_NODE} xx.run`.
@@ -22,6 +22,7 @@ DPDispatcher will monitor (poke) until these jobs finish and download the result
22
22
  machine
23
23
  resources
24
24
  task
25
+ env
25
26
  run
26
27
  cli
27
28
  api/api
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '0.6.6'
16
- __version_tuple__ = version_tuple = (0, 6, 6)
15
+ __version__ = version = '0.6.7'
16
+ __version_tuple__ = version_tuple = (0, 6, 7)
@@ -1,5 +1,5 @@
1
1
  from abc import ABCMeta, abstractmethod
2
- from typing import List, Tuple
2
+ from typing import Any, List, Tuple
3
3
 
4
4
  from dargs import Argument
5
5
 
@@ -73,6 +73,66 @@ class BaseContext(metaclass=ABCMeta):
73
73
  def check_finish(self, proc):
74
74
  raise NotImplementedError("abstract method")
75
75
 
76
+ def block_checkcall(self, cmd, asynchronously=False) -> Tuple[Any, Any, Any]:
77
+ """Run command with arguments. Wait for command to complete.
78
+
79
+ Parameters
80
+ ----------
81
+ cmd : str
82
+ The command to run.
83
+ asynchronously : bool, optional, default=False
84
+ Run command asynchronously. If True, `nohup` will be used to run the command.
85
+
86
+ Returns
87
+ -------
88
+ stdin
89
+ standard inout
90
+ stdout
91
+ standard output
92
+ stderr
93
+ standard error
94
+
95
+ Raises
96
+ ------
97
+ RuntimeError
98
+ when the return code is not zero
99
+ """
100
+ if asynchronously:
101
+ cmd = f"nohup {cmd} >/dev/null &"
102
+ exit_status, stdin, stdout, stderr = self.block_call(cmd)
103
+ if exit_status != 0:
104
+ raise RuntimeError(
105
+ "Get error code %d in calling %s with job: %s . message: %s"
106
+ % (
107
+ exit_status,
108
+ cmd,
109
+ self.submission.submission_hash,
110
+ stderr.read().decode("utf-8"),
111
+ )
112
+ )
113
+ return stdin, stdout, stderr
114
+
115
+ @abstractmethod
116
+ def block_call(self, cmd) -> Tuple[int, Any, Any, Any]:
117
+ """Run command with arguments. Wait for command to complete.
118
+
119
+ Parameters
120
+ ----------
121
+ cmd : str
122
+ The command to run.
123
+
124
+ Returns
125
+ -------
126
+ exit_status
127
+ exit code
128
+ stdin
129
+ standard inout
130
+ stdout
131
+ standard output
132
+ stderr
133
+ standard error
134
+ """
135
+
76
136
  @classmethod
77
137
  def machine_arginfo(cls) -> Argument:
78
138
  """Generate the machine arginfo.
@@ -335,6 +335,11 @@ class BohriumContext(BaseContext):
335
335
  )
336
336
  ]
337
337
 
338
+ def block_call(self, cmd):
339
+ raise RuntimeError(
340
+ "Unsupported method. You may use an unsupported combination of the machine and the context."
341
+ )
342
+
338
343
 
339
344
  DpCloudServerContext = BohriumContext
340
345
  LebesgueContext = BohriumContext
@@ -244,3 +244,8 @@ class HDFSContext(BaseContext):
244
244
 
245
245
  def read_file(self, fname):
246
246
  return HDFS.read_hdfs_file(os.path.join(self.remote_root, fname))
247
+
248
+ def block_call(self, cmd):
249
+ raise RuntimeError(
250
+ "Unsupported method. You may use an unsupported combination of the machine and the context."
251
+ )
@@ -112,23 +112,6 @@ class LazyLocalContext(BaseContext):
112
112
  # else:
113
113
  # raise RuntimeError('do not find download file ' + fname)
114
114
 
115
- def block_checkcall(self, cmd):
116
- # script_dir = os.path.join(self.local_root, self.submission.work_base)
117
- # os.chdir(script_dir)
118
- proc = sp.Popen(
119
- cmd, cwd=self.local_root, shell=True, stdout=sp.PIPE, stderr=sp.PIPE
120
- )
121
- o, e = proc.communicate()
122
- stdout = SPRetObj(o)
123
- stderr = SPRetObj(e)
124
- code = proc.returncode
125
- if code != 0:
126
- raise RuntimeError(
127
- "Get error code %d in locally calling %s with job: %s ",
128
- (code, cmd, self.submission.submission_hash),
129
- )
130
- return None, stdout, stderr
131
-
132
115
  def block_call(self, cmd):
133
116
  proc = sp.Popen(
134
117
  cmd, cwd=self.local_root, shell=True, stdout=sp.PIPE, stderr=sp.PIPE
@@ -3,6 +3,9 @@ import shutil
3
3
  import subprocess as sp
4
4
  from glob import glob
5
5
  from subprocess import TimeoutExpired
6
+ from typing import List
7
+
8
+ from dargs import Argument
6
9
 
7
10
  from dpdispatcher.base_context import BaseContext
8
11
  from dpdispatcher.dlog import dlog
@@ -60,6 +63,7 @@ class LocalContext(BaseContext):
60
63
  self.temp_local_root = os.path.abspath(local_root)
61
64
  self.temp_remote_root = os.path.abspath(remote_root)
62
65
  self.remote_profile = remote_profile
66
+ self.symlink = remote_profile.get("symlink", True)
63
67
 
64
68
  @classmethod
65
69
  def load_from_dict(cls, context_dict):
@@ -83,6 +87,25 @@ class LocalContext(BaseContext):
83
87
  self.temp_remote_root, submission.submission_hash
84
88
  )
85
89
 
90
+ def _copy_from_local_to_remote(self, local_path, remote_path):
91
+ if not os.path.exists(local_path):
92
+ raise FileNotFoundError(
93
+ f"cannot find uploaded file {os.path.join(local_path)}"
94
+ )
95
+ if os.path.exists(remote_path):
96
+ os.remove(remote_path)
97
+ _check_file_path(remote_path)
98
+
99
+ if self.symlink:
100
+ # ensure the file exist
101
+ os.symlink(local_path, remote_path)
102
+ elif os.path.isfile(local_path):
103
+ shutil.copyfile(local_path, remote_path)
104
+ elif os.path.isdir(local_path):
105
+ shutil.copytree(local_path, remote_path)
106
+ else:
107
+ raise ValueError(f"Unknown file type: {local_path}")
108
+
86
109
  def upload(self, submission):
87
110
  os.makedirs(self.remote_root, exist_ok=True)
88
111
  for ii in submission.belonging_tasks:
@@ -103,14 +126,9 @@ class LocalContext(BaseContext):
103
126
  file_list.extend(rel_file_list)
104
127
 
105
128
  for jj in file_list:
106
- if not os.path.exists(os.path.join(local_job, jj)):
107
- raise FileNotFoundError(
108
- "cannot find upload file " + os.path.join(local_job, jj)
109
- )
110
- if os.path.exists(os.path.join(remote_job, jj)):
111
- os.remove(os.path.join(remote_job, jj))
112
- _check_file_path(os.path.join(remote_job, jj))
113
- os.symlink(os.path.join(local_job, jj), os.path.join(remote_job, jj))
129
+ self._copy_from_local_to_remote(
130
+ os.path.join(local_job, jj), os.path.join(remote_job, jj)
131
+ )
114
132
 
115
133
  local_job = self.local_root
116
134
  remote_job = self.remote_root
@@ -128,14 +146,9 @@ class LocalContext(BaseContext):
128
146
  file_list.extend(rel_file_list)
129
147
 
130
148
  for jj in file_list:
131
- if not os.path.exists(os.path.join(local_job, jj)):
132
- raise FileNotFoundError(
133
- "cannot find upload file " + os.path.join(local_job, jj)
134
- )
135
- if os.path.exists(os.path.join(remote_job, jj)):
136
- os.remove(os.path.join(remote_job, jj))
137
- _check_file_path(os.path.join(remote_job, jj))
138
- os.symlink(os.path.join(local_job, jj), os.path.join(remote_job, jj))
149
+ self._copy_from_local_to_remote(
150
+ os.path.join(local_job, jj), os.path.join(remote_job, jj)
151
+ )
139
152
 
140
153
  def download(
141
154
  self, submission, check_exists=False, mark_failure=True, back_error=False
@@ -288,21 +301,6 @@ class LocalContext(BaseContext):
288
301
  # no nothing in the case of linked files
289
302
  pass
290
303
 
291
- def block_checkcall(self, cmd):
292
- proc = sp.Popen(
293
- cmd, cwd=self.remote_root, shell=True, stdout=sp.PIPE, stderr=sp.PIPE
294
- )
295
- o, e = proc.communicate()
296
- stdout = SPRetObj(o)
297
- stderr = SPRetObj(e)
298
- code = proc.returncode
299
- if code != 0:
300
- raise RuntimeError(
301
- f"Get error code {code} in locally calling {cmd} with job: {self.submission.submission_hash}"
302
- f"\nStandard error: {stderr}"
303
- )
304
- return None, stdout, stderr
305
-
306
304
  def block_call(self, cmd):
307
305
  proc = sp.Popen(
308
306
  cmd, cwd=self.remote_root, shell=True, stdout=sp.PIPE, stderr=sp.PIPE
@@ -351,3 +349,31 @@ class LocalContext(BaseContext):
351
349
  stdout = None
352
350
  stderr = None
353
351
  return ret, stdout, stderr
352
+
353
+ @classmethod
354
+ def machine_subfields(cls) -> List[Argument]:
355
+ """Generate the machine subfields.
356
+
357
+ Returns
358
+ -------
359
+ list[Argument]
360
+ machine subfields
361
+ """
362
+ doc_remote_profile = "The information used to maintain the local machine."
363
+ return [
364
+ Argument(
365
+ "remote_profile",
366
+ dict,
367
+ optional=True,
368
+ doc=doc_remote_profile,
369
+ sub_fields=[
370
+ Argument(
371
+ "symlink",
372
+ bool,
373
+ optional=True,
374
+ default=True,
375
+ doc="Whether to use symbolic links to replace copy. This option should be turned off if the local directory is not accessible on the Batch system.",
376
+ ),
377
+ ],
378
+ )
379
+ ]
@@ -258,3 +258,8 @@ class OpenAPIContext(BaseContext):
258
258
  dir_to_be_removed = os.path.join(local_root, "backup")
259
259
  if os.path.exists(dir_to_be_removed):
260
260
  shutil.rmtree(dir_to_be_removed)
261
+
262
+ def block_call(self, cmd):
263
+ raise RuntimeError(
264
+ "Unsupported method. You may use an unsupported combination of the machine and the context."
265
+ )
@@ -44,6 +44,7 @@ class SSHSession:
44
44
  totp_secret=None,
45
45
  tar_compress=True,
46
46
  look_for_keys=True,
47
+ execute_command=None,
47
48
  ):
48
49
  self.hostname = hostname
49
50
  self.username = username
@@ -56,6 +57,7 @@ class SSHSession:
56
57
  self.ssh = None
57
58
  self.tar_compress = tar_compress
58
59
  self.look_for_keys = look_for_keys
60
+ self.execute_command = execute_command
59
61
  self._keyboard_interactive_auth = False
60
62
  self._setup_ssh()
61
63
 
@@ -237,6 +239,8 @@ class SSHSession:
237
239
  self.ssh._transport = ts # type: ignore
238
240
  # reset sftp
239
241
  self._sftp = None
242
+ if self.execute_command is not None:
243
+ self.exec_command(self.execute_command)
240
244
 
241
245
  def inter_handler(self, title, instructions, prompt_list):
242
246
  """inter_handler: the callback for paramiko.transport.auth_interactive.
@@ -338,6 +342,7 @@ class SSHSession:
338
342
  doc_look_for_keys = (
339
343
  "enable searching for discoverable private key files in ~/.ssh/"
340
344
  )
345
+ doc_execute_command = "execute command after ssh connection is established."
341
346
  ssh_remote_profile_args = [
342
347
  Argument("hostname", str, optional=False, doc=doc_hostname),
343
348
  Argument("username", str, optional=False, doc=doc_username),
@@ -379,6 +384,13 @@ class SSHSession:
379
384
  default=True,
380
385
  doc=doc_look_for_keys,
381
386
  ),
387
+ Argument(
388
+ "execute_command",
389
+ str,
390
+ optional=True,
391
+ default=None,
392
+ doc=doc_execute_command,
393
+ ),
382
394
  ]
383
395
  ssh_remote_profile_format = Argument(
384
396
  "ssh_session", dict, ssh_remote_profile_args
@@ -755,41 +767,6 @@ class SSHContext(BaseContext):
755
767
  tar_compress=self.remote_profile.get("tar_compress", None),
756
768
  )
757
769
 
758
- def block_checkcall(self, cmd, asynchronously=False, stderr_whitelist=None):
759
- """Run command with arguments. Wait for command to complete. If the return code
760
- was zero then return, otherwise raise RuntimeError.
761
-
762
- Parameters
763
- ----------
764
- cmd : str
765
- The command to run.
766
- asynchronously : bool, optional, default=False
767
- Run command asynchronously. If True, `nohup` will be used to run the command.
768
- stderr_whitelist : list of str, optional, default=None
769
- If not None, the stderr will be checked against the whitelist. If the stderr
770
- contains any of the strings in the whitelist, the command will be considered
771
- successful.
772
- """
773
- assert self.remote_root is not None
774
- self.ssh_session.ensure_alive()
775
- if asynchronously:
776
- cmd = f"nohup {cmd} >/dev/null &"
777
- stdin, stdout, stderr = self.ssh_session.exec_command(
778
- (f"cd {shlex.quote(self.remote_root)} ;") + cmd
779
- )
780
- exit_status = stdout.channel.recv_exit_status()
781
- if exit_status != 0:
782
- raise RuntimeError(
783
- "Get error code %d in calling %s through ssh with job: %s . message: %s"
784
- % (
785
- exit_status,
786
- cmd,
787
- self.submission.submission_hash,
788
- stderr.read().decode("utf-8"),
789
- )
790
- )
791
- return stdin, stdout, stderr
792
-
793
770
  def block_call(self, cmd):
794
771
  assert self.remote_root is not None
795
772
  self.ssh_session.ensure_alive()
@@ -161,6 +161,9 @@ class Machine(metaclass=ABCMeta):
161
161
  machine_dict["remote_profile"] = self.context.remote_profile
162
162
  else:
163
163
  machine_dict["remote_profile"] = {}
164
+ # normalize the dict
165
+ base = self.arginfo()
166
+ machine_dict = base.normalize_value(machine_dict, trim_pattern="_*")
164
167
  return machine_dict
165
168
 
166
169
  def __eq__(self, other):
@@ -265,6 +268,15 @@ class Machine(metaclass=ABCMeta):
265
268
 
266
269
  export_envs_part = ""
267
270
  envs = job.resources.envs
271
+ envs = {
272
+ # export resources information to the environment variables
273
+ "DPDISPATCHER_NUMBER_NODE": job.resources.number_node,
274
+ "DPDISPATCHER_CPU_PER_NODE": job.resources.cpu_per_node,
275
+ "DPDISPATCHER_GPU_PER_NODE": job.resources.gpu_per_node,
276
+ "DPDISPATCHER_QUEUE_NAME": job.resources.queue_name,
277
+ "DPDISPATCHER_GROUP_SIZE": job.resources.group_size,
278
+ **envs,
279
+ }
268
280
  for k, v in envs.items():
269
281
  if isinstance(v, list):
270
282
  for each_value in v:
@@ -105,7 +105,7 @@ class JH_UniScheduler(Machine):
105
105
  elif ret != 0:
106
106
  # just retry when any unknown error raised.
107
107
  raise RetrySignal(
108
- "Get error code %d in checking status through ssh with job: %s . message: %s"
108
+ "Get error code %d in checking status with job: %s . message: %s"
109
109
  % (ret, job.job_hash, err_str)
110
110
  )
111
111
  status_out = stdout.read().decode("utf-8").split("\n")
@@ -181,8 +181,8 @@ class DistributedShell(Machine):
181
181
  if ret != 0:
182
182
  err_str = stderr.decode("utf-8")
183
183
  raise RuntimeError(
184
- "Command squeue fails to execute, error message:%s\nreturn code %d\n"
185
- % (err_str, ret)
184
+ "Command %s fails to execute, error message:%s\nreturn code %d\n"
185
+ % (cmd, err_str, ret)
186
186
  )
187
187
  job_id = int(stdout.decode("utf-8").strip())
188
188
 
@@ -129,7 +129,7 @@ class LSF(Machine):
129
129
  elif ret != 0:
130
130
  # just retry when any unknown error raised.
131
131
  raise RetrySignal(
132
- "Get error code %d in checking status through ssh with job: %s . message: %s"
132
+ "Get error code %d in checking status with job: %s . message: %s"
133
133
  % (ret, job.job_hash, err_str)
134
134
  )
135
135
  status_out = stdout.read().decode("utf-8").split("\n")