dpdispatcher 0.6.6__tar.gz → 0.6.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dpdispatcher might be problematic. Click here for more details.
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/.pre-commit-config.yaml +1 -1
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/PKG-INFO +1 -1
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/ci/pbs/start-pbs.sh +1 -1
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/ci/pbs.sh +2 -1
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/ci/slurm/register_cluster.sh +1 -1
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/ci/slurm/start-slurm.sh +1 -1
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/ci/slurm.sh +2 -1
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/ci/ssh/start-ssh.sh +1 -1
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/ci/ssh.sh +1 -1
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/ci/ssh_rsync.sh +1 -1
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/conf.py +5 -1
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/context.md +4 -3
- dpdispatcher-0.6.7/doc/env.md +35 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/index.rst +1 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/_version.py +2 -2
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/base_context.py +61 -1
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/contexts/dp_cloud_server_context.py +5 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/contexts/hdfs_context.py +5 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/contexts/lazy_local_context.py +0 -17
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/contexts/local_context.py +57 -31
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/contexts/openapi_context.py +5 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/contexts/ssh_context.py +12 -35
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/machine.py +12 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/machines/JH_UniScheduler.py +1 -1
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/machines/distributed_shell.py +2 -2
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/machines/lsf.py +1 -1
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/machines/pbs.py +11 -8
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/machines/shell.py +9 -8
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/machines/slurm.py +18 -21
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/submission.py +3 -3
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher.egg-info/PKG-INFO +1 -1
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher.egg-info/SOURCES.txt +1 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_argcheck.py +4 -1
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_lsf_script_generation.py +5 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_run_submission.py +26 -1
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/.git_archival.txt +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/.gitattributes +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/.github/dependabot.yml +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/.github/workflows/ci-docker.yml +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/.github/workflows/machines.yml +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/.github/workflows/mirror_gitee.yml +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/.github/workflows/publish_conda.yml +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/.github/workflows/pyright.yml +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/.github/workflows/release.yml +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/.github/workflows/test-bohrium.yml +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/.github/workflows/test.yml +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/.gitignore +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/.readthedocs.yaml +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/CONTRIBUTING.md +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/Dockerfile +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/LICENSE +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/README.md +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/ci/LICENSE +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/ci/README.md +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/ci/pbs/docker-compose.yml +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/ci/slurm/docker-compose.yml +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/ci/ssh/docker-compose.yml +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/codecov.yml +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/conda/conda_build_config.yaml +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/conda/meta.yaml +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/.gitignore +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/Makefile +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/batch.md +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/cli.rst +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/credits.rst +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/dpdispatcher_on_yarn.md +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/examples/expanse.md +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/examples/g16.md +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/examples/shell.md +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/examples/template.md +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/getting-started.md +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/install.md +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/machine.rst +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/make.bat +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/pep723.rst +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/requirements.txt +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/resources.rst +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/run.md +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/doc/task.rst +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/__init__.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/__main__.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/arginfo.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/contexts/__init__.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/dlog.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/dpcloudserver/__init__.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/dpcloudserver/client.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/dpdisp.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/entrypoints/__init__.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/entrypoints/gui.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/entrypoints/run.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/entrypoints/submission.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/machines/__init__.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/machines/dp_cloud_server.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/machines/fugaku.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/machines/openapi.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/run.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/utils/__init__.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/utils/dpcloudserver/__init__.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/utils/dpcloudserver/client.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/utils/dpcloudserver/config.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/utils/dpcloudserver/retcode.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/utils/dpcloudserver/zip_file.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/utils/hdfs_cli.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/utils/job_status.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/utils/record.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher/utils/utils.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher.egg-info/dependency_links.txt +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher.egg-info/entry_points.txt +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher.egg-info/requires.txt +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/dpdispatcher.egg-info/top_level.txt +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/examples/dpdisp_run.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/examples/machine/expanse.json +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/examples/machine/lazy_local.json +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/examples/machine/mandu.json +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/examples/resources/expanse_cpu.json +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/examples/resources/mandu.json +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/examples/resources/template.slurm +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/examples/resources/tiger.json +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/examples/task/deepmd-kit.json +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/examples/task/g16.json +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/pyproject.toml +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/scripts/script_gen_dargs_docs.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/scripts/script_gen_dargs_json.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/setup.cfg +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/.gitignore +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/__init__.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/batch.json +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/context.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/debug_test_class_submission_init.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/devel_test_JH_UniScheduler.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/devel_test_ali_ehpc.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/devel_test_dp_cloud_server.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/devel_test_lazy_ali_ehpc.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/devel_test_lsf.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/devel_test_shell.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/devel_test_slurm.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/devel_test_ssh_ali_ehpc.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/graph.pb +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/hello_world.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/job.json +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/machine.json +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/machine_JH_UniScheduler.json +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/machine_ali_ehpc.json +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/machine_center.json +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/machine_diffenert.json +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/machine_dp_cloud_server.json +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/machine_fugaku.json +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/machine_if_cuda_multi_devices.json +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/machine_lazy_local_jh_unischeduler.json +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/machine_lazy_local_lsf.json +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/machine_lazy_local_slurm.json +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/machine_lazylocal_shell.json +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/machine_local_fugaku.json +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/machine_local_shell.json +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/machine_lsf.json +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/machine_openapi.json +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/machine_slurm.json +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/machine_yarn.json +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/resources.json +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/submission.json +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/jsons/task.json +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/sample_class.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/script_gen_json.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/slurm_test.env +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_JH_UniScheduler_script_generation.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_class_job.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_class_machine.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_class_machine_dispatch.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_class_resources.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_class_submission.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_class_submission_init.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_class_task.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_cli.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_context_dir/0_md/bct-1/conf.lmp +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_context_dir/0_md/bct-1/input.lammps +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_context_dir/0_md/bct-1/some_dir/some_file +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_context_dir/0_md/bct-2/conf.lmp +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_context_dir/0_md/bct-2/input.lammps +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_context_dir/0_md/bct-3/conf.lmp +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_context_dir/0_md/bct-3/input.lammps +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_context_dir/0_md/bct-4/conf.lmp +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_context_dir/0_md/bct-4/input.lammps +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_context_dir/0_md/dir with space/file with space +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_context_dir/0_md/graph.pb +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_context_dir/0_md/some_dir/some_file +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_examples.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_group_size.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_gui.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_hdfs_context.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_hdfs_dir/0_md/bct-1/conf.lmp +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_hdfs_dir/0_md/bct-1/input.lammps +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_hdfs_dir/0_md/bct-2/conf.lmp +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_hdfs_dir/0_md/bct-2/input.lammps +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_hdfs_dir/0_md/bct-3/conf.lmp +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_hdfs_dir/0_md/bct-3/input.lammps +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_hdfs_dir/0_md/bct-4/conf.lmp +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_hdfs_dir/0_md/bct-4/input.lammps +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_hdfs_dir/0_md/graph.pb +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_if_cuda_multi_devices/test_dir/test.txt +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_import_classes.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_jh_unischeduler/0_md/bct-1/conf.lmp +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_jh_unischeduler/0_md/bct-1/input.lammps +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_jh_unischeduler/0_md/bct-2/conf.lmp +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_jh_unischeduler/0_md/bct-2/input.lammps +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_jh_unischeduler/0_md/bct-3/conf.lmp +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_jh_unischeduler/0_md/bct-3/input.lammps +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_jh_unischeduler/0_md/bct-4/conf.lmp +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_jh_unischeduler/0_md/bct-4/input.lammps +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_jh_unischeduler/0_md/graph.pb +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_lazy_local_context.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_local_context.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_lsf_dir/0_md/bct-1/conf.lmp +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_lsf_dir/0_md/bct-1/input.lammps +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_lsf_dir/0_md/bct-2/conf.lmp +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_lsf_dir/0_md/bct-2/input.lammps +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_lsf_dir/0_md/bct-3/conf.lmp +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_lsf_dir/0_md/bct-3/input.lammps +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_lsf_dir/0_md/bct-4/conf.lmp +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_lsf_dir/0_md/bct-4/input.lammps +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_lsf_dir/0_md/graph.pb +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_lsf_dir/0_md/submission.json +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_pbs_dir/0_md/bct-1/conf.lmp +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_pbs_dir/0_md/bct-1/input.lammps +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_pbs_dir/0_md/bct-2/conf.lmp +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_pbs_dir/0_md/bct-2/input.lammps +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_pbs_dir/0_md/bct-3/conf.lmp +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_pbs_dir/0_md/bct-3/input.lammps +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_pbs_dir/0_md/bct-4/conf.lmp +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_pbs_dir/0_md/bct-4/input.lammps +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_pbs_dir/0_md/graph.pb +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_retry.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_run.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_run_submission_bohrium.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_run_submission_ratio_unfinished.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_shell_cuda_multi_devices.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_shell_trival.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_shell_trival_dir/fail_dir/mock_fail_task.txt +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_shell_trival_dir/parent_dir/dir with space/example.txt +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_shell_trival_dir/parent_dir/dir1/example.txt +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_shell_trival_dir/parent_dir/dir2/example.txt +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_shell_trival_dir/parent_dir/dir3/example.txt +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_shell_trival_dir/parent_dir/dir4/example.txt +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_shell_trival_dir/parent_dir/graph.pb +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_shell_trival_dir/recover_dir/mock_recover_task.txt +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_slurm_dir/0_md/bct-1/conf.lmp +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_slurm_dir/0_md/bct-1/input.lammps +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_slurm_dir/0_md/bct-2/conf.lmp +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_slurm_dir/0_md/bct-2/input.lammps +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_slurm_dir/0_md/bct-3/conf.lmp +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_slurm_dir/0_md/bct-3/input.lammps +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_slurm_dir/0_md/bct-4/conf.lmp +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_slurm_dir/0_md/bct-4/input.lammps +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_slurm_dir/0_md/d3c842c5b9476e48f7145b370cd330372b9293e1.json +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_slurm_dir/0_md/graph.pb +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_slurm_dir/0_md/submission.json +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_slurm_script_generation.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_ssh_context.py +0 -0
- {dpdispatcher-0.6.6 → dpdispatcher-0.6.7}/tests/test_work_path/.gitkeep +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: dpdispatcher
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.7
|
|
4
4
|
Summary: Generate HPC scheduler systems jobs input scripts, submit these scripts to HPC systems, and poke until they finish
|
|
5
5
|
Author: DeepModeling
|
|
6
6
|
License: GNU LESSER GENERAL PUBLIC LICENSE
|
|
@@ -2,13 +2,14 @@
|
|
|
2
2
|
set -e
|
|
3
3
|
|
|
4
4
|
cd ./ci/pbs
|
|
5
|
-
docker
|
|
5
|
+
docker compose pull
|
|
6
6
|
./start-pbs.sh
|
|
7
7
|
cd -
|
|
8
8
|
|
|
9
9
|
docker exec pbs_master /bin/bash -c "chmod -R 777 /shared_space"
|
|
10
10
|
docker exec pbs_master /bin/bash -c "chown -R pbsuser:pbsuser /home/pbsuser"
|
|
11
11
|
|
|
12
|
+
docker exec pbs_master /bin/bash -c "yum install -y procps"
|
|
12
13
|
docker exec pbs_master /bin/bash -c "cd /dpdispatcher && pip install uv && uv pip install --system .[test] coverage && chown -R pbsuser ."
|
|
13
14
|
docker exec -u pbsuser pbs_master /bin/bash -c "cd /dpdispatcher && coverage run --source=./dpdispatcher -m unittest -v && coverage report"
|
|
14
15
|
docker exec -u pbsuser --env-file <(env | grep GITHUB) pbs_master /bin/bash -c "cd /dpdispatcher && curl -Os https://uploader.codecov.io/latest/linux/codecov && chmod +x codecov && ./codecov"
|
|
@@ -2,9 +2,10 @@
|
|
|
2
2
|
set -e
|
|
3
3
|
|
|
4
4
|
cd ./ci/slurm
|
|
5
|
-
docker
|
|
5
|
+
docker compose pull
|
|
6
6
|
./start-slurm.sh
|
|
7
7
|
cd -
|
|
8
8
|
|
|
9
|
+
docker exec slurmctld /bin/bash -c "yum install -y procps"
|
|
9
10
|
docker exec slurmctld /bin/bash -c "cd dpdispatcher && pip install uv && uv pip install --system .[test] coverage && coverage run --source=./dpdispatcher -m unittest -v && coverage report"
|
|
10
11
|
docker exec --env-file <(env | grep -e GITHUB -e CODECOV) slurmctld /bin/bash -c "cd dpdispatcher && curl -Os https://uploader.codecov.io/latest/linux/codecov && chmod +x codecov && ./codecov"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
#!/bin/bash
|
|
2
2
|
|
|
3
|
-
docker
|
|
3
|
+
docker compose up -d --no-build
|
|
4
4
|
|
|
5
5
|
docker exec server /bin/bash -c "ssh-keygen -b 2048 -t rsa -f /root/.ssh/id_rsa -q -N \"\" && cat /root/.ssh/id_rsa.pub >> /root/.ssh/authorized_keys && chmod 600 /root/.ssh/authorized_keys"
|
|
6
6
|
docker exec server /bin/bash -c "mkdir -p /dpdispatcher_working"
|
|
@@ -21,7 +21,7 @@ from datetime import date
|
|
|
21
21
|
|
|
22
22
|
project = "DPDispatcher"
|
|
23
23
|
copyright = "2020-%d, Deep Modeling" % date.today().year
|
|
24
|
-
author = "
|
|
24
|
+
author = "DeepModeling"
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
# -- General configuration ---------------------------------------------------
|
|
@@ -97,3 +97,7 @@ intersphinx_mapping = {
|
|
|
97
97
|
"python": ("https://docs.python.org/", None),
|
|
98
98
|
"dargs": ("https://docs.deepmodeling.com/projects/dargs/en/latest/", None),
|
|
99
99
|
}
|
|
100
|
+
|
|
101
|
+
myst_enable_extensions = [
|
|
102
|
+
"colon_fence",
|
|
103
|
+
]
|
|
@@ -9,16 +9,17 @@ One needs to set {dargs:argument}`context_type <machine/context_type>` to one of
|
|
|
9
9
|
|
|
10
10
|
`LazyLocal` directly runs jobs in the local server and local directory.
|
|
11
11
|
|
|
12
|
-
Since [`bash -l`](https://www.gnu.org/software/bash/manual/bash.html#Invoking-Bash) is used in the shebang line of the submission scripts, the [login shell startup files](https://www.gnu.org/software/bash/manual/bash.html#Invoking-Bash) will be executed, potentially overriding the current
|
|
12
|
+
Since [`bash -l`](https://www.gnu.org/software/bash/manual/bash.html#Invoking-Bash) is used in the shebang line of the submission scripts, the [login shell startup files](https://www.gnu.org/software/bash/manual/bash.html#Invoking-Bash) will be executed, potentially overriding the current environment variables. Therefore, it's advisable to explicitly set the environment variables using {dargs:argument}`envs <resources/envs>` or {dargs:argument}`source_list <resources/source_list>`.
|
|
13
13
|
|
|
14
14
|
## Local
|
|
15
15
|
|
|
16
16
|
{dargs:argument}`context_type <machine/context_type>`: `Local`
|
|
17
17
|
|
|
18
18
|
`Local` runs jobs in the local server, but in a different directory.
|
|
19
|
-
Files will be
|
|
19
|
+
Files will be symlinked to the remote directory before jobs start and copied back after jobs finish.
|
|
20
|
+
If the local directory is not accessible with the [batch system](./batch.md), turn off {dargs:argument}`symlink <machine[LocalContext]/remote_profile/symlink>`, and then files on the local directory will be copied to the remote directory.
|
|
20
21
|
|
|
21
|
-
Since [`bash -l`](https://www.gnu.org/software/bash/manual/bash.html#Invoking-Bash) is used in the shebang line of the submission scripts, the [login shell startup files](https://www.gnu.org/software/bash/manual/bash.html#Invoking-Bash) will be executed, potentially overriding the current
|
|
22
|
+
Since [`bash -l`](https://www.gnu.org/software/bash/manual/bash.html#Invoking-Bash) is used in the shebang line of the submission scripts, the [login shell startup files](https://www.gnu.org/software/bash/manual/bash.html#Invoking-Bash) will be executed, potentially overriding the current environment variables. Therefore, it's advisable to explicitly set the environment variables using {dargs:argument}`envs <resources/envs>` or {dargs:argument}`source_list <resources/source_list>`.
|
|
22
23
|
|
|
23
24
|
## SSH
|
|
24
25
|
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# Environment variables
|
|
2
|
+
|
|
3
|
+
When launching a job, dpdispatcher sets the following environment variables according to the resources, in addition to user-defined environment variables:
|
|
4
|
+
|
|
5
|
+
:::{envvar} DPDISPATCHER_NUMBER_NODE
|
|
6
|
+
|
|
7
|
+
The number of nodes required for each job.
|
|
8
|
+
|
|
9
|
+
:::
|
|
10
|
+
|
|
11
|
+
:::{envvar} DPDISPATCHER_CPU_PER_NODE
|
|
12
|
+
|
|
13
|
+
CPU numbers of each node assigned to each job.
|
|
14
|
+
|
|
15
|
+
:::
|
|
16
|
+
|
|
17
|
+
:::{envvar} DPDISPATCHER_GPU_PER_NODE
|
|
18
|
+
|
|
19
|
+
GPU numbers of each node assigned to each job.
|
|
20
|
+
|
|
21
|
+
:::
|
|
22
|
+
|
|
23
|
+
:::{envvar} DPDISPATCHER_QUEUE_NAME
|
|
24
|
+
|
|
25
|
+
The queue name of batch job scheduler system.
|
|
26
|
+
|
|
27
|
+
:::
|
|
28
|
+
|
|
29
|
+
:::{envvar} DPDISPATCHER_GROUP_SIZE
|
|
30
|
+
|
|
31
|
+
The number of tasks in a job. 0 means infinity.
|
|
32
|
+
|
|
33
|
+
:::
|
|
34
|
+
|
|
35
|
+
These environment variables can be used in the {dargs:argument}`command <task/command>`, for example, `mpirun -n ${DPDISPATCHER_CPU_PER_NODE} xx.run`.
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from abc import ABCMeta, abstractmethod
|
|
2
|
-
from typing import List, Tuple
|
|
2
|
+
from typing import Any, List, Tuple
|
|
3
3
|
|
|
4
4
|
from dargs import Argument
|
|
5
5
|
|
|
@@ -73,6 +73,66 @@ class BaseContext(metaclass=ABCMeta):
|
|
|
73
73
|
def check_finish(self, proc):
|
|
74
74
|
raise NotImplementedError("abstract method")
|
|
75
75
|
|
|
76
|
+
def block_checkcall(self, cmd, asynchronously=False) -> Tuple[Any, Any, Any]:
|
|
77
|
+
"""Run command with arguments. Wait for command to complete.
|
|
78
|
+
|
|
79
|
+
Parameters
|
|
80
|
+
----------
|
|
81
|
+
cmd : str
|
|
82
|
+
The command to run.
|
|
83
|
+
asynchronously : bool, optional, default=False
|
|
84
|
+
Run command asynchronously. If True, `nohup` will be used to run the command.
|
|
85
|
+
|
|
86
|
+
Returns
|
|
87
|
+
-------
|
|
88
|
+
stdin
|
|
89
|
+
standard inout
|
|
90
|
+
stdout
|
|
91
|
+
standard output
|
|
92
|
+
stderr
|
|
93
|
+
standard error
|
|
94
|
+
|
|
95
|
+
Raises
|
|
96
|
+
------
|
|
97
|
+
RuntimeError
|
|
98
|
+
when the return code is not zero
|
|
99
|
+
"""
|
|
100
|
+
if asynchronously:
|
|
101
|
+
cmd = f"nohup {cmd} >/dev/null &"
|
|
102
|
+
exit_status, stdin, stdout, stderr = self.block_call(cmd)
|
|
103
|
+
if exit_status != 0:
|
|
104
|
+
raise RuntimeError(
|
|
105
|
+
"Get error code %d in calling %s with job: %s . message: %s"
|
|
106
|
+
% (
|
|
107
|
+
exit_status,
|
|
108
|
+
cmd,
|
|
109
|
+
self.submission.submission_hash,
|
|
110
|
+
stderr.read().decode("utf-8"),
|
|
111
|
+
)
|
|
112
|
+
)
|
|
113
|
+
return stdin, stdout, stderr
|
|
114
|
+
|
|
115
|
+
@abstractmethod
|
|
116
|
+
def block_call(self, cmd) -> Tuple[int, Any, Any, Any]:
|
|
117
|
+
"""Run command with arguments. Wait for command to complete.
|
|
118
|
+
|
|
119
|
+
Parameters
|
|
120
|
+
----------
|
|
121
|
+
cmd : str
|
|
122
|
+
The command to run.
|
|
123
|
+
|
|
124
|
+
Returns
|
|
125
|
+
-------
|
|
126
|
+
exit_status
|
|
127
|
+
exit code
|
|
128
|
+
stdin
|
|
129
|
+
standard inout
|
|
130
|
+
stdout
|
|
131
|
+
standard output
|
|
132
|
+
stderr
|
|
133
|
+
standard error
|
|
134
|
+
"""
|
|
135
|
+
|
|
76
136
|
@classmethod
|
|
77
137
|
def machine_arginfo(cls) -> Argument:
|
|
78
138
|
"""Generate the machine arginfo.
|
|
@@ -335,6 +335,11 @@ class BohriumContext(BaseContext):
|
|
|
335
335
|
)
|
|
336
336
|
]
|
|
337
337
|
|
|
338
|
+
def block_call(self, cmd):
|
|
339
|
+
raise RuntimeError(
|
|
340
|
+
"Unsupported method. You may use an unsupported combination of the machine and the context."
|
|
341
|
+
)
|
|
342
|
+
|
|
338
343
|
|
|
339
344
|
DpCloudServerContext = BohriumContext
|
|
340
345
|
LebesgueContext = BohriumContext
|
|
@@ -244,3 +244,8 @@ class HDFSContext(BaseContext):
|
|
|
244
244
|
|
|
245
245
|
def read_file(self, fname):
|
|
246
246
|
return HDFS.read_hdfs_file(os.path.join(self.remote_root, fname))
|
|
247
|
+
|
|
248
|
+
def block_call(self, cmd):
|
|
249
|
+
raise RuntimeError(
|
|
250
|
+
"Unsupported method. You may use an unsupported combination of the machine and the context."
|
|
251
|
+
)
|
|
@@ -112,23 +112,6 @@ class LazyLocalContext(BaseContext):
|
|
|
112
112
|
# else:
|
|
113
113
|
# raise RuntimeError('do not find download file ' + fname)
|
|
114
114
|
|
|
115
|
-
def block_checkcall(self, cmd):
|
|
116
|
-
# script_dir = os.path.join(self.local_root, self.submission.work_base)
|
|
117
|
-
# os.chdir(script_dir)
|
|
118
|
-
proc = sp.Popen(
|
|
119
|
-
cmd, cwd=self.local_root, shell=True, stdout=sp.PIPE, stderr=sp.PIPE
|
|
120
|
-
)
|
|
121
|
-
o, e = proc.communicate()
|
|
122
|
-
stdout = SPRetObj(o)
|
|
123
|
-
stderr = SPRetObj(e)
|
|
124
|
-
code = proc.returncode
|
|
125
|
-
if code != 0:
|
|
126
|
-
raise RuntimeError(
|
|
127
|
-
"Get error code %d in locally calling %s with job: %s ",
|
|
128
|
-
(code, cmd, self.submission.submission_hash),
|
|
129
|
-
)
|
|
130
|
-
return None, stdout, stderr
|
|
131
|
-
|
|
132
115
|
def block_call(self, cmd):
|
|
133
116
|
proc = sp.Popen(
|
|
134
117
|
cmd, cwd=self.local_root, shell=True, stdout=sp.PIPE, stderr=sp.PIPE
|
|
@@ -3,6 +3,9 @@ import shutil
|
|
|
3
3
|
import subprocess as sp
|
|
4
4
|
from glob import glob
|
|
5
5
|
from subprocess import TimeoutExpired
|
|
6
|
+
from typing import List
|
|
7
|
+
|
|
8
|
+
from dargs import Argument
|
|
6
9
|
|
|
7
10
|
from dpdispatcher.base_context import BaseContext
|
|
8
11
|
from dpdispatcher.dlog import dlog
|
|
@@ -60,6 +63,7 @@ class LocalContext(BaseContext):
|
|
|
60
63
|
self.temp_local_root = os.path.abspath(local_root)
|
|
61
64
|
self.temp_remote_root = os.path.abspath(remote_root)
|
|
62
65
|
self.remote_profile = remote_profile
|
|
66
|
+
self.symlink = remote_profile.get("symlink", True)
|
|
63
67
|
|
|
64
68
|
@classmethod
|
|
65
69
|
def load_from_dict(cls, context_dict):
|
|
@@ -83,6 +87,25 @@ class LocalContext(BaseContext):
|
|
|
83
87
|
self.temp_remote_root, submission.submission_hash
|
|
84
88
|
)
|
|
85
89
|
|
|
90
|
+
def _copy_from_local_to_remote(self, local_path, remote_path):
|
|
91
|
+
if not os.path.exists(local_path):
|
|
92
|
+
raise FileNotFoundError(
|
|
93
|
+
f"cannot find uploaded file {os.path.join(local_path)}"
|
|
94
|
+
)
|
|
95
|
+
if os.path.exists(remote_path):
|
|
96
|
+
os.remove(remote_path)
|
|
97
|
+
_check_file_path(remote_path)
|
|
98
|
+
|
|
99
|
+
if self.symlink:
|
|
100
|
+
# ensure the file exist
|
|
101
|
+
os.symlink(local_path, remote_path)
|
|
102
|
+
elif os.path.isfile(local_path):
|
|
103
|
+
shutil.copyfile(local_path, remote_path)
|
|
104
|
+
elif os.path.isdir(local_path):
|
|
105
|
+
shutil.copytree(local_path, remote_path)
|
|
106
|
+
else:
|
|
107
|
+
raise ValueError(f"Unknown file type: {local_path}")
|
|
108
|
+
|
|
86
109
|
def upload(self, submission):
|
|
87
110
|
os.makedirs(self.remote_root, exist_ok=True)
|
|
88
111
|
for ii in submission.belonging_tasks:
|
|
@@ -103,14 +126,9 @@ class LocalContext(BaseContext):
|
|
|
103
126
|
file_list.extend(rel_file_list)
|
|
104
127
|
|
|
105
128
|
for jj in file_list:
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
)
|
|
110
|
-
if os.path.exists(os.path.join(remote_job, jj)):
|
|
111
|
-
os.remove(os.path.join(remote_job, jj))
|
|
112
|
-
_check_file_path(os.path.join(remote_job, jj))
|
|
113
|
-
os.symlink(os.path.join(local_job, jj), os.path.join(remote_job, jj))
|
|
129
|
+
self._copy_from_local_to_remote(
|
|
130
|
+
os.path.join(local_job, jj), os.path.join(remote_job, jj)
|
|
131
|
+
)
|
|
114
132
|
|
|
115
133
|
local_job = self.local_root
|
|
116
134
|
remote_job = self.remote_root
|
|
@@ -128,14 +146,9 @@ class LocalContext(BaseContext):
|
|
|
128
146
|
file_list.extend(rel_file_list)
|
|
129
147
|
|
|
130
148
|
for jj in file_list:
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
)
|
|
135
|
-
if os.path.exists(os.path.join(remote_job, jj)):
|
|
136
|
-
os.remove(os.path.join(remote_job, jj))
|
|
137
|
-
_check_file_path(os.path.join(remote_job, jj))
|
|
138
|
-
os.symlink(os.path.join(local_job, jj), os.path.join(remote_job, jj))
|
|
149
|
+
self._copy_from_local_to_remote(
|
|
150
|
+
os.path.join(local_job, jj), os.path.join(remote_job, jj)
|
|
151
|
+
)
|
|
139
152
|
|
|
140
153
|
def download(
|
|
141
154
|
self, submission, check_exists=False, mark_failure=True, back_error=False
|
|
@@ -288,21 +301,6 @@ class LocalContext(BaseContext):
|
|
|
288
301
|
# no nothing in the case of linked files
|
|
289
302
|
pass
|
|
290
303
|
|
|
291
|
-
def block_checkcall(self, cmd):
|
|
292
|
-
proc = sp.Popen(
|
|
293
|
-
cmd, cwd=self.remote_root, shell=True, stdout=sp.PIPE, stderr=sp.PIPE
|
|
294
|
-
)
|
|
295
|
-
o, e = proc.communicate()
|
|
296
|
-
stdout = SPRetObj(o)
|
|
297
|
-
stderr = SPRetObj(e)
|
|
298
|
-
code = proc.returncode
|
|
299
|
-
if code != 0:
|
|
300
|
-
raise RuntimeError(
|
|
301
|
-
f"Get error code {code} in locally calling {cmd} with job: {self.submission.submission_hash}"
|
|
302
|
-
f"\nStandard error: {stderr}"
|
|
303
|
-
)
|
|
304
|
-
return None, stdout, stderr
|
|
305
|
-
|
|
306
304
|
def block_call(self, cmd):
|
|
307
305
|
proc = sp.Popen(
|
|
308
306
|
cmd, cwd=self.remote_root, shell=True, stdout=sp.PIPE, stderr=sp.PIPE
|
|
@@ -351,3 +349,31 @@ class LocalContext(BaseContext):
|
|
|
351
349
|
stdout = None
|
|
352
350
|
stderr = None
|
|
353
351
|
return ret, stdout, stderr
|
|
352
|
+
|
|
353
|
+
@classmethod
|
|
354
|
+
def machine_subfields(cls) -> List[Argument]:
|
|
355
|
+
"""Generate the machine subfields.
|
|
356
|
+
|
|
357
|
+
Returns
|
|
358
|
+
-------
|
|
359
|
+
list[Argument]
|
|
360
|
+
machine subfields
|
|
361
|
+
"""
|
|
362
|
+
doc_remote_profile = "The information used to maintain the local machine."
|
|
363
|
+
return [
|
|
364
|
+
Argument(
|
|
365
|
+
"remote_profile",
|
|
366
|
+
dict,
|
|
367
|
+
optional=True,
|
|
368
|
+
doc=doc_remote_profile,
|
|
369
|
+
sub_fields=[
|
|
370
|
+
Argument(
|
|
371
|
+
"symlink",
|
|
372
|
+
bool,
|
|
373
|
+
optional=True,
|
|
374
|
+
default=True,
|
|
375
|
+
doc="Whether to use symbolic links to replace copy. This option should be turned off if the local directory is not accessible on the Batch system.",
|
|
376
|
+
),
|
|
377
|
+
],
|
|
378
|
+
)
|
|
379
|
+
]
|
|
@@ -258,3 +258,8 @@ class OpenAPIContext(BaseContext):
|
|
|
258
258
|
dir_to_be_removed = os.path.join(local_root, "backup")
|
|
259
259
|
if os.path.exists(dir_to_be_removed):
|
|
260
260
|
shutil.rmtree(dir_to_be_removed)
|
|
261
|
+
|
|
262
|
+
def block_call(self, cmd):
|
|
263
|
+
raise RuntimeError(
|
|
264
|
+
"Unsupported method. You may use an unsupported combination of the machine and the context."
|
|
265
|
+
)
|
|
@@ -44,6 +44,7 @@ class SSHSession:
|
|
|
44
44
|
totp_secret=None,
|
|
45
45
|
tar_compress=True,
|
|
46
46
|
look_for_keys=True,
|
|
47
|
+
execute_command=None,
|
|
47
48
|
):
|
|
48
49
|
self.hostname = hostname
|
|
49
50
|
self.username = username
|
|
@@ -56,6 +57,7 @@ class SSHSession:
|
|
|
56
57
|
self.ssh = None
|
|
57
58
|
self.tar_compress = tar_compress
|
|
58
59
|
self.look_for_keys = look_for_keys
|
|
60
|
+
self.execute_command = execute_command
|
|
59
61
|
self._keyboard_interactive_auth = False
|
|
60
62
|
self._setup_ssh()
|
|
61
63
|
|
|
@@ -237,6 +239,8 @@ class SSHSession:
|
|
|
237
239
|
self.ssh._transport = ts # type: ignore
|
|
238
240
|
# reset sftp
|
|
239
241
|
self._sftp = None
|
|
242
|
+
if self.execute_command is not None:
|
|
243
|
+
self.exec_command(self.execute_command)
|
|
240
244
|
|
|
241
245
|
def inter_handler(self, title, instructions, prompt_list):
|
|
242
246
|
"""inter_handler: the callback for paramiko.transport.auth_interactive.
|
|
@@ -338,6 +342,7 @@ class SSHSession:
|
|
|
338
342
|
doc_look_for_keys = (
|
|
339
343
|
"enable searching for discoverable private key files in ~/.ssh/"
|
|
340
344
|
)
|
|
345
|
+
doc_execute_command = "execute command after ssh connection is established."
|
|
341
346
|
ssh_remote_profile_args = [
|
|
342
347
|
Argument("hostname", str, optional=False, doc=doc_hostname),
|
|
343
348
|
Argument("username", str, optional=False, doc=doc_username),
|
|
@@ -379,6 +384,13 @@ class SSHSession:
|
|
|
379
384
|
default=True,
|
|
380
385
|
doc=doc_look_for_keys,
|
|
381
386
|
),
|
|
387
|
+
Argument(
|
|
388
|
+
"execute_command",
|
|
389
|
+
str,
|
|
390
|
+
optional=True,
|
|
391
|
+
default=None,
|
|
392
|
+
doc=doc_execute_command,
|
|
393
|
+
),
|
|
382
394
|
]
|
|
383
395
|
ssh_remote_profile_format = Argument(
|
|
384
396
|
"ssh_session", dict, ssh_remote_profile_args
|
|
@@ -755,41 +767,6 @@ class SSHContext(BaseContext):
|
|
|
755
767
|
tar_compress=self.remote_profile.get("tar_compress", None),
|
|
756
768
|
)
|
|
757
769
|
|
|
758
|
-
def block_checkcall(self, cmd, asynchronously=False, stderr_whitelist=None):
|
|
759
|
-
"""Run command with arguments. Wait for command to complete. If the return code
|
|
760
|
-
was zero then return, otherwise raise RuntimeError.
|
|
761
|
-
|
|
762
|
-
Parameters
|
|
763
|
-
----------
|
|
764
|
-
cmd : str
|
|
765
|
-
The command to run.
|
|
766
|
-
asynchronously : bool, optional, default=False
|
|
767
|
-
Run command asynchronously. If True, `nohup` will be used to run the command.
|
|
768
|
-
stderr_whitelist : list of str, optional, default=None
|
|
769
|
-
If not None, the stderr will be checked against the whitelist. If the stderr
|
|
770
|
-
contains any of the strings in the whitelist, the command will be considered
|
|
771
|
-
successful.
|
|
772
|
-
"""
|
|
773
|
-
assert self.remote_root is not None
|
|
774
|
-
self.ssh_session.ensure_alive()
|
|
775
|
-
if asynchronously:
|
|
776
|
-
cmd = f"nohup {cmd} >/dev/null &"
|
|
777
|
-
stdin, stdout, stderr = self.ssh_session.exec_command(
|
|
778
|
-
(f"cd {shlex.quote(self.remote_root)} ;") + cmd
|
|
779
|
-
)
|
|
780
|
-
exit_status = stdout.channel.recv_exit_status()
|
|
781
|
-
if exit_status != 0:
|
|
782
|
-
raise RuntimeError(
|
|
783
|
-
"Get error code %d in calling %s through ssh with job: %s . message: %s"
|
|
784
|
-
% (
|
|
785
|
-
exit_status,
|
|
786
|
-
cmd,
|
|
787
|
-
self.submission.submission_hash,
|
|
788
|
-
stderr.read().decode("utf-8"),
|
|
789
|
-
)
|
|
790
|
-
)
|
|
791
|
-
return stdin, stdout, stderr
|
|
792
|
-
|
|
793
770
|
def block_call(self, cmd):
|
|
794
771
|
assert self.remote_root is not None
|
|
795
772
|
self.ssh_session.ensure_alive()
|
|
@@ -161,6 +161,9 @@ class Machine(metaclass=ABCMeta):
|
|
|
161
161
|
machine_dict["remote_profile"] = self.context.remote_profile
|
|
162
162
|
else:
|
|
163
163
|
machine_dict["remote_profile"] = {}
|
|
164
|
+
# normalize the dict
|
|
165
|
+
base = self.arginfo()
|
|
166
|
+
machine_dict = base.normalize_value(machine_dict, trim_pattern="_*")
|
|
164
167
|
return machine_dict
|
|
165
168
|
|
|
166
169
|
def __eq__(self, other):
|
|
@@ -265,6 +268,15 @@ class Machine(metaclass=ABCMeta):
|
|
|
265
268
|
|
|
266
269
|
export_envs_part = ""
|
|
267
270
|
envs = job.resources.envs
|
|
271
|
+
envs = {
|
|
272
|
+
# export resources information to the environment variables
|
|
273
|
+
"DPDISPATCHER_NUMBER_NODE": job.resources.number_node,
|
|
274
|
+
"DPDISPATCHER_CPU_PER_NODE": job.resources.cpu_per_node,
|
|
275
|
+
"DPDISPATCHER_GPU_PER_NODE": job.resources.gpu_per_node,
|
|
276
|
+
"DPDISPATCHER_QUEUE_NAME": job.resources.queue_name,
|
|
277
|
+
"DPDISPATCHER_GROUP_SIZE": job.resources.group_size,
|
|
278
|
+
**envs,
|
|
279
|
+
}
|
|
268
280
|
for k, v in envs.items():
|
|
269
281
|
if isinstance(v, list):
|
|
270
282
|
for each_value in v:
|
|
@@ -105,7 +105,7 @@ class JH_UniScheduler(Machine):
|
|
|
105
105
|
elif ret != 0:
|
|
106
106
|
# just retry when any unknown error raised.
|
|
107
107
|
raise RetrySignal(
|
|
108
|
-
"Get error code %d in checking status
|
|
108
|
+
"Get error code %d in checking status with job: %s . message: %s"
|
|
109
109
|
% (ret, job.job_hash, err_str)
|
|
110
110
|
)
|
|
111
111
|
status_out = stdout.read().decode("utf-8").split("\n")
|
|
@@ -181,8 +181,8 @@ class DistributedShell(Machine):
|
|
|
181
181
|
if ret != 0:
|
|
182
182
|
err_str = stderr.decode("utf-8")
|
|
183
183
|
raise RuntimeError(
|
|
184
|
-
"Command
|
|
185
|
-
% (err_str, ret)
|
|
184
|
+
"Command %s fails to execute, error message:%s\nreturn code %d\n"
|
|
185
|
+
% (cmd, err_str, ret)
|
|
186
186
|
)
|
|
187
187
|
job_id = int(stdout.decode("utf-8").strip())
|
|
188
188
|
|
|
@@ -129,7 +129,7 @@ class LSF(Machine):
|
|
|
129
129
|
elif ret != 0:
|
|
130
130
|
# just retry when any unknown error raised.
|
|
131
131
|
raise RetrySignal(
|
|
132
|
-
"Get error code %d in checking status
|
|
132
|
+
"Get error code %d in checking status with job: %s . message: %s"
|
|
133
133
|
% (ret, job.job_hash, err_str)
|
|
134
134
|
)
|
|
135
135
|
status_out = stdout.read().decode("utf-8").split("\n")
|