hpc-runner 0.3.0__tar.gz → 0.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hpc_runner-0.3.2/.github/workflows/ci.yml +57 -0
- hpc_runner-0.3.2/.pre-commit-config.yaml +24 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/CLAUDE.md +40 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/PKG-INFO +7 -5
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/defaults/config.toml +7 -1
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/docs/source/configuration.rst +1 -1
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/pyproject.toml +3 -2
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/_version.py +2 -2
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/cli/config.py +2 -2
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/cli/main.py +8 -3
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/cli/run.py +24 -9
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/cli/status.py +0 -1
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/cli/submit.py +0 -2
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/core/config.py +8 -2
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/core/descriptors.py +9 -3
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/core/job.py +6 -5
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/core/job_array.py +2 -1
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/core/resources.py +2 -1
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/schedulers/__init__.py +2 -2
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/schedulers/base.py +31 -17
- hpc_runner-0.3.2/src/hpc_runner/schedulers/local/scheduler.py +296 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/schedulers/local/templates/job.sh.j2 +17 -4
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/schedulers/sge/args.py +14 -14
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/schedulers/sge/parser.py +4 -4
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/schedulers/sge/scheduler.py +76 -78
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/schedulers/sge/templates/batch.sh.j2 +0 -5
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/schedulers/sge/templates/interactive.sh.j2 +0 -5
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/tui/app.py +14 -25
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/tui/components/filter_bar.py +2 -4
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/tui/components/filter_popup.py +13 -8
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/tui/components/job_table.py +5 -9
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/tui/providers/jobs.py +3 -5
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/tui/screens/confirm.py +3 -1
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/tui/screens/log_viewer.py +1 -3
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/tui/snapshot.py +7 -5
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/workflow/pipeline.py +2 -1
- hpc_runner-0.3.2/tests/modulefiles/dummy_lib/2.5 +4 -0
- hpc_runner-0.3.2/tests/modulefiles/dummy_tool/1.0 +4 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/tests/test_cli/test_run.py +119 -14
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/tests/test_core/test_config.py +62 -5
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/tests/test_schedulers/test_local.py +95 -1
- hpc_runner-0.3.0/.github/workflows/ci.yml +0 -30
- hpc_runner-0.3.0/src/hpc_runner/schedulers/local/scheduler.py +0 -383
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/.github/workflows/docs.yml +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/.github/workflows/publish.yml +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/.gitignore +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/README.md +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/docs/HPC_MONITOR_TUI_PLAN.md +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/docs/Makefile +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/docs/TEXTUAL_STYLING_COOKBOOK.md +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/docs/source/_static/.gitkeep +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/docs/source/_templates/.gitkeep +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/docs/source/cli.rst +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/docs/source/conf.py +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/docs/source/getting_started.rst +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/docs/source/index.rst +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/docs/source/programmatic_api.rst +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/docs/source/sge.rst +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/sourceme +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/__init__.py +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/cli/__init__.py +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/cli/cancel.py +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/cli/monitor.py +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/core/__init__.py +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/core/exceptions.py +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/core/job_info.py +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/core/result.py +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/core/types.py +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/py.typed +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/schedulers/detection.py +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/schedulers/local/__init__.py +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/schedulers/sge/__init__.py +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/templates/__init__.py +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/templates/engine.py +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/tui/__init__.py +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/tui/components/__init__.py +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/tui/components/detail_panel.py +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/tui/providers/__init__.py +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/tui/screens/__init__.py +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/tui/screens/job_details.py +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/tui/styles/monitor.tcss +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/workflow/__init__.py +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/src/hpc_runner/workflow/dependency.py +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/tests/__init__.py +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/tests/conftest.py +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/tests/test_cli/__init__.py +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/tests/test_core/__init__.py +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/tests/test_core/test_job.py +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/tests/test_core/test_resources.py +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/tests/test_schedulers/__init__.py +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/tests/test_schedulers/test_detection.py +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/tests/test_schedulers/test_sge.py +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/tests/test_tui/__init__.py +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/tests/test_tui/test_app_snapshot.py +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/tests/test_tui/test_detail_panel.py +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/tests/test_tui/test_job_table.py +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/tests/test_workflow/__init__.py +0 -0
- {hpc_runner-0.3.0 → hpc_runner-0.3.2}/tests/test_workflow/test_pipeline.py +0 -0
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
lint:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v4
|
|
14
|
+
with:
|
|
15
|
+
fetch-depth: 0
|
|
16
|
+
|
|
17
|
+
- name: Set up Python
|
|
18
|
+
uses: actions/setup-python@v5
|
|
19
|
+
with:
|
|
20
|
+
python-version: '3.12'
|
|
21
|
+
|
|
22
|
+
- name: Install dependencies
|
|
23
|
+
run: pip install -e ".[dev]"
|
|
24
|
+
|
|
25
|
+
- name: Ruff lint
|
|
26
|
+
run: ruff check src/hpc_runner
|
|
27
|
+
|
|
28
|
+
- name: Ruff format
|
|
29
|
+
run: ruff format --check src/hpc_runner
|
|
30
|
+
|
|
31
|
+
- name: Mypy
|
|
32
|
+
run: mypy src/hpc_runner
|
|
33
|
+
|
|
34
|
+
test:
|
|
35
|
+
runs-on: ubuntu-latest
|
|
36
|
+
strategy:
|
|
37
|
+
matrix:
|
|
38
|
+
python-version: ['3.10', '3.11', '3.12']
|
|
39
|
+
|
|
40
|
+
steps:
|
|
41
|
+
- uses: actions/checkout@v4
|
|
42
|
+
with:
|
|
43
|
+
fetch-depth: 0 # Full history for hatch-vcs
|
|
44
|
+
|
|
45
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
46
|
+
uses: actions/setup-python@v5
|
|
47
|
+
with:
|
|
48
|
+
python-version: ${{ matrix.python-version }}
|
|
49
|
+
|
|
50
|
+
- name: Install environment-modules
|
|
51
|
+
run: sudo apt-get update && sudo apt-get install -y environment-modules
|
|
52
|
+
|
|
53
|
+
- name: Install dependencies
|
|
54
|
+
run: pip install -e ".[dev]"
|
|
55
|
+
|
|
56
|
+
- name: Run tests
|
|
57
|
+
run: pytest -v
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
repos:
|
|
2
|
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
3
|
+
rev: v2.3.0
|
|
4
|
+
hooks:
|
|
5
|
+
- id: check-yaml
|
|
6
|
+
- id: end-of-file-fixer
|
|
7
|
+
- id: trailing-whitespace
|
|
8
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
9
|
+
rev: v0.15.0
|
|
10
|
+
hooks:
|
|
11
|
+
- id: ruff
|
|
12
|
+
args: [--fix]
|
|
13
|
+
- id: ruff-format
|
|
14
|
+
|
|
15
|
+
- repo: https://github.com/pre-commit/mirrors-mypy
|
|
16
|
+
rev: v1.19.1
|
|
17
|
+
hooks:
|
|
18
|
+
- id: mypy
|
|
19
|
+
additional_dependencies:
|
|
20
|
+
- "rich-click>=1.7"
|
|
21
|
+
- "jinja2>=3.0"
|
|
22
|
+
- "textual>=6.11"
|
|
23
|
+
pass_filenames: false
|
|
24
|
+
args: [src/hpc_runner]
|
|
@@ -2,6 +2,46 @@
|
|
|
2
2
|
|
|
3
3
|
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
|
4
4
|
|
|
5
|
+
## Project Overview
|
|
6
|
+
|
|
7
|
+
Provides a front end for submitting jobs to an HPC cluster using SGE, Slurm, LSF, etc. The aim is to enable two things:
|
|
8
|
+
|
|
9
|
+
1. Abstract away the intricacies of scheduler CLI args and allow the user to submit jobs based on tool name or job type.
|
|
10
|
+
2. Use Environment Modules to construct a clean environment for a job and avoid the 'works for me' problem.
|
|
11
|
+
|
|
12
|
+
### Job/Type Abstraction
|
|
13
|
+
|
|
14
|
+
Usage model:
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
# Launch an extern on the cluster via SGE:
|
|
18
|
+
qsub -q <queue> -N <job_name> -cwd -V -l <resources> xterm
|
|
19
|
+
|
|
20
|
+
# Launch an xterm on the cluster via hpc-runner:
|
|
21
|
+
hpc run xterm
|
|
22
|
+
|
|
23
|
+
# Or launch a script that uses a particular tool using a job type:
|
|
24
|
+
hpc run --type xcelium run_sim.sh
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
Configuration file:
|
|
28
|
+
|
|
29
|
+
* use a TOML based config file to define defaults for jobs
|
|
30
|
+
* define tool flows and specify all scheduler arguments
|
|
31
|
+
* tool flows are detected via the first argument passed to 'hpc run'
|
|
32
|
+
* definine job types for flows that don't pass the tool
|
|
33
|
+
* when using makefiles or scripts, the runner can't extract the tool name from the command line
|
|
34
|
+
|
|
35
|
+
### Consistent Environment
|
|
36
|
+
|
|
37
|
+
One of the common pitfalls of HPC flows is that what works for one user doesn't always work for another. Using
|
|
38
|
+
Environment Modules means that common tools flows use a fixed tool version for all users.
|
|
39
|
+
|
|
40
|
+
To accomplish this, the scheduler must purge all modules as part of its setup script and load any modules defined
|
|
41
|
+
for the flow. Each tool or type defined in the configuration file must also define the set of module files that must
|
|
42
|
+
be loaded.
|
|
43
|
+
|
|
44
|
+
|
|
5
45
|
## Build & Development Commands
|
|
6
46
|
|
|
7
47
|
```bash
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hpc-runner
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.2
|
|
4
4
|
Summary: Unified HPC job submission across multiple schedulers
|
|
5
5
|
Project-URL: Homepage, https://github.com/sjalloq/hpc-runner
|
|
6
6
|
Project-URL: Repository, https://github.com/sjalloq/hpc-runner
|
|
@@ -28,22 +28,24 @@ Provides-Extra: all
|
|
|
28
28
|
Requires-Dist: build; extra == 'all'
|
|
29
29
|
Requires-Dist: furo>=2024.0.0; extra == 'all'
|
|
30
30
|
Requires-Dist: hatch-vcs; extra == 'all'
|
|
31
|
-
Requires-Dist: mypy; extra == 'all'
|
|
31
|
+
Requires-Dist: mypy>=1.19; extra == 'all'
|
|
32
|
+
Requires-Dist: pre-commit; extra == 'all'
|
|
32
33
|
Requires-Dist: pytest-asyncio; extra == 'all'
|
|
33
34
|
Requires-Dist: pytest-cov; extra == 'all'
|
|
34
35
|
Requires-Dist: pytest>=7.0; extra == 'all'
|
|
35
|
-
Requires-Dist: ruff; extra == 'all'
|
|
36
|
+
Requires-Dist: ruff>=0.15; extra == 'all'
|
|
36
37
|
Requires-Dist: sphinx>=7.0; extra == 'all'
|
|
37
38
|
Requires-Dist: twine; extra == 'all'
|
|
38
39
|
Provides-Extra: dev
|
|
39
40
|
Requires-Dist: build; extra == 'dev'
|
|
40
41
|
Requires-Dist: furo>=2024.0.0; extra == 'dev'
|
|
41
42
|
Requires-Dist: hatch-vcs; extra == 'dev'
|
|
42
|
-
Requires-Dist: mypy; extra == 'dev'
|
|
43
|
+
Requires-Dist: mypy>=1.19; extra == 'dev'
|
|
44
|
+
Requires-Dist: pre-commit; extra == 'dev'
|
|
43
45
|
Requires-Dist: pytest-asyncio; extra == 'dev'
|
|
44
46
|
Requires-Dist: pytest-cov; extra == 'dev'
|
|
45
47
|
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
46
|
-
Requires-Dist: ruff; extra == 'dev'
|
|
48
|
+
Requires-Dist: ruff>=0.15; extra == 'dev'
|
|
47
49
|
Requires-Dist: sphinx>=7.0; extra == 'dev'
|
|
48
50
|
Requires-Dist: twine; extra == 'dev'
|
|
49
51
|
Description-Content-Type: text/markdown
|
|
@@ -17,7 +17,7 @@ time_resource = "h_rt"
|
|
|
17
17
|
merge_output = true
|
|
18
18
|
|
|
19
19
|
# Module handling
|
|
20
|
-
purge_modules =
|
|
20
|
+
purge_modules = true # Run 'module purge' before loading modules
|
|
21
21
|
silent_modules = false # Use -s flag for silent module operations
|
|
22
22
|
module_init_script = "" # Site-specific module init (empty = auto-detect)
|
|
23
23
|
|
|
@@ -25,6 +25,12 @@ module_init_script = "" # Site-specific module init (empty = auto-detect)
|
|
|
25
25
|
expand_makeflags = true # Expand $NSLOTS in MAKEFLAGS
|
|
26
26
|
unset_vars = [] # Environment variables to unset (e.g., ["https_proxy"])
|
|
27
27
|
|
|
28
|
+
# Local scheduler settings
|
|
29
|
+
[schedulers.local]
|
|
30
|
+
purge_modules = true
|
|
31
|
+
silent_modules = false
|
|
32
|
+
module_init_script = ""
|
|
33
|
+
|
|
28
34
|
# Slurm-specific settings (for future use)
|
|
29
35
|
[schedulers.slurm]
|
|
30
36
|
# Default Slurm settings
|
|
@@ -100,7 +100,7 @@ Save as ``hpc-runner.toml`` (or ``~/.config/hpc-runner/config.toml``):
|
|
|
100
100
|
time_resource = "h_rt"
|
|
101
101
|
|
|
102
102
|
merge_output = true
|
|
103
|
-
purge_modules =
|
|
103
|
+
purge_modules = true
|
|
104
104
|
silent_modules = false
|
|
105
105
|
module_init_script = ""
|
|
106
106
|
expand_makeflags = true
|
|
@@ -39,13 +39,14 @@ dev = [
|
|
|
39
39
|
"pytest>=7.0",
|
|
40
40
|
"pytest-cov",
|
|
41
41
|
"pytest-asyncio",
|
|
42
|
-
"mypy",
|
|
43
|
-
"ruff",
|
|
42
|
+
"mypy>=1.19",
|
|
43
|
+
"ruff>=0.15",
|
|
44
44
|
"sphinx>=7.0",
|
|
45
45
|
"furo>=2024.0.0",
|
|
46
46
|
"build",
|
|
47
47
|
"twine",
|
|
48
48
|
"hatch-vcs",
|
|
49
|
+
"pre-commit",
|
|
49
50
|
]
|
|
50
51
|
all = [
|
|
51
52
|
"hpc-runner[dev]",
|
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.3.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 3,
|
|
31
|
+
__version__ = version = '0.3.2'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 3, 2)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
|
@@ -62,7 +62,7 @@ def init(ctx: Context, global_config: bool) -> None:
|
|
|
62
62
|
return
|
|
63
63
|
|
|
64
64
|
# Write default config
|
|
65
|
-
default_config =
|
|
65
|
+
default_config = """# hpc-runner configuration
|
|
66
66
|
#
|
|
67
67
|
# This file is safe to commit to a project repo (for shared defaults).
|
|
68
68
|
# For a per-user config, run: hpc config init --global
|
|
@@ -92,7 +92,7 @@ merge_output = true
|
|
|
92
92
|
# [types.gpu]
|
|
93
93
|
# queue = "gpu"
|
|
94
94
|
# resources = [{name = "gpu", value = 1}]
|
|
95
|
-
|
|
95
|
+
"""
|
|
96
96
|
|
|
97
97
|
config_path.write_text(default_config)
|
|
98
98
|
console.print(f"[green]Created {config_path}[/green]")
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
"""Main CLI entry point using rich-click."""
|
|
2
2
|
|
|
3
|
+
from collections.abc import Callable
|
|
3
4
|
from pathlib import Path
|
|
5
|
+
from typing import TypeVar
|
|
4
6
|
|
|
5
7
|
import rich_click as click
|
|
6
8
|
from rich.console import Console
|
|
@@ -11,6 +13,7 @@ click.rich_click.SHOW_ARGUMENTS = True
|
|
|
11
13
|
# Global console for Rich output
|
|
12
14
|
console = Console()
|
|
13
15
|
|
|
16
|
+
|
|
14
17
|
# Context object to pass state between commands
|
|
15
18
|
class Context:
|
|
16
19
|
def __init__(self) -> None:
|
|
@@ -18,7 +21,9 @@ class Context:
|
|
|
18
21
|
self.scheduler: str | None = None
|
|
19
22
|
self.verbose: bool = False
|
|
20
23
|
|
|
21
|
-
|
|
24
|
+
|
|
25
|
+
F = TypeVar("F", bound=Callable[..., object])
|
|
26
|
+
pass_context: Callable[[F], F] = click.make_pass_decorator(Context, ensure=True) # type: ignore[assignment]
|
|
22
27
|
|
|
23
28
|
|
|
24
29
|
@click.group(context_settings={"help_option_names": ["-h", "--help"]})
|
|
@@ -68,8 +73,8 @@ from hpc_runner.cli.status import status # noqa: E402
|
|
|
68
73
|
|
|
69
74
|
cli.add_command(run)
|
|
70
75
|
cli.add_command(status)
|
|
71
|
-
cli.add_command(cancel)
|
|
72
|
-
cli.add_command(config_cmd, name="config")
|
|
76
|
+
cli.add_command(cancel) # type: ignore[has-type]
|
|
77
|
+
cli.add_command(config_cmd, name="config") # type: ignore[has-type]
|
|
73
78
|
cli.add_command(monitor)
|
|
74
79
|
|
|
75
80
|
|
|
@@ -11,6 +11,7 @@ from hpc_runner.cli.main import Context, pass_context
|
|
|
11
11
|
|
|
12
12
|
if TYPE_CHECKING:
|
|
13
13
|
from hpc_runner.core.job import Job
|
|
14
|
+
from hpc_runner.schedulers.base import BaseScheduler
|
|
14
15
|
|
|
15
16
|
console = Console()
|
|
16
17
|
|
|
@@ -34,10 +35,15 @@ console = Console()
|
|
|
34
35
|
@click.option("--job-type", "job_type", help="Job type from config")
|
|
35
36
|
@click.option("--module", "modules", multiple=True, help="Modules to load (repeatable)")
|
|
36
37
|
@click.option("--stderr", help="Separate stderr file (default: merged)")
|
|
37
|
-
@click.option("--
|
|
38
|
+
@click.option("--stdout", "stdout", help="Stdout file path pattern")
|
|
38
39
|
@click.option("--array", help="Array job specification (e.g., 1-100)")
|
|
39
40
|
@click.option("--depend", help="Job dependency specification")
|
|
40
|
-
@click.option(
|
|
41
|
+
@click.option(
|
|
42
|
+
"--inherit-env/--no-inherit-env",
|
|
43
|
+
"inherit_env",
|
|
44
|
+
default=True,
|
|
45
|
+
help="Inherit environment variables",
|
|
46
|
+
)
|
|
41
47
|
@click.option("--interactive", is_flag=True, help="Run interactively (srun/qrsh)")
|
|
42
48
|
@click.option("--local", is_flag=True, help="Run locally (no scheduler)")
|
|
43
49
|
@click.option("--dry-run", "dry_run", is_flag=True, help="Show what would be submitted")
|
|
@@ -58,7 +64,7 @@ def run(
|
|
|
58
64
|
job_type: str | None,
|
|
59
65
|
modules: tuple[str, ...],
|
|
60
66
|
stderr: str | None,
|
|
61
|
-
|
|
67
|
+
stdout: str | None,
|
|
62
68
|
array: str | None,
|
|
63
69
|
depend: str | None,
|
|
64
70
|
inherit_env: bool,
|
|
@@ -128,8 +134,8 @@ def run(
|
|
|
128
134
|
job.modules = list(modules)
|
|
129
135
|
if stderr:
|
|
130
136
|
job.stderr = stderr
|
|
131
|
-
if
|
|
132
|
-
job.stdout =
|
|
137
|
+
if stdout:
|
|
138
|
+
job.stdout = stdout
|
|
133
139
|
if depend:
|
|
134
140
|
job.dependency = depend
|
|
135
141
|
|
|
@@ -233,7 +239,10 @@ def _parse_args(args: tuple[str, ...]) -> tuple[list[str], list[str]]:
|
|
|
233
239
|
|
|
234
240
|
|
|
235
241
|
def _show_dry_run(
|
|
236
|
-
job: "Job",
|
|
242
|
+
job: "Job",
|
|
243
|
+
scheduler: "BaseScheduler",
|
|
244
|
+
scheduler_args: list[str],
|
|
245
|
+
interactive: bool = False,
|
|
237
246
|
) -> None:
|
|
238
247
|
"""Display what would be submitted."""
|
|
239
248
|
mode = "interactive" if interactive else "batch"
|
|
@@ -252,15 +261,21 @@ def _show_dry_run(
|
|
|
252
261
|
console.print(f"\n[bold]Scheduler passthrough args:[/bold] {' '.join(scheduler_args)}")
|
|
253
262
|
|
|
254
263
|
console.print("\n[bold]Generated script:[/bold]")
|
|
255
|
-
if interactive
|
|
256
|
-
script = scheduler.
|
|
264
|
+
if interactive:
|
|
265
|
+
script = scheduler.generate_interactive_script(job, "/tmp/example_script.sh")
|
|
257
266
|
else:
|
|
258
267
|
script = scheduler.generate_script(job)
|
|
259
268
|
syntax = Syntax(script, "bash", theme="monokai", line_numbers=True)
|
|
260
269
|
console.print(syntax)
|
|
261
270
|
|
|
262
271
|
|
|
263
|
-
def _handle_array_job(
|
|
272
|
+
def _handle_array_job(
|
|
273
|
+
job: "Job",
|
|
274
|
+
array_spec: str,
|
|
275
|
+
scheduler: "BaseScheduler",
|
|
276
|
+
dry_run: bool,
|
|
277
|
+
verbose: bool,
|
|
278
|
+
) -> None:
|
|
264
279
|
"""Handle array job submission."""
|
|
265
280
|
from hpc_runner.core.job_array import JobArray
|
|
266
281
|
|
|
@@ -5,7 +5,6 @@ from __future__ import annotations
|
|
|
5
5
|
import sys
|
|
6
6
|
from typing import Final
|
|
7
7
|
|
|
8
|
-
|
|
9
8
|
_GLOBAL_FLAGS: Final[set[str]] = {"--config", "--scheduler", "--verbose", "-h", "--help"}
|
|
10
9
|
|
|
11
10
|
|
|
@@ -69,4 +68,3 @@ def main() -> None:
|
|
|
69
68
|
|
|
70
69
|
global_opts, rest = _split_global_flags(argv)
|
|
71
70
|
cli.main(args=[*global_opts, "run", *rest], prog_name="submit")
|
|
72
|
-
|
|
@@ -10,7 +10,7 @@ from typing import Any
|
|
|
10
10
|
if sys.version_info >= (3, 11):
|
|
11
11
|
import tomllib
|
|
12
12
|
else:
|
|
13
|
-
import tomli as tomllib
|
|
13
|
+
import tomli as tomllib # type: ignore[import-not-found]
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
@dataclass
|
|
@@ -68,7 +68,13 @@ def _merge(base: dict[str, Any], override: dict[str, Any]) -> dict[str, Any]:
|
|
|
68
68
|
if value and value[0] == "-":
|
|
69
69
|
result[key] = value[1:]
|
|
70
70
|
else:
|
|
71
|
-
|
|
71
|
+
seen: set[Any] = set()
|
|
72
|
+
merged: list[Any] = []
|
|
73
|
+
for item in result[key] + value:
|
|
74
|
+
if item not in seen:
|
|
75
|
+
seen.add(item)
|
|
76
|
+
merged.append(item)
|
|
77
|
+
result[key] = merged
|
|
72
78
|
else:
|
|
73
79
|
result[key] = value
|
|
74
80
|
return result
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""Descriptor pattern for job attributes and scheduler arguments."""
|
|
2
2
|
|
|
3
3
|
from abc import ABC, abstractmethod
|
|
4
|
-
from typing import Any, Generic, TypeVar
|
|
4
|
+
from typing import Any, Generic, TypeVar, overload
|
|
5
5
|
|
|
6
6
|
T = TypeVar("T")
|
|
7
7
|
|
|
@@ -34,12 +34,18 @@ class JobAttribute(Generic[T]):
|
|
|
34
34
|
def __init__(self, name: str, *, default: T | None = None):
|
|
35
35
|
self.public_name = name
|
|
36
36
|
self.default = default
|
|
37
|
-
self._private_name: str
|
|
37
|
+
self._private_name: str = f"_{name}"
|
|
38
38
|
|
|
39
39
|
def __set_name__(self, owner: type, name: str) -> None:
|
|
40
40
|
self._private_name = f"_{name}"
|
|
41
41
|
|
|
42
|
-
|
|
42
|
+
@overload
|
|
43
|
+
def __get__(self, obj: None, objtype: type) -> "JobAttribute[T]": ...
|
|
44
|
+
|
|
45
|
+
@overload
|
|
46
|
+
def __get__(self, obj: Any, objtype: type | None = None) -> T | None: ...
|
|
47
|
+
|
|
48
|
+
def __get__(self, obj: Any, objtype: type | None = None) -> "T | None | JobAttribute[T]":
|
|
43
49
|
if obj is None:
|
|
44
50
|
return self
|
|
45
51
|
return getattr(obj, self._private_name, self.default)
|
|
@@ -3,7 +3,8 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
import os
|
|
6
|
-
from
|
|
6
|
+
from collections.abc import Iterator
|
|
7
|
+
from typing import TYPE_CHECKING, Any
|
|
7
8
|
|
|
8
9
|
from hpc_runner.core.descriptors import JobAttribute
|
|
9
10
|
from hpc_runner.core.resources import ResourceSet
|
|
@@ -182,7 +183,7 @@ class Job:
|
|
|
182
183
|
# Submission API
|
|
183
184
|
# =========================================================================
|
|
184
185
|
|
|
185
|
-
def submit(self, scheduler:
|
|
186
|
+
def submit(self, scheduler: BaseScheduler | None = None) -> JobResult:
|
|
186
187
|
"""Submit the job to a scheduler.
|
|
187
188
|
|
|
188
189
|
This is the primary programmatic API for job submission.
|
|
@@ -216,7 +217,7 @@ class Job:
|
|
|
216
217
|
tool_or_type: str,
|
|
217
218
|
command: str | None = None,
|
|
218
219
|
**overrides: Any,
|
|
219
|
-
) ->
|
|
220
|
+
) -> Job:
|
|
220
221
|
"""Create a job from configuration.
|
|
221
222
|
|
|
222
223
|
Looks up job settings from the config file by tool name or job type,
|
|
@@ -309,9 +310,9 @@ class Job:
|
|
|
309
310
|
|
|
310
311
|
def after(
|
|
311
312
|
self,
|
|
312
|
-
*jobs:
|
|
313
|
+
*jobs: JobResult,
|
|
313
314
|
type: str = "afterok",
|
|
314
|
-
) ->
|
|
315
|
+
) -> Job:
|
|
315
316
|
"""Add job dependencies.
|
|
316
317
|
|
|
317
318
|
Args:
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Resource abstraction for job resource requests."""
|
|
2
2
|
|
|
3
|
+
from collections.abc import Iterator
|
|
3
4
|
from dataclasses import dataclass, field
|
|
4
5
|
|
|
5
6
|
|
|
@@ -39,7 +40,7 @@ class ResourceSet:
|
|
|
39
40
|
return r
|
|
40
41
|
return None
|
|
41
42
|
|
|
42
|
-
def __iter__(self):
|
|
43
|
+
def __iter__(self) -> Iterator[Resource]:
|
|
43
44
|
return iter(self.resources)
|
|
44
45
|
|
|
45
46
|
def __len__(self) -> int:
|
|
@@ -18,7 +18,7 @@ _SCHEDULERS: dict[str, str] = {
|
|
|
18
18
|
}
|
|
19
19
|
|
|
20
20
|
|
|
21
|
-
def get_scheduler(name: str | None = None) ->
|
|
21
|
+
def get_scheduler(name: str | None = None) -> BaseScheduler:
|
|
22
22
|
"""Get scheduler instance.
|
|
23
23
|
|
|
24
24
|
Args:
|
|
@@ -39,7 +39,7 @@ def get_scheduler(name: str | None = None) -> "BaseScheduler":
|
|
|
39
39
|
module = importlib.import_module(module_path)
|
|
40
40
|
scheduler_class = getattr(module, class_name)
|
|
41
41
|
|
|
42
|
-
return scheduler_class()
|
|
42
|
+
return scheduler_class() # type: ignore[no-any-return]
|
|
43
43
|
|
|
44
44
|
|
|
45
45
|
def register_scheduler(name: str, import_path: str) -> None:
|
|
@@ -5,7 +5,7 @@ from __future__ import annotations
|
|
|
5
5
|
from abc import ABC, abstractmethod
|
|
6
6
|
from datetime import datetime
|
|
7
7
|
from pathlib import Path
|
|
8
|
-
from typing import TYPE_CHECKING
|
|
8
|
+
from typing import TYPE_CHECKING, Any
|
|
9
9
|
|
|
10
10
|
from hpc_runner.core.descriptors import SchedulerArg
|
|
11
11
|
|
|
@@ -35,13 +35,13 @@ class BaseScheduler(ABC):
|
|
|
35
35
|
name: str = ""
|
|
36
36
|
|
|
37
37
|
# Subclasses populate this in __init__ with config-driven values
|
|
38
|
-
ARG_RENDERERS: dict[str, SchedulerArg] = {}
|
|
38
|
+
ARG_RENDERERS: dict[str, SchedulerArg[Any]] = {}
|
|
39
39
|
|
|
40
40
|
# =========================================================================
|
|
41
41
|
# Rendering Protocol
|
|
42
42
|
# =========================================================================
|
|
43
43
|
|
|
44
|
-
def render_directives(self, job:
|
|
44
|
+
def render_directives(self, job: Job) -> list[str]:
|
|
45
45
|
"""Render job attributes as script directives.
|
|
46
46
|
|
|
47
47
|
Iterates over job's renderable attributes and uses ARG_RENDERERS
|
|
@@ -66,7 +66,7 @@ class BaseScheduler(ABC):
|
|
|
66
66
|
|
|
67
67
|
return directives
|
|
68
68
|
|
|
69
|
-
def render_args(self, job:
|
|
69
|
+
def render_args(self, job: Job) -> list[str]:
|
|
70
70
|
"""Render job attributes as command-line arguments.
|
|
71
71
|
|
|
72
72
|
Iterates over job's renderable attributes and uses ARG_RENDERERS
|
|
@@ -94,9 +94,7 @@ class BaseScheduler(ABC):
|
|
|
94
94
|
# =========================================================================
|
|
95
95
|
|
|
96
96
|
@abstractmethod
|
|
97
|
-
def submit(
|
|
98
|
-
self, job: "Job", interactive: bool = False, keep_script: bool = False
|
|
99
|
-
) -> "JobResult":
|
|
97
|
+
def submit(self, job: Job, interactive: bool = False, keep_script: bool = False) -> JobResult:
|
|
100
98
|
"""Submit a job to the scheduler.
|
|
101
99
|
|
|
102
100
|
Args:
|
|
@@ -106,7 +104,7 @@ class BaseScheduler(ABC):
|
|
|
106
104
|
"""
|
|
107
105
|
|
|
108
106
|
@abstractmethod
|
|
109
|
-
def submit_array(self, array:
|
|
107
|
+
def submit_array(self, array: JobArray) -> ArrayJobResult:
|
|
110
108
|
"""Submit an array job."""
|
|
111
109
|
|
|
112
110
|
@abstractmethod
|
|
@@ -114,7 +112,7 @@ class BaseScheduler(ABC):
|
|
|
114
112
|
"""Cancel a job."""
|
|
115
113
|
|
|
116
114
|
@abstractmethod
|
|
117
|
-
def get_status(self, job_id: str) ->
|
|
115
|
+
def get_status(self, job_id: str) -> JobStatus:
|
|
118
116
|
"""Get job status."""
|
|
119
117
|
|
|
120
118
|
@abstractmethod
|
|
@@ -122,21 +120,37 @@ class BaseScheduler(ABC):
|
|
|
122
120
|
"""Get job exit code."""
|
|
123
121
|
|
|
124
122
|
@abstractmethod
|
|
125
|
-
def generate_script(self, job:
|
|
123
|
+
def generate_script(self, job: Job, array_range: str | None = None) -> str:
|
|
126
124
|
"""Generate submission script."""
|
|
127
125
|
|
|
128
126
|
@abstractmethod
|
|
129
|
-
def build_submit_command(self, job:
|
|
127
|
+
def build_submit_command(self, job: Job) -> list[str]:
|
|
130
128
|
"""Build submission command line."""
|
|
131
129
|
|
|
132
130
|
@abstractmethod
|
|
133
|
-
def build_interactive_command(self, job:
|
|
131
|
+
def build_interactive_command(self, job: Job) -> list[str]:
|
|
134
132
|
"""Build interactive execution command."""
|
|
135
133
|
|
|
136
134
|
# =========================================================================
|
|
137
135
|
# Optional Methods - Override if scheduler supports these
|
|
138
136
|
# =========================================================================
|
|
139
137
|
|
|
138
|
+
def generate_interactive_script(self, job: Job, script_path: str) -> str:
|
|
139
|
+
"""Generate wrapper script for interactive jobs.
|
|
140
|
+
|
|
141
|
+
By default, falls back to the standard batch script.
|
|
142
|
+
Override in subclasses that need a different template for
|
|
143
|
+
interactive sessions (e.g. SGE uses qrsh with no #$ directives).
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
job: Job to generate script for.
|
|
147
|
+
script_path: Path where the script will be written.
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
Script content as a string.
|
|
151
|
+
"""
|
|
152
|
+
return self.generate_script(job)
|
|
153
|
+
|
|
140
154
|
def get_output_path(self, job_id: str, stream: str) -> Path | None:
|
|
141
155
|
"""Get path to output file.
|
|
142
156
|
|
|
@@ -149,16 +163,16 @@ class BaseScheduler(ABC):
|
|
|
149
163
|
"""
|
|
150
164
|
return None
|
|
151
165
|
|
|
152
|
-
def get_scheduler_args(self, job:
|
|
166
|
+
def get_scheduler_args(self, job: Job) -> list[str]:
|
|
153
167
|
"""Get scheduler-specific raw args from job."""
|
|
154
168
|
return getattr(job, f"{self.name}_args", [])
|
|
155
169
|
|
|
156
170
|
def list_active_jobs(
|
|
157
171
|
self,
|
|
158
172
|
user: str | None = None,
|
|
159
|
-
status: set[
|
|
173
|
+
status: set[JobStatus] | None = None,
|
|
160
174
|
queue: str | None = None,
|
|
161
|
-
) -> list[
|
|
175
|
+
) -> list[JobInfo]:
|
|
162
176
|
"""List active jobs. Override in subclass."""
|
|
163
177
|
return []
|
|
164
178
|
|
|
@@ -170,7 +184,7 @@ class BaseScheduler(ABC):
|
|
|
170
184
|
exit_code: int | None = None,
|
|
171
185
|
queue: str | None = None,
|
|
172
186
|
limit: int = 100,
|
|
173
|
-
) -> list[
|
|
187
|
+
) -> list[JobInfo]:
|
|
174
188
|
"""List completed jobs from accounting. Override in subclass."""
|
|
175
189
|
return []
|
|
176
190
|
|
|
@@ -178,7 +192,7 @@ class BaseScheduler(ABC):
|
|
|
178
192
|
"""Check if job accounting/history is available."""
|
|
179
193
|
return False
|
|
180
194
|
|
|
181
|
-
def get_job_details(self, job_id: str) -> tuple[
|
|
195
|
+
def get_job_details(self, job_id: str) -> tuple[JobInfo, dict[str, object]]:
|
|
182
196
|
"""Get detailed information for a single job.
|
|
183
197
|
|
|
184
198
|
Args:
|