hpc-runner 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hpc_runner/__init__.py +57 -0
- hpc_runner/_version.py +34 -0
- hpc_runner/cli/__init__.py +1 -0
- hpc_runner/cli/cancel.py +38 -0
- hpc_runner/cli/config.py +109 -0
- hpc_runner/cli/main.py +72 -0
- hpc_runner/cli/run.py +136 -0
- hpc_runner/cli/status.py +65 -0
- hpc_runner/core/__init__.py +1 -0
- hpc_runner/core/config.py +177 -0
- hpc_runner/core/descriptors.py +56 -0
- hpc_runner/core/exceptions.py +29 -0
- hpc_runner/core/job.py +149 -0
- hpc_runner/core/job_array.py +58 -0
- hpc_runner/core/resources.py +49 -0
- hpc_runner/core/result.py +157 -0
- hpc_runner/core/types.py +13 -0
- hpc_runner/py.typed +0 -0
- hpc_runner/schedulers/__init__.py +60 -0
- hpc_runner/schedulers/base.py +76 -0
- hpc_runner/schedulers/detection.py +34 -0
- hpc_runner/schedulers/local/__init__.py +5 -0
- hpc_runner/schedulers/local/scheduler.py +237 -0
- hpc_runner/schedulers/local/templates/job.sh.j2 +28 -0
- hpc_runner/schedulers/sge/__init__.py +5 -0
- hpc_runner/schedulers/sge/args.py +165 -0
- hpc_runner/schedulers/sge/parser.py +194 -0
- hpc_runner/schedulers/sge/scheduler.py +325 -0
- hpc_runner/schedulers/sge/templates/job.sh.j2 +39 -0
- hpc_runner/templates/__init__.py +5 -0
- hpc_runner/templates/engine.py +55 -0
- hpc_runner/workflow/__init__.py +6 -0
- hpc_runner/workflow/dependency.py +20 -0
- hpc_runner/workflow/pipeline.py +180 -0
- hpc_runner-0.1.0.dist-info/METADATA +46 -0
- hpc_runner-0.1.0.dist-info/RECORD +38 -0
- hpc_runner-0.1.0.dist-info/WHEEL +4 -0
- hpc_runner-0.1.0.dist-info/entry_points.txt +2 -0
hpc_runner/__init__.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""hpc-runner: HPC job submission across multiple schedulers."""
|
|
2
|
+
|
|
3
|
+
try:
|
|
4
|
+
from hpc_runner._version import __version__
|
|
5
|
+
except ImportError:
|
|
6
|
+
__version__ = "0.0.0.dev0"
|
|
7
|
+
|
|
8
|
+
from hpc_runner.core.config import HPCConfig, get_config, load_config, reload_config
|
|
9
|
+
from hpc_runner.core.exceptions import (
|
|
10
|
+
ConfigError,
|
|
11
|
+
ConfigNotFoundError,
|
|
12
|
+
HPCToolsError,
|
|
13
|
+
JobNotFoundError,
|
|
14
|
+
SchedulerError,
|
|
15
|
+
SubmissionError,
|
|
16
|
+
ValidationError,
|
|
17
|
+
)
|
|
18
|
+
from hpc_runner.core.job import Job
|
|
19
|
+
from hpc_runner.core.job_array import JobArray
|
|
20
|
+
from hpc_runner.core.resources import Resource, ResourceSet
|
|
21
|
+
from hpc_runner.core.result import ArrayJobResult, JobResult, JobStatus
|
|
22
|
+
from hpc_runner.schedulers import get_scheduler, list_schedulers, register_scheduler
|
|
23
|
+
from hpc_runner.workflow import DependencyType, Pipeline, PipelineJob
|
|
24
|
+
|
|
25
|
+
__all__ = [
|
|
26
|
+
# Version
|
|
27
|
+
"__version__",
|
|
28
|
+
# Core
|
|
29
|
+
"Job",
|
|
30
|
+
"JobArray",
|
|
31
|
+
"JobResult",
|
|
32
|
+
"ArrayJobResult",
|
|
33
|
+
"JobStatus",
|
|
34
|
+
"Resource",
|
|
35
|
+
"ResourceSet",
|
|
36
|
+
# Config
|
|
37
|
+
"load_config",
|
|
38
|
+
"get_config",
|
|
39
|
+
"reload_config",
|
|
40
|
+
"HPCConfig",
|
|
41
|
+
# Schedulers
|
|
42
|
+
"get_scheduler",
|
|
43
|
+
"register_scheduler",
|
|
44
|
+
"list_schedulers",
|
|
45
|
+
# Workflow
|
|
46
|
+
"Pipeline",
|
|
47
|
+
"PipelineJob",
|
|
48
|
+
"DependencyType",
|
|
49
|
+
# Exceptions
|
|
50
|
+
"HPCToolsError",
|
|
51
|
+
"SchedulerError",
|
|
52
|
+
"SubmissionError",
|
|
53
|
+
"JobNotFoundError",
|
|
54
|
+
"ConfigError",
|
|
55
|
+
"ConfigNotFoundError",
|
|
56
|
+
"ValidationError",
|
|
57
|
+
]
|
hpc_runner/_version.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# file generated by setuptools-scm
|
|
2
|
+
# don't change, don't track in version control
|
|
3
|
+
|
|
4
|
+
__all__ = [
|
|
5
|
+
"__version__",
|
|
6
|
+
"__version_tuple__",
|
|
7
|
+
"version",
|
|
8
|
+
"version_tuple",
|
|
9
|
+
"__commit_id__",
|
|
10
|
+
"commit_id",
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
TYPE_CHECKING = False
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from typing import Tuple
|
|
16
|
+
from typing import Union
|
|
17
|
+
|
|
18
|
+
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
19
|
+
COMMIT_ID = Union[str, None]
|
|
20
|
+
else:
|
|
21
|
+
VERSION_TUPLE = object
|
|
22
|
+
COMMIT_ID = object
|
|
23
|
+
|
|
24
|
+
version: str
|
|
25
|
+
__version__: str
|
|
26
|
+
__version_tuple__: VERSION_TUPLE
|
|
27
|
+
version_tuple: VERSION_TUPLE
|
|
28
|
+
commit_id: COMMIT_ID
|
|
29
|
+
__commit_id__: COMMIT_ID
|
|
30
|
+
|
|
31
|
+
__version__ = version = '0.1.0'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 1, 0)
|
|
33
|
+
|
|
34
|
+
__commit_id__ = commit_id = None
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""CLI for hpc-tools."""
|
hpc_runner/cli/cancel.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""Cancel command - cancel running jobs."""
|
|
2
|
+
|
|
3
|
+
import rich_click as click
|
|
4
|
+
from rich.console import Console
|
|
5
|
+
|
|
6
|
+
from hpc_runner.cli.main import Context, pass_context
|
|
7
|
+
|
|
8
|
+
console = Console()
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@click.command()
|
|
12
|
+
@click.argument("job_id")
|
|
13
|
+
@click.option("--force", "-f", is_flag=True, help="Force cancel without confirmation")
|
|
14
|
+
@pass_context
|
|
15
|
+
def cancel(
|
|
16
|
+
ctx: Context,
|
|
17
|
+
job_id: str,
|
|
18
|
+
force: bool,
|
|
19
|
+
) -> None:
|
|
20
|
+
"""Cancel a job.
|
|
21
|
+
|
|
22
|
+
JOB_ID is the job ID to cancel.
|
|
23
|
+
"""
|
|
24
|
+
from hpc_runner.schedulers import get_scheduler
|
|
25
|
+
|
|
26
|
+
scheduler = get_scheduler(ctx.scheduler)
|
|
27
|
+
|
|
28
|
+
if not force:
|
|
29
|
+
if not click.confirm(f"Cancel job {job_id}?"):
|
|
30
|
+
console.print("[yellow]Cancelled[/yellow]")
|
|
31
|
+
return
|
|
32
|
+
|
|
33
|
+
success = scheduler.cancel(job_id)
|
|
34
|
+
|
|
35
|
+
if success:
|
|
36
|
+
console.print(f"[green]Job {job_id} cancelled[/green]")
|
|
37
|
+
else:
|
|
38
|
+
console.print(f"[red]Failed to cancel job {job_id}[/red]")
|
hpc_runner/cli/config.py
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
"""Config command - manage configuration."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import rich_click as click
|
|
6
|
+
from rich.console import Console
|
|
7
|
+
from rich.syntax import Syntax
|
|
8
|
+
|
|
9
|
+
from hpc_runner.cli.main import Context, pass_context
|
|
10
|
+
|
|
11
|
+
console = Console()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@click.group()
|
|
15
|
+
def config_cmd() -> None:
|
|
16
|
+
"""Manage configuration."""
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@config_cmd.command("show")
|
|
21
|
+
@pass_context
|
|
22
|
+
def show(ctx: Context) -> None:
|
|
23
|
+
"""Show current configuration."""
|
|
24
|
+
from hpc_runner.core.config import find_config_file, load_config
|
|
25
|
+
|
|
26
|
+
config_path = ctx.config_path or find_config_file()
|
|
27
|
+
|
|
28
|
+
if config_path is None:
|
|
29
|
+
console.print("[yellow]No configuration file found[/yellow]")
|
|
30
|
+
console.print("Using default settings")
|
|
31
|
+
console.print("\nSearch locations:")
|
|
32
|
+
console.print(" 1. ./hpc-tools.toml")
|
|
33
|
+
console.print(" 2. ./pyproject.toml [tool.hpc-tools]")
|
|
34
|
+
console.print(" 3. <git root>/hpc-tools.toml")
|
|
35
|
+
console.print(" 4. ~/.config/hpc-tools/config.toml")
|
|
36
|
+
return
|
|
37
|
+
|
|
38
|
+
console.print(f"[bold]Config file:[/bold] {config_path}")
|
|
39
|
+
console.print()
|
|
40
|
+
|
|
41
|
+
# Read and display the config file
|
|
42
|
+
content = config_path.read_text()
|
|
43
|
+
syntax = Syntax(content, "toml", theme="monokai", line_numbers=True)
|
|
44
|
+
console.print(syntax)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@config_cmd.command("init")
|
|
48
|
+
@click.option("--global", "-g", "global_config", is_flag=True, help="Create global config")
|
|
49
|
+
@pass_context
|
|
50
|
+
def init(ctx: Context, global_config: bool) -> None:
|
|
51
|
+
"""Create a new configuration file."""
|
|
52
|
+
if global_config:
|
|
53
|
+
config_dir = Path.home() / ".config" / "hpc-tools"
|
|
54
|
+
config_dir.mkdir(parents=True, exist_ok=True)
|
|
55
|
+
config_path = config_dir / "config.toml"
|
|
56
|
+
else:
|
|
57
|
+
config_path = Path.cwd() / "hpc-tools.toml"
|
|
58
|
+
|
|
59
|
+
if config_path.exists():
|
|
60
|
+
if not click.confirm(f"{config_path} already exists. Overwrite?"):
|
|
61
|
+
console.print("[yellow]Cancelled[/yellow]")
|
|
62
|
+
return
|
|
63
|
+
|
|
64
|
+
# Write default config
|
|
65
|
+
default_config = '''# hpc-tools configuration
|
|
66
|
+
|
|
67
|
+
[defaults]
|
|
68
|
+
# Default job settings
|
|
69
|
+
cpu = 1
|
|
70
|
+
mem = "4G"
|
|
71
|
+
time = "1:00:00"
|
|
72
|
+
# queue = "batch"
|
|
73
|
+
|
|
74
|
+
# Modules to always load
|
|
75
|
+
modules = []
|
|
76
|
+
|
|
77
|
+
[schedulers.sge]
|
|
78
|
+
# SGE-specific settings
|
|
79
|
+
parallel_environment = "smp"
|
|
80
|
+
memory_resource = "mem_free"
|
|
81
|
+
time_resource = "h_rt"
|
|
82
|
+
merge_output = true
|
|
83
|
+
|
|
84
|
+
# Tool-specific configurations
|
|
85
|
+
# [tools.python]
|
|
86
|
+
# modules = ["python/3.11"]
|
|
87
|
+
|
|
88
|
+
# Job type configurations
|
|
89
|
+
# [types.gpu]
|
|
90
|
+
# queue = "gpu"
|
|
91
|
+
# resources = [{name = "gpu", value = 1}]
|
|
92
|
+
'''
|
|
93
|
+
|
|
94
|
+
config_path.write_text(default_config)
|
|
95
|
+
console.print(f"[green]Created {config_path}[/green]")
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
@config_cmd.command("path")
|
|
99
|
+
@pass_context
|
|
100
|
+
def path(ctx: Context) -> None:
|
|
101
|
+
"""Show path to active configuration file."""
|
|
102
|
+
from hpc_runner.core.config import find_config_file
|
|
103
|
+
|
|
104
|
+
config_path = ctx.config_path or find_config_file()
|
|
105
|
+
|
|
106
|
+
if config_path:
|
|
107
|
+
console.print(str(config_path))
|
|
108
|
+
else:
|
|
109
|
+
console.print("[yellow]No configuration file found[/yellow]")
|
hpc_runner/cli/main.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""Main CLI entry point using rich-click."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
import rich_click as click
|
|
7
|
+
from rich.console import Console
|
|
8
|
+
|
|
9
|
+
# Configure rich-click
|
|
10
|
+
click.rich_click.SHOW_ARGUMENTS = True
|
|
11
|
+
|
|
12
|
+
# Global console for Rich output
|
|
13
|
+
console = Console()
|
|
14
|
+
|
|
15
|
+
# Context object to pass state between commands
|
|
16
|
+
class Context:
|
|
17
|
+
def __init__(self) -> None:
|
|
18
|
+
self.config_path: Optional[Path] = None
|
|
19
|
+
self.scheduler: Optional[str] = None
|
|
20
|
+
self.verbose: bool = False
|
|
21
|
+
|
|
22
|
+
pass_context = click.make_pass_decorator(Context, ensure=True)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@click.group()
|
|
26
|
+
@click.option(
|
|
27
|
+
"--config", "-c",
|
|
28
|
+
type=click.Path(exists=True, path_type=Path),
|
|
29
|
+
help="Path to configuration file",
|
|
30
|
+
)
|
|
31
|
+
@click.option(
|
|
32
|
+
"--scheduler", "-s",
|
|
33
|
+
type=str,
|
|
34
|
+
help="Force scheduler (sge, slurm, pbs, local)",
|
|
35
|
+
)
|
|
36
|
+
@click.option(
|
|
37
|
+
"--verbose", "-v",
|
|
38
|
+
is_flag=True,
|
|
39
|
+
help="Enable verbose output",
|
|
40
|
+
)
|
|
41
|
+
@click.version_option(package_name="hpc-runner")
|
|
42
|
+
@pass_context
|
|
43
|
+
def cli(ctx: Context, config: Optional[Path], scheduler: Optional[str], verbose: bool) -> None:
|
|
44
|
+
"""HPC job submission tool.
|
|
45
|
+
|
|
46
|
+
Submit and manage jobs across different HPC schedulers (SGE, Slurm, PBS)
|
|
47
|
+
with a unified interface.
|
|
48
|
+
"""
|
|
49
|
+
ctx.config_path = config
|
|
50
|
+
ctx.scheduler = scheduler
|
|
51
|
+
ctx.verbose = verbose
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
# Import and register subcommands
|
|
55
|
+
from hpc_runner.cli.run import run
|
|
56
|
+
from hpc_runner.cli.status import status
|
|
57
|
+
from hpc_runner.cli.cancel import cancel
|
|
58
|
+
from hpc_runner.cli.config import config_cmd
|
|
59
|
+
|
|
60
|
+
cli.add_command(run)
|
|
61
|
+
cli.add_command(status)
|
|
62
|
+
cli.add_command(cancel)
|
|
63
|
+
cli.add_command(config_cmd, name="config")
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def main() -> None:
|
|
67
|
+
"""Entry point for console script."""
|
|
68
|
+
cli()
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
if __name__ == "__main__":
|
|
72
|
+
main()
|
hpc_runner/cli/run.py
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
"""Run command - submit jobs to the scheduler."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional, Tuple
|
|
4
|
+
|
|
5
|
+
import rich_click as click
|
|
6
|
+
from rich.console import Console
|
|
7
|
+
from rich.panel import Panel
|
|
8
|
+
from rich.syntax import Syntax
|
|
9
|
+
|
|
10
|
+
from hpc_runner.cli.main import Context, pass_context
|
|
11
|
+
|
|
12
|
+
console = Console()
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@click.command()
|
|
16
|
+
@click.argument("command", nargs=-1, required=True)
|
|
17
|
+
@click.option("--name", "-N", help="Job name")
|
|
18
|
+
@click.option("--cpu", "-c", type=int, help="Number of CPUs")
|
|
19
|
+
@click.option("--mem", "-m", help="Memory requirement (e.g., 16G)")
|
|
20
|
+
@click.option("--time", "-t", help="Time limit (e.g., 4:00:00)")
|
|
21
|
+
@click.option("--queue", "-q", help="Queue/partition name")
|
|
22
|
+
@click.option("--interactive", "-I", is_flag=True, help="Run interactively (blocking)")
|
|
23
|
+
@click.option("--local", "-L", is_flag=True, help="Run locally (no scheduler)")
|
|
24
|
+
@click.option("--type", "-T", "job_type", help="Job type from config")
|
|
25
|
+
@click.option("--module", "-M", multiple=True, help="Modules to load (can be repeated)")
|
|
26
|
+
@click.option("--raw", "-R", multiple=True, help="Raw scheduler args (can be repeated)")
|
|
27
|
+
@click.option("--dry-run", "-n", is_flag=True, help="Show what would be submitted")
|
|
28
|
+
@click.option("--stderr", "-e", help="Separate stderr file (default: merged with stdout)")
|
|
29
|
+
@pass_context
|
|
30
|
+
def run(
|
|
31
|
+
ctx: Context,
|
|
32
|
+
command: Tuple[str, ...],
|
|
33
|
+
name: Optional[str],
|
|
34
|
+
cpu: Optional[int],
|
|
35
|
+
mem: Optional[str],
|
|
36
|
+
time: Optional[str],
|
|
37
|
+
queue: Optional[str],
|
|
38
|
+
interactive: bool,
|
|
39
|
+
local: bool,
|
|
40
|
+
job_type: Optional[str],
|
|
41
|
+
module: Tuple[str, ...],
|
|
42
|
+
raw: Tuple[str, ...],
|
|
43
|
+
dry_run: bool,
|
|
44
|
+
stderr: Optional[str],
|
|
45
|
+
) -> None:
|
|
46
|
+
"""Submit a job to the scheduler.
|
|
47
|
+
|
|
48
|
+
COMMAND is the command to execute. Use quotes for complex commands:
|
|
49
|
+
|
|
50
|
+
hpc run "make -j8 all"
|
|
51
|
+
|
|
52
|
+
hpc run python script.py --arg value
|
|
53
|
+
"""
|
|
54
|
+
from hpc_runner.core.job import Job
|
|
55
|
+
from hpc_runner.schedulers import get_scheduler
|
|
56
|
+
|
|
57
|
+
# Get scheduler
|
|
58
|
+
scheduler_name = "local" if local else ctx.scheduler
|
|
59
|
+
scheduler = get_scheduler(scheduler_name)
|
|
60
|
+
|
|
61
|
+
# Build command string
|
|
62
|
+
cmd_str = " ".join(command)
|
|
63
|
+
|
|
64
|
+
# Create job from config or parameters
|
|
65
|
+
if job_type:
|
|
66
|
+
job = Job.from_config(job_type, command=cmd_str)
|
|
67
|
+
else:
|
|
68
|
+
job = Job(command=cmd_str)
|
|
69
|
+
|
|
70
|
+
# Override with CLI arguments
|
|
71
|
+
if name:
|
|
72
|
+
job.name = name
|
|
73
|
+
if cpu:
|
|
74
|
+
job.cpu = cpu
|
|
75
|
+
if mem:
|
|
76
|
+
job.mem = mem
|
|
77
|
+
if time:
|
|
78
|
+
job.time = time
|
|
79
|
+
if queue:
|
|
80
|
+
job.queue = queue
|
|
81
|
+
if module:
|
|
82
|
+
job.modules = list(module)
|
|
83
|
+
if raw:
|
|
84
|
+
job.raw_args = list(raw)
|
|
85
|
+
if stderr:
|
|
86
|
+
job.stderr = stderr
|
|
87
|
+
|
|
88
|
+
if dry_run:
|
|
89
|
+
_show_dry_run(job, scheduler)
|
|
90
|
+
return
|
|
91
|
+
|
|
92
|
+
# Submit
|
|
93
|
+
result = scheduler.submit(job, interactive=interactive)
|
|
94
|
+
|
|
95
|
+
if interactive:
|
|
96
|
+
if result.returncode == 0:
|
|
97
|
+
console.print(f"[green]Job completed successfully[/green]")
|
|
98
|
+
else:
|
|
99
|
+
console.print(f"[red]Job failed with exit code: {result.returncode}[/red]")
|
|
100
|
+
else:
|
|
101
|
+
console.print(f"Submitted job [bold cyan]{result.job_id}[/bold cyan]")
|
|
102
|
+
if ctx.verbose:
|
|
103
|
+
console.print(f" Scheduler: {scheduler.name}")
|
|
104
|
+
console.print(f" Job name: {job.name}")
|
|
105
|
+
console.print(f" Command: {job.command}")
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _show_dry_run(job: "Job", scheduler: "BaseScheduler") -> None:
|
|
109
|
+
"""Display what would be submitted."""
|
|
110
|
+
from hpc_runner.schedulers.base import BaseScheduler
|
|
111
|
+
|
|
112
|
+
console.print(Panel.fit("[bold]Dry Run[/bold]", border_style="yellow"))
|
|
113
|
+
console.print(f"[bold]Scheduler:[/bold] {scheduler.name}")
|
|
114
|
+
console.print(f"[bold]Job Name:[/bold] {job.name}")
|
|
115
|
+
console.print(f"[bold]Command:[/bold] {job.command}")
|
|
116
|
+
|
|
117
|
+
if job.cpu:
|
|
118
|
+
console.print(f"[bold]CPU:[/bold] {job.cpu}")
|
|
119
|
+
if job.mem:
|
|
120
|
+
console.print(f"[bold]Memory:[/bold] {job.mem}")
|
|
121
|
+
if job.time:
|
|
122
|
+
console.print(f"[bold]Time:[/bold] {job.time}")
|
|
123
|
+
if job.queue:
|
|
124
|
+
console.print(f"[bold]Queue:[/bold] {job.queue}")
|
|
125
|
+
if job.modules:
|
|
126
|
+
console.print(f"[bold]Modules:[/bold] {', '.join(job.modules)}")
|
|
127
|
+
if job.merge_output:
|
|
128
|
+
console.print(f"[bold]Output:[/bold] merged (stdout only)")
|
|
129
|
+
else:
|
|
130
|
+
console.print(f"[bold]Stderr:[/bold] {job.stderr}")
|
|
131
|
+
|
|
132
|
+
console.print()
|
|
133
|
+
console.print("[bold]Generated Script:[/bold]")
|
|
134
|
+
script = scheduler.generate_script(job)
|
|
135
|
+
syntax = Syntax(script, "bash", theme="monokai", line_numbers=True)
|
|
136
|
+
console.print(syntax)
|
hpc_runner/cli/status.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""Status command - check job status."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
import rich_click as click
|
|
6
|
+
from rich.console import Console
|
|
7
|
+
from rich.table import Table
|
|
8
|
+
|
|
9
|
+
from hpc_runner.cli.main import Context, pass_context
|
|
10
|
+
|
|
11
|
+
console = Console()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@click.command()
|
|
15
|
+
@click.argument("job_id", required=False)
|
|
16
|
+
@click.option("--all", "-a", "all_users", is_flag=True, help="Show all users' jobs")
|
|
17
|
+
@click.option("--watch", "-w", is_flag=True, help="Watch mode (refresh periodically)")
|
|
18
|
+
@pass_context
|
|
19
|
+
def status(
|
|
20
|
+
ctx: Context,
|
|
21
|
+
job_id: Optional[str],
|
|
22
|
+
all_users: bool,
|
|
23
|
+
watch: bool,
|
|
24
|
+
) -> None:
|
|
25
|
+
"""Check job status.
|
|
26
|
+
|
|
27
|
+
If JOB_ID is provided, show status of that specific job.
|
|
28
|
+
Otherwise, list all your jobs.
|
|
29
|
+
"""
|
|
30
|
+
from hpc_runner.schedulers import get_scheduler
|
|
31
|
+
|
|
32
|
+
scheduler = get_scheduler(ctx.scheduler)
|
|
33
|
+
|
|
34
|
+
if job_id:
|
|
35
|
+
# Show specific job status
|
|
36
|
+
status = scheduler.get_status(job_id)
|
|
37
|
+
exit_code = scheduler.get_exit_code(job_id)
|
|
38
|
+
|
|
39
|
+
table = Table(title=f"Job {job_id}")
|
|
40
|
+
table.add_column("Property", style="cyan")
|
|
41
|
+
table.add_column("Value")
|
|
42
|
+
|
|
43
|
+
table.add_row("Status", _status_style(status.name))
|
|
44
|
+
if exit_code is not None:
|
|
45
|
+
table.add_row("Exit Code", str(exit_code))
|
|
46
|
+
|
|
47
|
+
console.print(table)
|
|
48
|
+
else:
|
|
49
|
+
# List all jobs (not implemented for all schedulers)
|
|
50
|
+
console.print("[yellow]Listing all jobs requires scheduler-specific implementation[/yellow]")
|
|
51
|
+
console.print("Use 'hpc status <job_id>' to check a specific job")
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _status_style(status: str) -> str:
|
|
55
|
+
"""Apply color to status string."""
|
|
56
|
+
colors = {
|
|
57
|
+
"PENDING": "[yellow]PENDING[/yellow]",
|
|
58
|
+
"RUNNING": "[blue]RUNNING[/blue]",
|
|
59
|
+
"COMPLETED": "[green]COMPLETED[/green]",
|
|
60
|
+
"FAILED": "[red]FAILED[/red]",
|
|
61
|
+
"CANCELLED": "[magenta]CANCELLED[/magenta]",
|
|
62
|
+
"TIMEOUT": "[red]TIMEOUT[/red]",
|
|
63
|
+
"UNKNOWN": "[dim]UNKNOWN[/dim]",
|
|
64
|
+
}
|
|
65
|
+
return colors.get(status, status)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Core models and abstractions for hpc-tools."""
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
"""Configuration loading and management."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import sys
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
if sys.version_info >= (3, 11):
|
|
11
|
+
import tomllib
|
|
12
|
+
else:
|
|
13
|
+
import tomli as tomllib
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class HPCConfig:
|
|
18
|
+
"""Loaded configuration."""
|
|
19
|
+
|
|
20
|
+
defaults: dict[str, Any] = field(default_factory=dict)
|
|
21
|
+
tools: dict[str, dict[str, Any]] = field(default_factory=dict)
|
|
22
|
+
types: dict[str, dict[str, Any]] = field(default_factory=dict)
|
|
23
|
+
schedulers: dict[str, dict[str, Any]] = field(default_factory=dict)
|
|
24
|
+
|
|
25
|
+
_source_path: Path | None = field(default=None, repr=False)
|
|
26
|
+
|
|
27
|
+
def get_job_config(self, tool_or_type: str) -> dict[str, Any]:
|
|
28
|
+
"""Get merged configuration for a tool or type.
|
|
29
|
+
|
|
30
|
+
Lookup order:
|
|
31
|
+
1. Check types[tool_or_type]
|
|
32
|
+
2. Check tools[tool_or_type]
|
|
33
|
+
3. Fall back to defaults
|
|
34
|
+
"""
|
|
35
|
+
config = self.defaults.copy()
|
|
36
|
+
|
|
37
|
+
if tool_or_type in self.types:
|
|
38
|
+
config = _merge(config, self.types[tool_or_type])
|
|
39
|
+
elif tool_or_type in self.tools:
|
|
40
|
+
config = _merge(config, self.tools[tool_or_type])
|
|
41
|
+
|
|
42
|
+
return config
|
|
43
|
+
|
|
44
|
+
def get_tool_config(self, command: str) -> dict[str, Any]:
|
|
45
|
+
"""Get configuration matching a command.
|
|
46
|
+
|
|
47
|
+
Extracts tool name from command and looks up config.
|
|
48
|
+
"""
|
|
49
|
+
# Extract tool name (first word, strip path)
|
|
50
|
+
tool = command.split()[0]
|
|
51
|
+
tool = Path(tool).name
|
|
52
|
+
|
|
53
|
+
return self.get_job_config(tool)
|
|
54
|
+
|
|
55
|
+
def get_scheduler_config(self, scheduler: str) -> dict[str, Any]:
|
|
56
|
+
"""Get scheduler-specific configuration."""
|
|
57
|
+
return self.schedulers.get(scheduler, {})
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _merge(base: dict[str, Any], override: dict[str, Any]) -> dict[str, Any]:
|
|
61
|
+
"""Deep merge with override taking precedence."""
|
|
62
|
+
result = base.copy()
|
|
63
|
+
for key, value in override.items():
|
|
64
|
+
if key in result and isinstance(result[key], dict) and isinstance(value, dict):
|
|
65
|
+
result[key] = _merge(result[key], value)
|
|
66
|
+
elif key in result and isinstance(result[key], list) and isinstance(value, list):
|
|
67
|
+
# Check for list reset marker
|
|
68
|
+
if value and value[0] == "-":
|
|
69
|
+
result[key] = value[1:]
|
|
70
|
+
else:
|
|
71
|
+
result[key] = list(set(result[key] + value))
|
|
72
|
+
else:
|
|
73
|
+
result[key] = value
|
|
74
|
+
return result
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def find_config_file() -> Path | None:
|
|
78
|
+
"""Find configuration file in priority order.
|
|
79
|
+
|
|
80
|
+
Search order:
|
|
81
|
+
1. ./hpc-tools.toml (current directory)
|
|
82
|
+
2. ./pyproject.toml [tool.hpc-tools] section
|
|
83
|
+
3. Git repository root hpc-tools.toml
|
|
84
|
+
4. ~/.config/hpc-tools/config.toml
|
|
85
|
+
5. Package defaults
|
|
86
|
+
"""
|
|
87
|
+
# Current directory
|
|
88
|
+
cwd = Path.cwd()
|
|
89
|
+
if (cwd / "hpc-tools.toml").exists():
|
|
90
|
+
return cwd / "hpc-tools.toml"
|
|
91
|
+
|
|
92
|
+
if (cwd / "pyproject.toml").exists():
|
|
93
|
+
try:
|
|
94
|
+
with open(cwd / "pyproject.toml", "rb") as f:
|
|
95
|
+
pyproject = tomllib.load(f)
|
|
96
|
+
if "tool" in pyproject and "hpc-tools" in pyproject["tool"]:
|
|
97
|
+
return cwd / "pyproject.toml"
|
|
98
|
+
except Exception:
|
|
99
|
+
pass
|
|
100
|
+
|
|
101
|
+
# Git root
|
|
102
|
+
git_root = _find_git_root(cwd)
|
|
103
|
+
if git_root and (git_root / "hpc-tools.toml").exists():
|
|
104
|
+
return git_root / "hpc-tools.toml"
|
|
105
|
+
|
|
106
|
+
# User config
|
|
107
|
+
user_config = Path.home() / ".config" / "hpc-tools" / "config.toml"
|
|
108
|
+
if user_config.exists():
|
|
109
|
+
return user_config
|
|
110
|
+
|
|
111
|
+
# Package defaults
|
|
112
|
+
package_defaults = Path(__file__).parent.parent.parent.parent / "defaults" / "config.toml"
|
|
113
|
+
if package_defaults.exists():
|
|
114
|
+
return package_defaults
|
|
115
|
+
|
|
116
|
+
return None
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _find_git_root(start: Path) -> Path | None:
|
|
120
|
+
"""Find git repository root."""
|
|
121
|
+
current = start.resolve()
|
|
122
|
+
while current != current.parent:
|
|
123
|
+
if (current / ".git").exists():
|
|
124
|
+
return current
|
|
125
|
+
current = current.parent
|
|
126
|
+
return None
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def load_config(path: Path | str | None = None) -> HPCConfig:
|
|
130
|
+
"""Load configuration from file.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
path: Explicit config path or None to auto-discover
|
|
134
|
+
"""
|
|
135
|
+
if path is None:
|
|
136
|
+
path = find_config_file()
|
|
137
|
+
|
|
138
|
+
if path is None:
|
|
139
|
+
return HPCConfig() # Empty config, use defaults
|
|
140
|
+
|
|
141
|
+
path = Path(path)
|
|
142
|
+
|
|
143
|
+
with open(path, "rb") as f:
|
|
144
|
+
data = tomllib.load(f)
|
|
145
|
+
|
|
146
|
+
# Handle pyproject.toml
|
|
147
|
+
if path.name == "pyproject.toml":
|
|
148
|
+
data = data.get("tool", {}).get("hpc-tools", {})
|
|
149
|
+
|
|
150
|
+
config = HPCConfig(
|
|
151
|
+
defaults=data.get("defaults", {}),
|
|
152
|
+
tools=data.get("tools", {}),
|
|
153
|
+
types=data.get("types", {}),
|
|
154
|
+
schedulers=data.get("schedulers", {}),
|
|
155
|
+
)
|
|
156
|
+
config._source_path = path
|
|
157
|
+
|
|
158
|
+
return config
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
# Global config cache
|
|
162
|
+
_cached_config: HPCConfig | None = None
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def get_config() -> HPCConfig:
|
|
166
|
+
"""Get the global configuration (cached)."""
|
|
167
|
+
global _cached_config
|
|
168
|
+
if _cached_config is None:
|
|
169
|
+
_cached_config = load_config()
|
|
170
|
+
return _cached_config
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def reload_config(path: Path | str | None = None) -> HPCConfig:
|
|
174
|
+
"""Reload configuration (clears cache)."""
|
|
175
|
+
global _cached_config
|
|
176
|
+
_cached_config = load_config(path)
|
|
177
|
+
return _cached_config
|