hpc-runner 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hpc_runner/__init__.py +57 -0
- hpc_runner/_version.py +34 -0
- hpc_runner/cli/__init__.py +1 -0
- hpc_runner/cli/cancel.py +38 -0
- hpc_runner/cli/config.py +109 -0
- hpc_runner/cli/main.py +76 -0
- hpc_runner/cli/monitor.py +30 -0
- hpc_runner/cli/run.py +292 -0
- hpc_runner/cli/status.py +66 -0
- hpc_runner/core/__init__.py +31 -0
- hpc_runner/core/config.py +177 -0
- hpc_runner/core/descriptors.py +110 -0
- hpc_runner/core/exceptions.py +38 -0
- hpc_runner/core/job.py +328 -0
- hpc_runner/core/job_array.py +58 -0
- hpc_runner/core/job_info.py +104 -0
- hpc_runner/core/resources.py +49 -0
- hpc_runner/core/result.py +161 -0
- hpc_runner/core/types.py +13 -0
- hpc_runner/py.typed +0 -0
- hpc_runner/schedulers/__init__.py +60 -0
- hpc_runner/schedulers/base.py +194 -0
- hpc_runner/schedulers/detection.py +52 -0
- hpc_runner/schedulers/local/__init__.py +5 -0
- hpc_runner/schedulers/local/scheduler.py +354 -0
- hpc_runner/schedulers/local/templates/job.sh.j2 +28 -0
- hpc_runner/schedulers/sge/__init__.py +5 -0
- hpc_runner/schedulers/sge/args.py +232 -0
- hpc_runner/schedulers/sge/parser.py +287 -0
- hpc_runner/schedulers/sge/scheduler.py +881 -0
- hpc_runner/schedulers/sge/templates/batch.sh.j2 +82 -0
- hpc_runner/schedulers/sge/templates/interactive.sh.j2 +78 -0
- hpc_runner/templates/__init__.py +5 -0
- hpc_runner/templates/engine.py +55 -0
- hpc_runner/tui/__init__.py +5 -0
- hpc_runner/tui/app.py +436 -0
- hpc_runner/tui/components/__init__.py +17 -0
- hpc_runner/tui/components/detail_panel.py +187 -0
- hpc_runner/tui/components/filter_bar.py +174 -0
- hpc_runner/tui/components/filter_popup.py +345 -0
- hpc_runner/tui/components/job_table.py +260 -0
- hpc_runner/tui/providers/__init__.py +5 -0
- hpc_runner/tui/providers/jobs.py +197 -0
- hpc_runner/tui/screens/__init__.py +7 -0
- hpc_runner/tui/screens/confirm.py +67 -0
- hpc_runner/tui/screens/job_details.py +210 -0
- hpc_runner/tui/screens/log_viewer.py +170 -0
- hpc_runner/tui/snapshot.py +153 -0
- hpc_runner/tui/styles/monitor.tcss +567 -0
- hpc_runner/workflow/__init__.py +6 -0
- hpc_runner/workflow/dependency.py +20 -0
- hpc_runner/workflow/pipeline.py +180 -0
- hpc_runner-0.2.0.dist-info/METADATA +285 -0
- hpc_runner-0.2.0.dist-info/RECORD +56 -0
- hpc_runner-0.2.0.dist-info/WHEEL +4 -0
- hpc_runner-0.2.0.dist-info/entry_points.txt +2 -0
hpc_runner/__init__.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""hpc-runner: HPC job submission across multiple schedulers."""
|
|
2
|
+
|
|
3
|
+
try:
|
|
4
|
+
from hpc_runner._version import __version__
|
|
5
|
+
except ImportError:
|
|
6
|
+
__version__ = "0.0.0.dev0"
|
|
7
|
+
|
|
8
|
+
from hpc_runner.core.config import HPCConfig, get_config, load_config, reload_config
|
|
9
|
+
from hpc_runner.core.exceptions import (
|
|
10
|
+
ConfigError,
|
|
11
|
+
ConfigNotFoundError,
|
|
12
|
+
HPCToolsError,
|
|
13
|
+
JobNotFoundError,
|
|
14
|
+
SchedulerError,
|
|
15
|
+
SubmissionError,
|
|
16
|
+
ValidationError,
|
|
17
|
+
)
|
|
18
|
+
from hpc_runner.core.job import Job
|
|
19
|
+
from hpc_runner.core.job_array import JobArray
|
|
20
|
+
from hpc_runner.core.resources import Resource, ResourceSet
|
|
21
|
+
from hpc_runner.core.result import ArrayJobResult, JobResult, JobStatus
|
|
22
|
+
from hpc_runner.schedulers import get_scheduler, list_schedulers, register_scheduler
|
|
23
|
+
from hpc_runner.workflow import DependencyType, Pipeline, PipelineJob
|
|
24
|
+
|
|
25
|
+
__all__ = [
|
|
26
|
+
# Version
|
|
27
|
+
"__version__",
|
|
28
|
+
# Core
|
|
29
|
+
"Job",
|
|
30
|
+
"JobArray",
|
|
31
|
+
"JobResult",
|
|
32
|
+
"ArrayJobResult",
|
|
33
|
+
"JobStatus",
|
|
34
|
+
"Resource",
|
|
35
|
+
"ResourceSet",
|
|
36
|
+
# Config
|
|
37
|
+
"load_config",
|
|
38
|
+
"get_config",
|
|
39
|
+
"reload_config",
|
|
40
|
+
"HPCConfig",
|
|
41
|
+
# Schedulers
|
|
42
|
+
"get_scheduler",
|
|
43
|
+
"register_scheduler",
|
|
44
|
+
"list_schedulers",
|
|
45
|
+
# Workflow
|
|
46
|
+
"Pipeline",
|
|
47
|
+
"PipelineJob",
|
|
48
|
+
"DependencyType",
|
|
49
|
+
# Exceptions
|
|
50
|
+
"HPCToolsError",
|
|
51
|
+
"SchedulerError",
|
|
52
|
+
"SubmissionError",
|
|
53
|
+
"JobNotFoundError",
|
|
54
|
+
"ConfigError",
|
|
55
|
+
"ConfigNotFoundError",
|
|
56
|
+
"ValidationError",
|
|
57
|
+
]
|
hpc_runner/_version.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# file generated by setuptools-scm
|
|
2
|
+
# don't change, don't track in version control
|
|
3
|
+
|
|
4
|
+
__all__ = [
|
|
5
|
+
"__version__",
|
|
6
|
+
"__version_tuple__",
|
|
7
|
+
"version",
|
|
8
|
+
"version_tuple",
|
|
9
|
+
"__commit_id__",
|
|
10
|
+
"commit_id",
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
TYPE_CHECKING = False
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from typing import Tuple
|
|
16
|
+
from typing import Union
|
|
17
|
+
|
|
18
|
+
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
19
|
+
COMMIT_ID = Union[str, None]
|
|
20
|
+
else:
|
|
21
|
+
VERSION_TUPLE = object
|
|
22
|
+
COMMIT_ID = object
|
|
23
|
+
|
|
24
|
+
version: str
|
|
25
|
+
__version__: str
|
|
26
|
+
__version_tuple__: VERSION_TUPLE
|
|
27
|
+
version_tuple: VERSION_TUPLE
|
|
28
|
+
commit_id: COMMIT_ID
|
|
29
|
+
__commit_id__: COMMIT_ID
|
|
30
|
+
|
|
31
|
+
__version__ = version = '0.2.0'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 2, 0)
|
|
33
|
+
|
|
34
|
+
__commit_id__ = commit_id = None
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""CLI for hpc-tools."""
|
hpc_runner/cli/cancel.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""Cancel command - cancel running jobs."""
|
|
2
|
+
|
|
3
|
+
import rich_click as click
|
|
4
|
+
from rich.console import Console
|
|
5
|
+
|
|
6
|
+
from hpc_runner.cli.main import Context, pass_context
|
|
7
|
+
|
|
8
|
+
console = Console()
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@click.command()
|
|
12
|
+
@click.argument("job_id")
|
|
13
|
+
@click.option("--force", is_flag=True, help="Cancel without confirmation")
|
|
14
|
+
@pass_context
|
|
15
|
+
def cancel(
|
|
16
|
+
ctx: Context,
|
|
17
|
+
job_id: str,
|
|
18
|
+
force: bool,
|
|
19
|
+
) -> None:
|
|
20
|
+
"""Cancel a job.
|
|
21
|
+
|
|
22
|
+
JOB_ID is the job ID to cancel.
|
|
23
|
+
"""
|
|
24
|
+
from hpc_runner.schedulers import get_scheduler
|
|
25
|
+
|
|
26
|
+
scheduler = get_scheduler(ctx.scheduler)
|
|
27
|
+
|
|
28
|
+
if not force:
|
|
29
|
+
if not click.confirm(f"Cancel job {job_id}?"):
|
|
30
|
+
console.print("[yellow]Cancelled[/yellow]")
|
|
31
|
+
return
|
|
32
|
+
|
|
33
|
+
success = scheduler.cancel(job_id)
|
|
34
|
+
|
|
35
|
+
if success:
|
|
36
|
+
console.print(f"[green]Job {job_id} cancelled[/green]")
|
|
37
|
+
else:
|
|
38
|
+
console.print(f"[red]Failed to cancel job {job_id}[/red]")
|
hpc_runner/cli/config.py
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
"""Config command - manage configuration."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import rich_click as click
|
|
6
|
+
from rich.console import Console
|
|
7
|
+
from rich.syntax import Syntax
|
|
8
|
+
|
|
9
|
+
from hpc_runner.cli.main import Context, pass_context
|
|
10
|
+
|
|
11
|
+
console = Console()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@click.group()
|
|
15
|
+
def config_cmd() -> None:
|
|
16
|
+
"""Manage configuration."""
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@config_cmd.command("show")
|
|
21
|
+
@pass_context
|
|
22
|
+
def show(ctx: Context) -> None:
|
|
23
|
+
"""Show current configuration."""
|
|
24
|
+
from hpc_runner.core.config import find_config_file
|
|
25
|
+
|
|
26
|
+
config_path = ctx.config_path or find_config_file()
|
|
27
|
+
|
|
28
|
+
if config_path is None:
|
|
29
|
+
console.print("[yellow]No configuration file found[/yellow]")
|
|
30
|
+
console.print("Using default settings")
|
|
31
|
+
console.print("\nSearch locations:")
|
|
32
|
+
console.print(" 1. ./hpc-tools.toml")
|
|
33
|
+
console.print(" 2. ./pyproject.toml [tool.hpc-tools]")
|
|
34
|
+
console.print(" 3. <git root>/hpc-tools.toml")
|
|
35
|
+
console.print(" 4. ~/.config/hpc-tools/config.toml")
|
|
36
|
+
return
|
|
37
|
+
|
|
38
|
+
console.print(f"[bold]Config file:[/bold] {config_path}")
|
|
39
|
+
console.print()
|
|
40
|
+
|
|
41
|
+
# Read and display the config file
|
|
42
|
+
content = config_path.read_text()
|
|
43
|
+
syntax = Syntax(content, "toml", theme="monokai", line_numbers=True)
|
|
44
|
+
console.print(syntax)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@config_cmd.command("init")
|
|
48
|
+
@click.option("--global", "global_config", is_flag=True, help="Create in ~/.config/hpc-tools/")
|
|
49
|
+
@pass_context
|
|
50
|
+
def init(ctx: Context, global_config: bool) -> None:
|
|
51
|
+
"""Create a new configuration file."""
|
|
52
|
+
if global_config:
|
|
53
|
+
config_dir = Path.home() / ".config" / "hpc-tools"
|
|
54
|
+
config_dir.mkdir(parents=True, exist_ok=True)
|
|
55
|
+
config_path = config_dir / "config.toml"
|
|
56
|
+
else:
|
|
57
|
+
config_path = Path.cwd() / "hpc-tools.toml"
|
|
58
|
+
|
|
59
|
+
if config_path.exists():
|
|
60
|
+
if not click.confirm(f"{config_path} already exists. Overwrite?"):
|
|
61
|
+
console.print("[yellow]Cancelled[/yellow]")
|
|
62
|
+
return
|
|
63
|
+
|
|
64
|
+
# Write default config
|
|
65
|
+
default_config = '''# hpc-tools configuration
|
|
66
|
+
|
|
67
|
+
[defaults]
|
|
68
|
+
# Default job settings
|
|
69
|
+
cpu = 1
|
|
70
|
+
mem = "4G"
|
|
71
|
+
time = "1:00:00"
|
|
72
|
+
# queue = "batch"
|
|
73
|
+
|
|
74
|
+
# Modules to always load
|
|
75
|
+
modules = []
|
|
76
|
+
|
|
77
|
+
[schedulers.sge]
|
|
78
|
+
# SGE-specific settings
|
|
79
|
+
parallel_environment = "smp"
|
|
80
|
+
memory_resource = "mem_free"
|
|
81
|
+
time_resource = "h_rt"
|
|
82
|
+
merge_output = true
|
|
83
|
+
|
|
84
|
+
# Tool-specific configurations
|
|
85
|
+
# [tools.python]
|
|
86
|
+
# modules = ["python/3.11"]
|
|
87
|
+
|
|
88
|
+
# Job type configurations
|
|
89
|
+
# [types.gpu]
|
|
90
|
+
# queue = "gpu"
|
|
91
|
+
# resources = [{name = "gpu", value = 1}]
|
|
92
|
+
'''
|
|
93
|
+
|
|
94
|
+
config_path.write_text(default_config)
|
|
95
|
+
console.print(f"[green]Created {config_path}[/green]")
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
@config_cmd.command("path")
|
|
99
|
+
@pass_context
|
|
100
|
+
def path(ctx: Context) -> None:
|
|
101
|
+
"""Show path to active configuration file."""
|
|
102
|
+
from hpc_runner.core.config import find_config_file
|
|
103
|
+
|
|
104
|
+
config_path = ctx.config_path or find_config_file()
|
|
105
|
+
|
|
106
|
+
if config_path:
|
|
107
|
+
console.print(str(config_path))
|
|
108
|
+
else:
|
|
109
|
+
console.print("[yellow]No configuration file found[/yellow]")
|
hpc_runner/cli/main.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""Main CLI entry point using rich-click."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import rich_click as click
|
|
6
|
+
from rich.console import Console
|
|
7
|
+
|
|
8
|
+
# Configure rich-click
|
|
9
|
+
click.rich_click.SHOW_ARGUMENTS = True
|
|
10
|
+
|
|
11
|
+
# Global console for Rich output
|
|
12
|
+
console = Console()
|
|
13
|
+
|
|
14
|
+
# Context object to pass state between commands
|
|
15
|
+
class Context:
|
|
16
|
+
def __init__(self) -> None:
|
|
17
|
+
self.config_path: Path | None = None
|
|
18
|
+
self.scheduler: str | None = None
|
|
19
|
+
self.verbose: bool = False
|
|
20
|
+
|
|
21
|
+
pass_context = click.make_pass_decorator(Context, ensure=True)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@click.group(context_settings={"help_option_names": ["-h", "--help"]})
|
|
25
|
+
@click.option(
|
|
26
|
+
"--config",
|
|
27
|
+
type=click.Path(exists=True, path_type=Path),
|
|
28
|
+
help="Path to configuration file",
|
|
29
|
+
)
|
|
30
|
+
@click.option(
|
|
31
|
+
"--scheduler",
|
|
32
|
+
type=str,
|
|
33
|
+
help="Force scheduler (sge, slurm, pbs, local)",
|
|
34
|
+
)
|
|
35
|
+
@click.option(
|
|
36
|
+
"--verbose",
|
|
37
|
+
is_flag=True,
|
|
38
|
+
help="Enable verbose output",
|
|
39
|
+
)
|
|
40
|
+
@click.version_option(package_name="hpc-runner")
|
|
41
|
+
@pass_context
|
|
42
|
+
def cli(ctx: Context, config: Path | None, scheduler: str | None, verbose: bool) -> None:
|
|
43
|
+
"""HPC job submission tool.
|
|
44
|
+
|
|
45
|
+
Submit and manage jobs across different HPC schedulers (SGE, Slurm, PBS)
|
|
46
|
+
with a unified interface.
|
|
47
|
+
|
|
48
|
+
Any unrecognized short options are passed directly to the underlying
|
|
49
|
+
scheduler, allowing use of native flags like -N, -n, -q, etc.
|
|
50
|
+
"""
|
|
51
|
+
ctx.config_path = config
|
|
52
|
+
ctx.scheduler = scheduler
|
|
53
|
+
ctx.verbose = verbose
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
# Import and register subcommands (must be after cli is defined to avoid circular imports)
|
|
57
|
+
from hpc_runner.cli.cancel import cancel # noqa: E402
|
|
58
|
+
from hpc_runner.cli.config import config_cmd # noqa: E402
|
|
59
|
+
from hpc_runner.cli.monitor import monitor # noqa: E402
|
|
60
|
+
from hpc_runner.cli.run import run # noqa: E402
|
|
61
|
+
from hpc_runner.cli.status import status # noqa: E402
|
|
62
|
+
|
|
63
|
+
cli.add_command(run)
|
|
64
|
+
cli.add_command(status)
|
|
65
|
+
cli.add_command(cancel)
|
|
66
|
+
cli.add_command(config_cmd, name="config")
|
|
67
|
+
cli.add_command(monitor)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def main() -> None:
|
|
71
|
+
"""Entry point for console script."""
|
|
72
|
+
cli()
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
if __name__ == "__main__":
|
|
76
|
+
main()
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""CLI command for launching the interactive job monitor."""
|
|
2
|
+
|
|
3
|
+
import rich_click as click
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@click.command()
|
|
7
|
+
@click.option(
|
|
8
|
+
"--refresh",
|
|
9
|
+
"-r",
|
|
10
|
+
default=10,
|
|
11
|
+
type=int,
|
|
12
|
+
help="Auto-refresh interval in seconds",
|
|
13
|
+
)
|
|
14
|
+
def monitor(refresh: int) -> None:
|
|
15
|
+
"""Launch interactive job monitor TUI.
|
|
16
|
+
|
|
17
|
+
Opens a terminal UI for monitoring HPC jobs across schedulers.
|
|
18
|
+
Shows active and completed jobs with filtering and search.
|
|
19
|
+
|
|
20
|
+
\b
|
|
21
|
+
Keyboard shortcuts:
|
|
22
|
+
q Quit
|
|
23
|
+
r Manual refresh
|
|
24
|
+
u Toggle user filter (me/all)
|
|
25
|
+
Tab Switch tabs
|
|
26
|
+
"""
|
|
27
|
+
from hpc_runner.tui import HpcMonitorApp
|
|
28
|
+
|
|
29
|
+
app = HpcMonitorApp(refresh_interval=refresh)
|
|
30
|
+
app.run()
|
hpc_runner/cli/run.py
ADDED
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
"""Run command - submit jobs to the scheduler."""
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
import rich_click as click
|
|
6
|
+
from rich.console import Console
|
|
7
|
+
from rich.panel import Panel
|
|
8
|
+
from rich.syntax import Syntax
|
|
9
|
+
|
|
10
|
+
from hpc_runner.cli.main import Context, pass_context
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from hpc_runner.core.job import Job
|
|
14
|
+
|
|
15
|
+
console = Console()
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@click.command(
|
|
19
|
+
context_settings={
|
|
20
|
+
"ignore_unknown_options": True,
|
|
21
|
+
"allow_interspersed_args": True,
|
|
22
|
+
}
|
|
23
|
+
)
|
|
24
|
+
@click.argument("args", nargs=-1, type=click.UNPROCESSED)
|
|
25
|
+
# All hpc-runner options are long-form only
|
|
26
|
+
@click.option("--job-name", "job_name", help="Job name")
|
|
27
|
+
@click.option("--cpu", type=int, help="Number of CPUs")
|
|
28
|
+
@click.option("--mem", help="Memory requirement (e.g., 16G)")
|
|
29
|
+
@click.option("--time", "time_limit", help="Time limit (e.g., 4:00:00)")
|
|
30
|
+
@click.option("--queue", help="Queue/partition name")
|
|
31
|
+
@click.option("--nodes", type=int, help="Number of nodes (MPI jobs)")
|
|
32
|
+
@click.option("--ntasks", type=int, help="Number of tasks (MPI jobs)")
|
|
33
|
+
@click.option("--directory", type=click.Path(exists=True), help="Working directory")
|
|
34
|
+
@click.option("--job-type", "job_type", help="Job type from config")
|
|
35
|
+
@click.option("--module", "modules", multiple=True, help="Modules to load (repeatable)")
|
|
36
|
+
@click.option("--stderr", help="Separate stderr file (default: merged)")
|
|
37
|
+
@click.option("--output", help="Stdout file path pattern")
|
|
38
|
+
@click.option("--array", help="Array job specification (e.g., 1-100)")
|
|
39
|
+
@click.option("--depend", help="Job dependency specification")
|
|
40
|
+
@click.option("--inherit-env/--no-inherit-env", "inherit_env", default=True, help="Inherit environment variables")
|
|
41
|
+
@click.option("--interactive", is_flag=True, help="Run interactively (srun/qrsh)")
|
|
42
|
+
@click.option("--local", is_flag=True, help="Run locally (no scheduler)")
|
|
43
|
+
@click.option("--dry-run", "dry_run", is_flag=True, help="Show what would be submitted")
|
|
44
|
+
@click.option("--wait", is_flag=True, help="Wait for job completion")
|
|
45
|
+
@click.option("--keep-script", "keep_script", is_flag=True, help="Keep job script for debugging")
|
|
46
|
+
@pass_context
|
|
47
|
+
def run(
|
|
48
|
+
ctx: Context,
|
|
49
|
+
args: tuple[str, ...],
|
|
50
|
+
job_name: str | None,
|
|
51
|
+
cpu: int | None,
|
|
52
|
+
mem: str | None,
|
|
53
|
+
time_limit: str | None,
|
|
54
|
+
queue: str | None,
|
|
55
|
+
nodes: int | None,
|
|
56
|
+
ntasks: int | None,
|
|
57
|
+
directory: str | None,
|
|
58
|
+
job_type: str | None,
|
|
59
|
+
modules: tuple[str, ...],
|
|
60
|
+
stderr: str | None,
|
|
61
|
+
output: str | None,
|
|
62
|
+
array: str | None,
|
|
63
|
+
depend: str | None,
|
|
64
|
+
inherit_env: bool,
|
|
65
|
+
interactive: bool,
|
|
66
|
+
local: bool,
|
|
67
|
+
dry_run: bool,
|
|
68
|
+
wait: bool,
|
|
69
|
+
keep_script: bool,
|
|
70
|
+
) -> None:
|
|
71
|
+
"""Submit a job to the scheduler.
|
|
72
|
+
|
|
73
|
+
COMMAND is the command to execute. Use quotes for complex commands:
|
|
74
|
+
|
|
75
|
+
\b
|
|
76
|
+
hpc run "make -j8 all"
|
|
77
|
+
hpc run python script.py --arg value
|
|
78
|
+
|
|
79
|
+
Any unrecognized options starting with '-' are passed directly to the
|
|
80
|
+
underlying scheduler. This allows using native flags:
|
|
81
|
+
|
|
82
|
+
\b
|
|
83
|
+
hpc run -N 4 -n 16 "mpirun ./sim" # Slurm nodes/tasks
|
|
84
|
+
hpc run -q batch.q -l gpu=2 "train" # SGE queue/resources
|
|
85
|
+
"""
|
|
86
|
+
import shlex
|
|
87
|
+
|
|
88
|
+
from hpc_runner.core.job import Job
|
|
89
|
+
from hpc_runner.schedulers import get_scheduler
|
|
90
|
+
|
|
91
|
+
# Parse args into command and scheduler passthrough args
|
|
92
|
+
command_parts, scheduler_args = _parse_args(args)
|
|
93
|
+
|
|
94
|
+
if not command_parts:
|
|
95
|
+
raise click.UsageError("Command is required")
|
|
96
|
+
|
|
97
|
+
# Use shlex.join to preserve quoting for args with spaces/special chars
|
|
98
|
+
cmd_str = shlex.join(command_parts)
|
|
99
|
+
|
|
100
|
+
# Get scheduler
|
|
101
|
+
scheduler_name = "local" if local else ctx.scheduler
|
|
102
|
+
scheduler = get_scheduler(scheduler_name)
|
|
103
|
+
|
|
104
|
+
# Create job from config or parameters
|
|
105
|
+
if job_type:
|
|
106
|
+
job = Job.from_config(job_type, command=cmd_str)
|
|
107
|
+
else:
|
|
108
|
+
job = Job(command=cmd_str)
|
|
109
|
+
|
|
110
|
+
# Apply CLI overrides
|
|
111
|
+
if job_name:
|
|
112
|
+
job.name = job_name
|
|
113
|
+
if cpu:
|
|
114
|
+
job.cpu = cpu
|
|
115
|
+
if mem:
|
|
116
|
+
job.mem = mem
|
|
117
|
+
if time_limit:
|
|
118
|
+
job.time = time_limit
|
|
119
|
+
if queue:
|
|
120
|
+
job.queue = queue
|
|
121
|
+
if nodes:
|
|
122
|
+
job.nodes = nodes
|
|
123
|
+
if ntasks:
|
|
124
|
+
job.tasks = ntasks
|
|
125
|
+
if directory:
|
|
126
|
+
job.workdir = directory
|
|
127
|
+
if modules:
|
|
128
|
+
job.modules = list(modules)
|
|
129
|
+
if stderr:
|
|
130
|
+
job.stderr = stderr
|
|
131
|
+
if output:
|
|
132
|
+
job.stdout = output
|
|
133
|
+
if depend:
|
|
134
|
+
job.dependency = depend
|
|
135
|
+
|
|
136
|
+
# inherit_env is always set (has a default), so always apply it
|
|
137
|
+
job.inherit_env = inherit_env
|
|
138
|
+
|
|
139
|
+
# Add scheduler passthrough args
|
|
140
|
+
if scheduler_args:
|
|
141
|
+
job.raw_args = scheduler_args
|
|
142
|
+
if ctx.verbose:
|
|
143
|
+
console.print(f"[dim]Scheduler passthrough: {' '.join(scheduler_args)}[/dim]")
|
|
144
|
+
|
|
145
|
+
# Handle array jobs
|
|
146
|
+
if array:
|
|
147
|
+
_handle_array_job(job, array, scheduler, dry_run, ctx.verbose)
|
|
148
|
+
return
|
|
149
|
+
|
|
150
|
+
if dry_run:
|
|
151
|
+
_show_dry_run(job, scheduler, scheduler_args, interactive=interactive)
|
|
152
|
+
return
|
|
153
|
+
|
|
154
|
+
# Submit the job
|
|
155
|
+
result = scheduler.submit(job, interactive=interactive, keep_script=keep_script)
|
|
156
|
+
|
|
157
|
+
if interactive:
|
|
158
|
+
if result.returncode == 0:
|
|
159
|
+
console.print("[green]Job completed successfully[/green]")
|
|
160
|
+
else:
|
|
161
|
+
console.print(f"[red]Job failed with exit code: {result.returncode}[/red]")
|
|
162
|
+
else:
|
|
163
|
+
console.print(f"Submitted job [bold cyan]{result.job_id}[/bold cyan]")
|
|
164
|
+
|
|
165
|
+
if ctx.verbose:
|
|
166
|
+
console.print(f" Scheduler: {scheduler.name}")
|
|
167
|
+
console.print(f" Job name: {job.name}")
|
|
168
|
+
console.print(f" Command: {job.command}")
|
|
169
|
+
|
|
170
|
+
if wait:
|
|
171
|
+
console.print("[dim]Waiting for job completion...[/dim]")
|
|
172
|
+
final_status = result.wait()
|
|
173
|
+
console.print(f"Job completed with status: [bold]{final_status.name}[/bold]")
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def _parse_args(args: tuple[str, ...]) -> tuple[list[str], list[str]]:
|
|
177
|
+
"""Parse args into command parts and scheduler passthrough args.
|
|
178
|
+
|
|
179
|
+
Scheduler args are any args that:
|
|
180
|
+
- Start with '-' and are not recognized hpc-runner options
|
|
181
|
+
- Include their values (e.g., "-N 4" becomes ["-N", "4"])
|
|
182
|
+
|
|
183
|
+
The command is everything after the first non-option arg or after '--'.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
args: Raw arguments from click
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
Tuple of (command_parts, scheduler_args)
|
|
190
|
+
"""
|
|
191
|
+
command_parts: list[str] = []
|
|
192
|
+
scheduler_args: list[str] = []
|
|
193
|
+
|
|
194
|
+
args_list = list(args)
|
|
195
|
+
i = 0
|
|
196
|
+
in_command = False
|
|
197
|
+
|
|
198
|
+
while i < len(args_list):
|
|
199
|
+
arg = args_list[i]
|
|
200
|
+
|
|
201
|
+
# '--' signals end of options
|
|
202
|
+
if arg == "--":
|
|
203
|
+
in_command = True
|
|
204
|
+
i += 1
|
|
205
|
+
continue
|
|
206
|
+
|
|
207
|
+
if in_command:
|
|
208
|
+
command_parts.append(arg)
|
|
209
|
+
i += 1
|
|
210
|
+
continue
|
|
211
|
+
|
|
212
|
+
# Check if this looks like an option
|
|
213
|
+
if arg.startswith("-"):
|
|
214
|
+
# This is a scheduler passthrough option
|
|
215
|
+
scheduler_args.append(arg)
|
|
216
|
+
|
|
217
|
+
# Check if next arg is the value (not another option)
|
|
218
|
+
if i + 1 < len(args_list) and not args_list[i + 1].startswith("-"):
|
|
219
|
+
# Handle special case: is this a flag or does it take a value?
|
|
220
|
+
# Heuristic: if next arg doesn't start with '-', treat as value
|
|
221
|
+
# unless the current arg uses '=' syntax
|
|
222
|
+
if "=" not in arg:
|
|
223
|
+
i += 1
|
|
224
|
+
scheduler_args.append(args_list[i])
|
|
225
|
+
i += 1
|
|
226
|
+
else:
|
|
227
|
+
# First non-option arg starts the command
|
|
228
|
+
in_command = True
|
|
229
|
+
command_parts.append(arg)
|
|
230
|
+
i += 1
|
|
231
|
+
|
|
232
|
+
return command_parts, scheduler_args
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def _show_dry_run(
|
|
236
|
+
job: "Job", scheduler, scheduler_args: list[str], interactive: bool = False
|
|
237
|
+
) -> None:
|
|
238
|
+
"""Display what would be submitted."""
|
|
239
|
+
mode = "interactive" if interactive else "batch"
|
|
240
|
+
console.print(
|
|
241
|
+
Panel.fit(
|
|
242
|
+
f"[bold]Scheduler:[/bold] {scheduler.name}\n"
|
|
243
|
+
f"[bold]Mode:[/bold] {mode}\n"
|
|
244
|
+
f"[bold]Job name:[/bold] {job.name}\n"
|
|
245
|
+
f"[bold]Command:[/bold] {job.command}",
|
|
246
|
+
title="Dry Run",
|
|
247
|
+
border_style="blue",
|
|
248
|
+
)
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
if scheduler_args:
|
|
252
|
+
console.print(f"\n[bold]Scheduler passthrough args:[/bold] {' '.join(scheduler_args)}")
|
|
253
|
+
|
|
254
|
+
console.print("\n[bold]Generated script:[/bold]")
|
|
255
|
+
if interactive and hasattr(scheduler, "_generate_interactive_script"):
|
|
256
|
+
script = scheduler._generate_interactive_script(job, "/tmp/example_script.sh")
|
|
257
|
+
else:
|
|
258
|
+
script = scheduler.generate_script(job)
|
|
259
|
+
syntax = Syntax(script, "bash", theme="monokai", line_numbers=True)
|
|
260
|
+
console.print(syntax)
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def _handle_array_job(job, array_spec: str, scheduler, dry_run: bool, verbose: bool) -> None:
|
|
264
|
+
"""Handle array job submission."""
|
|
265
|
+
from hpc_runner.core.job_array import JobArray
|
|
266
|
+
|
|
267
|
+
# Parse array spec (e.g., "1-100", "1-100:10", "1-100%5")
|
|
268
|
+
# Basic parsing - could be enhanced
|
|
269
|
+
parts = array_spec.replace("%", ":").split(":")
|
|
270
|
+
range_parts = parts[0].split("-")
|
|
271
|
+
|
|
272
|
+
start = int(range_parts[0])
|
|
273
|
+
end = int(range_parts[1]) if len(range_parts) > 1 else start
|
|
274
|
+
step = int(parts[1]) if len(parts) > 1 else 1
|
|
275
|
+
max_concurrent = int(parts[2]) if len(parts) > 2 else None
|
|
276
|
+
|
|
277
|
+
array_job = JobArray(
|
|
278
|
+
job=job,
|
|
279
|
+
start=start,
|
|
280
|
+
end=end,
|
|
281
|
+
step=step,
|
|
282
|
+
max_concurrent=max_concurrent,
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
if dry_run:
|
|
286
|
+
console.print(f"[bold]Array job:[/bold] {array_job.range_str} ({array_job.count} tasks)")
|
|
287
|
+
_show_dry_run(job, scheduler, [])
|
|
288
|
+
return
|
|
289
|
+
|
|
290
|
+
result = array_job.submit(scheduler)
|
|
291
|
+
console.print(f"Submitted array job [bold cyan]{result.base_job_id}[/bold cyan]")
|
|
292
|
+
console.print(f" Tasks: {array_job.count} ({array_job.range_str})")
|