hpc-runner 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. hpc_runner/__init__.py +57 -0
  2. hpc_runner/_version.py +34 -0
  3. hpc_runner/cli/__init__.py +1 -0
  4. hpc_runner/cli/cancel.py +38 -0
  5. hpc_runner/cli/config.py +109 -0
  6. hpc_runner/cli/main.py +76 -0
  7. hpc_runner/cli/monitor.py +30 -0
  8. hpc_runner/cli/run.py +292 -0
  9. hpc_runner/cli/status.py +66 -0
  10. hpc_runner/core/__init__.py +31 -0
  11. hpc_runner/core/config.py +177 -0
  12. hpc_runner/core/descriptors.py +110 -0
  13. hpc_runner/core/exceptions.py +38 -0
  14. hpc_runner/core/job.py +328 -0
  15. hpc_runner/core/job_array.py +58 -0
  16. hpc_runner/core/job_info.py +104 -0
  17. hpc_runner/core/resources.py +49 -0
  18. hpc_runner/core/result.py +161 -0
  19. hpc_runner/core/types.py +13 -0
  20. hpc_runner/py.typed +0 -0
  21. hpc_runner/schedulers/__init__.py +60 -0
  22. hpc_runner/schedulers/base.py +194 -0
  23. hpc_runner/schedulers/detection.py +52 -0
  24. hpc_runner/schedulers/local/__init__.py +5 -0
  25. hpc_runner/schedulers/local/scheduler.py +354 -0
  26. hpc_runner/schedulers/local/templates/job.sh.j2 +28 -0
  27. hpc_runner/schedulers/sge/__init__.py +5 -0
  28. hpc_runner/schedulers/sge/args.py +232 -0
  29. hpc_runner/schedulers/sge/parser.py +287 -0
  30. hpc_runner/schedulers/sge/scheduler.py +881 -0
  31. hpc_runner/schedulers/sge/templates/batch.sh.j2 +82 -0
  32. hpc_runner/schedulers/sge/templates/interactive.sh.j2 +78 -0
  33. hpc_runner/templates/__init__.py +5 -0
  34. hpc_runner/templates/engine.py +55 -0
  35. hpc_runner/tui/__init__.py +5 -0
  36. hpc_runner/tui/app.py +436 -0
  37. hpc_runner/tui/components/__init__.py +17 -0
  38. hpc_runner/tui/components/detail_panel.py +187 -0
  39. hpc_runner/tui/components/filter_bar.py +174 -0
  40. hpc_runner/tui/components/filter_popup.py +345 -0
  41. hpc_runner/tui/components/job_table.py +260 -0
  42. hpc_runner/tui/providers/__init__.py +5 -0
  43. hpc_runner/tui/providers/jobs.py +197 -0
  44. hpc_runner/tui/screens/__init__.py +7 -0
  45. hpc_runner/tui/screens/confirm.py +67 -0
  46. hpc_runner/tui/screens/job_details.py +210 -0
  47. hpc_runner/tui/screens/log_viewer.py +170 -0
  48. hpc_runner/tui/snapshot.py +153 -0
  49. hpc_runner/tui/styles/monitor.tcss +567 -0
  50. hpc_runner/workflow/__init__.py +6 -0
  51. hpc_runner/workflow/dependency.py +20 -0
  52. hpc_runner/workflow/pipeline.py +180 -0
  53. hpc_runner-0.2.0.dist-info/METADATA +285 -0
  54. hpc_runner-0.2.0.dist-info/RECORD +56 -0
  55. hpc_runner-0.2.0.dist-info/WHEEL +4 -0
  56. hpc_runner-0.2.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,66 @@
1
+ """Status command - check job status."""
2
+
3
+
4
+ import rich_click as click
5
+ from rich.console import Console
6
+ from rich.table import Table
7
+
8
+ from hpc_runner.cli.main import Context, pass_context
9
+
10
+ console = Console()
11
+
12
+
13
+ @click.command()
14
+ @click.argument("job_id", required=False)
15
+ @click.option("--all", "all_users", is_flag=True, help="Show all users' jobs")
16
+ @click.option("--watch", is_flag=True, help="Watch mode (refresh periodically)")
17
+ @pass_context
18
+ def status(
19
+ ctx: Context,
20
+ job_id: str | None,
21
+ all_users: bool,
22
+ watch: bool,
23
+ ) -> None:
24
+ """Check job status.
25
+
26
+ If JOB_ID is provided, show status of that specific job.
27
+ Otherwise, list all your jobs.
28
+ """
29
+ from hpc_runner.schedulers import get_scheduler
30
+
31
+ scheduler = get_scheduler(ctx.scheduler)
32
+
33
+ if job_id:
34
+ # Show specific job status
35
+ status = scheduler.get_status(job_id)
36
+ exit_code = scheduler.get_exit_code(job_id)
37
+
38
+ table = Table(title=f"Job {job_id}")
39
+ table.add_column("Property", style="cyan")
40
+ table.add_column("Value")
41
+
42
+ table.add_row("Status", _status_style(status.name))
43
+ if exit_code is not None:
44
+ table.add_row("Exit Code", str(exit_code))
45
+
46
+ console.print(table)
47
+ else:
48
+ # List all jobs (not implemented for all schedulers)
49
+ console.print(
50
+ "[yellow]Listing all jobs requires scheduler-specific implementation[/yellow]"
51
+ )
52
+ console.print("Use 'hpc status <job_id>' to check a specific job")
53
+
54
+
55
+ def _status_style(status: str) -> str:
56
+ """Apply color to status string."""
57
+ colors = {
58
+ "PENDING": "[yellow]PENDING[/yellow]",
59
+ "RUNNING": "[blue]RUNNING[/blue]",
60
+ "COMPLETED": "[green]COMPLETED[/green]",
61
+ "FAILED": "[red]FAILED[/red]",
62
+ "CANCELLED": "[magenta]CANCELLED[/magenta]",
63
+ "TIMEOUT": "[red]TIMEOUT[/red]",
64
+ "UNKNOWN": "[dim]UNKNOWN[/dim]",
65
+ }
66
+ return colors.get(status, status)
@@ -0,0 +1,31 @@
1
+ """Core models and abstractions for hpc-tools."""
2
+
3
+ from .exceptions import (
4
+ AccountingNotAvailable,
5
+ ConfigError,
6
+ ConfigNotFoundError,
7
+ HPCToolsError,
8
+ JobNotFoundError,
9
+ SchedulerError,
10
+ SubmissionError,
11
+ ValidationError,
12
+ )
13
+ from .job_info import JobInfo
14
+ from .result import ArrayJobResult, JobResult, JobStatus
15
+
16
+ __all__ = [
17
+ # Exceptions
18
+ "AccountingNotAvailable",
19
+ "ConfigError",
20
+ "ConfigNotFoundError",
21
+ "HPCToolsError",
22
+ "JobNotFoundError",
23
+ "SchedulerError",
24
+ "SubmissionError",
25
+ "ValidationError",
26
+ # Types
27
+ "JobInfo",
28
+ "JobResult",
29
+ "ArrayJobResult",
30
+ "JobStatus",
31
+ ]
@@ -0,0 +1,177 @@
1
+ """Configuration loading and management."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+ from dataclasses import dataclass, field
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ if sys.version_info >= (3, 11):
11
+ import tomllib
12
+ else:
13
+ import tomli as tomllib
14
+
15
+
16
+ @dataclass
17
+ class HPCConfig:
18
+ """Loaded configuration."""
19
+
20
+ defaults: dict[str, Any] = field(default_factory=dict)
21
+ tools: dict[str, dict[str, Any]] = field(default_factory=dict)
22
+ types: dict[str, dict[str, Any]] = field(default_factory=dict)
23
+ schedulers: dict[str, dict[str, Any]] = field(default_factory=dict)
24
+
25
+ _source_path: Path | None = field(default=None, repr=False)
26
+
27
+ def get_job_config(self, tool_or_type: str) -> dict[str, Any]:
28
+ """Get merged configuration for a tool or type.
29
+
30
+ Lookup order:
31
+ 1. Check types[tool_or_type]
32
+ 2. Check tools[tool_or_type]
33
+ 3. Fall back to defaults
34
+ """
35
+ config = self.defaults.copy()
36
+
37
+ if tool_or_type in self.types:
38
+ config = _merge(config, self.types[tool_or_type])
39
+ elif tool_or_type in self.tools:
40
+ config = _merge(config, self.tools[tool_or_type])
41
+
42
+ return config
43
+
44
+ def get_tool_config(self, command: str) -> dict[str, Any]:
45
+ """Get configuration matching a command.
46
+
47
+ Extracts tool name from command and looks up config.
48
+ """
49
+ # Extract tool name (first word, strip path)
50
+ tool = command.split()[0]
51
+ tool = Path(tool).name
52
+
53
+ return self.get_job_config(tool)
54
+
55
+ def get_scheduler_config(self, scheduler: str) -> dict[str, Any]:
56
+ """Get scheduler-specific configuration."""
57
+ return self.schedulers.get(scheduler, {})
58
+
59
+
60
+ def _merge(base: dict[str, Any], override: dict[str, Any]) -> dict[str, Any]:
61
+ """Deep merge with override taking precedence."""
62
+ result = base.copy()
63
+ for key, value in override.items():
64
+ if key in result and isinstance(result[key], dict) and isinstance(value, dict):
65
+ result[key] = _merge(result[key], value)
66
+ elif key in result and isinstance(result[key], list) and isinstance(value, list):
67
+ # Check for list reset marker
68
+ if value and value[0] == "-":
69
+ result[key] = value[1:]
70
+ else:
71
+ result[key] = list(set(result[key] + value))
72
+ else:
73
+ result[key] = value
74
+ return result
75
+
76
+
77
+ def find_config_file() -> Path | None:
78
+ """Find configuration file in priority order.
79
+
80
+ Search order:
81
+ 1. ./hpc-tools.toml (current directory)
82
+ 2. ./pyproject.toml [tool.hpc-tools] section
83
+ 3. Git repository root hpc-tools.toml
84
+ 4. ~/.config/hpc-tools/config.toml
85
+ 5. Package defaults
86
+ """
87
+ # Current directory
88
+ cwd = Path.cwd()
89
+ if (cwd / "hpc-tools.toml").exists():
90
+ return cwd / "hpc-tools.toml"
91
+
92
+ if (cwd / "pyproject.toml").exists():
93
+ try:
94
+ with open(cwd / "pyproject.toml", "rb") as f:
95
+ pyproject = tomllib.load(f)
96
+ if "tool" in pyproject and "hpc-tools" in pyproject["tool"]:
97
+ return cwd / "pyproject.toml"
98
+ except Exception:
99
+ pass
100
+
101
+ # Git root
102
+ git_root = _find_git_root(cwd)
103
+ if git_root and (git_root / "hpc-tools.toml").exists():
104
+ return git_root / "hpc-tools.toml"
105
+
106
+ # User config
107
+ user_config = Path.home() / ".config" / "hpc-tools" / "config.toml"
108
+ if user_config.exists():
109
+ return user_config
110
+
111
+ # Package defaults
112
+ package_defaults = Path(__file__).parent.parent.parent.parent / "defaults" / "config.toml"
113
+ if package_defaults.exists():
114
+ return package_defaults
115
+
116
+ return None
117
+
118
+
119
+ def _find_git_root(start: Path) -> Path | None:
120
+ """Find git repository root."""
121
+ current = start.resolve()
122
+ while current != current.parent:
123
+ if (current / ".git").exists():
124
+ return current
125
+ current = current.parent
126
+ return None
127
+
128
+
129
+ def load_config(path: Path | str | None = None) -> HPCConfig:
130
+ """Load configuration from file.
131
+
132
+ Args:
133
+ path: Explicit config path or None to auto-discover
134
+ """
135
+ if path is None:
136
+ path = find_config_file()
137
+
138
+ if path is None:
139
+ return HPCConfig() # Empty config, use defaults
140
+
141
+ path = Path(path)
142
+
143
+ with open(path, "rb") as f:
144
+ data = tomllib.load(f)
145
+
146
+ # Handle pyproject.toml
147
+ if path.name == "pyproject.toml":
148
+ data = data.get("tool", {}).get("hpc-tools", {})
149
+
150
+ config = HPCConfig(
151
+ defaults=data.get("defaults", {}),
152
+ tools=data.get("tools", {}),
153
+ types=data.get("types", {}),
154
+ schedulers=data.get("schedulers", {}),
155
+ )
156
+ config._source_path = path
157
+
158
+ return config
159
+
160
+
161
+ # Global config cache
162
+ _cached_config: HPCConfig | None = None
163
+
164
+
165
+ def get_config() -> HPCConfig:
166
+ """Get the global configuration (cached)."""
167
+ global _cached_config
168
+ if _cached_config is None:
169
+ _cached_config = load_config()
170
+ return _cached_config
171
+
172
+
173
+ def reload_config(path: Path | str | None = None) -> HPCConfig:
174
+ """Reload configuration (clears cache)."""
175
+ global _cached_config
176
+ _cached_config = load_config(path)
177
+ return _cached_config
@@ -0,0 +1,110 @@
1
+ """Descriptor pattern for job attributes and scheduler arguments."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from typing import Any, Generic, TypeVar
5
+
6
+ T = TypeVar("T")
7
+
8
+
9
+ # =============================================================================
10
+ # Job Attribute Descriptor
11
+ # =============================================================================
12
+
13
+
14
+ class JobAttribute(Generic[T]):
15
+ """Descriptor for Job attributes that enables iteration and rendering.
16
+
17
+ This descriptor provides:
18
+ - Clean attribute access on Job instances
19
+ - Class-level access returns the descriptor itself
20
+ - Support for default values
21
+ - Registration for iteration by schedulers
22
+
23
+ Example:
24
+ class Job:
25
+ name = JobAttribute('name')
26
+ cpu = JobAttribute('cpu', default=1)
27
+
28
+ job = Job()
29
+ job.name = "test"
30
+ print(job.name) # "test"
31
+ print(Job.name) # <JobAttribute 'name'>
32
+ """
33
+
34
+ def __init__(self, name: str, *, default: T | None = None):
35
+ self.public_name = name
36
+ self.default = default
37
+ self._private_name: str | None = None
38
+
39
+ def __set_name__(self, owner: type, name: str) -> None:
40
+ self._private_name = f"_{name}"
41
+
42
+ def __get__(self, obj: Any, objtype: type | None = None) -> T | "JobAttribute[T]":
43
+ if obj is None:
44
+ return self
45
+ return getattr(obj, self._private_name, self.default)
46
+
47
+ def __set__(self, obj: Any, value: T | None) -> None:
48
+ setattr(obj, self._private_name, value)
49
+
50
+ def __repr__(self) -> str:
51
+ return f"<JobAttribute '{self.public_name}'>"
52
+
53
+
54
+ # =============================================================================
55
+ # Scheduler Argument Base Class
56
+ # =============================================================================
57
+
58
+
59
+ class SchedulerArg(ABC, Generic[T]):
60
+ """Base class for scheduler-specific argument renderers.
61
+
62
+ Each scheduler backend (SGE, Slurm, PBS) will have subclasses that know
63
+ how to render job attribute values into that scheduler's syntax.
64
+
65
+ Subclasses must implement:
66
+ - to_args(value) -> list of command-line arguments
67
+ - to_directive(value) -> script directive string or None
68
+
69
+ Example:
70
+ class SGEJobNameArg(SchedulerArg[str]):
71
+ def to_args(self, value):
72
+ return ["-N", value] if value else []
73
+
74
+ def to_directive(self, value):
75
+ return f"#$ -N {value}" if value else None
76
+ """
77
+
78
+ def __init__(
79
+ self,
80
+ flag: str,
81
+ *,
82
+ doc: str = "",
83
+ ):
84
+ self.flag = flag
85
+ self.doc = doc
86
+
87
+ @abstractmethod
88
+ def to_args(self, value: T | None) -> list[str]:
89
+ """Convert value to command-line arguments.
90
+
91
+ Args:
92
+ value: The job attribute value (may be None)
93
+
94
+ Returns:
95
+ List of command-line argument strings, empty list if value is None
96
+ """
97
+
98
+ @abstractmethod
99
+ def to_directive(self, value: T | None) -> str | None:
100
+ """Convert value to a script directive.
101
+
102
+ Args:
103
+ value: The job attribute value (may be None)
104
+
105
+ Returns:
106
+ Directive string (e.g., "#$ -N jobname") or None if value is None
107
+ """
108
+
109
+ def __repr__(self) -> str:
110
+ return f"<{self.__class__.__name__} flag='{self.flag}'>"
@@ -0,0 +1,38 @@
1
+ """Custom exceptions for hpc-tools."""
2
+
3
+
4
+ class HPCToolsError(Exception):
5
+ """Base exception for hpc-tools."""
6
+
7
+
8
+ class SchedulerError(HPCToolsError):
9
+ """Error related to scheduler operations."""
10
+
11
+
12
+ class SubmissionError(SchedulerError):
13
+ """Error during job submission."""
14
+
15
+
16
+ class JobNotFoundError(SchedulerError):
17
+ """Job ID not found."""
18
+
19
+
20
+ class ConfigError(HPCToolsError):
21
+ """Error in configuration."""
22
+
23
+
24
+ class ConfigNotFoundError(ConfigError):
25
+ """Configuration file not found."""
26
+
27
+
28
+ class ValidationError(HPCToolsError):
29
+ """Validation error for job parameters."""
30
+
31
+
32
+ class AccountingNotAvailable(SchedulerError):
33
+ """Job accounting/history is not enabled on this cluster.
34
+
35
+ Raised when attempting to query historical job data (e.g., via qacct
36
+ for SGE or sacct for Slurm) but the scheduler's accounting system
37
+ is not configured or accessible.
38
+ """