hpc-runner 0.1.0__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/CLAUDE.md +68 -0
  2. hpc_runner-0.2.0/PKG-INFO +285 -0
  3. hpc_runner-0.2.0/README.md +241 -0
  4. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/defaults/config.toml +9 -0
  5. hpc_runner-0.2.0/docs/HPC_MONITOR_TUI_PLAN.md +796 -0
  6. hpc_runner-0.2.0/docs/TEXTUAL_STYLING_COOKBOOK.md +1346 -0
  7. hpc_runner-0.2.0/docs/cli-redesign-spec.md +786 -0
  8. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/pyproject.toml +3 -2
  9. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/sourceme +3 -9
  10. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/src/hpc_runner/_version.py +2 -2
  11. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/src/hpc_runner/cli/cancel.py +1 -1
  12. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/src/hpc_runner/cli/config.py +2 -2
  13. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/src/hpc_runner/cli/main.py +17 -13
  14. hpc_runner-0.2.0/src/hpc_runner/cli/monitor.py +30 -0
  15. hpc_runner-0.2.0/src/hpc_runner/cli/run.py +292 -0
  16. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/src/hpc_runner/cli/status.py +6 -5
  17. hpc_runner-0.2.0/src/hpc_runner/core/__init__.py +31 -0
  18. hpc_runner-0.2.0/src/hpc_runner/core/descriptors.py +110 -0
  19. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/src/hpc_runner/core/exceptions.py +9 -0
  20. hpc_runner-0.2.0/src/hpc_runner/core/job.py +328 -0
  21. hpc_runner-0.2.0/src/hpc_runner/core/job_info.py +104 -0
  22. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/src/hpc_runner/core/result.py +4 -0
  23. hpc_runner-0.2.0/src/hpc_runner/schedulers/base.py +194 -0
  24. hpc_runner-0.2.0/src/hpc_runner/schedulers/detection.py +52 -0
  25. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/src/hpc_runner/schedulers/local/scheduler.py +119 -2
  26. hpc_runner-0.2.0/src/hpc_runner/schedulers/sge/args.py +232 -0
  27. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/src/hpc_runner/schedulers/sge/parser.py +106 -13
  28. hpc_runner-0.2.0/src/hpc_runner/schedulers/sge/scheduler.py +881 -0
  29. hpc_runner-0.2.0/src/hpc_runner/schedulers/sge/templates/batch.sh.j2 +82 -0
  30. hpc_runner-0.2.0/src/hpc_runner/schedulers/sge/templates/interactive.sh.j2 +78 -0
  31. hpc_runner-0.2.0/src/hpc_runner/tui/__init__.py +5 -0
  32. hpc_runner-0.2.0/src/hpc_runner/tui/app.py +436 -0
  33. hpc_runner-0.2.0/src/hpc_runner/tui/components/__init__.py +17 -0
  34. hpc_runner-0.2.0/src/hpc_runner/tui/components/detail_panel.py +187 -0
  35. hpc_runner-0.2.0/src/hpc_runner/tui/components/filter_bar.py +174 -0
  36. hpc_runner-0.2.0/src/hpc_runner/tui/components/filter_popup.py +345 -0
  37. hpc_runner-0.2.0/src/hpc_runner/tui/components/job_table.py +260 -0
  38. hpc_runner-0.2.0/src/hpc_runner/tui/providers/__init__.py +5 -0
  39. hpc_runner-0.2.0/src/hpc_runner/tui/providers/jobs.py +197 -0
  40. hpc_runner-0.2.0/src/hpc_runner/tui/screens/__init__.py +7 -0
  41. hpc_runner-0.2.0/src/hpc_runner/tui/screens/confirm.py +67 -0
  42. hpc_runner-0.2.0/src/hpc_runner/tui/screens/job_details.py +210 -0
  43. hpc_runner-0.2.0/src/hpc_runner/tui/screens/log_viewer.py +170 -0
  44. hpc_runner-0.2.0/src/hpc_runner/tui/snapshot.py +153 -0
  45. hpc_runner-0.2.0/src/hpc_runner/tui/styles/monitor.tcss +567 -0
  46. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/tests/conftest.py +37 -1
  47. hpc_runner-0.2.0/tests/test_cli/test_run.py +238 -0
  48. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/tests/test_schedulers/test_sge.py +139 -0
  49. hpc_runner-0.2.0/tests/test_schedulers/test_sge_xml_submit_time.py +49 -0
  50. hpc_runner-0.2.0/tests/test_tui/__init__.py +0 -0
  51. hpc_runner-0.2.0/tests/test_tui/test_app_snapshot.py +184 -0
  52. hpc_runner-0.2.0/tests/test_tui/test_detail_panel.py +320 -0
  53. hpc_runner-0.2.0/tests/test_tui/test_job_table.py +270 -0
  54. hpc_runner-0.1.0/PKG-INFO +0 -46
  55. hpc_runner-0.1.0/README.md +0 -3
  56. hpc_runner-0.1.0/src/hpc_runner/cli/run.py +0 -136
  57. hpc_runner-0.1.0/src/hpc_runner/core/__init__.py +0 -1
  58. hpc_runner-0.1.0/src/hpc_runner/core/descriptors.py +0 -56
  59. hpc_runner-0.1.0/src/hpc_runner/core/job.py +0 -149
  60. hpc_runner-0.1.0/src/hpc_runner/schedulers/base.py +0 -76
  61. hpc_runner-0.1.0/src/hpc_runner/schedulers/detection.py +0 -34
  62. hpc_runner-0.1.0/src/hpc_runner/schedulers/sge/args.py +0 -165
  63. hpc_runner-0.1.0/src/hpc_runner/schedulers/sge/scheduler.py +0 -325
  64. hpc_runner-0.1.0/src/hpc_runner/schedulers/sge/templates/job.sh.j2 +0 -39
  65. hpc_runner-0.1.0/tests/test_cli/test_run.py +0 -98
  66. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/.github/workflows/ci.yml +0 -0
  67. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/.github/workflows/publish.yml +0 -0
  68. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/.gitignore +0 -0
  69. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/SPEC.md +0 -0
  70. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/src/hpc_runner/__init__.py +0 -0
  71. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/src/hpc_runner/cli/__init__.py +0 -0
  72. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/src/hpc_runner/core/config.py +0 -0
  73. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/src/hpc_runner/core/job_array.py +0 -0
  74. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/src/hpc_runner/core/resources.py +0 -0
  75. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/src/hpc_runner/core/types.py +0 -0
  76. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/src/hpc_runner/py.typed +0 -0
  77. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/src/hpc_runner/schedulers/__init__.py +0 -0
  78. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/src/hpc_runner/schedulers/local/__init__.py +0 -0
  79. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/src/hpc_runner/schedulers/local/templates/job.sh.j2 +0 -0
  80. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/src/hpc_runner/schedulers/sge/__init__.py +0 -0
  81. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/src/hpc_runner/templates/__init__.py +0 -0
  82. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/src/hpc_runner/templates/engine.py +0 -0
  83. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/src/hpc_runner/workflow/__init__.py +0 -0
  84. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/src/hpc_runner/workflow/dependency.py +0 -0
  85. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/src/hpc_runner/workflow/pipeline.py +0 -0
  86. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/tests/__init__.py +0 -0
  87. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/tests/test_cli/__init__.py +0 -0
  88. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/tests/test_core/__init__.py +0 -0
  89. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/tests/test_core/test_config.py +0 -0
  90. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/tests/test_core/test_job.py +0 -0
  91. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/tests/test_core/test_resources.py +0 -0
  92. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/tests/test_schedulers/__init__.py +0 -0
  93. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/tests/test_schedulers/test_detection.py +0 -0
  94. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/tests/test_schedulers/test_local.py +0 -0
  95. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/tests/test_workflow/__init__.py +0 -0
  96. {hpc_runner-0.1.0 → hpc_runner-0.2.0}/tests/test_workflow/test_pipeline.py +0 -0
@@ -58,8 +58,76 @@ Uses `rich-click` for styled output. Commands: `run`, `status`, `cancel`, `confi
58
58
 
59
59
  Jinja2 templates for job scripts. Each scheduler has its own template in `schedulers/{name}/templates/job.sh.j2`.
60
60
 
61
+ ### TUI (`src/hpc_runner/tui/`)
62
+
63
+ **HpcMonitorApp** - Textual-based terminal UI for monitoring HPC jobs. Entry point: `hpc monitor`.
64
+
65
+ - **app.py** - Main application with custom Nord-inspired theme
66
+ - **styles/monitor.tcss** - CSS styling following Rovr aesthetic (see `docs/TEXTUAL_STYLING_COOKBOOK.md`)
67
+ - **snapshot.py** - Visual review utility for development
68
+
61
69
  ## Key Design Decisions
62
70
 
63
71
  - **Merged output by default**: stderr goes to stdout unless `--stderr` specified
64
72
  - **Configurable SGE settings**: PE name, memory resource name, time resource name all come from config, not hardcoded
65
73
  - **Descriptor pattern**: Scheduler arguments use Python descriptors for type-safe flag/directive generation
74
+
75
+ ## TUI Development Rules
76
+
77
+ ### Styling Requirements (CRITICAL)
78
+
79
+ All TUI components MUST follow these styling patterns. **Do NOT use DEFAULT_CSS in components** - put all styles in `monitor.tcss` for consistency.
80
+
81
+ **Core Principles:**
82
+ - **Transparent backgrounds everywhere** - use `background: transparent` on all widgets
83
+ - **Rounded borders** - use `border: round $border-blurred` (unfocused) or `border: round $border` (focused)
84
+ - **No solid colored backgrounds** except for highlighted/selected items
85
+ - **Border titles in $primary** - use `border-title-color: $primary`
86
+
87
+ **Standard Widget Patterns:**
88
+ ```css
89
+ /* Panels and containers */
90
+ MyWidget {
91
+ background: transparent;
92
+ border: round $border-blurred;
93
+ border-title-color: $primary;
94
+ border-title-background: transparent;
95
+ }
96
+
97
+ MyWidget:focus, MyWidget:focus-within {
98
+ border: round $border;
99
+ }
100
+
101
+ /* Buttons - transparent with border */
102
+ Button {
103
+ background: transparent;
104
+ border: round $border-blurred;
105
+ color: $foreground;
106
+ }
107
+
108
+ Button:hover {
109
+ background: $boost;
110
+ border: round $border;
111
+ }
112
+
113
+ /* Popups/overlays - transparent background */
114
+ Popup {
115
+ layer: overlay;
116
+ background: transparent;
117
+ border: round $primary;
118
+ }
119
+ ```
120
+
121
+ **CSS Variables (defined in monitor.tcss):**
122
+ - `$border-blurred` - muted border for unfocused elements
123
+ - `$border` - bright border for focused elements
124
+ - `$primary` - teal accent color (#88C0D0)
125
+ - `$error` - red for destructive actions
126
+
127
+ **Verification:**
128
+
129
+ After ANY edit to TUI code, verify visually that:
130
+ 1. All backgrounds are transparent (terminal shows through)
131
+ 2. Borders are rounded (╭╮╰╯ characters)
132
+ 3. No solid color blocks except for selected/highlighted items
133
+ 4. Focus states brighten borders appropriately
@@ -0,0 +1,285 @@
1
+ Metadata-Version: 2.4
2
+ Name: hpc-runner
3
+ Version: 0.2.0
4
+ Summary: Unified HPC job submission across multiple schedulers
5
+ Project-URL: Homepage, https://github.com/sjalloq/hpc-runner
6
+ Project-URL: Repository, https://github.com/sjalloq/hpc-runner
7
+ Author: Shareef Jalloq
8
+ License-Expression: MIT
9
+ Keywords: cluster,hpc,job-submission,pbs,sge,slurm
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Environment :: Console
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Operating System :: POSIX :: Linux
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Topic :: System :: Clustering
21
+ Classifier: Topic :: System :: Distributed Computing
22
+ Requires-Python: >=3.10
23
+ Requires-Dist: jinja2>=3.0
24
+ Requires-Dist: rich-click>=1.7
25
+ Requires-Dist: textual>=6.11
26
+ Requires-Dist: tomli>=2.0; python_version < '3.11'
27
+ Provides-Extra: all
28
+ Requires-Dist: build; extra == 'all'
29
+ Requires-Dist: hatch-vcs; extra == 'all'
30
+ Requires-Dist: mypy; extra == 'all'
31
+ Requires-Dist: pytest-cov; extra == 'all'
32
+ Requires-Dist: pytest>=7.0; extra == 'all'
33
+ Requires-Dist: ruff; extra == 'all'
34
+ Requires-Dist: twine; extra == 'all'
35
+ Provides-Extra: dev
36
+ Requires-Dist: build; extra == 'dev'
37
+ Requires-Dist: hatch-vcs; extra == 'dev'
38
+ Requires-Dist: mypy; extra == 'dev'
39
+ Requires-Dist: pytest-cov; extra == 'dev'
40
+ Requires-Dist: pytest>=7.0; extra == 'dev'
41
+ Requires-Dist: ruff; extra == 'dev'
42
+ Requires-Dist: twine; extra == 'dev'
43
+ Description-Content-Type: text/markdown
44
+
45
+ # hpc-runner
46
+
47
+ **Unified HPC job submission across multiple schedulers**
48
+
49
+ Write your jobs once, run them on any cluster - SGE, Slurm, PBS, or locally for testing.
50
+
51
+ ## Features
52
+
53
+ - **Unified CLI** - Same commands work across SGE, Slurm, PBS
54
+ - **Python API** - Programmatic job submission with dependencies and pipelines
55
+ - **Auto-detection** - Automatically finds your cluster's scheduler
56
+ - **Interactive TUI** - Monitor jobs with a terminal dashboard
57
+ - **Job Dependencies** - Chain jobs with afterok, afterany, afternotok
58
+ - **Array Jobs** - Batch processing with throttling support
59
+ - **Virtual Environment Handling** - Automatic venv activation on compute nodes
60
+ - **Module Integration** - Load environment modules in job scripts
61
+ - **Dry-run Mode** - Preview generated scripts before submission
62
+
63
+ ## Installation
64
+
65
+ ```bash
66
+ pip install hpc-runner
67
+ ```
68
+
69
+ Or with uv:
70
+
71
+ ```bash
72
+ uv pip install hpc-runner
73
+ ```
74
+
75
+ ## Quick Start
76
+
77
+ ### CLI
78
+
79
+ ```bash
80
+ # Basic job submission
81
+ hpc run python train.py
82
+
83
+ # With resources
84
+ hpc run --cpu 4 --mem 16G --time 4:00:00 "python train.py"
85
+
86
+ # GPU job
87
+ hpc run --queue gpu --cpu 4 --mem 32G "python train.py --epochs 100"
88
+
89
+ # Preview without submitting
90
+ hpc run --dry-run --cpu 8 "make -j8"
91
+
92
+ # Interactive session
93
+ hpc run --interactive bash
94
+
95
+ # Array job
96
+ hpc run --array 1-100 "python process.py --task-id \$SGE_TASK_ID"
97
+
98
+ # Wait for completion
99
+ hpc run --wait python long_job.py
100
+ ```
101
+
102
+ ### Python API
103
+
104
+ ```python
105
+ from hpc_runner import Job
106
+
107
+ # Create and submit a job
108
+ job = Job(
109
+ command="python train.py",
110
+ cpu=4,
111
+ mem="16G",
112
+ time="4:00:00",
113
+ queue="gpu",
114
+ )
115
+ result = job.submit()
116
+
117
+ # Wait for completion
118
+ status = result.wait()
119
+ print(f"Exit code: {result.returncode}")
120
+
121
+ # Read output
122
+ print(result.read_stdout())
123
+ ```
124
+
125
+ ### Job Dependencies
126
+
127
+ ```python
128
+ from hpc_runner import Job
129
+
130
+ # First job
131
+ preprocess = Job(command="python preprocess.py", cpu=8, mem="32G")
132
+ result1 = preprocess.submit()
133
+
134
+ # Second job runs after first succeeds
135
+ train = Job(command="python train.py", cpu=4, mem="48G", queue="gpu")
136
+ train.after(result1, type="afterok")
137
+ result2 = train.submit()
138
+ ```
139
+
140
+ ### Pipelines
141
+
142
+ ```python
143
+ from hpc_runner import Pipeline
144
+
145
+ with Pipeline("ml_workflow") as p:
146
+ p.add("python preprocess.py", name="preprocess", cpu=8)
147
+ p.add("python train.py", name="train", depends_on=["preprocess"], queue="gpu")
148
+ p.add("python evaluate.py", name="evaluate", depends_on=["train"])
149
+
150
+ results = p.submit()
151
+ p.wait()
152
+ ```
153
+
154
+ ## Scheduler Support
155
+
156
+ | Scheduler | Status | Notes |
157
+ |-----------|--------|-------|
158
+ | SGE | Fully implemented | qsub, qstat, qdel, qrsh |
159
+ | Local | Fully implemented | Run as subprocess (for testing) |
160
+ | Slurm | Planned | sbatch, squeue, scancel |
161
+ | PBS | Planned | qsub, qstat, qdel |
162
+
163
+ ### Auto-detection Priority
164
+
165
+ 1. `HPC_SCHEDULER` environment variable
166
+ 2. SGE (`SGE_ROOT` or `qstat` available)
167
+ 3. Slurm (`sbatch` available)
168
+ 4. PBS (`qsub` with PBS)
169
+ 5. Local fallback
170
+
171
+ ## Configuration
172
+
173
+ hpc-runner uses TOML configuration files. Location priority:
174
+
175
+ 1. `--config /path/to/config.toml`
176
+ 2. `./hpc-tools.toml`
177
+ 3. `./pyproject.toml` under `[tool.hpc-tools]`
178
+ 4. Git repository root `hpc-tools.toml`
179
+ 5. `~/.config/hpc-tools/config.toml`
180
+ 6. Package defaults
181
+
182
+ ### Example Configuration
183
+
184
+ ```toml
185
+ [defaults]
186
+ cpu = 1
187
+ mem = "4G"
188
+ time = "1:00:00"
189
+ inherit_env = true
190
+
191
+ [schedulers.sge]
192
+ parallel_environment = "smp"
193
+ memory_resource = "mem_free"
194
+ purge_modules = true
195
+
196
+ [types.gpu]
197
+ queue = "gpu"
198
+ resources = [{name = "gpu", value = 1}]
199
+
200
+ [types.interactive]
201
+ queue = "interactive"
202
+ time = "8:00:00"
203
+ ```
204
+
205
+ Use named job types:
206
+
207
+ ```bash
208
+ hpc run --job-type gpu "python train.py"
209
+ ```
210
+
211
+ ## TUI Monitor
212
+
213
+ Launch the interactive job monitor:
214
+
215
+ ```bash
216
+ hpc monitor
217
+ ```
218
+
219
+ Key bindings:
220
+ - `q` - Quit
221
+ - `r` - Refresh
222
+ - `u` - Toggle user filter (my jobs / all)
223
+ - `/` - Search
224
+ - `Enter` - View job details
225
+ - `Tab` - Switch tabs
226
+
227
+ ## CLI Reference
228
+
229
+ ```
230
+ hpc run [OPTIONS] COMMAND
231
+
232
+ Options:
233
+ --job-name TEXT Job name
234
+ --cpu INTEGER Number of CPUs
235
+ --mem TEXT Memory (e.g., 16G, 4096M)
236
+ --time TEXT Time limit (e.g., 4:00:00)
237
+ --queue TEXT Queue/partition name
238
+ --directory PATH Working directory
239
+ --module TEXT Module to load (repeatable)
240
+ --array TEXT Array spec (e.g., 1-100, 1-100%5)
241
+ --depend TEXT Job dependencies
242
+ --inherit-env Inherit environment (default: true)
243
+ --no-inherit-env Don't inherit environment
244
+ --interactive Run interactively (qrsh/srun)
245
+ --local Run locally (no scheduler)
246
+ --dry-run Show script without submitting
247
+ --wait Wait for completion
248
+ --keep-script Keep job script for debugging
249
+ -h, --help Show help
250
+
251
+ Other commands:
252
+ hpc status [JOB_ID] Check job status
253
+ hpc cancel JOB_ID Cancel a job
254
+ hpc monitor Interactive TUI
255
+ hpc config show Show active configuration
256
+ ```
257
+
258
+ ## Development
259
+
260
+ ```bash
261
+ # Setup environment
262
+ source sourceme
263
+ source sourceme --clean # Clean rebuild
264
+
265
+ # Run tests
266
+ pytest
267
+ pytest -v
268
+ pytest -k "test_job"
269
+
270
+ # Type checking
271
+ mypy src/hpc_runner
272
+
273
+ # Linting
274
+ ruff check src/hpc_runner
275
+ ruff format src/hpc_runner
276
+ ```
277
+
278
+ ## Documentation
279
+
280
+ - [Programmatic API Reference](docs/programmatic_api.md)
281
+ - [TUI Styling Guide](docs/TEXTUAL_STYLING_COOKBOOK.md)
282
+
283
+ ## License
284
+
285
+ MIT License - see LICENSE file for details.
@@ -0,0 +1,241 @@
1
+ # hpc-runner
2
+
3
+ **Unified HPC job submission across multiple schedulers**
4
+
5
+ Write your jobs once, run them on any cluster - SGE, Slurm, PBS, or locally for testing.
6
+
7
+ ## Features
8
+
9
+ - **Unified CLI** - Same commands work across SGE, Slurm, PBS
10
+ - **Python API** - Programmatic job submission with dependencies and pipelines
11
+ - **Auto-detection** - Automatically finds your cluster's scheduler
12
+ - **Interactive TUI** - Monitor jobs with a terminal dashboard
13
+ - **Job Dependencies** - Chain jobs with afterok, afterany, afternotok
14
+ - **Array Jobs** - Batch processing with throttling support
15
+ - **Virtual Environment Handling** - Automatic venv activation on compute nodes
16
+ - **Module Integration** - Load environment modules in job scripts
17
+ - **Dry-run Mode** - Preview generated scripts before submission
18
+
19
+ ## Installation
20
+
21
+ ```bash
22
+ pip install hpc-runner
23
+ ```
24
+
25
+ Or with uv:
26
+
27
+ ```bash
28
+ uv pip install hpc-runner
29
+ ```
30
+
31
+ ## Quick Start
32
+
33
+ ### CLI
34
+
35
+ ```bash
36
+ # Basic job submission
37
+ hpc run python train.py
38
+
39
+ # With resources
40
+ hpc run --cpu 4 --mem 16G --time 4:00:00 "python train.py"
41
+
42
+ # GPU job
43
+ hpc run --queue gpu --cpu 4 --mem 32G "python train.py --epochs 100"
44
+
45
+ # Preview without submitting
46
+ hpc run --dry-run --cpu 8 "make -j8"
47
+
48
+ # Interactive session
49
+ hpc run --interactive bash
50
+
51
+ # Array job
52
+ hpc run --array 1-100 "python process.py --task-id \$SGE_TASK_ID"
53
+
54
+ # Wait for completion
55
+ hpc run --wait python long_job.py
56
+ ```
57
+
58
+ ### Python API
59
+
60
+ ```python
61
+ from hpc_runner import Job
62
+
63
+ # Create and submit a job
64
+ job = Job(
65
+ command="python train.py",
66
+ cpu=4,
67
+ mem="16G",
68
+ time="4:00:00",
69
+ queue="gpu",
70
+ )
71
+ result = job.submit()
72
+
73
+ # Wait for completion
74
+ status = result.wait()
75
+ print(f"Exit code: {result.returncode}")
76
+
77
+ # Read output
78
+ print(result.read_stdout())
79
+ ```
80
+
81
+ ### Job Dependencies
82
+
83
+ ```python
84
+ from hpc_runner import Job
85
+
86
+ # First job
87
+ preprocess = Job(command="python preprocess.py", cpu=8, mem="32G")
88
+ result1 = preprocess.submit()
89
+
90
+ # Second job runs after first succeeds
91
+ train = Job(command="python train.py", cpu=4, mem="48G", queue="gpu")
92
+ train.after(result1, type="afterok")
93
+ result2 = train.submit()
94
+ ```
95
+
96
+ ### Pipelines
97
+
98
+ ```python
99
+ from hpc_runner import Pipeline
100
+
101
+ with Pipeline("ml_workflow") as p:
102
+ p.add("python preprocess.py", name="preprocess", cpu=8)
103
+ p.add("python train.py", name="train", depends_on=["preprocess"], queue="gpu")
104
+ p.add("python evaluate.py", name="evaluate", depends_on=["train"])
105
+
106
+ results = p.submit()
107
+ p.wait()
108
+ ```
109
+
110
+ ## Scheduler Support
111
+
112
+ | Scheduler | Status | Notes |
113
+ |-----------|--------|-------|
114
+ | SGE | Fully implemented | qsub, qstat, qdel, qrsh |
115
+ | Local | Fully implemented | Run as subprocess (for testing) |
116
+ | Slurm | Planned | sbatch, squeue, scancel |
117
+ | PBS | Planned | qsub, qstat, qdel |
118
+
119
+ ### Auto-detection Priority
120
+
121
+ 1. `HPC_SCHEDULER` environment variable
122
+ 2. SGE (`SGE_ROOT` or `qstat` available)
123
+ 3. Slurm (`sbatch` available)
124
+ 4. PBS (`qsub` with PBS)
125
+ 5. Local fallback
126
+
127
+ ## Configuration
128
+
129
+ hpc-runner uses TOML configuration files. Location priority:
130
+
131
+ 1. `--config /path/to/config.toml`
132
+ 2. `./hpc-tools.toml`
133
+ 3. `./pyproject.toml` under `[tool.hpc-tools]`
134
+ 4. Git repository root `hpc-tools.toml`
135
+ 5. `~/.config/hpc-tools/config.toml`
136
+ 6. Package defaults
137
+
138
+ ### Example Configuration
139
+
140
+ ```toml
141
+ [defaults]
142
+ cpu = 1
143
+ mem = "4G"
144
+ time = "1:00:00"
145
+ inherit_env = true
146
+
147
+ [schedulers.sge]
148
+ parallel_environment = "smp"
149
+ memory_resource = "mem_free"
150
+ purge_modules = true
151
+
152
+ [types.gpu]
153
+ queue = "gpu"
154
+ resources = [{name = "gpu", value = 1}]
155
+
156
+ [types.interactive]
157
+ queue = "interactive"
158
+ time = "8:00:00"
159
+ ```
160
+
161
+ Use named job types:
162
+
163
+ ```bash
164
+ hpc run --job-type gpu "python train.py"
165
+ ```
166
+
167
+ ## TUI Monitor
168
+
169
+ Launch the interactive job monitor:
170
+
171
+ ```bash
172
+ hpc monitor
173
+ ```
174
+
175
+ Key bindings:
176
+ - `q` - Quit
177
+ - `r` - Refresh
178
+ - `u` - Toggle user filter (my jobs / all)
179
+ - `/` - Search
180
+ - `Enter` - View job details
181
+ - `Tab` - Switch tabs
182
+
183
+ ## CLI Reference
184
+
185
+ ```
186
+ hpc run [OPTIONS] COMMAND
187
+
188
+ Options:
189
+ --job-name TEXT Job name
190
+ --cpu INTEGER Number of CPUs
191
+ --mem TEXT Memory (e.g., 16G, 4096M)
192
+ --time TEXT Time limit (e.g., 4:00:00)
193
+ --queue TEXT Queue/partition name
194
+ --directory PATH Working directory
195
+ --module TEXT Module to load (repeatable)
196
+ --array TEXT Array spec (e.g., 1-100, 1-100%5)
197
+ --depend TEXT Job dependencies
198
+ --inherit-env Inherit environment (default: true)
199
+ --no-inherit-env Don't inherit environment
200
+ --interactive Run interactively (qrsh/srun)
201
+ --local Run locally (no scheduler)
202
+ --dry-run Show script without submitting
203
+ --wait Wait for completion
204
+ --keep-script Keep job script for debugging
205
+ -h, --help Show help
206
+
207
+ Other commands:
208
+ hpc status [JOB_ID] Check job status
209
+ hpc cancel JOB_ID Cancel a job
210
+ hpc monitor Interactive TUI
211
+ hpc config show Show active configuration
212
+ ```
213
+
214
+ ## Development
215
+
216
+ ```bash
217
+ # Setup environment
218
+ source sourceme
219
+ source sourceme --clean # Clean rebuild
220
+
221
+ # Run tests
222
+ pytest
223
+ pytest -v
224
+ pytest -k "test_job"
225
+
226
+ # Type checking
227
+ mypy src/hpc_runner
228
+
229
+ # Linting
230
+ ruff check src/hpc_runner
231
+ ruff format src/hpc_runner
232
+ ```
233
+
234
+ ## Documentation
235
+
236
+ - [Programmatic API Reference](docs/programmatic_api.md)
237
+ - [TUI Styling Guide](docs/TEXTUAL_STYLING_COOKBOOK.md)
238
+
239
+ ## License
240
+
241
+ MIT License - see LICENSE file for details.
@@ -16,6 +16,15 @@ memory_resource = "mem_free"
16
16
  time_resource = "h_rt"
17
17
  merge_output = true
18
18
 
19
+ # Module handling
20
+ purge_modules = false # Run 'module purge' before loading modules
21
+ silent_modules = false # Use -s flag for silent module operations
22
+ module_init_script = "" # Site-specific module init (empty = auto-detect)
23
+
24
+ # Environment handling
25
+ expand_makeflags = true # Expand $NSLOTS in MAKEFLAGS
26
+ unset_vars = [] # Environment variables to unset (e.g., ["https_proxy"])
27
+
19
28
  # Slurm-specific settings (for future use)
20
29
  [schedulers.slurm]
21
30
  # Default Slurm settings