hpc-runner 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. hpc_runner/__init__.py +57 -0
  2. hpc_runner/_version.py +34 -0
  3. hpc_runner/cli/__init__.py +1 -0
  4. hpc_runner/cli/cancel.py +38 -0
  5. hpc_runner/cli/config.py +109 -0
  6. hpc_runner/cli/main.py +76 -0
  7. hpc_runner/cli/monitor.py +30 -0
  8. hpc_runner/cli/run.py +292 -0
  9. hpc_runner/cli/status.py +66 -0
  10. hpc_runner/core/__init__.py +31 -0
  11. hpc_runner/core/config.py +177 -0
  12. hpc_runner/core/descriptors.py +110 -0
  13. hpc_runner/core/exceptions.py +38 -0
  14. hpc_runner/core/job.py +328 -0
  15. hpc_runner/core/job_array.py +58 -0
  16. hpc_runner/core/job_info.py +104 -0
  17. hpc_runner/core/resources.py +49 -0
  18. hpc_runner/core/result.py +161 -0
  19. hpc_runner/core/types.py +13 -0
  20. hpc_runner/py.typed +0 -0
  21. hpc_runner/schedulers/__init__.py +60 -0
  22. hpc_runner/schedulers/base.py +194 -0
  23. hpc_runner/schedulers/detection.py +52 -0
  24. hpc_runner/schedulers/local/__init__.py +5 -0
  25. hpc_runner/schedulers/local/scheduler.py +354 -0
  26. hpc_runner/schedulers/local/templates/job.sh.j2 +28 -0
  27. hpc_runner/schedulers/sge/__init__.py +5 -0
  28. hpc_runner/schedulers/sge/args.py +232 -0
  29. hpc_runner/schedulers/sge/parser.py +287 -0
  30. hpc_runner/schedulers/sge/scheduler.py +881 -0
  31. hpc_runner/schedulers/sge/templates/batch.sh.j2 +82 -0
  32. hpc_runner/schedulers/sge/templates/interactive.sh.j2 +78 -0
  33. hpc_runner/templates/__init__.py +5 -0
  34. hpc_runner/templates/engine.py +55 -0
  35. hpc_runner/tui/__init__.py +5 -0
  36. hpc_runner/tui/app.py +436 -0
  37. hpc_runner/tui/components/__init__.py +17 -0
  38. hpc_runner/tui/components/detail_panel.py +187 -0
  39. hpc_runner/tui/components/filter_bar.py +174 -0
  40. hpc_runner/tui/components/filter_popup.py +345 -0
  41. hpc_runner/tui/components/job_table.py +260 -0
  42. hpc_runner/tui/providers/__init__.py +5 -0
  43. hpc_runner/tui/providers/jobs.py +197 -0
  44. hpc_runner/tui/screens/__init__.py +7 -0
  45. hpc_runner/tui/screens/confirm.py +67 -0
  46. hpc_runner/tui/screens/job_details.py +210 -0
  47. hpc_runner/tui/screens/log_viewer.py +170 -0
  48. hpc_runner/tui/snapshot.py +153 -0
  49. hpc_runner/tui/styles/monitor.tcss +567 -0
  50. hpc_runner/workflow/__init__.py +6 -0
  51. hpc_runner/workflow/dependency.py +20 -0
  52. hpc_runner/workflow/pipeline.py +180 -0
  53. hpc_runner-0.2.0.dist-info/METADATA +285 -0
  54. hpc_runner-0.2.0.dist-info/RECORD +56 -0
  55. hpc_runner-0.2.0.dist-info/WHEEL +4 -0
  56. hpc_runner-0.2.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,28 @@
1
+ #!/bin/bash
2
+ # Generated by hpc-tools (local scheduler)
3
+
4
+ # Exit on error
5
+ set -e
6
+
7
+ {% if job.modules_path %}
8
+ # Additional module paths (simulated for local)
9
+ {% for path in job.modules_path %}
10
+ # module use {{ path }}
11
+ {% endfor %}
12
+ {% endif %}
13
+
14
+ {% if job.modules %}
15
+ # Modules (simulated for local - not actually loaded)
16
+ {% for mod in job.modules %}
17
+ # module load {{ mod }}
18
+ {% endfor %}
19
+ {% endif %}
20
+
21
+ {% if job.workdir %}
22
+ # Change to working directory
23
+ cd {{ job.workdir }}
24
+ {% endif %}
25
+
26
+ # Execute command
27
+ {{ job.command }}
28
+ exit $?
@@ -0,0 +1,5 @@
1
+ """SGE (Sun Grid Engine) scheduler implementation."""
2
+
3
+ from hpc_runner.schedulers.sge.scheduler import SGEScheduler
4
+
5
+ __all__ = ["SGEScheduler"]
@@ -0,0 +1,232 @@
1
+ """SGE-specific argument renderers.
2
+
3
+ Each class knows how to render a single job attribute to SGE syntax,
4
+ both as a script directive (#$ ...) and as command-line arguments.
5
+ """
6
+
7
+ from hpc_runner.core.descriptors import SchedulerArg
8
+
9
+
10
+ class SGEArg(SchedulerArg):
11
+ """Base class for SGE arguments.
12
+
13
+ SGE uses:
14
+ - Directives: #$ -flag value
15
+ - CLI args: -flag value
16
+ """
17
+
18
+ def to_args(self, value) -> list[str]:
19
+ if value is None:
20
+ return []
21
+ return [f"-{self.flag}", str(value)]
22
+
23
+ def to_directive(self, value) -> str | None:
24
+ if value is None:
25
+ return None
26
+ return f"#$ -{self.flag} {value}"
27
+
28
+
29
+ # =============================================================================
30
+ # Simple Flag Arguments
31
+ # =============================================================================
32
+
33
+
34
+ class SGEJobNameArg(SGEArg):
35
+ """Job name: -N name"""
36
+
37
+ def __init__(self):
38
+ super().__init__("N", doc="Job name")
39
+
40
+
41
+ class SGEQueueArg(SGEArg):
42
+ """Queue selection: -q queue_name"""
43
+
44
+ def __init__(self):
45
+ super().__init__("q", doc="Queue/partition name")
46
+
47
+
48
+ class SGEOutputArg(SGEArg):
49
+ """Stdout path: -o path"""
50
+
51
+ def __init__(self):
52
+ super().__init__("o", doc="Stdout file path")
53
+
54
+
55
+ class SGEErrorArg(SGEArg):
56
+ """Stderr path: -e path"""
57
+
58
+ def __init__(self):
59
+ super().__init__("e", doc="Stderr file path")
60
+
61
+
62
+ class SGEPriorityArg(SGEArg):
63
+ """Job priority: -p priority"""
64
+
65
+ def __init__(self):
66
+ super().__init__("p", doc="Job priority (-1023 to 1024)")
67
+
68
+
69
+ class SGEShellArg(SGEArg):
70
+ """Shell selection: -S /path/to/shell"""
71
+
72
+ def __init__(self):
73
+ super().__init__("S", doc="Shell path")
74
+
75
+
76
+ # =============================================================================
77
+ # Boolean Flag Arguments (no value, just presence)
78
+ # =============================================================================
79
+
80
+
81
+ class SGECwdArg(SchedulerArg[bool]):
82
+ """Use current working directory: -cwd"""
83
+
84
+ def __init__(self):
85
+ super().__init__("cwd", doc="Execute in current working directory")
86
+
87
+ def to_args(self, value: bool | None) -> list[str]:
88
+ return ["-cwd"] if value else []
89
+
90
+ def to_directive(self, value: bool | None) -> str | None:
91
+ return "#$ -cwd" if value else None
92
+
93
+
94
+ class SGEInheritEnvArg(SchedulerArg[bool]):
95
+ """Inherit environment: -V"""
96
+
97
+ def __init__(self):
98
+ super().__init__("V", doc="Inherit environment variables")
99
+
100
+ def to_args(self, value: bool | None) -> list[str]:
101
+ return ["-V"] if value else []
102
+
103
+ def to_directive(self, value: bool | None) -> str | None:
104
+ return "#$ -V" if value else None
105
+
106
+
107
+ class SGEMergeOutputArg(SchedulerArg[bool]):
108
+ """Merge stdout and stderr: -j y"""
109
+
110
+ def __init__(self):
111
+ super().__init__("j", doc="Join stdout and stderr")
112
+
113
+ def to_args(self, value: bool | None) -> list[str]:
114
+ return ["-j", "y"] if value else []
115
+
116
+ def to_directive(self, value: bool | None) -> str | None:
117
+ return "#$ -j y" if value else None
118
+
119
+
120
+ # =============================================================================
121
+ # Resource Arguments (configurable resource names)
122
+ # =============================================================================
123
+
124
+
125
+ class SGECpuArg(SchedulerArg[int]):
126
+ """Parallel environment slots: -pe <pe_name> <slots>
127
+
128
+ The PE name is configurable per-cluster (e.g., 'smp', 'mpi', 'orte').
129
+ """
130
+
131
+ def __init__(self, pe_name: str = "smp"):
132
+ super().__init__("pe", doc=f"Parallel environment ({pe_name})")
133
+ self.pe_name = pe_name
134
+
135
+ def to_args(self, value: int | None) -> list[str]:
136
+ if value is None:
137
+ return []
138
+ return ["-pe", self.pe_name, str(value)]
139
+
140
+ def to_directive(self, value: int | None) -> str | None:
141
+ if value is None:
142
+ return None
143
+ return f"#$ -pe {self.pe_name} {value}"
144
+
145
+
146
+ class SGEMemArg(SchedulerArg[str]):
147
+ """Memory request: -l <resource>=<value>
148
+
149
+ The resource name is configurable (e.g., 'mem_free', 'h_vmem', 'mem').
150
+ """
151
+
152
+ def __init__(self, resource_name: str = "mem_free"):
153
+ super().__init__("l", doc=f"Memory ({resource_name})")
154
+ self.resource_name = resource_name
155
+
156
+ def to_args(self, value: str | None) -> list[str]:
157
+ if value is None:
158
+ return []
159
+ return ["-l", f"{self.resource_name}={value}"]
160
+
161
+ def to_directive(self, value: str | None) -> str | None:
162
+ if value is None:
163
+ return None
164
+ return f"#$ -l {self.resource_name}={value}"
165
+
166
+
167
+ class SGETimeArg(SchedulerArg[str]):
168
+ """Time limit: -l <resource>=<HH:MM:SS>
169
+
170
+ The resource name is configurable (e.g., 'h_rt', 's_rt').
171
+ """
172
+
173
+ def __init__(self, resource_name: str = "h_rt"):
174
+ super().__init__("l", doc=f"Time limit ({resource_name})")
175
+ self.resource_name = resource_name
176
+
177
+ def to_args(self, value: str | None) -> list[str]:
178
+ if value is None:
179
+ return []
180
+ return ["-l", f"{self.resource_name}={value}"]
181
+
182
+ def to_directive(self, value: str | None) -> str | None:
183
+ if value is None:
184
+ return None
185
+ return f"#$ -l {self.resource_name}={value}"
186
+
187
+
188
+ # =============================================================================
189
+ # Array Job Arguments
190
+ # =============================================================================
191
+
192
+
193
+ class SGEArrayArg(SchedulerArg[str]):
194
+ """Array job range: -t range
195
+
196
+ Range formats: 1-100, 1-100:10, 1,2,3,4
197
+ """
198
+
199
+ def __init__(self):
200
+ super().__init__("t", doc="Array job range")
201
+
202
+ def to_args(self, value: str | None) -> list[str]:
203
+ if value is None:
204
+ return []
205
+ return ["-t", value]
206
+
207
+ def to_directive(self, value: str | None) -> str | None:
208
+ if value is None:
209
+ return None
210
+ return f"#$ -t {value}"
211
+
212
+
213
+ # =============================================================================
214
+ # Dependency Arguments
215
+ # =============================================================================
216
+
217
+
218
+ class SGEHoldArg(SchedulerArg[str]):
219
+ """Job dependency: -hold_jid job_id[,job_id,...]"""
220
+
221
+ def __init__(self):
222
+ super().__init__("hold_jid", doc="Hold until jobs complete")
223
+
224
+ def to_args(self, value: str | None) -> list[str]:
225
+ if value is None:
226
+ return []
227
+ return ["-hold_jid", value]
228
+
229
+ def to_directive(self, value: str | None) -> str | None:
230
+ if value is None:
231
+ return None
232
+ return f"#$ -hold_jid {value}"
@@ -0,0 +1,287 @@
1
+ """SGE output parsing utilities."""
2
+
3
+ import re
4
+ from datetime import datetime
5
+ import xml.etree.ElementTree as ET
6
+ from typing import Any
7
+
8
+ from hpc_runner.core.result import JobStatus
9
+
10
+
11
+ def parse_qstat_xml(xml_output: str) -> dict[str, Any]:
12
+ """Parse qstat -xml output.
13
+
14
+ Returns dict with job_id -> job_info mappings.
15
+ """
16
+ jobs: dict[str, Any] = {}
17
+
18
+ try:
19
+ root = ET.fromstring(xml_output)
20
+ _strip_namespaces(root)
21
+
22
+ # Parse queue_info (running jobs)
23
+ for job_list in root.findall(".//job_list"):
24
+ job_info = _parse_job_element(job_list)
25
+ if job_info:
26
+ jobs[job_info["job_id"]] = job_info
27
+
28
+ # Parse job_info (pending jobs)
29
+ for job_list in root.findall(".//job_info/job_list"):
30
+ job_info = _parse_job_element(job_list)
31
+ if job_info:
32
+ jobs[job_info["job_id"]] = job_info
33
+
34
+ except ET.ParseError:
35
+ pass
36
+
37
+ return jobs
38
+
39
+
40
+ def _parse_job_element(elem: ET.Element) -> dict[str, Any] | None:
41
+ """Parse a single job_list element.
42
+
43
+ SGE XML elements include:
44
+ - JB_job_number: Job ID
45
+ - JB_name: Job name
46
+ - JB_owner: Username
47
+ - state: Job state (r, qw, hqw, etc.)
48
+ - queue_name: Queue@host (for running jobs)
49
+ - hard_req_queue: Requested queue (for pending jobs)
50
+ - slots: Number of slots/CPUs
51
+ - JB_submission_time: Submission timestamp (epoch)
52
+ - JAT_start_time: Start timestamp (epoch, running jobs only)
53
+ - tasks: Array task ID (for array jobs)
54
+ """
55
+ job_id_elem = elem.find("JB_job_number")
56
+ if job_id_elem is None or job_id_elem.text is None:
57
+ return None
58
+
59
+ job_info: dict[str, Any] = {
60
+ "job_id": job_id_elem.text,
61
+ }
62
+
63
+ # Job name
64
+ name_elem = elem.find("JB_name")
65
+ if name_elem is not None and name_elem.text:
66
+ job_info["name"] = name_elem.text
67
+
68
+ # Owner/user
69
+ owner_elem = elem.find("JB_owner")
70
+ if owner_elem is not None and owner_elem.text:
71
+ job_info["user"] = owner_elem.text
72
+
73
+ # State
74
+ state_elem = elem.find("state")
75
+ if state_elem is not None and state_elem.text:
76
+ job_info["state"] = state_elem.text
77
+
78
+ # Queue - running jobs have queue_name, pending may have hard_req_queue
79
+ queue_elem = elem.find("queue_name")
80
+ if queue_elem is not None and queue_elem.text:
81
+ # Format is usually "queue@host", extract queue and host separately
82
+ queue_full = queue_elem.text
83
+ if "@" in queue_full:
84
+ queue_name, host = queue_full.split("@", 1)
85
+ job_info["queue"] = queue_name
86
+ job_info["node"] = host
87
+ else:
88
+ job_info["queue"] = queue_full
89
+ else:
90
+ # Check for requested queue (pending jobs)
91
+ hard_queue = elem.find("hard_req_queue")
92
+ if hard_queue is not None and hard_queue.text:
93
+ job_info["queue"] = hard_queue.text
94
+
95
+ # Slots (CPU count)
96
+ slots_elem = elem.find("slots")
97
+ if slots_elem is not None and slots_elem.text:
98
+ job_info["slots"] = int(slots_elem.text)
99
+
100
+ # Submission time (epoch seconds)
101
+ submit_text = elem.findtext(".//JB_submission_time")
102
+ if submit_text:
103
+ try:
104
+ job_info["submit_time"] = int(submit_text)
105
+ except ValueError:
106
+ pass
107
+
108
+ # Start time (epoch seconds, only for running jobs)
109
+ start_text = elem.findtext(".//JAT_start_time")
110
+ if start_text:
111
+ start_epoch = _parse_sge_timestamp(start_text)
112
+ if start_epoch is not None:
113
+ job_info["start_time"] = start_epoch
114
+
115
+ # Array task ID
116
+ tasks_elem = elem.find("tasks")
117
+ if tasks_elem is not None and tasks_elem.text:
118
+ job_info["array_task_id"] = tasks_elem.text
119
+
120
+ return job_info
121
+
122
+
123
+ def _strip_namespaces(root: ET.Element) -> None:
124
+ """Strip XML namespaces so ElementTree finds simple tag names."""
125
+ for elem in root.iter():
126
+ if isinstance(elem.tag, str) and "}" in elem.tag:
127
+ elem.tag = elem.tag.split("}", 1)[1]
128
+
129
+
130
+ def parse_qstat_plain(output: str) -> dict[str, Any]:
131
+ """Parse plain qstat output.
132
+
133
+ Format:
134
+ job-ID prior name user state submit/start at queue slots ja-task-ID
135
+ --------------------------------------------------------------------------------
136
+ 12345 0.55500 myjob user r 01/01/2024 10:00:00 all.q@node1 1
137
+ """
138
+ jobs: dict[str, Any] = {}
139
+
140
+ lines = output.strip().split("\n")
141
+
142
+ # Skip header lines
143
+ data_started = False
144
+ for line in lines:
145
+ if line.startswith("-"):
146
+ data_started = True
147
+ continue
148
+ if not data_started:
149
+ continue
150
+
151
+ parts = line.split()
152
+ if len(parts) >= 5:
153
+ job_id = parts[0]
154
+ jobs[job_id] = {
155
+ "job_id": job_id,
156
+ "priority": parts[1],
157
+ "name": parts[2],
158
+ "user": parts[3],
159
+ "state": parts[4],
160
+ }
161
+
162
+ # Parse submit/start time (MM/DD/YYYY HH:MM:SS)
163
+ if len(parts) >= 7:
164
+ timestamp = _parse_qstat_datetime(parts[5], parts[6])
165
+ if timestamp is not None:
166
+ if "r" in parts[4]:
167
+ jobs[job_id]["start_time"] = timestamp
168
+ else:
169
+ jobs[job_id]["submit_time"] = timestamp
170
+
171
+ # Parse queue if present
172
+ if len(parts) >= 8:
173
+ jobs[job_id]["queue"] = parts[7]
174
+
175
+ # Parse slots if present
176
+ if len(parts) >= 9:
177
+ try:
178
+ jobs[job_id]["slots"] = int(parts[8])
179
+ except ValueError:
180
+ pass
181
+
182
+ return jobs
183
+
184
+
185
+ def _parse_qstat_datetime(date_part: str, time_part: str) -> int | None:
186
+ """Parse qstat date/time into epoch seconds."""
187
+ try:
188
+ dt = datetime.strptime(f"{date_part} {time_part}", "%m/%d/%Y %H:%M:%S")
189
+ except ValueError:
190
+ return None
191
+ return int(dt.timestamp())
192
+
193
+
194
+ def _parse_sge_timestamp(value: str) -> int | None:
195
+ """Parse SGE timestamps that may be epoch seconds or ISO 8601."""
196
+ if value.isdigit():
197
+ try:
198
+ return int(value)
199
+ except ValueError:
200
+ return None
201
+ try:
202
+ return int(datetime.strptime(value, "%Y-%m-%dT%H:%M:%S").timestamp())
203
+ except ValueError:
204
+ return None
205
+
206
+
207
+ def parse_qacct_output(output: str) -> dict[str, Any]:
208
+ """Parse qacct output for job accounting info.
209
+
210
+ Format:
211
+ ==============================================================
212
+ qname all.q
213
+ hostname node1
214
+ group users
215
+ owner user
216
+ jobname myjob
217
+ jobnumber 12345
218
+ ...
219
+ exit_status 0
220
+ """
221
+ info: dict[str, Any] = {}
222
+
223
+ for line in output.strip().split("\n"):
224
+ if line.startswith("="):
225
+ continue
226
+
227
+ parts = line.split(None, 1)
228
+ if len(parts) == 2:
229
+ key, value = parts
230
+ info[key] = value.strip()
231
+
232
+ return info
233
+
234
+
235
+ def state_to_status(state: str) -> JobStatus:
236
+ """Convert SGE state code to JobStatus.
237
+
238
+ SGE states:
239
+ - qw: pending (queued, waiting)
240
+ - hqw: hold (on hold)
241
+ - r: running
242
+ - t: transferring
243
+ - Rr, Rt: restarted
244
+ - s, ts: suspended
245
+ - S, tS: queue suspended
246
+ - T, tT: threshold
247
+ - Eqw: error (waiting)
248
+ - dr: deleting (running)
249
+ - dt: deleting (transferring)
250
+ """
251
+ state = state.lower()
252
+
253
+ # Deleting or error states take precedence over other flags.
254
+ if "d" in state:
255
+ return JobStatus.CANCELLED
256
+ if "e" in state:
257
+ return JobStatus.FAILED
258
+
259
+ # Running or transferring states.
260
+ if "r" in state or "t" in state:
261
+ return JobStatus.RUNNING
262
+
263
+ # Queued, held, or suspended states.
264
+ if "q" in state or "h" in state or "s" in state:
265
+ return JobStatus.PENDING
266
+
267
+ return JobStatus.UNKNOWN
268
+
269
+
270
+ def parse_qsub_output(output: str) -> str | None:
271
+ """Parse qsub output to extract job ID.
272
+
273
+ Expected format:
274
+ Your job 12345 ("jobname") has been submitted
275
+ Your job-array 12345.1-10:1 ("jobname") has been submitted
276
+ """
277
+ # Standard job
278
+ match = re.search(r"Your job (\d+)", output)
279
+ if match:
280
+ return match.group(1)
281
+
282
+ # Array job
283
+ match = re.search(r"Your job-array (\d+)", output)
284
+ if match:
285
+ return match.group(1)
286
+
287
+ return None