hpc-runner 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hpc_runner/__init__.py +57 -0
- hpc_runner/_version.py +34 -0
- hpc_runner/cli/__init__.py +1 -0
- hpc_runner/cli/cancel.py +38 -0
- hpc_runner/cli/config.py +109 -0
- hpc_runner/cli/main.py +76 -0
- hpc_runner/cli/monitor.py +30 -0
- hpc_runner/cli/run.py +292 -0
- hpc_runner/cli/status.py +66 -0
- hpc_runner/core/__init__.py +31 -0
- hpc_runner/core/config.py +177 -0
- hpc_runner/core/descriptors.py +110 -0
- hpc_runner/core/exceptions.py +38 -0
- hpc_runner/core/job.py +328 -0
- hpc_runner/core/job_array.py +58 -0
- hpc_runner/core/job_info.py +104 -0
- hpc_runner/core/resources.py +49 -0
- hpc_runner/core/result.py +161 -0
- hpc_runner/core/types.py +13 -0
- hpc_runner/py.typed +0 -0
- hpc_runner/schedulers/__init__.py +60 -0
- hpc_runner/schedulers/base.py +194 -0
- hpc_runner/schedulers/detection.py +52 -0
- hpc_runner/schedulers/local/__init__.py +5 -0
- hpc_runner/schedulers/local/scheduler.py +354 -0
- hpc_runner/schedulers/local/templates/job.sh.j2 +28 -0
- hpc_runner/schedulers/sge/__init__.py +5 -0
- hpc_runner/schedulers/sge/args.py +232 -0
- hpc_runner/schedulers/sge/parser.py +287 -0
- hpc_runner/schedulers/sge/scheduler.py +881 -0
- hpc_runner/schedulers/sge/templates/batch.sh.j2 +82 -0
- hpc_runner/schedulers/sge/templates/interactive.sh.j2 +78 -0
- hpc_runner/templates/__init__.py +5 -0
- hpc_runner/templates/engine.py +55 -0
- hpc_runner/tui/__init__.py +5 -0
- hpc_runner/tui/app.py +436 -0
- hpc_runner/tui/components/__init__.py +17 -0
- hpc_runner/tui/components/detail_panel.py +187 -0
- hpc_runner/tui/components/filter_bar.py +174 -0
- hpc_runner/tui/components/filter_popup.py +345 -0
- hpc_runner/tui/components/job_table.py +260 -0
- hpc_runner/tui/providers/__init__.py +5 -0
- hpc_runner/tui/providers/jobs.py +197 -0
- hpc_runner/tui/screens/__init__.py +7 -0
- hpc_runner/tui/screens/confirm.py +67 -0
- hpc_runner/tui/screens/job_details.py +210 -0
- hpc_runner/tui/screens/log_viewer.py +170 -0
- hpc_runner/tui/snapshot.py +153 -0
- hpc_runner/tui/styles/monitor.tcss +567 -0
- hpc_runner/workflow/__init__.py +6 -0
- hpc_runner/workflow/dependency.py +20 -0
- hpc_runner/workflow/pipeline.py +180 -0
- hpc_runner-0.2.0.dist-info/METADATA +285 -0
- hpc_runner-0.2.0.dist-info/RECORD +56 -0
- hpc_runner-0.2.0.dist-info/WHEEL +4 -0
- hpc_runner-0.2.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Generated by hpc-tools (local scheduler)
|
|
3
|
+
|
|
4
|
+
# Exit on error
|
|
5
|
+
set -e
|
|
6
|
+
|
|
7
|
+
{% if job.modules_path %}
|
|
8
|
+
# Additional module paths (simulated for local)
|
|
9
|
+
{% for path in job.modules_path %}
|
|
10
|
+
# module use {{ path }}
|
|
11
|
+
{% endfor %}
|
|
12
|
+
{% endif %}
|
|
13
|
+
|
|
14
|
+
{% if job.modules %}
|
|
15
|
+
# Modules (simulated for local - not actually loaded)
|
|
16
|
+
{% for mod in job.modules %}
|
|
17
|
+
# module load {{ mod }}
|
|
18
|
+
{% endfor %}
|
|
19
|
+
{% endif %}
|
|
20
|
+
|
|
21
|
+
{% if job.workdir %}
|
|
22
|
+
# Change to working directory
|
|
23
|
+
cd {{ job.workdir }}
|
|
24
|
+
{% endif %}
|
|
25
|
+
|
|
26
|
+
# Execute command
|
|
27
|
+
{{ job.command }}
|
|
28
|
+
exit $?
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
"""SGE-specific argument renderers.
|
|
2
|
+
|
|
3
|
+
Each class knows how to render a single job attribute to SGE syntax,
|
|
4
|
+
both as a script directive (#$ ...) and as command-line arguments.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from hpc_runner.core.descriptors import SchedulerArg
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class SGEArg(SchedulerArg):
|
|
11
|
+
"""Base class for SGE arguments.
|
|
12
|
+
|
|
13
|
+
SGE uses:
|
|
14
|
+
- Directives: #$ -flag value
|
|
15
|
+
- CLI args: -flag value
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def to_args(self, value) -> list[str]:
|
|
19
|
+
if value is None:
|
|
20
|
+
return []
|
|
21
|
+
return [f"-{self.flag}", str(value)]
|
|
22
|
+
|
|
23
|
+
def to_directive(self, value) -> str | None:
|
|
24
|
+
if value is None:
|
|
25
|
+
return None
|
|
26
|
+
return f"#$ -{self.flag} {value}"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# =============================================================================
|
|
30
|
+
# Simple Flag Arguments
|
|
31
|
+
# =============================================================================
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class SGEJobNameArg(SGEArg):
|
|
35
|
+
"""Job name: -N name"""
|
|
36
|
+
|
|
37
|
+
def __init__(self):
|
|
38
|
+
super().__init__("N", doc="Job name")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class SGEQueueArg(SGEArg):
|
|
42
|
+
"""Queue selection: -q queue_name"""
|
|
43
|
+
|
|
44
|
+
def __init__(self):
|
|
45
|
+
super().__init__("q", doc="Queue/partition name")
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class SGEOutputArg(SGEArg):
|
|
49
|
+
"""Stdout path: -o path"""
|
|
50
|
+
|
|
51
|
+
def __init__(self):
|
|
52
|
+
super().__init__("o", doc="Stdout file path")
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class SGEErrorArg(SGEArg):
|
|
56
|
+
"""Stderr path: -e path"""
|
|
57
|
+
|
|
58
|
+
def __init__(self):
|
|
59
|
+
super().__init__("e", doc="Stderr file path")
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class SGEPriorityArg(SGEArg):
|
|
63
|
+
"""Job priority: -p priority"""
|
|
64
|
+
|
|
65
|
+
def __init__(self):
|
|
66
|
+
super().__init__("p", doc="Job priority (-1023 to 1024)")
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class SGEShellArg(SGEArg):
|
|
70
|
+
"""Shell selection: -S /path/to/shell"""
|
|
71
|
+
|
|
72
|
+
def __init__(self):
|
|
73
|
+
super().__init__("S", doc="Shell path")
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
# =============================================================================
|
|
77
|
+
# Boolean Flag Arguments (no value, just presence)
|
|
78
|
+
# =============================================================================
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class SGECwdArg(SchedulerArg[bool]):
|
|
82
|
+
"""Use current working directory: -cwd"""
|
|
83
|
+
|
|
84
|
+
def __init__(self):
|
|
85
|
+
super().__init__("cwd", doc="Execute in current working directory")
|
|
86
|
+
|
|
87
|
+
def to_args(self, value: bool | None) -> list[str]:
|
|
88
|
+
return ["-cwd"] if value else []
|
|
89
|
+
|
|
90
|
+
def to_directive(self, value: bool | None) -> str | None:
|
|
91
|
+
return "#$ -cwd" if value else None
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class SGEInheritEnvArg(SchedulerArg[bool]):
|
|
95
|
+
"""Inherit environment: -V"""
|
|
96
|
+
|
|
97
|
+
def __init__(self):
|
|
98
|
+
super().__init__("V", doc="Inherit environment variables")
|
|
99
|
+
|
|
100
|
+
def to_args(self, value: bool | None) -> list[str]:
|
|
101
|
+
return ["-V"] if value else []
|
|
102
|
+
|
|
103
|
+
def to_directive(self, value: bool | None) -> str | None:
|
|
104
|
+
return "#$ -V" if value else None
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class SGEMergeOutputArg(SchedulerArg[bool]):
|
|
108
|
+
"""Merge stdout and stderr: -j y"""
|
|
109
|
+
|
|
110
|
+
def __init__(self):
|
|
111
|
+
super().__init__("j", doc="Join stdout and stderr")
|
|
112
|
+
|
|
113
|
+
def to_args(self, value: bool | None) -> list[str]:
|
|
114
|
+
return ["-j", "y"] if value else []
|
|
115
|
+
|
|
116
|
+
def to_directive(self, value: bool | None) -> str | None:
|
|
117
|
+
return "#$ -j y" if value else None
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
# =============================================================================
|
|
121
|
+
# Resource Arguments (configurable resource names)
|
|
122
|
+
# =============================================================================
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class SGECpuArg(SchedulerArg[int]):
|
|
126
|
+
"""Parallel environment slots: -pe <pe_name> <slots>
|
|
127
|
+
|
|
128
|
+
The PE name is configurable per-cluster (e.g., 'smp', 'mpi', 'orte').
|
|
129
|
+
"""
|
|
130
|
+
|
|
131
|
+
def __init__(self, pe_name: str = "smp"):
|
|
132
|
+
super().__init__("pe", doc=f"Parallel environment ({pe_name})")
|
|
133
|
+
self.pe_name = pe_name
|
|
134
|
+
|
|
135
|
+
def to_args(self, value: int | None) -> list[str]:
|
|
136
|
+
if value is None:
|
|
137
|
+
return []
|
|
138
|
+
return ["-pe", self.pe_name, str(value)]
|
|
139
|
+
|
|
140
|
+
def to_directive(self, value: int | None) -> str | None:
|
|
141
|
+
if value is None:
|
|
142
|
+
return None
|
|
143
|
+
return f"#$ -pe {self.pe_name} {value}"
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class SGEMemArg(SchedulerArg[str]):
|
|
147
|
+
"""Memory request: -l <resource>=<value>
|
|
148
|
+
|
|
149
|
+
The resource name is configurable (e.g., 'mem_free', 'h_vmem', 'mem').
|
|
150
|
+
"""
|
|
151
|
+
|
|
152
|
+
def __init__(self, resource_name: str = "mem_free"):
|
|
153
|
+
super().__init__("l", doc=f"Memory ({resource_name})")
|
|
154
|
+
self.resource_name = resource_name
|
|
155
|
+
|
|
156
|
+
def to_args(self, value: str | None) -> list[str]:
|
|
157
|
+
if value is None:
|
|
158
|
+
return []
|
|
159
|
+
return ["-l", f"{self.resource_name}={value}"]
|
|
160
|
+
|
|
161
|
+
def to_directive(self, value: str | None) -> str | None:
|
|
162
|
+
if value is None:
|
|
163
|
+
return None
|
|
164
|
+
return f"#$ -l {self.resource_name}={value}"
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
class SGETimeArg(SchedulerArg[str]):
|
|
168
|
+
"""Time limit: -l <resource>=<HH:MM:SS>
|
|
169
|
+
|
|
170
|
+
The resource name is configurable (e.g., 'h_rt', 's_rt').
|
|
171
|
+
"""
|
|
172
|
+
|
|
173
|
+
def __init__(self, resource_name: str = "h_rt"):
|
|
174
|
+
super().__init__("l", doc=f"Time limit ({resource_name})")
|
|
175
|
+
self.resource_name = resource_name
|
|
176
|
+
|
|
177
|
+
def to_args(self, value: str | None) -> list[str]:
|
|
178
|
+
if value is None:
|
|
179
|
+
return []
|
|
180
|
+
return ["-l", f"{self.resource_name}={value}"]
|
|
181
|
+
|
|
182
|
+
def to_directive(self, value: str | None) -> str | None:
|
|
183
|
+
if value is None:
|
|
184
|
+
return None
|
|
185
|
+
return f"#$ -l {self.resource_name}={value}"
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
# =============================================================================
|
|
189
|
+
# Array Job Arguments
|
|
190
|
+
# =============================================================================
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
class SGEArrayArg(SchedulerArg[str]):
|
|
194
|
+
"""Array job range: -t range
|
|
195
|
+
|
|
196
|
+
Range formats: 1-100, 1-100:10, 1,2,3,4
|
|
197
|
+
"""
|
|
198
|
+
|
|
199
|
+
def __init__(self):
|
|
200
|
+
super().__init__("t", doc="Array job range")
|
|
201
|
+
|
|
202
|
+
def to_args(self, value: str | None) -> list[str]:
|
|
203
|
+
if value is None:
|
|
204
|
+
return []
|
|
205
|
+
return ["-t", value]
|
|
206
|
+
|
|
207
|
+
def to_directive(self, value: str | None) -> str | None:
|
|
208
|
+
if value is None:
|
|
209
|
+
return None
|
|
210
|
+
return f"#$ -t {value}"
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
# =============================================================================
|
|
214
|
+
# Dependency Arguments
|
|
215
|
+
# =============================================================================
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
class SGEHoldArg(SchedulerArg[str]):
|
|
219
|
+
"""Job dependency: -hold_jid job_id[,job_id,...]"""
|
|
220
|
+
|
|
221
|
+
def __init__(self):
|
|
222
|
+
super().__init__("hold_jid", doc="Hold until jobs complete")
|
|
223
|
+
|
|
224
|
+
def to_args(self, value: str | None) -> list[str]:
|
|
225
|
+
if value is None:
|
|
226
|
+
return []
|
|
227
|
+
return ["-hold_jid", value]
|
|
228
|
+
|
|
229
|
+
def to_directive(self, value: str | None) -> str | None:
|
|
230
|
+
if value is None:
|
|
231
|
+
return None
|
|
232
|
+
return f"#$ -hold_jid {value}"
|
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
"""SGE output parsing utilities."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
import xml.etree.ElementTree as ET
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from hpc_runner.core.result import JobStatus
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def parse_qstat_xml(xml_output: str) -> dict[str, Any]:
|
|
12
|
+
"""Parse qstat -xml output.
|
|
13
|
+
|
|
14
|
+
Returns dict with job_id -> job_info mappings.
|
|
15
|
+
"""
|
|
16
|
+
jobs: dict[str, Any] = {}
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
root = ET.fromstring(xml_output)
|
|
20
|
+
_strip_namespaces(root)
|
|
21
|
+
|
|
22
|
+
# Parse queue_info (running jobs)
|
|
23
|
+
for job_list in root.findall(".//job_list"):
|
|
24
|
+
job_info = _parse_job_element(job_list)
|
|
25
|
+
if job_info:
|
|
26
|
+
jobs[job_info["job_id"]] = job_info
|
|
27
|
+
|
|
28
|
+
# Parse job_info (pending jobs)
|
|
29
|
+
for job_list in root.findall(".//job_info/job_list"):
|
|
30
|
+
job_info = _parse_job_element(job_list)
|
|
31
|
+
if job_info:
|
|
32
|
+
jobs[job_info["job_id"]] = job_info
|
|
33
|
+
|
|
34
|
+
except ET.ParseError:
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
return jobs
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _parse_job_element(elem: ET.Element) -> dict[str, Any] | None:
|
|
41
|
+
"""Parse a single job_list element.
|
|
42
|
+
|
|
43
|
+
SGE XML elements include:
|
|
44
|
+
- JB_job_number: Job ID
|
|
45
|
+
- JB_name: Job name
|
|
46
|
+
- JB_owner: Username
|
|
47
|
+
- state: Job state (r, qw, hqw, etc.)
|
|
48
|
+
- queue_name: Queue@host (for running jobs)
|
|
49
|
+
- hard_req_queue: Requested queue (for pending jobs)
|
|
50
|
+
- slots: Number of slots/CPUs
|
|
51
|
+
- JB_submission_time: Submission timestamp (epoch)
|
|
52
|
+
- JAT_start_time: Start timestamp (epoch, running jobs only)
|
|
53
|
+
- tasks: Array task ID (for array jobs)
|
|
54
|
+
"""
|
|
55
|
+
job_id_elem = elem.find("JB_job_number")
|
|
56
|
+
if job_id_elem is None or job_id_elem.text is None:
|
|
57
|
+
return None
|
|
58
|
+
|
|
59
|
+
job_info: dict[str, Any] = {
|
|
60
|
+
"job_id": job_id_elem.text,
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
# Job name
|
|
64
|
+
name_elem = elem.find("JB_name")
|
|
65
|
+
if name_elem is not None and name_elem.text:
|
|
66
|
+
job_info["name"] = name_elem.text
|
|
67
|
+
|
|
68
|
+
# Owner/user
|
|
69
|
+
owner_elem = elem.find("JB_owner")
|
|
70
|
+
if owner_elem is not None and owner_elem.text:
|
|
71
|
+
job_info["user"] = owner_elem.text
|
|
72
|
+
|
|
73
|
+
# State
|
|
74
|
+
state_elem = elem.find("state")
|
|
75
|
+
if state_elem is not None and state_elem.text:
|
|
76
|
+
job_info["state"] = state_elem.text
|
|
77
|
+
|
|
78
|
+
# Queue - running jobs have queue_name, pending may have hard_req_queue
|
|
79
|
+
queue_elem = elem.find("queue_name")
|
|
80
|
+
if queue_elem is not None and queue_elem.text:
|
|
81
|
+
# Format is usually "queue@host", extract queue and host separately
|
|
82
|
+
queue_full = queue_elem.text
|
|
83
|
+
if "@" in queue_full:
|
|
84
|
+
queue_name, host = queue_full.split("@", 1)
|
|
85
|
+
job_info["queue"] = queue_name
|
|
86
|
+
job_info["node"] = host
|
|
87
|
+
else:
|
|
88
|
+
job_info["queue"] = queue_full
|
|
89
|
+
else:
|
|
90
|
+
# Check for requested queue (pending jobs)
|
|
91
|
+
hard_queue = elem.find("hard_req_queue")
|
|
92
|
+
if hard_queue is not None and hard_queue.text:
|
|
93
|
+
job_info["queue"] = hard_queue.text
|
|
94
|
+
|
|
95
|
+
# Slots (CPU count)
|
|
96
|
+
slots_elem = elem.find("slots")
|
|
97
|
+
if slots_elem is not None and slots_elem.text:
|
|
98
|
+
job_info["slots"] = int(slots_elem.text)
|
|
99
|
+
|
|
100
|
+
# Submission time (epoch seconds)
|
|
101
|
+
submit_text = elem.findtext(".//JB_submission_time")
|
|
102
|
+
if submit_text:
|
|
103
|
+
try:
|
|
104
|
+
job_info["submit_time"] = int(submit_text)
|
|
105
|
+
except ValueError:
|
|
106
|
+
pass
|
|
107
|
+
|
|
108
|
+
# Start time (epoch seconds, only for running jobs)
|
|
109
|
+
start_text = elem.findtext(".//JAT_start_time")
|
|
110
|
+
if start_text:
|
|
111
|
+
start_epoch = _parse_sge_timestamp(start_text)
|
|
112
|
+
if start_epoch is not None:
|
|
113
|
+
job_info["start_time"] = start_epoch
|
|
114
|
+
|
|
115
|
+
# Array task ID
|
|
116
|
+
tasks_elem = elem.find("tasks")
|
|
117
|
+
if tasks_elem is not None and tasks_elem.text:
|
|
118
|
+
job_info["array_task_id"] = tasks_elem.text
|
|
119
|
+
|
|
120
|
+
return job_info
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _strip_namespaces(root: ET.Element) -> None:
|
|
124
|
+
"""Strip XML namespaces so ElementTree finds simple tag names."""
|
|
125
|
+
for elem in root.iter():
|
|
126
|
+
if isinstance(elem.tag, str) and "}" in elem.tag:
|
|
127
|
+
elem.tag = elem.tag.split("}", 1)[1]
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def parse_qstat_plain(output: str) -> dict[str, Any]:
|
|
131
|
+
"""Parse plain qstat output.
|
|
132
|
+
|
|
133
|
+
Format:
|
|
134
|
+
job-ID prior name user state submit/start at queue slots ja-task-ID
|
|
135
|
+
--------------------------------------------------------------------------------
|
|
136
|
+
12345 0.55500 myjob user r 01/01/2024 10:00:00 all.q@node1 1
|
|
137
|
+
"""
|
|
138
|
+
jobs: dict[str, Any] = {}
|
|
139
|
+
|
|
140
|
+
lines = output.strip().split("\n")
|
|
141
|
+
|
|
142
|
+
# Skip header lines
|
|
143
|
+
data_started = False
|
|
144
|
+
for line in lines:
|
|
145
|
+
if line.startswith("-"):
|
|
146
|
+
data_started = True
|
|
147
|
+
continue
|
|
148
|
+
if not data_started:
|
|
149
|
+
continue
|
|
150
|
+
|
|
151
|
+
parts = line.split()
|
|
152
|
+
if len(parts) >= 5:
|
|
153
|
+
job_id = parts[0]
|
|
154
|
+
jobs[job_id] = {
|
|
155
|
+
"job_id": job_id,
|
|
156
|
+
"priority": parts[1],
|
|
157
|
+
"name": parts[2],
|
|
158
|
+
"user": parts[3],
|
|
159
|
+
"state": parts[4],
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
# Parse submit/start time (MM/DD/YYYY HH:MM:SS)
|
|
163
|
+
if len(parts) >= 7:
|
|
164
|
+
timestamp = _parse_qstat_datetime(parts[5], parts[6])
|
|
165
|
+
if timestamp is not None:
|
|
166
|
+
if "r" in parts[4]:
|
|
167
|
+
jobs[job_id]["start_time"] = timestamp
|
|
168
|
+
else:
|
|
169
|
+
jobs[job_id]["submit_time"] = timestamp
|
|
170
|
+
|
|
171
|
+
# Parse queue if present
|
|
172
|
+
if len(parts) >= 8:
|
|
173
|
+
jobs[job_id]["queue"] = parts[7]
|
|
174
|
+
|
|
175
|
+
# Parse slots if present
|
|
176
|
+
if len(parts) >= 9:
|
|
177
|
+
try:
|
|
178
|
+
jobs[job_id]["slots"] = int(parts[8])
|
|
179
|
+
except ValueError:
|
|
180
|
+
pass
|
|
181
|
+
|
|
182
|
+
return jobs
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def _parse_qstat_datetime(date_part: str, time_part: str) -> int | None:
|
|
186
|
+
"""Parse qstat date/time into epoch seconds."""
|
|
187
|
+
try:
|
|
188
|
+
dt = datetime.strptime(f"{date_part} {time_part}", "%m/%d/%Y %H:%M:%S")
|
|
189
|
+
except ValueError:
|
|
190
|
+
return None
|
|
191
|
+
return int(dt.timestamp())
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def _parse_sge_timestamp(value: str) -> int | None:
|
|
195
|
+
"""Parse SGE timestamps that may be epoch seconds or ISO 8601."""
|
|
196
|
+
if value.isdigit():
|
|
197
|
+
try:
|
|
198
|
+
return int(value)
|
|
199
|
+
except ValueError:
|
|
200
|
+
return None
|
|
201
|
+
try:
|
|
202
|
+
return int(datetime.strptime(value, "%Y-%m-%dT%H:%M:%S").timestamp())
|
|
203
|
+
except ValueError:
|
|
204
|
+
return None
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def parse_qacct_output(output: str) -> dict[str, Any]:
|
|
208
|
+
"""Parse qacct output for job accounting info.
|
|
209
|
+
|
|
210
|
+
Format:
|
|
211
|
+
==============================================================
|
|
212
|
+
qname all.q
|
|
213
|
+
hostname node1
|
|
214
|
+
group users
|
|
215
|
+
owner user
|
|
216
|
+
jobname myjob
|
|
217
|
+
jobnumber 12345
|
|
218
|
+
...
|
|
219
|
+
exit_status 0
|
|
220
|
+
"""
|
|
221
|
+
info: dict[str, Any] = {}
|
|
222
|
+
|
|
223
|
+
for line in output.strip().split("\n"):
|
|
224
|
+
if line.startswith("="):
|
|
225
|
+
continue
|
|
226
|
+
|
|
227
|
+
parts = line.split(None, 1)
|
|
228
|
+
if len(parts) == 2:
|
|
229
|
+
key, value = parts
|
|
230
|
+
info[key] = value.strip()
|
|
231
|
+
|
|
232
|
+
return info
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def state_to_status(state: str) -> JobStatus:
|
|
236
|
+
"""Convert SGE state code to JobStatus.
|
|
237
|
+
|
|
238
|
+
SGE states:
|
|
239
|
+
- qw: pending (queued, waiting)
|
|
240
|
+
- hqw: hold (on hold)
|
|
241
|
+
- r: running
|
|
242
|
+
- t: transferring
|
|
243
|
+
- Rr, Rt: restarted
|
|
244
|
+
- s, ts: suspended
|
|
245
|
+
- S, tS: queue suspended
|
|
246
|
+
- T, tT: threshold
|
|
247
|
+
- Eqw: error (waiting)
|
|
248
|
+
- dr: deleting (running)
|
|
249
|
+
- dt: deleting (transferring)
|
|
250
|
+
"""
|
|
251
|
+
state = state.lower()
|
|
252
|
+
|
|
253
|
+
# Deleting or error states take precedence over other flags.
|
|
254
|
+
if "d" in state:
|
|
255
|
+
return JobStatus.CANCELLED
|
|
256
|
+
if "e" in state:
|
|
257
|
+
return JobStatus.FAILED
|
|
258
|
+
|
|
259
|
+
# Running or transferring states.
|
|
260
|
+
if "r" in state or "t" in state:
|
|
261
|
+
return JobStatus.RUNNING
|
|
262
|
+
|
|
263
|
+
# Queued, held, or suspended states.
|
|
264
|
+
if "q" in state or "h" in state or "s" in state:
|
|
265
|
+
return JobStatus.PENDING
|
|
266
|
+
|
|
267
|
+
return JobStatus.UNKNOWN
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def parse_qsub_output(output: str) -> str | None:
|
|
271
|
+
"""Parse qsub output to extract job ID.
|
|
272
|
+
|
|
273
|
+
Expected format:
|
|
274
|
+
Your job 12345 ("jobname") has been submitted
|
|
275
|
+
Your job-array 12345.1-10:1 ("jobname") has been submitted
|
|
276
|
+
"""
|
|
277
|
+
# Standard job
|
|
278
|
+
match = re.search(r"Your job (\d+)", output)
|
|
279
|
+
if match:
|
|
280
|
+
return match.group(1)
|
|
281
|
+
|
|
282
|
+
# Array job
|
|
283
|
+
match = re.search(r"Your job-array (\d+)", output)
|
|
284
|
+
if match:
|
|
285
|
+
return match.group(1)
|
|
286
|
+
|
|
287
|
+
return None
|