hpc-runner 0.1.1__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. hpc_runner/_version.py +2 -2
  2. hpc_runner/cli/cancel.py +1 -1
  3. hpc_runner/cli/config.py +2 -2
  4. hpc_runner/cli/main.py +17 -13
  5. hpc_runner/cli/monitor.py +30 -0
  6. hpc_runner/cli/run.py +223 -67
  7. hpc_runner/cli/status.py +6 -5
  8. hpc_runner/core/__init__.py +30 -0
  9. hpc_runner/core/descriptors.py +87 -33
  10. hpc_runner/core/exceptions.py +9 -0
  11. hpc_runner/core/job.py +272 -93
  12. hpc_runner/core/job_info.py +104 -0
  13. hpc_runner/core/result.py +4 -0
  14. hpc_runner/schedulers/base.py +148 -30
  15. hpc_runner/schedulers/detection.py +22 -4
  16. hpc_runner/schedulers/local/scheduler.py +119 -2
  17. hpc_runner/schedulers/sge/args.py +161 -94
  18. hpc_runner/schedulers/sge/parser.py +106 -13
  19. hpc_runner/schedulers/sge/scheduler.py +727 -171
  20. hpc_runner/schedulers/sge/templates/batch.sh.j2 +82 -0
  21. hpc_runner/schedulers/sge/templates/interactive.sh.j2 +78 -0
  22. hpc_runner/tui/__init__.py +5 -0
  23. hpc_runner/tui/app.py +436 -0
  24. hpc_runner/tui/components/__init__.py +17 -0
  25. hpc_runner/tui/components/detail_panel.py +187 -0
  26. hpc_runner/tui/components/filter_bar.py +174 -0
  27. hpc_runner/tui/components/filter_popup.py +345 -0
  28. hpc_runner/tui/components/job_table.py +260 -0
  29. hpc_runner/tui/providers/__init__.py +5 -0
  30. hpc_runner/tui/providers/jobs.py +197 -0
  31. hpc_runner/tui/screens/__init__.py +7 -0
  32. hpc_runner/tui/screens/confirm.py +67 -0
  33. hpc_runner/tui/screens/job_details.py +210 -0
  34. hpc_runner/tui/screens/log_viewer.py +170 -0
  35. hpc_runner/tui/snapshot.py +153 -0
  36. hpc_runner/tui/styles/monitor.tcss +567 -0
  37. hpc_runner-0.2.1.dist-info/METADATA +285 -0
  38. hpc_runner-0.2.1.dist-info/RECORD +56 -0
  39. hpc_runner/schedulers/sge/templates/job.sh.j2 +0 -39
  40. hpc_runner-0.1.1.dist-info/METADATA +0 -46
  41. hpc_runner-0.1.1.dist-info/RECORD +0 -38
  42. {hpc_runner-0.1.1.dist-info → hpc_runner-0.2.1.dist-info}/WHEEL +0 -0
  43. {hpc_runner-0.1.1.dist-info → hpc_runner-0.2.1.dist-info}/entry_points.txt +0 -0
@@ -1,165 +1,232 @@
1
- """SGE-specific argument descriptors."""
1
+ """SGE-specific argument renderers.
2
+
3
+ Each class knows how to render a single job attribute to SGE syntax,
4
+ both as a script directive (#$ ...) and as command-line arguments.
5
+ """
2
6
 
3
7
  from hpc_runner.core.descriptors import SchedulerArg
4
8
 
5
9
 
6
10
  class SGEArg(SchedulerArg):
7
- """Base SGE argument.
11
+ """Base class for SGE arguments.
8
12
 
9
- SGE uses #$ -flag value format for directives.
13
+ SGE uses:
14
+ - Directives: #$ -flag value
15
+ - CLI args: -flag value
10
16
  """
11
17
 
12
18
  def to_args(self, value) -> list[str]:
13
19
  if value is None:
14
20
  return []
15
- return [f"-{self.flag}", str(self.converter(value))]
21
+ return [f"-{self.flag}", str(value)]
16
22
 
17
23
  def to_directive(self, value) -> str | None:
18
24
  if value is None:
19
25
  return None
20
- return f"#$ -{self.flag} {self.converter(value)}"
26
+ return f"#$ -{self.flag} {value}"
27
+
28
+
29
+ # =============================================================================
30
+ # Simple Flag Arguments
31
+ # =============================================================================
32
+
33
+
34
+ class SGEJobNameArg(SGEArg):
35
+ """Job name: -N name"""
36
+
37
+ def __init__(self):
38
+ super().__init__("N", doc="Job name")
39
+
40
+
41
+ class SGEQueueArg(SGEArg):
42
+ """Queue selection: -q queue_name"""
43
+
44
+ def __init__(self):
45
+ super().__init__("q", doc="Queue/partition name")
46
+
47
+
48
+ class SGEOutputArg(SGEArg):
49
+ """Stdout path: -o path"""
50
+
51
+ def __init__(self):
52
+ super().__init__("o", doc="Stdout file path")
53
+
54
+
55
+ class SGEErrorArg(SGEArg):
56
+ """Stderr path: -e path"""
57
+
58
+ def __init__(self):
59
+ super().__init__("e", doc="Stderr file path")
21
60
 
22
61
 
23
- class SGECpuArg(SGEArg):
24
- """CPU/slots argument using parallel environment.
62
+ class SGEPriorityArg(SGEArg):
63
+ """Job priority: -p priority"""
25
64
 
26
- Note: The PE name is configurable via config.
65
+ def __init__(self):
66
+ super().__init__("p", doc="Job priority (-1023 to 1024)")
67
+
68
+
69
+ class SGEShellArg(SGEArg):
70
+ """Shell selection: -S /path/to/shell"""
71
+
72
+ def __init__(self):
73
+ super().__init__("S", doc="Shell path")
74
+
75
+
76
+ # =============================================================================
77
+ # Boolean Flag Arguments (no value, just presence)
78
+ # =============================================================================
79
+
80
+
81
+ class SGECwdArg(SchedulerArg[bool]):
82
+ """Use current working directory: -cwd"""
83
+
84
+ def __init__(self):
85
+ super().__init__("cwd", doc="Execute in current working directory")
86
+
87
+ def to_args(self, value: bool | None) -> list[str]:
88
+ return ["-cwd"] if value else []
89
+
90
+ def to_directive(self, value: bool | None) -> str | None:
91
+ return "#$ -cwd" if value else None
92
+
93
+
94
+ class SGEInheritEnvArg(SchedulerArg[bool]):
95
+ """Inherit environment: -V"""
96
+
97
+ def __init__(self):
98
+ super().__init__("V", doc="Inherit environment variables")
99
+
100
+ def to_args(self, value: bool | None) -> list[str]:
101
+ return ["-V"] if value else []
102
+
103
+ def to_directive(self, value: bool | None) -> str | None:
104
+ return "#$ -V" if value else None
105
+
106
+
107
+ class SGEMergeOutputArg(SchedulerArg[bool]):
108
+ """Merge stdout and stderr: -j y"""
109
+
110
+ def __init__(self):
111
+ super().__init__("j", doc="Join stdout and stderr")
112
+
113
+ def to_args(self, value: bool | None) -> list[str]:
114
+ return ["-j", "y"] if value else []
115
+
116
+ def to_directive(self, value: bool | None) -> str | None:
117
+ return "#$ -j y" if value else None
118
+
119
+
120
+ # =============================================================================
121
+ # Resource Arguments (configurable resource names)
122
+ # =============================================================================
123
+
124
+
125
+ class SGECpuArg(SchedulerArg[int]):
126
+ """Parallel environment slots: -pe <pe_name> <slots>
127
+
128
+ The PE name is configurable per-cluster (e.g., 'smp', 'mpi', 'orte').
27
129
  """
28
130
 
29
131
  def __init__(self, pe_name: str = "smp"):
30
- super().__init__("pe", converter=lambda v: f"{pe_name} {v}", doc="Parallel environment")
132
+ super().__init__("pe", doc=f"Parallel environment ({pe_name})")
31
133
  self.pe_name = pe_name
32
134
 
33
- def to_args(self, value, pe_name: str | None = None) -> list[str]:
135
+ def to_args(self, value: int | None) -> list[str]:
34
136
  if value is None:
35
137
  return []
36
- pe = pe_name or self.pe_name
37
- return ["-pe", f"{pe} {value}"]
138
+ return ["-pe", self.pe_name, str(value)]
38
139
 
39
- def to_directive(self, value, pe_name: str | None = None) -> str | None:
140
+ def to_directive(self, value: int | None) -> str | None:
40
141
  if value is None:
41
142
  return None
42
- pe = pe_name or self.pe_name
43
- return f"#$ -pe {pe} {value}"
143
+ return f"#$ -pe {self.pe_name} {value}"
44
144
 
45
145
 
46
- class SGEMemArg(SGEArg):
47
- """Memory argument.
146
+ class SGEMemArg(SchedulerArg[str]):
147
+ """Memory request: -l <resource>=<value>
48
148
 
49
- Uses -l resource=value format. Resource name is configurable.
149
+ The resource name is configurable (e.g., 'mem_free', 'h_vmem', 'mem').
50
150
  """
51
151
 
52
152
  def __init__(self, resource_name: str = "mem_free"):
53
- super().__init__("l", doc="Memory requirement")
153
+ super().__init__("l", doc=f"Memory ({resource_name})")
54
154
  self.resource_name = resource_name
55
155
 
56
- def to_args(self, value, resource_name: str | None = None) -> list[str]:
156
+ def to_args(self, value: str | None) -> list[str]:
57
157
  if value is None:
58
158
  return []
59
- res = resource_name or self.resource_name
60
- return ["-l", f"{res}={value}"]
159
+ return ["-l", f"{self.resource_name}={value}"]
61
160
 
62
- def to_directive(self, value, resource_name: str | None = None) -> str | None:
161
+ def to_directive(self, value: str | None) -> str | None:
63
162
  if value is None:
64
163
  return None
65
- res = resource_name or self.resource_name
66
- return f"#$ -l {res}={value}"
164
+ return f"#$ -l {self.resource_name}={value}"
67
165
 
68
166
 
69
- class SGETimeArg(SGEArg):
70
- """Time limit argument.
167
+ class SGETimeArg(SchedulerArg[str]):
168
+ """Time limit: -l <resource>=<HH:MM:SS>
71
169
 
72
- Uses -l h_rt=HH:MM:SS format. Resource name is configurable.
170
+ The resource name is configurable (e.g., 'h_rt', 's_rt').
73
171
  """
74
172
 
75
173
  def __init__(self, resource_name: str = "h_rt"):
76
- super().__init__("l", doc="Hard runtime limit")
174
+ super().__init__("l", doc=f"Time limit ({resource_name})")
77
175
  self.resource_name = resource_name
78
176
 
79
- def to_args(self, value, resource_name: str | None = None) -> list[str]:
177
+ def to_args(self, value: str | None) -> list[str]:
80
178
  if value is None:
81
179
  return []
82
- res = resource_name or self.resource_name
83
- return ["-l", f"{res}={value}"]
180
+ return ["-l", f"{self.resource_name}={value}"]
84
181
 
85
- def to_directive(self, value, resource_name: str | None = None) -> str | None:
182
+ def to_directive(self, value: str | None) -> str | None:
86
183
  if value is None:
87
184
  return None
88
- res = resource_name or self.resource_name
89
- return f"#$ -l {res}={value}"
90
-
91
-
92
- class SGEQueueArg(SGEArg):
93
- """Queue argument."""
94
-
95
- def __init__(self):
96
- super().__init__("q", doc="Queue name")
97
-
98
-
99
- class SGEJobNameArg(SGEArg):
100
- """Job name argument."""
101
-
102
- def __init__(self):
103
- super().__init__("N", doc="Job name")
104
-
105
-
106
- class SGEOutputArg(SGEArg):
107
- """Stdout path argument."""
108
-
109
- def __init__(self):
110
- super().__init__("o", doc="Stdout file path")
185
+ return f"#$ -l {self.resource_name}={value}"
111
186
 
112
187
 
113
- class SGEErrorArg(SGEArg):
114
- """Stderr path argument."""
188
+ # =============================================================================
189
+ # Array Job Arguments
190
+ # =============================================================================
115
191
 
116
- def __init__(self):
117
- super().__init__("e", doc="Stderr file path")
118
192
 
193
+ class SGEArrayArg(SchedulerArg[str]):
194
+ """Array job range: -t range
119
195
 
120
- class SGEArrayArg(SGEArg):
121
- """Array job argument."""
196
+ Range formats: 1-100, 1-100:10, 1,2,3,4
197
+ """
122
198
 
123
199
  def __init__(self):
124
- super().__init__("t", doc="Array job range (e.g., 1-100, 1-100:10)")
200
+ super().__init__("t", doc="Array job range")
125
201
 
202
+ def to_args(self, value: str | None) -> list[str]:
203
+ if value is None:
204
+ return []
205
+ return ["-t", value]
126
206
 
127
- class SGEJoinOutputArg(SGEArg):
128
- """Join stdout and stderr."""
129
-
130
- def __init__(self):
131
- super().__init__("j", doc="Join stdout and stderr")
207
+ def to_directive(self, value: str | None) -> str | None:
208
+ if value is None:
209
+ return None
210
+ return f"#$ -t {value}"
132
211
 
133
- def to_args(self, value) -> list[str]:
134
- if value:
135
- return ["-j", "y"]
136
- return []
137
212
 
138
- def to_directive(self, value) -> str | None:
139
- if value:
140
- return "#$ -j y"
141
- return None
213
+ # =============================================================================
214
+ # Dependency Arguments
215
+ # =============================================================================
142
216
 
143
217
 
144
- class SGECwdArg(SGEArg):
145
- """Use current working directory."""
218
+ class SGEHoldArg(SchedulerArg[str]):
219
+ """Job dependency: -hold_jid job_id[,job_id,...]"""
146
220
 
147
221
  def __init__(self):
148
- super().__init__("cwd", doc="Use current working directory")
222
+ super().__init__("hold_jid", doc="Hold until jobs complete")
149
223
 
150
- def to_args(self, value) -> list[str]:
151
- if value:
152
- return ["-cwd"]
153
- return []
154
-
155
- def to_directive(self, value) -> str | None:
156
- if value:
157
- return "#$ -cwd"
158
- return None
159
-
160
-
161
- class SGEShellArg(SGEArg):
162
- """Shell to use for the job."""
224
+ def to_args(self, value: str | None) -> list[str]:
225
+ if value is None:
226
+ return []
227
+ return ["-hold_jid", value]
163
228
 
164
- def __init__(self):
165
- super().__init__("S", doc="Shell path")
229
+ def to_directive(self, value: str | None) -> str | None:
230
+ if value is None:
231
+ return None
232
+ return f"#$ -hold_jid {value}"
@@ -1,6 +1,7 @@
1
1
  """SGE output parsing utilities."""
2
2
 
3
3
  import re
4
+ from datetime import datetime
4
5
  import xml.etree.ElementTree as ET
5
6
  from typing import Any
6
7
 
@@ -16,6 +17,7 @@ def parse_qstat_xml(xml_output: str) -> dict[str, Any]:
16
17
 
17
18
  try:
18
19
  root = ET.fromstring(xml_output)
20
+ _strip_namespaces(root)
19
21
 
20
22
  # Parse queue_info (running jobs)
21
23
  for job_list in root.findall(".//job_list"):
@@ -36,7 +38,20 @@ def parse_qstat_xml(xml_output: str) -> dict[str, Any]:
36
38
 
37
39
 
38
40
  def _parse_job_element(elem: ET.Element) -> dict[str, Any] | None:
39
- """Parse a single job_list element."""
41
+ """Parse a single job_list element.
42
+
43
+ SGE XML elements include:
44
+ - JB_job_number: Job ID
45
+ - JB_name: Job name
46
+ - JB_owner: Username
47
+ - state: Job state (r, qw, hqw, etc.)
48
+ - queue_name: Queue@host (for running jobs)
49
+ - hard_req_queue: Requested queue (for pending jobs)
50
+ - slots: Number of slots/CPUs
51
+ - JB_submission_time: Submission timestamp (epoch)
52
+ - JAT_start_time: Start timestamp (epoch, running jobs only)
53
+ - tasks: Array task ID (for array jobs)
54
+ """
40
55
  job_id_elem = elem.find("JB_job_number")
41
56
  if job_id_elem is None or job_id_elem.text is None:
42
57
  return None
@@ -50,24 +65,68 @@ def _parse_job_element(elem: ET.Element) -> dict[str, Any] | None:
50
65
  if name_elem is not None and name_elem.text:
51
66
  job_info["name"] = name_elem.text
52
67
 
68
+ # Owner/user
69
+ owner_elem = elem.find("JB_owner")
70
+ if owner_elem is not None and owner_elem.text:
71
+ job_info["user"] = owner_elem.text
72
+
53
73
  # State
54
74
  state_elem = elem.find("state")
55
75
  if state_elem is not None and state_elem.text:
56
76
  job_info["state"] = state_elem.text
57
77
 
58
- # Queue
78
+ # Queue - running jobs have queue_name, pending may have hard_req_queue
59
79
  queue_elem = elem.find("queue_name")
60
80
  if queue_elem is not None and queue_elem.text:
61
- job_info["queue"] = queue_elem.text
62
-
63
- # Slots
81
+ # Format is usually "queue@host", extract queue and host separately
82
+ queue_full = queue_elem.text
83
+ if "@" in queue_full:
84
+ queue_name, host = queue_full.split("@", 1)
85
+ job_info["queue"] = queue_name
86
+ job_info["node"] = host
87
+ else:
88
+ job_info["queue"] = queue_full
89
+ else:
90
+ # Check for requested queue (pending jobs)
91
+ hard_queue = elem.find("hard_req_queue")
92
+ if hard_queue is not None and hard_queue.text:
93
+ job_info["queue"] = hard_queue.text
94
+
95
+ # Slots (CPU count)
64
96
  slots_elem = elem.find("slots")
65
97
  if slots_elem is not None and slots_elem.text:
66
98
  job_info["slots"] = int(slots_elem.text)
67
99
 
100
+ # Submission time (epoch seconds)
101
+ submit_text = elem.findtext(".//JB_submission_time")
102
+ if submit_text:
103
+ try:
104
+ job_info["submit_time"] = int(submit_text)
105
+ except ValueError:
106
+ pass
107
+
108
+ # Start time (epoch seconds, only for running jobs)
109
+ start_text = elem.findtext(".//JAT_start_time")
110
+ if start_text:
111
+ start_epoch = _parse_sge_timestamp(start_text)
112
+ if start_epoch is not None:
113
+ job_info["start_time"] = start_epoch
114
+
115
+ # Array task ID
116
+ tasks_elem = elem.find("tasks")
117
+ if tasks_elem is not None and tasks_elem.text:
118
+ job_info["array_task_id"] = tasks_elem.text
119
+
68
120
  return job_info
69
121
 
70
122
 
123
+ def _strip_namespaces(root: ET.Element) -> None:
124
+ """Strip XML namespaces so ElementTree finds simple tag names."""
125
+ for elem in root.iter():
126
+ if isinstance(elem.tag, str) and "}" in elem.tag:
127
+ elem.tag = elem.tag.split("}", 1)[1]
128
+
129
+
71
130
  def parse_qstat_plain(output: str) -> dict[str, Any]:
72
131
  """Parse plain qstat output.
73
132
 
@@ -100,6 +159,15 @@ def parse_qstat_plain(output: str) -> dict[str, Any]:
100
159
  "state": parts[4],
101
160
  }
102
161
 
162
+ # Parse submit/start time (MM/DD/YYYY HH:MM:SS)
163
+ if len(parts) >= 7:
164
+ timestamp = _parse_qstat_datetime(parts[5], parts[6])
165
+ if timestamp is not None:
166
+ if "r" in parts[4]:
167
+ jobs[job_id]["start_time"] = timestamp
168
+ else:
169
+ jobs[job_id]["submit_time"] = timestamp
170
+
103
171
  # Parse queue if present
104
172
  if len(parts) >= 8:
105
173
  jobs[job_id]["queue"] = parts[7]
@@ -114,6 +182,28 @@ def parse_qstat_plain(output: str) -> dict[str, Any]:
114
182
  return jobs
115
183
 
116
184
 
185
+ def _parse_qstat_datetime(date_part: str, time_part: str) -> int | None:
186
+ """Parse qstat date/time into epoch seconds."""
187
+ try:
188
+ dt = datetime.strptime(f"{date_part} {time_part}", "%m/%d/%Y %H:%M:%S")
189
+ except ValueError:
190
+ return None
191
+ return int(dt.timestamp())
192
+
193
+
194
+ def _parse_sge_timestamp(value: str) -> int | None:
195
+ """Parse SGE timestamps that may be epoch seconds or ISO 8601."""
196
+ if value.isdigit():
197
+ try:
198
+ return int(value)
199
+ except ValueError:
200
+ return None
201
+ try:
202
+ return int(datetime.strptime(value, "%Y-%m-%dT%H:%M:%S").timestamp())
203
+ except ValueError:
204
+ return None
205
+
206
+
117
207
  def parse_qacct_output(output: str) -> dict[str, Any]:
118
208
  """Parse qacct output for job accounting info.
119
209
 
@@ -160,16 +250,19 @@ def state_to_status(state: str) -> JobStatus:
160
250
  """
161
251
  state = state.lower()
162
252
 
163
- if state in ("r", "t", "rr", "rt"):
253
+ # Deleting or error states take precedence over other flags.
254
+ if "d" in state:
255
+ return JobStatus.CANCELLED
256
+ if "e" in state:
257
+ return JobStatus.FAILED
258
+
259
+ # Running or transferring states.
260
+ if "r" in state or "t" in state:
164
261
  return JobStatus.RUNNING
165
- elif state in ("qw", "hqw"):
262
+
263
+ # Queued, held, or suspended states.
264
+ if "q" in state or "h" in state or "s" in state:
166
265
  return JobStatus.PENDING
167
- elif state in ("eqw",):
168
- return JobStatus.FAILED
169
- elif state in ("dr", "dt"):
170
- return JobStatus.CANCELLED
171
- elif state in ("s", "ts", "ss", "ts"):
172
- return JobStatus.PENDING # Suspended, treat as pending
173
266
 
174
267
  return JobStatus.UNKNOWN
175
268