hpc-runner 0.1.1__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. hpc_runner/_version.py +2 -2
  2. hpc_runner/cli/cancel.py +1 -1
  3. hpc_runner/cli/config.py +2 -2
  4. hpc_runner/cli/main.py +17 -13
  5. hpc_runner/cli/monitor.py +30 -0
  6. hpc_runner/cli/run.py +223 -67
  7. hpc_runner/cli/status.py +6 -5
  8. hpc_runner/core/__init__.py +30 -0
  9. hpc_runner/core/descriptors.py +87 -33
  10. hpc_runner/core/exceptions.py +9 -0
  11. hpc_runner/core/job.py +272 -93
  12. hpc_runner/core/job_info.py +104 -0
  13. hpc_runner/core/result.py +4 -0
  14. hpc_runner/schedulers/base.py +148 -30
  15. hpc_runner/schedulers/detection.py +22 -4
  16. hpc_runner/schedulers/local/scheduler.py +119 -2
  17. hpc_runner/schedulers/sge/args.py +161 -94
  18. hpc_runner/schedulers/sge/parser.py +106 -13
  19. hpc_runner/schedulers/sge/scheduler.py +727 -171
  20. hpc_runner/schedulers/sge/templates/batch.sh.j2 +82 -0
  21. hpc_runner/schedulers/sge/templates/interactive.sh.j2 +78 -0
  22. hpc_runner/tui/__init__.py +5 -0
  23. hpc_runner/tui/app.py +436 -0
  24. hpc_runner/tui/components/__init__.py +17 -0
  25. hpc_runner/tui/components/detail_panel.py +187 -0
  26. hpc_runner/tui/components/filter_bar.py +174 -0
  27. hpc_runner/tui/components/filter_popup.py +345 -0
  28. hpc_runner/tui/components/job_table.py +260 -0
  29. hpc_runner/tui/providers/__init__.py +5 -0
  30. hpc_runner/tui/providers/jobs.py +197 -0
  31. hpc_runner/tui/screens/__init__.py +7 -0
  32. hpc_runner/tui/screens/confirm.py +67 -0
  33. hpc_runner/tui/screens/job_details.py +210 -0
  34. hpc_runner/tui/screens/log_viewer.py +170 -0
  35. hpc_runner/tui/snapshot.py +153 -0
  36. hpc_runner/tui/styles/monitor.tcss +567 -0
  37. hpc_runner-0.2.1.dist-info/METADATA +285 -0
  38. hpc_runner-0.2.1.dist-info/RECORD +56 -0
  39. hpc_runner/schedulers/sge/templates/job.sh.j2 +0 -39
  40. hpc_runner-0.1.1.dist-info/METADATA +0 -46
  41. hpc_runner-0.1.1.dist-info/RECORD +0 -38
  42. {hpc_runner-0.1.1.dist-info → hpc_runner-0.2.1.dist-info}/WHEEL +0 -0
  43. {hpc_runner-0.1.1.dist-info → hpc_runner-0.2.1.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,260 @@
1
+ """Job table widget for displaying HPC jobs."""
2
+
3
+ from textual.events import Resize
4
+ from textual.message import Message
5
+ from textual.widgets import DataTable
6
+
7
+ from hpc_runner.core.job_info import JobInfo
8
+ from hpc_runner.core.result import JobStatus
9
+
10
+
11
+ class JobTable(DataTable):
12
+ """DataTable for displaying HPC jobs.
13
+
14
+ Displays job information in a tabular format with columns for
15
+ ID, Name, Queue, Status, Runtime, and Resources.
16
+
17
+ Messages:
18
+ JobSelected: Emitted when a job row is highlighted/selected.
19
+ """
20
+
21
+ class JobSelected(Message):
22
+ """Message sent when a job is selected in the table."""
23
+
24
+ def __init__(self, job_id: str, job_info: JobInfo | None = None) -> None:
25
+ self.job_id = job_id
26
+ self.job_info = job_info
27
+ super().__init__()
28
+
29
+ # Column definitions: (key, label, fixed_width)
30
+ # Fixed columns have set widths; "name" gets remaining space
31
+ FIXED_COLUMNS = [
32
+ ("job_id", "ID", 10),
33
+ ("user", "User", 14),
34
+ ("queue", "Queue", 12),
35
+ ("status", "Status", 10),
36
+ ("runtime", "Runtime", 12),
37
+ ("slots", "Slots", 6),
38
+ ]
39
+ NAME_COL_MIN = 15 # Minimum width for name column
40
+
41
+ def __init__(
42
+ self,
43
+ *,
44
+ show_cursor: bool = True,
45
+ zebra_stripes: bool = True,
46
+ id: str | None = None,
47
+ classes: str | None = None,
48
+ ) -> None:
49
+ """Initialize the job table.
50
+
51
+ Args:
52
+ show_cursor: Whether to show the cursor/selection.
53
+ zebra_stripes: Whether to alternate row colors.
54
+ id: Widget ID.
55
+ classes: CSS classes.
56
+ """
57
+ super().__init__(
58
+ show_cursor=show_cursor,
59
+ zebra_stripes=zebra_stripes,
60
+ cursor_type="row",
61
+ id=id,
62
+ classes=classes,
63
+ )
64
+ self._jobs: dict[str, JobInfo] = {}
65
+ self._name_col_width = 20 # Default, will be recalculated
66
+
67
+ def on_mount(self) -> None:
68
+ """Set up columns on mount using the best available width."""
69
+ self.border_title = "Jobs"
70
+ self._setup_columns()
71
+ self.call_after_refresh(self._sync_columns_to_current_width)
72
+
73
+ def on_resize(self, event: Resize) -> None:
74
+ """Handle resize events by syncing column widths to the new size."""
75
+ del event
76
+ self.call_after_refresh(self._sync_columns_to_current_width)
77
+
78
+ def _get_table_width(self) -> int:
79
+ """Return the width available for columns within the table."""
80
+ content_size = getattr(self, "content_size", None)
81
+ if content_size is not None:
82
+ return content_size.width
83
+ return self.size.width or self.app.console.size.width
84
+
85
+ def _calculate_name_width(self, table_width: int) -> int:
86
+ """Calculate the name column width to avoid horizontal overflow."""
87
+ fixed_total = sum(w for _, _, w in self.FIXED_COLUMNS)
88
+ column_spacing = (len(self.FIXED_COLUMNS) + 1) * 2
89
+ name_width = table_width - fixed_total - column_spacing
90
+ return max(self.NAME_COL_MIN, name_width)
91
+
92
+ def _sync_columns_to_current_width(self) -> None:
93
+ """Sync the name column width after layout updates size information."""
94
+ table_width = self._get_table_width()
95
+ if table_width <= 0:
96
+ return
97
+ desired_width = self._calculate_name_width(table_width)
98
+ if desired_width != self._name_col_width:
99
+ self._set_name_column_width(desired_width)
100
+ self.call_after_refresh(self._post_layout_adjust, table_width)
101
+
102
+ def _post_layout_adjust(self, table_width: int) -> None:
103
+ """Trim name width if virtual width still overflows after layout."""
104
+ if table_width <= 0:
105
+ return
106
+ scrollbar_width = 1 if self.show_vertical_scrollbar else 0
107
+ effective_width = table_width - scrollbar_width
108
+ overflow = self.virtual_size.width - effective_width
109
+ if overflow <= 0 or self._name_col_width <= self.NAME_COL_MIN:
110
+ return
111
+ adjusted_width = max(self.NAME_COL_MIN, self._name_col_width - overflow)
112
+ if adjusted_width != self._name_col_width:
113
+ self._set_name_column_width(adjusted_width)
114
+
115
+ def _set_name_column_width(self, width: int) -> None:
116
+ """Apply name column width and refresh rows for correct truncation."""
117
+ self._name_col_width = width
118
+ name_column = self.columns.get("name")
119
+ if name_column is not None:
120
+ name_column.width = width
121
+ if self._jobs:
122
+ self._refresh_rows_for_width()
123
+
124
+ def _refresh_rows_for_width(self) -> None:
125
+ """Rebuild rows in the current order to match the new name width."""
126
+ row_job_ids = [str(row_key.value) for row_key in self.rows.keys()]
127
+ ordered_jobs: list[JobInfo] = []
128
+ seen: set[str] = set()
129
+ for job_id in row_job_ids:
130
+ job = self._jobs.get(job_id)
131
+ if job is not None:
132
+ ordered_jobs.append(job)
133
+ seen.add(job_id)
134
+ if len(ordered_jobs) != len(self._jobs):
135
+ ordered_jobs.extend(
136
+ job for job_id, job in self._jobs.items() if job_id not in seen
137
+ )
138
+ self.update_jobs(ordered_jobs)
139
+
140
+ def _setup_columns(self) -> None:
141
+ """Set up the table columns."""
142
+ # Add ID column first
143
+ self.add_column("ID", key="job_id", width=10)
144
+ # Add Name column that expands to fill remaining space
145
+ self.add_column(
146
+ "Name",
147
+ key="name",
148
+ width=self._name_col_width,
149
+ )
150
+ # Add remaining fixed columns
151
+ for key, label, width in self.FIXED_COLUMNS[1:]: # Skip job_id
152
+ self.add_column(label, key=key, width=width)
153
+
154
+ def _truncate_name(self, name: str) -> str:
155
+ """Truncate job name to fit in the name column."""
156
+ if len(name) <= self._name_col_width:
157
+ return name
158
+ # Truncate and add ellipsis
159
+ return name[: self._name_col_width - 1] + "…"
160
+
161
+ def update_jobs(self, jobs: list[JobInfo]) -> None:
162
+ """Update the table with a new list of jobs.
163
+
164
+ Args:
165
+ jobs: List of JobInfo objects to display.
166
+ """
167
+ # Save current selection to restore after update
168
+ selected_job_id: str | None = None
169
+ if self.cursor_row is not None and self.cursor_row >= 0:
170
+ try:
171
+ row_key = self.get_row_at(self.cursor_row)
172
+ if row_key:
173
+ selected_job_id = str(row_key[0])
174
+ except Exception:
175
+ pass
176
+
177
+ # Clear existing data
178
+ self.clear()
179
+ self._jobs.clear()
180
+
181
+ # Add new rows
182
+ for job in jobs:
183
+ self._jobs[job.job_id] = job
184
+ self.add_row(
185
+ job.job_id,
186
+ self._truncate_name(job.name),
187
+ job.user,
188
+ job.queue or "—",
189
+ self._format_status(job.status),
190
+ job.runtime_display,
191
+ str(job.cpu) if job.cpu is not None else "—",
192
+ key=job.job_id,
193
+ )
194
+
195
+ # Restore selection if the job still exists
196
+ if selected_job_id and selected_job_id in self._jobs:
197
+ try:
198
+ self.move_cursor(row=self._get_row_index(selected_job_id))
199
+ except Exception:
200
+ pass
201
+
202
+ def _get_row_index(self, job_id: str) -> int | None:
203
+ """Get the row index for a job ID."""
204
+ for idx, row_key in enumerate(self.rows.keys()):
205
+ if str(row_key.value) == job_id:
206
+ return idx
207
+ return None
208
+
209
+ def _format_status(self, status: JobStatus) -> str:
210
+ """Format status for display with color hints.
211
+
212
+ The actual coloring is done via CSS classes, but we return
213
+ a clean status string here.
214
+ """
215
+ status_map = {
216
+ JobStatus.RUNNING: "RUNNING",
217
+ JobStatus.PENDING: "PENDING",
218
+ JobStatus.COMPLETED: "COMPLETE",
219
+ JobStatus.FAILED: "FAILED",
220
+ JobStatus.CANCELLED: "CANCEL",
221
+ JobStatus.TIMEOUT: "TIMEOUT",
222
+ JobStatus.UNKNOWN: "UNKNOWN",
223
+ }
224
+ return status_map.get(status, str(status.name))
225
+
226
+ def on_data_table_row_highlighted(
227
+ self, event: DataTable.RowHighlighted
228
+ ) -> None:
229
+ """Handle row highlight - emit JobSelected message."""
230
+ if event.row_key is not None:
231
+ job_id = str(event.row_key.value)
232
+ job_info = self._jobs.get(job_id)
233
+ self.post_message(self.JobSelected(job_id, job_info))
234
+
235
+ def get_selected_job(self) -> JobInfo | None:
236
+ """Get the currently selected job.
237
+
238
+ Returns:
239
+ The selected JobInfo, or None if nothing selected.
240
+ """
241
+ if self.cursor_row is not None and self.cursor_row >= 0:
242
+ try:
243
+ row_key = self.get_row_at(self.cursor_row)
244
+ if row_key:
245
+ # row_key is a tuple of cell values, first is job_id
246
+ job_id = str(row_key[0])
247
+ return self._jobs.get(job_id)
248
+ except Exception:
249
+ pass
250
+ return None
251
+
252
+ @property
253
+ def job_count(self) -> int:
254
+ """Get the number of jobs in the table."""
255
+ return len(self._jobs)
256
+
257
+ @property
258
+ def is_empty(self) -> bool:
259
+ """Check if the table is empty."""
260
+ return len(self._jobs) == 0
@@ -0,0 +1,5 @@
1
+ """Data providers for HPC Monitor TUI."""
2
+
3
+ from .jobs import JobProvider
4
+
5
+ __all__ = ["JobProvider"]
@@ -0,0 +1,197 @@
1
+ """Job data provider for HPC Monitor TUI.
2
+
3
+ Wraps scheduler calls in async methods that run in a thread pool
4
+ to avoid blocking the UI.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import asyncio
10
+ import logging
11
+ import os
12
+ from concurrent.futures import ThreadPoolExecutor
13
+ from datetime import datetime
14
+ from typing import TYPE_CHECKING
15
+
16
+ from hpc_runner.core.exceptions import AccountingNotAvailable
17
+ from hpc_runner.core.job_info import JobInfo
18
+ from hpc_runner.core.result import JobStatus
19
+
20
+ if TYPE_CHECKING:
21
+ from hpc_runner.schedulers.base import BaseScheduler
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+ # Shared thread pool for scheduler calls
26
+ _executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix="hpc-provider")
27
+
28
+
29
+ class JobProvider:
30
+ """Async provider for job data from HPC schedulers.
31
+
32
+ Wraps synchronous scheduler calls in async methods that run in a
33
+ thread pool, preventing UI blocking during data fetches.
34
+
35
+ Attributes:
36
+ scheduler: The HPC scheduler instance to query.
37
+ current_user: The current username for filtering.
38
+ """
39
+
40
+ def __init__(self, scheduler: "BaseScheduler") -> None:
41
+ """Initialize the job provider.
42
+
43
+ Args:
44
+ scheduler: The scheduler instance to use for queries.
45
+ """
46
+ self.scheduler = scheduler
47
+ self.current_user = os.environ.get("USER", "unknown")
48
+
49
+ async def get_active_jobs(
50
+ self,
51
+ user_filter: str = "me",
52
+ status_filter: set[JobStatus] | None = None,
53
+ queue_filter: str | None = None,
54
+ ) -> list[JobInfo]:
55
+ """Get active jobs asynchronously.
56
+
57
+ Args:
58
+ user_filter: "me" for current user only, "all" for all users.
59
+ status_filter: Set of statuses to include. None = all.
60
+ queue_filter: Queue name to filter by. None = all.
61
+
62
+ Returns:
63
+ List of JobInfo objects. Empty list on error.
64
+ """
65
+ # Determine user parameter
66
+ user = self.current_user if user_filter == "me" else None
67
+
68
+ try:
69
+ # Run scheduler call in thread pool
70
+ loop = asyncio.get_event_loop()
71
+ jobs = await loop.run_in_executor(
72
+ _executor,
73
+ lambda: self.scheduler.list_active_jobs(
74
+ user=user,
75
+ status=status_filter,
76
+ queue=queue_filter,
77
+ ),
78
+ )
79
+ return jobs
80
+ except NotImplementedError:
81
+ logger.warning(
82
+ f"Scheduler {self.scheduler.name} does not implement list_active_jobs"
83
+ )
84
+ return []
85
+ except Exception as e:
86
+ logger.error(f"Error fetching active jobs: {e}")
87
+ return []
88
+
89
+ async def get_completed_jobs(
90
+ self,
91
+ user_filter: str = "me",
92
+ since: datetime | None = None,
93
+ until: datetime | None = None,
94
+ exit_code: int | None = None,
95
+ queue_filter: str | None = None,
96
+ limit: int = 100,
97
+ ) -> list[JobInfo]:
98
+ """Get completed jobs asynchronously.
99
+
100
+ Args:
101
+ user_filter: "me" for current user only, "all" for all users.
102
+ since: Only jobs completed after this time.
103
+ until: Only jobs completed before this time.
104
+ exit_code: Filter by exit code. None = all.
105
+ queue_filter: Queue name to filter by. None = all.
106
+ limit: Maximum number of jobs to return.
107
+
108
+ Returns:
109
+ List of JobInfo objects. Empty list on error.
110
+
111
+ Raises:
112
+ AccountingNotAvailable: If scheduler accounting is not enabled.
113
+ """
114
+ user = self.current_user if user_filter == "me" else None
115
+
116
+ try:
117
+ loop = asyncio.get_event_loop()
118
+ jobs = await loop.run_in_executor(
119
+ _executor,
120
+ lambda: self.scheduler.list_completed_jobs(
121
+ user=user,
122
+ since=since,
123
+ until=until,
124
+ exit_code=exit_code,
125
+ queue=queue_filter,
126
+ limit=limit,
127
+ ),
128
+ )
129
+ return jobs
130
+ except AccountingNotAvailable:
131
+ # Re-raise so caller can show appropriate message
132
+ raise
133
+ except NotImplementedError:
134
+ logger.warning(
135
+ f"Scheduler {self.scheduler.name} does not implement list_completed_jobs"
136
+ )
137
+ raise AccountingNotAvailable(
138
+ f"Scheduler {self.scheduler.name} does not support job history"
139
+ )
140
+ except Exception as e:
141
+ logger.error(f"Error fetching completed jobs: {e}")
142
+ return []
143
+
144
+ async def get_job_details(self, job_id: str) -> JobInfo | None:
145
+ """Get detailed information for a single job.
146
+
147
+ Args:
148
+ job_id: The job ID to look up.
149
+
150
+ Returns:
151
+ JobInfo with details, or None if not found/error.
152
+ """
153
+ try:
154
+ loop = asyncio.get_event_loop()
155
+ job = await loop.run_in_executor(
156
+ _executor,
157
+ lambda: self.scheduler.get_job_details(job_id),
158
+ )
159
+ return job
160
+ except Exception as e:
161
+ logger.error(f"Error fetching job details for {job_id}: {e}")
162
+ return None
163
+
164
+ async def has_accounting(self) -> bool:
165
+ """Check if job accounting/history is available.
166
+
167
+ Returns:
168
+ True if completed job history is available.
169
+ """
170
+ try:
171
+ loop = asyncio.get_event_loop()
172
+ return await loop.run_in_executor(
173
+ _executor,
174
+ self.scheduler.has_accounting,
175
+ )
176
+ except Exception as e:
177
+ logger.error(f"Error checking accounting availability: {e}")
178
+ return False
179
+
180
+ async def cancel_job(self, job_id: str) -> bool:
181
+ """Cancel a job.
182
+
183
+ Args:
184
+ job_id: The job ID to cancel.
185
+
186
+ Returns:
187
+ True if cancellation succeeded.
188
+ """
189
+ try:
190
+ loop = asyncio.get_event_loop()
191
+ return await loop.run_in_executor(
192
+ _executor,
193
+ lambda: self.scheduler.cancel(job_id),
194
+ )
195
+ except Exception as e:
196
+ logger.error(f"Error cancelling job {job_id}: {e}")
197
+ return False
@@ -0,0 +1,7 @@
1
+ """TUI screens for HPC Monitor."""
2
+
3
+ from .confirm import ConfirmScreen
4
+ from .job_details import JobDetailsScreen
5
+ from .log_viewer import LogViewerScreen
6
+
7
+ __all__ = ["ConfirmScreen", "JobDetailsScreen", "LogViewerScreen"]
@@ -0,0 +1,67 @@
1
+ """Confirmation modal screen."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from textual.app import ComposeResult
6
+ from textual.containers import Horizontal, Vertical
7
+ from textual.screen import ModalScreen
8
+ from textual.widgets import Button, Static
9
+
10
+
11
+ class ConfirmScreen(ModalScreen[bool]):
12
+ """Modal confirmation dialog.
13
+
14
+ Returns True if confirmed, False if cancelled.
15
+ Styles are defined in monitor.tcss.
16
+ """
17
+
18
+ BINDINGS = [
19
+ ("escape", "cancel", "Cancel"),
20
+ ("y", "confirm", "Yes"),
21
+ ("s", "screenshot", "Screenshot"),
22
+ ]
23
+
24
+ def action_screenshot(self) -> None:
25
+ """Save a screenshot."""
26
+ path = self.app.save_screenshot(path="./")
27
+ self.app.notify(f"Screenshot saved: {path}", timeout=3)
28
+
29
+ def __init__(
30
+ self,
31
+ message: str,
32
+ title: str = "Confirm",
33
+ confirm_label: str = "Confirm",
34
+ **kwargs,
35
+ ) -> None:
36
+ super().__init__(**kwargs)
37
+ self._message = message
38
+ self._title = title
39
+ self._confirm_label = confirm_label
40
+
41
+ def compose(self) -> ComposeResult:
42
+ """Create the modal content."""
43
+ with Vertical(id="confirm-dialog"):
44
+ yield Static(self._message, id="confirm-message", markup=True)
45
+ with Horizontal(id="confirm-buttons"):
46
+ yield Button(self._confirm_label, id="btn-confirm", variant="default")
47
+ yield Static("Esc to dismiss", id="confirm-hint")
48
+
49
+ def on_mount(self) -> None:
50
+ """Set up the dialog."""
51
+ dialog = self.query_one("#confirm-dialog", Vertical)
52
+ dialog.border_title = self._title
53
+ # Focus the confirm button
54
+ self.query_one("#btn-confirm", Button).focus()
55
+
56
+ def on_button_pressed(self, event: Button.Pressed) -> None:
57
+ """Handle button presses."""
58
+ if event.button.id == "btn-confirm":
59
+ self.dismiss(True)
60
+
61
+ def action_confirm(self) -> None:
62
+ """Confirm action (y key)."""
63
+ self.dismiss(True)
64
+
65
+ def action_cancel(self) -> None:
66
+ """Cancel action (n or escape key)."""
67
+ self.dismiss(False)