dayhoff-tools 1.9.10__py3-none-any.whl → 1.9.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dayhoff_tools/cli/engine/__init__.py +283 -41
- dayhoff_tools/cli/engine/engine_core.py +141 -227
- {dayhoff_tools-1.9.10.dist-info → dayhoff_tools-1.9.12.dist-info}/METADATA +1 -1
- {dayhoff_tools-1.9.10.dist-info → dayhoff_tools-1.9.12.dist-info}/RECORD +6 -6
- {dayhoff_tools-1.9.10.dist-info → dayhoff_tools-1.9.12.dist-info}/WHEEL +0 -0
- {dayhoff_tools-1.9.10.dist-info → dayhoff_tools-1.9.12.dist-info}/entry_points.txt +0 -0
@@ -6,44 +6,286 @@ import typer
|
|
6
6
|
engine_app = typer.Typer(help="Manage compute engines for development.")
|
7
7
|
studio_app = typer.Typer(help="Manage persistent development studios.")
|
8
8
|
|
9
|
-
#
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
9
|
+
# Use lazy loading pattern similar to main.py swarm commands
|
10
|
+
# Import functions only when commands are actually called
|
11
|
+
|
12
|
+
# Engine commands
|
13
|
+
@engine_app.command("launch")
|
14
|
+
def launch_engine_cmd(
|
15
|
+
name: str = typer.Argument(help="Name for the new engine"),
|
16
|
+
engine_type: str = typer.Option(
|
17
|
+
"cpu",
|
18
|
+
"--type",
|
19
|
+
"-t",
|
20
|
+
help="Engine type: cpu, cpumax, t4, a10g, a100, 4_t4, 8_t4, 4_a10g, 8_a10g",
|
21
|
+
),
|
22
|
+
user: str = typer.Option(None, "--user", "-u", help="Override username"),
|
23
|
+
boot_disk_size: int = typer.Option(
|
24
|
+
None,
|
25
|
+
"--size",
|
26
|
+
"-s",
|
27
|
+
help="Boot disk size in GB (default: 50GB, min: 20GB, max: 1000GB)",
|
28
|
+
),
|
29
|
+
availability_zone: str = typer.Option(
|
30
|
+
None,
|
31
|
+
"--az",
|
32
|
+
help="Prefer a specific Availability Zone (e.g., us-east-1b). If omitted the service will try all public subnets.",
|
33
|
+
),
|
34
|
+
):
|
35
|
+
"""Launch a new engine instance."""
|
36
|
+
from .engine_core import launch_engine
|
37
|
+
return launch_engine(name, engine_type, user, boot_disk_size, availability_zone)
|
38
|
+
|
39
|
+
|
40
|
+
@engine_app.command("list")
|
41
|
+
def list_engines_cmd(
|
42
|
+
user: str = typer.Option(None, "--user", "-u", help="Filter by user"),
|
43
|
+
running_only: bool = typer.Option(
|
44
|
+
False, "--running", help="Show only running engines"
|
45
|
+
),
|
46
|
+
stopped_only: bool = typer.Option(
|
47
|
+
False, "--stopped", help="Show only stopped engines"
|
48
|
+
),
|
49
|
+
detailed: bool = typer.Option(
|
50
|
+
False, "--detailed", "-d", help="Show detailed status (slower)"
|
51
|
+
),
|
52
|
+
):
|
53
|
+
"""List engines (shows all engines by default)."""
|
54
|
+
from .engine_core import list_engines
|
55
|
+
return list_engines(user, running_only, stopped_only, detailed)
|
56
|
+
|
57
|
+
|
58
|
+
@engine_app.command("status")
|
59
|
+
def engine_status_cmd(
|
60
|
+
name_or_id: str = typer.Argument(help="Engine name or instance ID"),
|
61
|
+
detailed: bool = typer.Option(False, "--detailed", "-d", help="Show detailed status (slower)"),
|
62
|
+
show_log: bool = typer.Option(False, "--show-log", help="Show bootstrap log (requires --detailed)"),
|
63
|
+
):
|
64
|
+
"""Show engine status and information."""
|
65
|
+
from .engine_core import engine_status
|
66
|
+
return engine_status(name_or_id, detailed, show_log)
|
67
|
+
|
68
|
+
|
69
|
+
@engine_app.command("start")
|
70
|
+
def start_engine_cmd(
|
71
|
+
name_or_id: str = typer.Argument(help="Engine name or instance ID"),
|
72
|
+
):
|
73
|
+
"""Start a stopped engine."""
|
74
|
+
from .engine_lifecycle import start_engine
|
75
|
+
return start_engine(name_or_id)
|
76
|
+
|
77
|
+
|
78
|
+
@engine_app.command("stop")
|
79
|
+
def stop_engine_cmd(
|
80
|
+
name_or_id: str = typer.Argument(help="Engine name or instance ID"),
|
81
|
+
force: bool = typer.Option(
|
82
|
+
False, "--force", "-f", help="Force stop and detach all studios"
|
83
|
+
),
|
84
|
+
):
|
85
|
+
"""Stop an engine."""
|
86
|
+
from .engine_lifecycle import stop_engine
|
87
|
+
return stop_engine(name_or_id, force)
|
88
|
+
|
89
|
+
|
90
|
+
@engine_app.command("terminate")
|
91
|
+
def terminate_engine_cmd(
|
92
|
+
name_or_id: str = typer.Argument(help="Engine name or instance ID"),
|
93
|
+
):
|
94
|
+
"""Permanently terminate an engine."""
|
95
|
+
from .engine_lifecycle import terminate_engine
|
96
|
+
return terminate_engine(name_or_id)
|
97
|
+
|
98
|
+
|
99
|
+
@engine_app.command("ssh")
|
100
|
+
def ssh_engine_cmd(
|
101
|
+
name_or_id: str = typer.Argument(help="Engine name or instance ID"),
|
102
|
+
admin: bool = typer.Option(
|
103
|
+
False, "--admin", help="Connect as ec2-user instead of the engine owner user"
|
104
|
+
),
|
105
|
+
idle_timeout: int = typer.Option(
|
106
|
+
600,
|
107
|
+
"--idle-timeout",
|
108
|
+
help="Idle timeout (seconds) for the SSM port-forward (0 = disable)",
|
109
|
+
),
|
110
|
+
):
|
111
|
+
"""Connect to an engine via SSH."""
|
112
|
+
from .engine_management import ssh_engine
|
113
|
+
return ssh_engine(name_or_id, admin, idle_timeout)
|
114
|
+
|
115
|
+
|
116
|
+
@engine_app.command("config-ssh")
|
117
|
+
def config_ssh_cmd(
|
118
|
+
clean: bool = typer.Option(False, "--clean", help="Remove all managed entries"),
|
119
|
+
all_engines: bool = typer.Option(
|
120
|
+
False, "--all", "-a", help="Include all engines from all users"
|
121
|
+
),
|
122
|
+
admin: bool = typer.Option(
|
123
|
+
False,
|
124
|
+
"--admin",
|
125
|
+
help="Generate entries that use ec2-user instead of per-engine owner user",
|
126
|
+
),
|
127
|
+
):
|
128
|
+
"""Update SSH config with available engines."""
|
129
|
+
from .engine_management import config_ssh
|
130
|
+
return config_ssh(clean, all_engines, admin)
|
131
|
+
|
132
|
+
|
133
|
+
@engine_app.command("resize")
|
134
|
+
def resize_engine_cmd(
|
135
|
+
name_or_id: str = typer.Argument(help="Engine name or instance ID"),
|
136
|
+
size: int = typer.Option(..., "--size", "-s", help="New size in GB"),
|
137
|
+
online: bool = typer.Option(
|
138
|
+
False,
|
139
|
+
"--online",
|
140
|
+
help="Resize while running (requires manual filesystem expansion)",
|
141
|
+
),
|
142
|
+
force: bool = typer.Option(
|
143
|
+
False, "--force", "-f", help="Force resize and detach all studios"
|
144
|
+
),
|
145
|
+
):
|
146
|
+
"""Resize an engine's boot disk."""
|
147
|
+
from .engine_management import resize_engine
|
148
|
+
return resize_engine(name_or_id, size, online, force)
|
149
|
+
|
150
|
+
|
151
|
+
@engine_app.command("gami")
|
152
|
+
def create_ami_cmd(
|
153
|
+
name_or_id: str = typer.Argument(
|
154
|
+
help="Engine name or instance ID to create AMI from"
|
155
|
+
),
|
156
|
+
):
|
157
|
+
"""Create a 'Golden AMI' from a running engine."""
|
158
|
+
from .engine_management import create_ami
|
159
|
+
return create_ami(name_or_id)
|
160
|
+
|
161
|
+
|
162
|
+
@engine_app.command("coffee")
|
163
|
+
def coffee_cmd(
|
164
|
+
name_or_id: str = typer.Argument(help="Engine name or instance ID"),
|
165
|
+
duration: str = typer.Argument("4h", help="Duration (e.g., 2h, 30m, 2h30m)"),
|
166
|
+
cancel: bool = typer.Option(
|
167
|
+
False, "--cancel", help="Cancel existing coffee lock instead of extending"
|
168
|
+
),
|
169
|
+
):
|
170
|
+
"""Pour ☕ for an engine: keeps it awake for the given duration (or cancel)."""
|
171
|
+
from .engine_maintenance import coffee
|
172
|
+
return coffee(name_or_id, duration, cancel)
|
173
|
+
|
174
|
+
|
175
|
+
@engine_app.command("idle")
|
176
|
+
def idle_timeout_cmd_wrapper(
|
177
|
+
name_or_id: str = typer.Argument(help="Engine name or instance ID"),
|
178
|
+
set: str = typer.Option(
|
179
|
+
None, "--set", "-s", help="New timeout (e.g., 2h30m, 45m)"
|
180
|
+
),
|
181
|
+
):
|
182
|
+
"""Show or set the engine idle-detector timeout."""
|
183
|
+
from .engine_maintenance import idle_timeout_cmd
|
184
|
+
return idle_timeout_cmd(name_or_id, set)
|
185
|
+
|
186
|
+
|
187
|
+
@engine_app.command("debug")
|
188
|
+
def debug_engine_cmd(
|
189
|
+
name_or_id: str = typer.Argument(help="Engine name or instance ID"),
|
190
|
+
):
|
191
|
+
"""Debug engine bootstrap status and files."""
|
192
|
+
from .engine_maintenance import debug_engine
|
193
|
+
return debug_engine(name_or_id)
|
194
|
+
|
195
|
+
|
196
|
+
@engine_app.command("repair")
|
197
|
+
def repair_engine_cmd(
|
198
|
+
name_or_id: str = typer.Argument(help="Engine name or instance ID"),
|
199
|
+
):
|
200
|
+
"""Repair an engine that's stuck in a bad state (e.g., after GAMI creation)."""
|
201
|
+
from .engine_maintenance import repair_engine
|
202
|
+
return repair_engine(name_or_id)
|
203
|
+
|
204
|
+
|
205
|
+
# Studio commands
|
206
|
+
@studio_app.command("create")
|
207
|
+
def create_studio_cmd(
|
208
|
+
size_gb: int = typer.Option(50, "--size", "-s", help="Studio size in GB"),
|
209
|
+
):
|
210
|
+
"""Create a new studio for the current user."""
|
211
|
+
from .studio_commands import create_studio
|
212
|
+
return create_studio(size_gb)
|
213
|
+
|
214
|
+
|
215
|
+
@studio_app.command("status")
|
216
|
+
def studio_status_cmd(
|
217
|
+
user: str = typer.Option(
|
218
|
+
None, "--user", "-u", help="Check status for a different user (admin only)"
|
219
|
+
),
|
220
|
+
):
|
221
|
+
"""Show status of your studio."""
|
222
|
+
from .studio_commands import studio_status
|
223
|
+
return studio_status(user)
|
224
|
+
|
225
|
+
|
226
|
+
@studio_app.command("attach")
|
227
|
+
def attach_studio_cmd(
|
228
|
+
engine_name_or_id: str = typer.Argument(help="Engine name or instance ID"),
|
229
|
+
user: str = typer.Option(
|
230
|
+
None, "--user", "-u", help="Attach a different user's studio (admin only)"
|
231
|
+
),
|
232
|
+
):
|
233
|
+
"""Attach your studio to an engine."""
|
234
|
+
from .studio_commands import attach_studio
|
235
|
+
return attach_studio(engine_name_or_id, user)
|
236
|
+
|
237
|
+
|
238
|
+
@studio_app.command("detach")
|
239
|
+
def detach_studio_cmd(
|
240
|
+
user: str = typer.Option(
|
241
|
+
None, "--user", "-u", help="Detach a different user's studio (admin only)"
|
242
|
+
),
|
243
|
+
):
|
244
|
+
"""Detach your studio from its current engine."""
|
245
|
+
from .studio_commands import detach_studio
|
246
|
+
return detach_studio(user)
|
247
|
+
|
248
|
+
|
249
|
+
@studio_app.command("delete")
|
250
|
+
def delete_studio_cmd(
|
251
|
+
user: str = typer.Option(
|
252
|
+
None, "--user", "-u", help="Delete a different user's studio (admin only)"
|
253
|
+
),
|
254
|
+
):
|
255
|
+
"""Delete your studio permanently."""
|
256
|
+
from .studio_commands import delete_studio
|
257
|
+
return delete_studio(user)
|
258
|
+
|
259
|
+
|
260
|
+
@studio_app.command("list")
|
261
|
+
def list_studios_cmd(
|
262
|
+
all_users: bool = typer.Option(
|
263
|
+
False, "--all", "-a", help="Show all users' studios"
|
264
|
+
),
|
265
|
+
):
|
266
|
+
"""List studios."""
|
267
|
+
from .studio_commands import list_studios
|
268
|
+
return list_studios(all_users)
|
269
|
+
|
270
|
+
|
271
|
+
@studio_app.command("reset")
|
272
|
+
def reset_studio_cmd(
|
273
|
+
user: str = typer.Option(
|
274
|
+
None, "--user", "-u", help="Reset a different user's studio"
|
275
|
+
),
|
276
|
+
):
|
277
|
+
"""Reset a stuck studio (admin operation)."""
|
278
|
+
from .studio_commands import reset_studio
|
279
|
+
return reset_studio(user)
|
280
|
+
|
281
|
+
|
282
|
+
@studio_app.command("resize")
|
283
|
+
def resize_studio_cmd(
|
284
|
+
size: int = typer.Option(..., "--size", "-s", help="New size in GB"),
|
285
|
+
user: str = typer.Option(
|
286
|
+
None, "--user", "-u", help="Resize a different user's studio (admin only)"
|
287
|
+
),
|
288
|
+
):
|
289
|
+
"""Resize your studio volume (requires detachment)."""
|
290
|
+
from .studio_commands import resize_studio
|
291
|
+
return resize_studio(size, user)
|
@@ -231,45 +231,12 @@ def engine_status(
|
|
231
231
|
|
232
232
|
engines = response.json().get("engines", [])
|
233
233
|
engine = resolve_engine(name_or_id, engines)
|
234
|
-
|
234
|
+
|
235
|
+
# Always try to fetch live idle data from the engine for both views
|
236
|
+
live_idle_data = _fetch_live_idle_data(engine["instance_id"])
|
237
|
+
|
235
238
|
# Fast status display (default)
|
236
|
-
if not detailed:
|
237
|
-
# Fetch idle status via SSM with longer timeout
|
238
|
-
ssm = boto3.client("ssm", region_name="us-east-1")
|
239
|
-
idle_data = None # Use None to indicate no data received
|
240
|
-
|
241
|
-
if engine["state"].lower() == "running":
|
242
|
-
try:
|
243
|
-
resp = ssm.send_command(
|
244
|
-
InstanceIds=[engine["instance_id"]],
|
245
|
-
DocumentName="AWS-RunShellScript",
|
246
|
-
Parameters={
|
247
|
-
"commands": [
|
248
|
-
"cat /var/run/idle-detector/last_state.json 2>/dev/null || echo '{}'"
|
249
|
-
],
|
250
|
-
"executionTimeout": ["10"],
|
251
|
-
},
|
252
|
-
)
|
253
|
-
cid = resp["Command"]["CommandId"]
|
254
|
-
|
255
|
-
# Wait up to 3 seconds for result
|
256
|
-
for _ in range(6): # 6 * 0.5 = 3 seconds
|
257
|
-
time.sleep(0.5)
|
258
|
-
inv = ssm.get_command_invocation(
|
259
|
-
CommandId=cid, InstanceId=engine["instance_id"]
|
260
|
-
)
|
261
|
-
if inv["Status"] in ["Success", "Failed"]:
|
262
|
-
break
|
263
|
-
|
264
|
-
if inv["Status"] == "Success":
|
265
|
-
content = inv["StandardOutputContent"].strip()
|
266
|
-
if content and content != "{}":
|
267
|
-
idle_data = json.loads(content)
|
268
|
-
else:
|
269
|
-
idle_data = {} # Empty response but SSM worked
|
270
|
-
except Exception:
|
271
|
-
idle_data = None # SSM failed
|
272
|
-
|
239
|
+
if not detailed:
|
273
240
|
# Determine running state display
|
274
241
|
running_state = engine["state"].lower()
|
275
242
|
if running_state == "running":
|
@@ -282,59 +249,33 @@ def engine_status(
|
|
282
249
|
run_disp = "[dim]Stopped[/dim]"
|
283
250
|
else:
|
284
251
|
run_disp = engine["state"].capitalize()
|
285
|
-
|
286
|
-
#
|
287
|
-
idle_disp = ""
|
288
|
-
|
289
|
-
if idle_data is None:
|
290
|
-
# SSM failed - we don't know the status
|
291
|
-
idle_disp = " [dim]N/A[/dim]"
|
292
|
-
elif not idle_data:
|
293
|
-
# Empty data - likely very early in boot
|
294
|
-
idle_disp = " [dim]N/A[/dim]"
|
295
|
-
else:
|
296
|
-
# We have data
|
297
|
-
is_idle = idle_data.get("idle", False)
|
298
|
-
timeout_sec = idle_data.get("timeout_sec")
|
299
|
-
idle_seconds = idle_data.get("idle_seconds", 0) if is_idle else 0
|
300
|
-
|
301
|
-
if is_idle:
|
302
|
-
if isinstance(timeout_sec, int) and isinstance(idle_seconds, int):
|
303
|
-
remaining = max(0, timeout_sec - idle_seconds)
|
304
|
-
remaining_mins = remaining // 60
|
305
|
-
if remaining_mins == 0:
|
306
|
-
idle_disp = f" [yellow]Idle {idle_seconds//60}m/{timeout_sec//60}m: [red]<1m[/red] left[/yellow]"
|
307
|
-
else:
|
308
|
-
idle_disp = f" [yellow]Idle {idle_seconds//60}m/{timeout_sec//60}m: [red]{remaining_mins}m[/red] left[/yellow]"
|
309
|
-
else:
|
310
|
-
idle_disp = " [yellow]Idle ?/?[/yellow]"
|
311
|
-
else:
|
312
|
-
# Actively not idle
|
313
|
-
idle_disp = " [green]Active[/green]"
|
314
|
-
|
252
|
+
|
253
|
+
# Format idle display using the unified function
|
254
|
+
idle_disp = " " + _format_idle_status_display(live_idle_data, running_state)
|
255
|
+
|
315
256
|
# Build status lines - minimal info for fast view
|
316
257
|
status_lines = [
|
317
258
|
f"[blue]{engine['name']}[/blue] {run_disp}{idle_disp}",
|
318
259
|
]
|
319
|
-
|
320
|
-
# Add activity sensors if we have
|
321
|
-
if
|
260
|
+
|
261
|
+
# Add activity sensors if we have live data
|
262
|
+
if live_idle_data and live_idle_data.get("_reasons_raw"):
|
322
263
|
status_lines.append("") # blank line before sensors
|
323
|
-
|
264
|
+
|
324
265
|
sensor_map = {
|
325
266
|
"CoffeeLockSensor": ("☕", "Coffee"),
|
326
267
|
"ActiveLoginSensor": ("🐚", "SSH"),
|
327
268
|
"IDEConnectionSensor": ("🖥 ", "IDE"),
|
328
269
|
"DockerWorkloadSensor": ("🐳", "Docker"),
|
329
270
|
}
|
330
|
-
|
331
|
-
for r in
|
271
|
+
|
272
|
+
for r in live_idle_data.get("_reasons_raw", []):
|
332
273
|
sensor = r.get("sensor", "Unknown")
|
333
274
|
active = r.get("active", False)
|
334
275
|
icon, label = sensor_map.get(sensor, ("?", sensor))
|
335
276
|
status_str = "[green]YES[/green]" if active else "[dim]nope[/dim]"
|
336
277
|
status_lines.append(f" {icon} {label:6} {status_str}")
|
337
|
-
|
278
|
+
|
338
279
|
# Display in a nice panel
|
339
280
|
console.print(
|
340
281
|
Panel("\n".join(status_lines), title="Engine Status", border_style="blue")
|
@@ -352,6 +293,18 @@ def engine_status(
|
|
352
293
|
idle_detector = engine_details.get("idle_detector", {}) or {}
|
353
294
|
attached_studios = engine_details.get("attached_studios", [])
|
354
295
|
|
296
|
+
# Overlay stale API data with fresh data from the engine
|
297
|
+
if live_idle_data:
|
298
|
+
# If API didn't indicate availability, replace entirely; otherwise, update.
|
299
|
+
if not idle_detector.get("available"):
|
300
|
+
idle_detector = live_idle_data
|
301
|
+
else:
|
302
|
+
idle_detector.update(live_idle_data)
|
303
|
+
else:
|
304
|
+
# SSM failed - mark as unavailable if we don't have good data from API
|
305
|
+
if not idle_detector.get("available"):
|
306
|
+
idle_detector = {"available": False} # Mark as unavailable
|
307
|
+
|
355
308
|
# Calculate costs
|
356
309
|
launch_time = parse_launch_time(engine["launch_time"])
|
357
310
|
uptime = datetime.now(timezone.utc) - launch_time
|
@@ -406,37 +359,8 @@ def engine_status(
|
|
406
359
|
else:
|
407
360
|
run_disp = engine["state"].capitalize()
|
408
361
|
|
409
|
-
#
|
410
|
-
|
411
|
-
# If we don't have idle info or it's explicitly unavailable, show N/A
|
412
|
-
if not idle_info or idle_info.get("available") == False:
|
413
|
-
return "[dim]N/A[/dim]"
|
414
|
-
|
415
|
-
if idle_info.get("status") == "active":
|
416
|
-
return "[green]Active[/green]"
|
417
|
-
if running_state in ("stopped", "stopping"):
|
418
|
-
return "[dim]N/A[/dim]"
|
419
|
-
|
420
|
-
# If idle, show time/threshold with time remaining if available
|
421
|
-
if idle_info.get("status") == "idle":
|
422
|
-
idle_seconds_v = idle_info.get("idle_seconds")
|
423
|
-
thresh_v = idle_info.get("idle_threshold")
|
424
|
-
if isinstance(idle_seconds_v, (int, float)) and isinstance(thresh_v, (int, float)):
|
425
|
-
remaining = max(0, int(thresh_v) - int(idle_seconds_v))
|
426
|
-
remaining_mins = remaining // 60
|
427
|
-
if remaining_mins == 0:
|
428
|
-
return f"[yellow]Idle {int(idle_seconds_v)//60}m/{int(thresh_v)//60}m: [red]<1m[/red] left[/yellow]"
|
429
|
-
else:
|
430
|
-
return f"[yellow]Idle {int(idle_seconds_v)//60}m/{int(thresh_v)//60}m: [red]{remaining_mins}m[/red] left[/yellow]"
|
431
|
-
elif isinstance(thresh_v, (int, float)):
|
432
|
-
return f"[yellow]Idle ?/{int(thresh_v)//60}m[/yellow]"
|
433
|
-
else:
|
434
|
-
return "[yellow]Idle ?/?[/yellow]"
|
435
|
-
|
436
|
-
# Default to N/A if we can't determine status
|
437
|
-
return "[dim]N/A[/dim]"
|
438
|
-
|
439
|
-
active_disp = _compute_active_disp(idle_detector)
|
362
|
+
# Recompute header display with latest data
|
363
|
+
active_disp = _format_idle_status_display(idle_detector, running_state)
|
440
364
|
|
441
365
|
top_lines = [
|
442
366
|
f"[blue]{engine['name']}[/blue] {run_disp} {active_disp}\n",
|
@@ -553,122 +477,6 @@ def engine_status(
|
|
553
477
|
except Exception:
|
554
478
|
pass
|
555
479
|
|
556
|
-
# Try to enrich/fallback idle-detector details from on-engine summary file via SSM
|
557
|
-
def _fetch_idle_summary_via_ssm(instance_id: str) -> Optional[Dict]:
|
558
|
-
try:
|
559
|
-
ssm = boto3.client("ssm", region_name="us-east-1")
|
560
|
-
res = ssm.send_command(
|
561
|
-
InstanceIds=[instance_id],
|
562
|
-
DocumentName="AWS-RunShellScript",
|
563
|
-
Parameters={
|
564
|
-
"commands": [
|
565
|
-
"cat /var/run/idle-detector/last_state.json 2>/dev/null || true",
|
566
|
-
],
|
567
|
-
"executionTimeout": ["5"],
|
568
|
-
},
|
569
|
-
)
|
570
|
-
cid = res["Command"]["CommandId"]
|
571
|
-
# Wait up to 2 seconds for SSM command to complete (was 1 second)
|
572
|
-
for _ in range(4): # 4 * 0.5 = 2 seconds
|
573
|
-
time.sleep(0.5)
|
574
|
-
inv = ssm.get_command_invocation(CommandId=cid, InstanceId=instance_id)
|
575
|
-
if inv["Status"] in ["Success", "Failed"]:
|
576
|
-
break
|
577
|
-
if inv["Status"] != "Success":
|
578
|
-
return None
|
579
|
-
content = inv["StandardOutputContent"].strip()
|
580
|
-
if not content:
|
581
|
-
return None
|
582
|
-
data = json.loads(content)
|
583
|
-
# Convert last_state schema (new or old) to idle_detector schema used by CLI output
|
584
|
-
idle_info: Dict[str, Any] = {"available": True}
|
585
|
-
|
586
|
-
# Active/idle
|
587
|
-
idle_flag = bool(data.get("idle", False))
|
588
|
-
idle_info["status"] = "idle" if idle_flag else "active"
|
589
|
-
|
590
|
-
# Threshold and elapsed
|
591
|
-
if isinstance(data.get("timeout_sec"), (int, float)):
|
592
|
-
idle_info["idle_threshold"] = int(data["timeout_sec"]) # seconds
|
593
|
-
if isinstance(data.get("idle_seconds"), (int, float)):
|
594
|
-
idle_info["idle_seconds"] = int(data["idle_seconds"])
|
595
|
-
|
596
|
-
# Keep raw reasons for sensor display when available (new schema)
|
597
|
-
if isinstance(data.get("reasons"), list):
|
598
|
-
idle_info["_reasons_raw"] = data["reasons"]
|
599
|
-
else:
|
600
|
-
# Fallback: synthesize reasons from the old forensics layout
|
601
|
-
f_all = data.get("forensics", {}) or {}
|
602
|
-
synthesized = []
|
603
|
-
|
604
|
-
def _mk(sensor_name: str, key: str):
|
605
|
-
entry = f_all.get(key, {}) or {}
|
606
|
-
synthesized.append(
|
607
|
-
{
|
608
|
-
"sensor": sensor_name,
|
609
|
-
"active": bool(entry.get("active", False)),
|
610
|
-
"reason": entry.get("reason", ""),
|
611
|
-
"forensic": entry.get("forensic", {}),
|
612
|
-
}
|
613
|
-
)
|
614
|
-
|
615
|
-
_mk("CoffeeLockSensor", "coffee")
|
616
|
-
_mk("ActiveLoginSensor", "ssh")
|
617
|
-
_mk("IDEConnectionSensor", "ide")
|
618
|
-
_mk("DockerWorkloadSensor", "docker")
|
619
|
-
idle_info["_reasons_raw"] = synthesized
|
620
|
-
|
621
|
-
# Derive details from sensors
|
622
|
-
for r in idle_info.get("_reasons_raw", []):
|
623
|
-
if not r.get("active"):
|
624
|
-
continue
|
625
|
-
sensor = (r.get("sensor") or "").lower()
|
626
|
-
forensic = r.get("forensic") or {}
|
627
|
-
if sensor == "ideconnectionsensor":
|
628
|
-
# Prefer unique_pid_count written by new detector
|
629
|
-
cnt = forensic.get("unique_pid_count")
|
630
|
-
if not isinstance(cnt, int):
|
631
|
-
cnt = forensic.get("matches")
|
632
|
-
if isinstance(cnt, int):
|
633
|
-
idle_info["ide_connections"] = {"connection_count": cnt}
|
634
|
-
else:
|
635
|
-
idle_info["ide_connections"] = {"connection_count": 1}
|
636
|
-
elif sensor == "coffeelocksensor":
|
637
|
-
rem = forensic.get("remaining_sec")
|
638
|
-
if isinstance(rem, (int, float)) and rem > 0:
|
639
|
-
idle_info["coffee_lock"] = format_duration(
|
640
|
-
timedelta(seconds=int(rem))
|
641
|
-
)
|
642
|
-
elif sensor == "activeloginsensor":
|
643
|
-
sess = {
|
644
|
-
"tty": forensic.get("tty", "pts/?"),
|
645
|
-
"pid": forensic.get("pid", "?"),
|
646
|
-
"idle_time": forensic.get("idle_sec", 0),
|
647
|
-
"from_ip": forensic.get("remote_addr", "unknown"),
|
648
|
-
}
|
649
|
-
idle_info.setdefault("ssh_sessions", []).append(sess)
|
650
|
-
return idle_info
|
651
|
-
except Exception:
|
652
|
-
return None
|
653
|
-
|
654
|
-
# Always try to enrich from on-engine summary (fast, best-effort)
|
655
|
-
overlay = _fetch_idle_summary_via_ssm(engine["instance_id"])
|
656
|
-
if overlay:
|
657
|
-
# If API didn't indicate availability, replace entirely; otherwise fill gaps
|
658
|
-
if not idle_detector.get("available"):
|
659
|
-
idle_detector = overlay
|
660
|
-
else:
|
661
|
-
for k, v in overlay.items():
|
662
|
-
idle_detector.setdefault(k, v)
|
663
|
-
else:
|
664
|
-
# SSM failed - mark as unavailable if we don't have good data
|
665
|
-
if not idle_detector.get("available"):
|
666
|
-
idle_detector = {"available": False} # Mark as unavailable
|
667
|
-
|
668
|
-
# Recompute header display with latest data
|
669
|
-
active_disp = _compute_active_disp(idle_detector)
|
670
|
-
top_lines[0] = f"[blue]{engine['name']}[/blue] {run_disp} {active_disp}\n"
|
671
|
-
|
672
480
|
# Activity Sensors (show all with YES/no)
|
673
481
|
if idle_detector.get("available"):
|
674
482
|
status_lines.append("")
|
@@ -694,11 +502,6 @@ def engine_status(
|
|
694
502
|
status_lines.append(_sensor_line(" IDE ", "IDEConnectionSensor", "🖥"))
|
695
503
|
status_lines.append(_sensor_line("Docker", "DockerWorkloadSensor", "🐳"))
|
696
504
|
|
697
|
-
# Recompute display with latest idle detector data
|
698
|
-
active_disp = _compute_active_disp(idle_detector)
|
699
|
-
# Rewrite top header line (index 0) to include updated display
|
700
|
-
top_lines[0] = f"[blue]{engine['name']}[/blue] {run_disp} {active_disp}\n"
|
701
|
-
|
702
505
|
# Combine top summary and details
|
703
506
|
all_lines = top_lines + status_lines
|
704
507
|
console.print(
|
@@ -737,3 +540,114 @@ def engine_status(
|
|
737
540
|
console.print("[red]❌ Could not retrieve bootstrap log[/red]")
|
738
541
|
except Exception as e:
|
739
542
|
console.print(f"[red]❌ Error fetching log: {e}[/red]")
|
543
|
+
|
544
|
+
|
545
|
+
def _format_idle_status_display(
|
546
|
+
idle_info: Optional[Dict[str, Any]], running_state: str
|
547
|
+
) -> str:
|
548
|
+
"""Computes the rich string for active/idle status display."""
|
549
|
+
# If we don't have idle info or it's explicitly unavailable, show N/A
|
550
|
+
if not idle_info or idle_info.get("available") is False:
|
551
|
+
return "[dim]N/A[/dim]"
|
552
|
+
|
553
|
+
if idle_info.get("status") == "active":
|
554
|
+
return "[green]Active[/green]"
|
555
|
+
if running_state in ("stopped", "stopping"):
|
556
|
+
return "[dim]N/A[/dim]"
|
557
|
+
|
558
|
+
# If idle, show time/threshold with time remaining if available
|
559
|
+
if idle_info.get("status") == "idle":
|
560
|
+
idle_seconds_v = idle_info.get("idle_seconds")
|
561
|
+
thresh_v = idle_info.get("idle_threshold")
|
562
|
+
if isinstance(idle_seconds_v, (int, float)) and isinstance(
|
563
|
+
thresh_v, (int, float)
|
564
|
+
):
|
565
|
+
remaining = max(0, int(thresh_v) - int(idle_seconds_v))
|
566
|
+
remaining_mins = remaining // 60
|
567
|
+
if remaining_mins == 0:
|
568
|
+
return f"[yellow]Idle {int(idle_seconds_v)//60}m/{int(thresh_v)//60}m: [red]<1m[/red] left[/yellow]"
|
569
|
+
else:
|
570
|
+
return f"[yellow]Idle {int(idle_seconds_v)//60}m/{int(thresh_v)//60}m: [red]{remaining_mins}m[/red] left[/yellow]"
|
571
|
+
elif isinstance(thresh_v, (int, float)):
|
572
|
+
return f"[yellow]Idle ?/{int(thresh_v)//60}m[/yellow]"
|
573
|
+
else:
|
574
|
+
return "[yellow]Idle ?/?[/yellow]"
|
575
|
+
|
576
|
+
# Default to N/A if we can't determine status
|
577
|
+
return "[dim]N/A[/dim]"
|
578
|
+
|
579
|
+
|
580
|
+
def _fetch_live_idle_data(instance_id: str) -> Optional[Dict]:
|
581
|
+
"""
|
582
|
+
Fetch and parse the live idle detector state from an engine via SSM.
|
583
|
+
|
584
|
+
This is the single source of truth for on-engine idle status. It fetches
|
585
|
+
the `last_state.json` file, parses it, and transforms it into the schema
|
586
|
+
used by the CLI for display logic.
|
587
|
+
"""
|
588
|
+
try:
|
589
|
+
ssm = boto3.client("ssm", region_name="us-east-1")
|
590
|
+
res = ssm.send_command(
|
591
|
+
InstanceIds=[instance_id],
|
592
|
+
DocumentName="AWS-RunShellScript",
|
593
|
+
Parameters={
|
594
|
+
"commands": [
|
595
|
+
"cat /var/run/idle-detector/last_state.json 2>/dev/null || true",
|
596
|
+
],
|
597
|
+
"executionTimeout": ["5"],
|
598
|
+
},
|
599
|
+
)
|
600
|
+
cid = res["Command"]["CommandId"]
|
601
|
+
# Wait up to 3 seconds for SSM command to complete
|
602
|
+
for _ in range(6): # 6 * 0.5 = 3 seconds
|
603
|
+
time.sleep(0.5)
|
604
|
+
inv = ssm.get_command_invocation(CommandId=cid, InstanceId=instance_id)
|
605
|
+
if inv["Status"] in ["Success", "Failed"]:
|
606
|
+
break
|
607
|
+
if inv["Status"] != "Success":
|
608
|
+
return None
|
609
|
+
content = inv["StandardOutputContent"].strip()
|
610
|
+
if not content:
|
611
|
+
return None
|
612
|
+
data = json.loads(content)
|
613
|
+
# Convert last_state schema (new or old) to idle_detector schema used by CLI output
|
614
|
+
idle_info: Dict[str, Any] = {"available": True}
|
615
|
+
|
616
|
+
# Active/idle
|
617
|
+
idle_flag = bool(data.get("idle", False))
|
618
|
+
idle_info["status"] = "idle" if idle_flag else "active"
|
619
|
+
|
620
|
+
# Threshold and elapsed
|
621
|
+
if isinstance(data.get("timeout_sec"), (int, float)):
|
622
|
+
idle_info["idle_threshold"] = int(data["timeout_sec"]) # seconds
|
623
|
+
if isinstance(data.get("idle_seconds"), (int, float)):
|
624
|
+
idle_info["idle_seconds"] = int(data["idle_seconds"])
|
625
|
+
|
626
|
+
# Keep raw reasons for sensor display when available (new schema)
|
627
|
+
if isinstance(data.get("reasons"), list):
|
628
|
+
idle_info["_reasons_raw"] = data["reasons"]
|
629
|
+
else:
|
630
|
+
# Fallback: synthesize reasons from the old forensics layout
|
631
|
+
f_all = data.get("forensics", {}) or {}
|
632
|
+
synthesized = []
|
633
|
+
|
634
|
+
def _mk(sensor_name: str, key: str):
|
635
|
+
entry = f_all.get(key, {}) or {}
|
636
|
+
synthesized.append(
|
637
|
+
{
|
638
|
+
"sensor": sensor_name,
|
639
|
+
"active": bool(entry.get("active", False)),
|
640
|
+
"reason": entry.get("reason", ""),
|
641
|
+
"forensic": entry.get("forensic", {}),
|
642
|
+
}
|
643
|
+
)
|
644
|
+
|
645
|
+
_mk("CoffeeLockSensor", "coffee")
|
646
|
+
_mk("ActiveLoginSensor", "ssh")
|
647
|
+
_mk("IDEConnectionSensor", "ide")
|
648
|
+
_mk("DockerWorkloadSensor", "docker")
|
649
|
+
idle_info["_reasons_raw"] = synthesized
|
650
|
+
|
651
|
+
return idle_info
|
652
|
+
except Exception:
|
653
|
+
return None
|
@@ -3,8 +3,8 @@ dayhoff_tools/chemistry/standardizer.py,sha256=uMn7VwHnx02nc404eO6fRuS4rsl4dvSPf
|
|
3
3
|
dayhoff_tools/chemistry/utils.py,sha256=jt-7JgF-GeeVC421acX-bobKbLU_X94KNOW24p_P-_M,2257
|
4
4
|
dayhoff_tools/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
5
|
dayhoff_tools/cli/cloud_commands.py,sha256=33qcWLmq-FwEXMdL3F0OHm-5Stlh2r65CldyEZgQ1no,40904
|
6
|
-
dayhoff_tools/cli/engine/__init__.py,sha256=
|
7
|
-
dayhoff_tools/cli/engine/engine_core.py,sha256=
|
6
|
+
dayhoff_tools/cli/engine/__init__.py,sha256=CGJ2blhWIIEsVb8HoLibZjSlMFRTSYZOO4zDQTtY3SY,9300
|
7
|
+
dayhoff_tools/cli/engine/engine_core.py,sha256=IEU2m93qArFx-EdVHnepWKvLwlNUAcT7ytIAqeOY6a0,25147
|
8
8
|
dayhoff_tools/cli/engine/engine_lifecycle.py,sha256=_Dk-EZs_qbm8APdOuGOuxhlbK6RgkkoLk2nrwKoo1-A,4519
|
9
9
|
dayhoff_tools/cli/engine/engine_maintenance.py,sha256=Vz4FpbM0eyfl9tTM6Q8z0ZzS2Ug5gAE-uKVbqBHkznU,13761
|
10
10
|
dayhoff_tools/cli/engine/engine_management.py,sha256=s_H3FtMlKsdfzR8pwV-j2W2QX-Fypkqj2kPC0aTqC1A,19072
|
@@ -33,7 +33,7 @@ dayhoff_tools/intake/uniprot.py,sha256=BZYJQF63OtPcBBnQ7_P9gulxzJtqyorgyuDiPeOJq
|
|
33
33
|
dayhoff_tools/logs.py,sha256=DKdeP0k0kliRcilwvX0mUB2eipO5BdWUeHwh-VnsICs,838
|
34
34
|
dayhoff_tools/sqlite.py,sha256=jV55ikF8VpTfeQqqlHSbY8OgfyfHj8zgHNpZjBLos_E,18672
|
35
35
|
dayhoff_tools/warehouse.py,sha256=UETBtZD3r7WgvURqfGbyHlT7cxoiVq8isjzMuerKw8I,24475
|
36
|
-
dayhoff_tools-1.9.
|
37
|
-
dayhoff_tools-1.9.
|
38
|
-
dayhoff_tools-1.9.
|
39
|
-
dayhoff_tools-1.9.
|
36
|
+
dayhoff_tools-1.9.12.dist-info/METADATA,sha256=EkiO1dejnv9KxAs8X7ycLdfGUlph70tyIeNPWQc-12U,2915
|
37
|
+
dayhoff_tools-1.9.12.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
38
|
+
dayhoff_tools-1.9.12.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
|
39
|
+
dayhoff_tools-1.9.12.dist-info/RECORD,,
|
File without changes
|
File without changes
|