dayhoff-tools 1.9.10__tar.gz → 1.9.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {dayhoff_tools-1.9.10 → dayhoff_tools-1.9.12}/PKG-INFO +1 -1
  2. dayhoff_tools-1.9.12/dayhoff_tools/cli/engine/__init__.py +291 -0
  3. {dayhoff_tools-1.9.10 → dayhoff_tools-1.9.12}/dayhoff_tools/cli/engine/engine_core.py +141 -227
  4. {dayhoff_tools-1.9.10 → dayhoff_tools-1.9.12}/pyproject.toml +1 -1
  5. dayhoff_tools-1.9.10/dayhoff_tools/cli/engine/__init__.py +0 -49
  6. {dayhoff_tools-1.9.10 → dayhoff_tools-1.9.12}/README.md +0 -0
  7. {dayhoff_tools-1.9.10 → dayhoff_tools-1.9.12}/dayhoff_tools/__init__.py +0 -0
  8. {dayhoff_tools-1.9.10 → dayhoff_tools-1.9.12}/dayhoff_tools/chemistry/standardizer.py +0 -0
  9. {dayhoff_tools-1.9.10 → dayhoff_tools-1.9.12}/dayhoff_tools/chemistry/utils.py +0 -0
  10. {dayhoff_tools-1.9.10 → dayhoff_tools-1.9.12}/dayhoff_tools/cli/__init__.py +0 -0
  11. {dayhoff_tools-1.9.10 → dayhoff_tools-1.9.12}/dayhoff_tools/cli/cloud_commands.py +0 -0
  12. {dayhoff_tools-1.9.10 → dayhoff_tools-1.9.12}/dayhoff_tools/cli/engine/engine_lifecycle.py +0 -0
  13. {dayhoff_tools-1.9.10 → dayhoff_tools-1.9.12}/dayhoff_tools/cli/engine/engine_maintenance.py +0 -0
  14. {dayhoff_tools-1.9.10 → dayhoff_tools-1.9.12}/dayhoff_tools/cli/engine/engine_management.py +0 -0
  15. {dayhoff_tools-1.9.10 → dayhoff_tools-1.9.12}/dayhoff_tools/cli/engine/shared.py +0 -0
  16. {dayhoff_tools-1.9.10 → dayhoff_tools-1.9.12}/dayhoff_tools/cli/engine/studio_commands.py +0 -0
  17. {dayhoff_tools-1.9.10 → dayhoff_tools-1.9.12}/dayhoff_tools/cli/main.py +0 -0
  18. {dayhoff_tools-1.9.10 → dayhoff_tools-1.9.12}/dayhoff_tools/cli/swarm_commands.py +0 -0
  19. {dayhoff_tools-1.9.10 → dayhoff_tools-1.9.12}/dayhoff_tools/cli/utility_commands.py +0 -0
  20. {dayhoff_tools-1.9.10 → dayhoff_tools-1.9.12}/dayhoff_tools/deployment/base.py +0 -0
  21. {dayhoff_tools-1.9.10 → dayhoff_tools-1.9.12}/dayhoff_tools/deployment/deploy_aws.py +0 -0
  22. {dayhoff_tools-1.9.10 → dayhoff_tools-1.9.12}/dayhoff_tools/deployment/deploy_gcp.py +0 -0
  23. {dayhoff_tools-1.9.10 → dayhoff_tools-1.9.12}/dayhoff_tools/deployment/deploy_utils.py +0 -0
  24. {dayhoff_tools-1.9.10 → dayhoff_tools-1.9.12}/dayhoff_tools/deployment/job_runner.py +0 -0
  25. {dayhoff_tools-1.9.10 → dayhoff_tools-1.9.12}/dayhoff_tools/deployment/processors.py +0 -0
  26. {dayhoff_tools-1.9.10 → dayhoff_tools-1.9.12}/dayhoff_tools/deployment/swarm.py +0 -0
  27. {dayhoff_tools-1.9.10 → dayhoff_tools-1.9.12}/dayhoff_tools/embedders.py +0 -0
  28. {dayhoff_tools-1.9.10 → dayhoff_tools-1.9.12}/dayhoff_tools/fasta.py +0 -0
  29. {dayhoff_tools-1.9.10 → dayhoff_tools-1.9.12}/dayhoff_tools/file_ops.py +0 -0
  30. {dayhoff_tools-1.9.10 → dayhoff_tools-1.9.12}/dayhoff_tools/h5.py +0 -0
  31. {dayhoff_tools-1.9.10 → dayhoff_tools-1.9.12}/dayhoff_tools/intake/gcp.py +0 -0
  32. {dayhoff_tools-1.9.10 → dayhoff_tools-1.9.12}/dayhoff_tools/intake/gtdb.py +0 -0
  33. {dayhoff_tools-1.9.10 → dayhoff_tools-1.9.12}/dayhoff_tools/intake/kegg.py +0 -0
  34. {dayhoff_tools-1.9.10 → dayhoff_tools-1.9.12}/dayhoff_tools/intake/mmseqs.py +0 -0
  35. {dayhoff_tools-1.9.10 → dayhoff_tools-1.9.12}/dayhoff_tools/intake/structure.py +0 -0
  36. {dayhoff_tools-1.9.10 → dayhoff_tools-1.9.12}/dayhoff_tools/intake/uniprot.py +0 -0
  37. {dayhoff_tools-1.9.10 → dayhoff_tools-1.9.12}/dayhoff_tools/logs.py +0 -0
  38. {dayhoff_tools-1.9.10 → dayhoff_tools-1.9.12}/dayhoff_tools/sqlite.py +0 -0
  39. {dayhoff_tools-1.9.10 → dayhoff_tools-1.9.12}/dayhoff_tools/warehouse.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: dayhoff-tools
3
- Version: 1.9.10
3
+ Version: 1.9.12
4
4
  Summary: Common tools for all the repos at Dayhoff Labs
5
5
  Author: Daniel Martin-Alarcon
6
6
  Author-email: dma@dayhofflabs.com
@@ -0,0 +1,291 @@
1
+ """Engine and Studio management commands for DHT CLI."""
2
+
3
+ import typer
4
+
5
+ # Initialize Typer apps
6
+ engine_app = typer.Typer(help="Manage compute engines for development.")
7
+ studio_app = typer.Typer(help="Manage persistent development studios.")
8
+
9
+ # Use lazy loading pattern similar to main.py swarm commands
10
+ # Import functions only when commands are actually called
11
+
12
+ # Engine commands
13
+ @engine_app.command("launch")
14
+ def launch_engine_cmd(
15
+ name: str = typer.Argument(help="Name for the new engine"),
16
+ engine_type: str = typer.Option(
17
+ "cpu",
18
+ "--type",
19
+ "-t",
20
+ help="Engine type: cpu, cpumax, t4, a10g, a100, 4_t4, 8_t4, 4_a10g, 8_a10g",
21
+ ),
22
+ user: str = typer.Option(None, "--user", "-u", help="Override username"),
23
+ boot_disk_size: int = typer.Option(
24
+ None,
25
+ "--size",
26
+ "-s",
27
+ help="Boot disk size in GB (default: 50GB, min: 20GB, max: 1000GB)",
28
+ ),
29
+ availability_zone: str = typer.Option(
30
+ None,
31
+ "--az",
32
+ help="Prefer a specific Availability Zone (e.g., us-east-1b). If omitted the service will try all public subnets.",
33
+ ),
34
+ ):
35
+ """Launch a new engine instance."""
36
+ from .engine_core import launch_engine
37
+ return launch_engine(name, engine_type, user, boot_disk_size, availability_zone)
38
+
39
+
40
+ @engine_app.command("list")
41
+ def list_engines_cmd(
42
+ user: str = typer.Option(None, "--user", "-u", help="Filter by user"),
43
+ running_only: bool = typer.Option(
44
+ False, "--running", help="Show only running engines"
45
+ ),
46
+ stopped_only: bool = typer.Option(
47
+ False, "--stopped", help="Show only stopped engines"
48
+ ),
49
+ detailed: bool = typer.Option(
50
+ False, "--detailed", "-d", help="Show detailed status (slower)"
51
+ ),
52
+ ):
53
+ """List engines (shows all engines by default)."""
54
+ from .engine_core import list_engines
55
+ return list_engines(user, running_only, stopped_only, detailed)
56
+
57
+
58
+ @engine_app.command("status")
59
+ def engine_status_cmd(
60
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
61
+ detailed: bool = typer.Option(False, "--detailed", "-d", help="Show detailed status (slower)"),
62
+ show_log: bool = typer.Option(False, "--show-log", help="Show bootstrap log (requires --detailed)"),
63
+ ):
64
+ """Show engine status and information."""
65
+ from .engine_core import engine_status
66
+ return engine_status(name_or_id, detailed, show_log)
67
+
68
+
69
+ @engine_app.command("start")
70
+ def start_engine_cmd(
71
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
72
+ ):
73
+ """Start a stopped engine."""
74
+ from .engine_lifecycle import start_engine
75
+ return start_engine(name_or_id)
76
+
77
+
78
+ @engine_app.command("stop")
79
+ def stop_engine_cmd(
80
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
81
+ force: bool = typer.Option(
82
+ False, "--force", "-f", help="Force stop and detach all studios"
83
+ ),
84
+ ):
85
+ """Stop an engine."""
86
+ from .engine_lifecycle import stop_engine
87
+ return stop_engine(name_or_id, force)
88
+
89
+
90
+ @engine_app.command("terminate")
91
+ def terminate_engine_cmd(
92
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
93
+ ):
94
+ """Permanently terminate an engine."""
95
+ from .engine_lifecycle import terminate_engine
96
+ return terminate_engine(name_or_id)
97
+
98
+
99
+ @engine_app.command("ssh")
100
+ def ssh_engine_cmd(
101
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
102
+ admin: bool = typer.Option(
103
+ False, "--admin", help="Connect as ec2-user instead of the engine owner user"
104
+ ),
105
+ idle_timeout: int = typer.Option(
106
+ 600,
107
+ "--idle-timeout",
108
+ help="Idle timeout (seconds) for the SSM port-forward (0 = disable)",
109
+ ),
110
+ ):
111
+ """Connect to an engine via SSH."""
112
+ from .engine_management import ssh_engine
113
+ return ssh_engine(name_or_id, admin, idle_timeout)
114
+
115
+
116
+ @engine_app.command("config-ssh")
117
+ def config_ssh_cmd(
118
+ clean: bool = typer.Option(False, "--clean", help="Remove all managed entries"),
119
+ all_engines: bool = typer.Option(
120
+ False, "--all", "-a", help="Include all engines from all users"
121
+ ),
122
+ admin: bool = typer.Option(
123
+ False,
124
+ "--admin",
125
+ help="Generate entries that use ec2-user instead of per-engine owner user",
126
+ ),
127
+ ):
128
+ """Update SSH config with available engines."""
129
+ from .engine_management import config_ssh
130
+ return config_ssh(clean, all_engines, admin)
131
+
132
+
133
+ @engine_app.command("resize")
134
+ def resize_engine_cmd(
135
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
136
+ size: int = typer.Option(..., "--size", "-s", help="New size in GB"),
137
+ online: bool = typer.Option(
138
+ False,
139
+ "--online",
140
+ help="Resize while running (requires manual filesystem expansion)",
141
+ ),
142
+ force: bool = typer.Option(
143
+ False, "--force", "-f", help="Force resize and detach all studios"
144
+ ),
145
+ ):
146
+ """Resize an engine's boot disk."""
147
+ from .engine_management import resize_engine
148
+ return resize_engine(name_or_id, size, online, force)
149
+
150
+
151
+ @engine_app.command("gami")
152
+ def create_ami_cmd(
153
+ name_or_id: str = typer.Argument(
154
+ help="Engine name or instance ID to create AMI from"
155
+ ),
156
+ ):
157
+ """Create a 'Golden AMI' from a running engine."""
158
+ from .engine_management import create_ami
159
+ return create_ami(name_or_id)
160
+
161
+
162
+ @engine_app.command("coffee")
163
+ def coffee_cmd(
164
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
165
+ duration: str = typer.Argument("4h", help="Duration (e.g., 2h, 30m, 2h30m)"),
166
+ cancel: bool = typer.Option(
167
+ False, "--cancel", help="Cancel existing coffee lock instead of extending"
168
+ ),
169
+ ):
170
+ """Pour ☕ for an engine: keeps it awake for the given duration (or cancel)."""
171
+ from .engine_maintenance import coffee
172
+ return coffee(name_or_id, duration, cancel)
173
+
174
+
175
+ @engine_app.command("idle")
176
+ def idle_timeout_cmd_wrapper(
177
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
178
+ set: str = typer.Option(
179
+ None, "--set", "-s", help="New timeout (e.g., 2h30m, 45m)"
180
+ ),
181
+ ):
182
+ """Show or set the engine idle-detector timeout."""
183
+ from .engine_maintenance import idle_timeout_cmd
184
+ return idle_timeout_cmd(name_or_id, set)
185
+
186
+
187
+ @engine_app.command("debug")
188
+ def debug_engine_cmd(
189
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
190
+ ):
191
+ """Debug engine bootstrap status and files."""
192
+ from .engine_maintenance import debug_engine
193
+ return debug_engine(name_or_id)
194
+
195
+
196
+ @engine_app.command("repair")
197
+ def repair_engine_cmd(
198
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
199
+ ):
200
+ """Repair an engine that's stuck in a bad state (e.g., after GAMI creation)."""
201
+ from .engine_maintenance import repair_engine
202
+ return repair_engine(name_or_id)
203
+
204
+
205
+ # Studio commands
206
+ @studio_app.command("create")
207
+ def create_studio_cmd(
208
+ size_gb: int = typer.Option(50, "--size", "-s", help="Studio size in GB"),
209
+ ):
210
+ """Create a new studio for the current user."""
211
+ from .studio_commands import create_studio
212
+ return create_studio(size_gb)
213
+
214
+
215
+ @studio_app.command("status")
216
+ def studio_status_cmd(
217
+ user: str = typer.Option(
218
+ None, "--user", "-u", help="Check status for a different user (admin only)"
219
+ ),
220
+ ):
221
+ """Show status of your studio."""
222
+ from .studio_commands import studio_status
223
+ return studio_status(user)
224
+
225
+
226
+ @studio_app.command("attach")
227
+ def attach_studio_cmd(
228
+ engine_name_or_id: str = typer.Argument(help="Engine name or instance ID"),
229
+ user: str = typer.Option(
230
+ None, "--user", "-u", help="Attach a different user's studio (admin only)"
231
+ ),
232
+ ):
233
+ """Attach your studio to an engine."""
234
+ from .studio_commands import attach_studio
235
+ return attach_studio(engine_name_or_id, user)
236
+
237
+
238
+ @studio_app.command("detach")
239
+ def detach_studio_cmd(
240
+ user: str = typer.Option(
241
+ None, "--user", "-u", help="Detach a different user's studio (admin only)"
242
+ ),
243
+ ):
244
+ """Detach your studio from its current engine."""
245
+ from .studio_commands import detach_studio
246
+ return detach_studio(user)
247
+
248
+
249
+ @studio_app.command("delete")
250
+ def delete_studio_cmd(
251
+ user: str = typer.Option(
252
+ None, "--user", "-u", help="Delete a different user's studio (admin only)"
253
+ ),
254
+ ):
255
+ """Delete your studio permanently."""
256
+ from .studio_commands import delete_studio
257
+ return delete_studio(user)
258
+
259
+
260
+ @studio_app.command("list")
261
+ def list_studios_cmd(
262
+ all_users: bool = typer.Option(
263
+ False, "--all", "-a", help="Show all users' studios"
264
+ ),
265
+ ):
266
+ """List studios."""
267
+ from .studio_commands import list_studios
268
+ return list_studios(all_users)
269
+
270
+
271
+ @studio_app.command("reset")
272
+ def reset_studio_cmd(
273
+ user: str = typer.Option(
274
+ None, "--user", "-u", help="Reset a different user's studio"
275
+ ),
276
+ ):
277
+ """Reset a stuck studio (admin operation)."""
278
+ from .studio_commands import reset_studio
279
+ return reset_studio(user)
280
+
281
+
282
+ @studio_app.command("resize")
283
+ def resize_studio_cmd(
284
+ size: int = typer.Option(..., "--size", "-s", help="New size in GB"),
285
+ user: str = typer.Option(
286
+ None, "--user", "-u", help="Resize a different user's studio (admin only)"
287
+ ),
288
+ ):
289
+ """Resize your studio volume (requires detachment)."""
290
+ from .studio_commands import resize_studio
291
+ return resize_studio(size, user)
@@ -231,45 +231,12 @@ def engine_status(
231
231
 
232
232
  engines = response.json().get("engines", [])
233
233
  engine = resolve_engine(name_or_id, engines)
234
-
234
+
235
+ # Always try to fetch live idle data from the engine for both views
236
+ live_idle_data = _fetch_live_idle_data(engine["instance_id"])
237
+
235
238
  # Fast status display (default)
236
- if not detailed:
237
- # Fetch idle status via SSM with longer timeout
238
- ssm = boto3.client("ssm", region_name="us-east-1")
239
- idle_data = None # Use None to indicate no data received
240
-
241
- if engine["state"].lower() == "running":
242
- try:
243
- resp = ssm.send_command(
244
- InstanceIds=[engine["instance_id"]],
245
- DocumentName="AWS-RunShellScript",
246
- Parameters={
247
- "commands": [
248
- "cat /var/run/idle-detector/last_state.json 2>/dev/null || echo '{}'"
249
- ],
250
- "executionTimeout": ["10"],
251
- },
252
- )
253
- cid = resp["Command"]["CommandId"]
254
-
255
- # Wait up to 3 seconds for result
256
- for _ in range(6): # 6 * 0.5 = 3 seconds
257
- time.sleep(0.5)
258
- inv = ssm.get_command_invocation(
259
- CommandId=cid, InstanceId=engine["instance_id"]
260
- )
261
- if inv["Status"] in ["Success", "Failed"]:
262
- break
263
-
264
- if inv["Status"] == "Success":
265
- content = inv["StandardOutputContent"].strip()
266
- if content and content != "{}":
267
- idle_data = json.loads(content)
268
- else:
269
- idle_data = {} # Empty response but SSM worked
270
- except Exception:
271
- idle_data = None # SSM failed
272
-
239
+ if not detailed:
273
240
  # Determine running state display
274
241
  running_state = engine["state"].lower()
275
242
  if running_state == "running":
@@ -282,59 +249,33 @@ def engine_status(
282
249
  run_disp = "[dim]Stopped[/dim]"
283
250
  else:
284
251
  run_disp = engine["state"].capitalize()
285
-
286
- # Determine idle/active status
287
- idle_disp = ""
288
- if running_state == "running":
289
- if idle_data is None:
290
- # SSM failed - we don't know the status
291
- idle_disp = " [dim]N/A[/dim]"
292
- elif not idle_data:
293
- # Empty data - likely very early in boot
294
- idle_disp = " [dim]N/A[/dim]"
295
- else:
296
- # We have data
297
- is_idle = idle_data.get("idle", False)
298
- timeout_sec = idle_data.get("timeout_sec")
299
- idle_seconds = idle_data.get("idle_seconds", 0) if is_idle else 0
300
-
301
- if is_idle:
302
- if isinstance(timeout_sec, int) and isinstance(idle_seconds, int):
303
- remaining = max(0, timeout_sec - idle_seconds)
304
- remaining_mins = remaining // 60
305
- if remaining_mins == 0:
306
- idle_disp = f" [yellow]Idle {idle_seconds//60}m/{timeout_sec//60}m: [red]<1m[/red] left[/yellow]"
307
- else:
308
- idle_disp = f" [yellow]Idle {idle_seconds//60}m/{timeout_sec//60}m: [red]{remaining_mins}m[/red] left[/yellow]"
309
- else:
310
- idle_disp = " [yellow]Idle ?/?[/yellow]"
311
- else:
312
- # Actively not idle
313
- idle_disp = " [green]Active[/green]"
314
-
252
+
253
+ # Format idle display using the unified function
254
+ idle_disp = " " + _format_idle_status_display(live_idle_data, running_state)
255
+
315
256
  # Build status lines - minimal info for fast view
316
257
  status_lines = [
317
258
  f"[blue]{engine['name']}[/blue] {run_disp}{idle_disp}",
318
259
  ]
319
-
320
- # Add activity sensors if we have idle data
321
- if idle_data and idle_data.get("reasons"):
260
+
261
+ # Add activity sensors if we have live data
262
+ if live_idle_data and live_idle_data.get("_reasons_raw"):
322
263
  status_lines.append("") # blank line before sensors
323
-
264
+
324
265
  sensor_map = {
325
266
  "CoffeeLockSensor": ("☕", "Coffee"),
326
267
  "ActiveLoginSensor": ("🐚", "SSH"),
327
268
  "IDEConnectionSensor": ("🖥 ", "IDE"),
328
269
  "DockerWorkloadSensor": ("🐳", "Docker"),
329
270
  }
330
-
331
- for r in idle_data.get("reasons", []):
271
+
272
+ for r in live_idle_data.get("_reasons_raw", []):
332
273
  sensor = r.get("sensor", "Unknown")
333
274
  active = r.get("active", False)
334
275
  icon, label = sensor_map.get(sensor, ("?", sensor))
335
276
  status_str = "[green]YES[/green]" if active else "[dim]nope[/dim]"
336
277
  status_lines.append(f" {icon} {label:6} {status_str}")
337
-
278
+
338
279
  # Display in a nice panel
339
280
  console.print(
340
281
  Panel("\n".join(status_lines), title="Engine Status", border_style="blue")
@@ -352,6 +293,18 @@ def engine_status(
352
293
  idle_detector = engine_details.get("idle_detector", {}) or {}
353
294
  attached_studios = engine_details.get("attached_studios", [])
354
295
 
296
+ # Overlay stale API data with fresh data from the engine
297
+ if live_idle_data:
298
+ # If API didn't indicate availability, replace entirely; otherwise, update.
299
+ if not idle_detector.get("available"):
300
+ idle_detector = live_idle_data
301
+ else:
302
+ idle_detector.update(live_idle_data)
303
+ else:
304
+ # SSM failed - mark as unavailable if we don't have good data from API
305
+ if not idle_detector.get("available"):
306
+ idle_detector = {"available": False} # Mark as unavailable
307
+
355
308
  # Calculate costs
356
309
  launch_time = parse_launch_time(engine["launch_time"])
357
310
  uptime = datetime.now(timezone.utc) - launch_time
@@ -406,37 +359,8 @@ def engine_status(
406
359
  else:
407
360
  run_disp = engine["state"].capitalize()
408
361
 
409
- # Compose Active/Idle header with extra detail when idle
410
- def _compute_active_disp(idle_info: Dict[str, Any]) -> str:
411
- # If we don't have idle info or it's explicitly unavailable, show N/A
412
- if not idle_info or idle_info.get("available") == False:
413
- return "[dim]N/A[/dim]"
414
-
415
- if idle_info.get("status") == "active":
416
- return "[green]Active[/green]"
417
- if running_state in ("stopped", "stopping"):
418
- return "[dim]N/A[/dim]"
419
-
420
- # If idle, show time/threshold with time remaining if available
421
- if idle_info.get("status") == "idle":
422
- idle_seconds_v = idle_info.get("idle_seconds")
423
- thresh_v = idle_info.get("idle_threshold")
424
- if isinstance(idle_seconds_v, (int, float)) and isinstance(thresh_v, (int, float)):
425
- remaining = max(0, int(thresh_v) - int(idle_seconds_v))
426
- remaining_mins = remaining // 60
427
- if remaining_mins == 0:
428
- return f"[yellow]Idle {int(idle_seconds_v)//60}m/{int(thresh_v)//60}m: [red]<1m[/red] left[/yellow]"
429
- else:
430
- return f"[yellow]Idle {int(idle_seconds_v)//60}m/{int(thresh_v)//60}m: [red]{remaining_mins}m[/red] left[/yellow]"
431
- elif isinstance(thresh_v, (int, float)):
432
- return f"[yellow]Idle ?/{int(thresh_v)//60}m[/yellow]"
433
- else:
434
- return "[yellow]Idle ?/?[/yellow]"
435
-
436
- # Default to N/A if we can't determine status
437
- return "[dim]N/A[/dim]"
438
-
439
- active_disp = _compute_active_disp(idle_detector)
362
+ # Recompute header display with latest data
363
+ active_disp = _format_idle_status_display(idle_detector, running_state)
440
364
 
441
365
  top_lines = [
442
366
  f"[blue]{engine['name']}[/blue] {run_disp} {active_disp}\n",
@@ -553,122 +477,6 @@ def engine_status(
553
477
  except Exception:
554
478
  pass
555
479
 
556
- # Try to enrich/fallback idle-detector details from on-engine summary file via SSM
557
- def _fetch_idle_summary_via_ssm(instance_id: str) -> Optional[Dict]:
558
- try:
559
- ssm = boto3.client("ssm", region_name="us-east-1")
560
- res = ssm.send_command(
561
- InstanceIds=[instance_id],
562
- DocumentName="AWS-RunShellScript",
563
- Parameters={
564
- "commands": [
565
- "cat /var/run/idle-detector/last_state.json 2>/dev/null || true",
566
- ],
567
- "executionTimeout": ["5"],
568
- },
569
- )
570
- cid = res["Command"]["CommandId"]
571
- # Wait up to 2 seconds for SSM command to complete (was 1 second)
572
- for _ in range(4): # 4 * 0.5 = 2 seconds
573
- time.sleep(0.5)
574
- inv = ssm.get_command_invocation(CommandId=cid, InstanceId=instance_id)
575
- if inv["Status"] in ["Success", "Failed"]:
576
- break
577
- if inv["Status"] != "Success":
578
- return None
579
- content = inv["StandardOutputContent"].strip()
580
- if not content:
581
- return None
582
- data = json.loads(content)
583
- # Convert last_state schema (new or old) to idle_detector schema used by CLI output
584
- idle_info: Dict[str, Any] = {"available": True}
585
-
586
- # Active/idle
587
- idle_flag = bool(data.get("idle", False))
588
- idle_info["status"] = "idle" if idle_flag else "active"
589
-
590
- # Threshold and elapsed
591
- if isinstance(data.get("timeout_sec"), (int, float)):
592
- idle_info["idle_threshold"] = int(data["timeout_sec"]) # seconds
593
- if isinstance(data.get("idle_seconds"), (int, float)):
594
- idle_info["idle_seconds"] = int(data["idle_seconds"])
595
-
596
- # Keep raw reasons for sensor display when available (new schema)
597
- if isinstance(data.get("reasons"), list):
598
- idle_info["_reasons_raw"] = data["reasons"]
599
- else:
600
- # Fallback: synthesize reasons from the old forensics layout
601
- f_all = data.get("forensics", {}) or {}
602
- synthesized = []
603
-
604
- def _mk(sensor_name: str, key: str):
605
- entry = f_all.get(key, {}) or {}
606
- synthesized.append(
607
- {
608
- "sensor": sensor_name,
609
- "active": bool(entry.get("active", False)),
610
- "reason": entry.get("reason", ""),
611
- "forensic": entry.get("forensic", {}),
612
- }
613
- )
614
-
615
- _mk("CoffeeLockSensor", "coffee")
616
- _mk("ActiveLoginSensor", "ssh")
617
- _mk("IDEConnectionSensor", "ide")
618
- _mk("DockerWorkloadSensor", "docker")
619
- idle_info["_reasons_raw"] = synthesized
620
-
621
- # Derive details from sensors
622
- for r in idle_info.get("_reasons_raw", []):
623
- if not r.get("active"):
624
- continue
625
- sensor = (r.get("sensor") or "").lower()
626
- forensic = r.get("forensic") or {}
627
- if sensor == "ideconnectionsensor":
628
- # Prefer unique_pid_count written by new detector
629
- cnt = forensic.get("unique_pid_count")
630
- if not isinstance(cnt, int):
631
- cnt = forensic.get("matches")
632
- if isinstance(cnt, int):
633
- idle_info["ide_connections"] = {"connection_count": cnt}
634
- else:
635
- idle_info["ide_connections"] = {"connection_count": 1}
636
- elif sensor == "coffeelocksensor":
637
- rem = forensic.get("remaining_sec")
638
- if isinstance(rem, (int, float)) and rem > 0:
639
- idle_info["coffee_lock"] = format_duration(
640
- timedelta(seconds=int(rem))
641
- )
642
- elif sensor == "activeloginsensor":
643
- sess = {
644
- "tty": forensic.get("tty", "pts/?"),
645
- "pid": forensic.get("pid", "?"),
646
- "idle_time": forensic.get("idle_sec", 0),
647
- "from_ip": forensic.get("remote_addr", "unknown"),
648
- }
649
- idle_info.setdefault("ssh_sessions", []).append(sess)
650
- return idle_info
651
- except Exception:
652
- return None
653
-
654
- # Always try to enrich from on-engine summary (fast, best-effort)
655
- overlay = _fetch_idle_summary_via_ssm(engine["instance_id"])
656
- if overlay:
657
- # If API didn't indicate availability, replace entirely; otherwise fill gaps
658
- if not idle_detector.get("available"):
659
- idle_detector = overlay
660
- else:
661
- for k, v in overlay.items():
662
- idle_detector.setdefault(k, v)
663
- else:
664
- # SSM failed - mark as unavailable if we don't have good data
665
- if not idle_detector.get("available"):
666
- idle_detector = {"available": False} # Mark as unavailable
667
-
668
- # Recompute header display with latest data
669
- active_disp = _compute_active_disp(idle_detector)
670
- top_lines[0] = f"[blue]{engine['name']}[/blue] {run_disp} {active_disp}\n"
671
-
672
480
  # Activity Sensors (show all with YES/no)
673
481
  if idle_detector.get("available"):
674
482
  status_lines.append("")
@@ -694,11 +502,6 @@ def engine_status(
694
502
  status_lines.append(_sensor_line(" IDE ", "IDEConnectionSensor", "🖥"))
695
503
  status_lines.append(_sensor_line("Docker", "DockerWorkloadSensor", "🐳"))
696
504
 
697
- # Recompute display with latest idle detector data
698
- active_disp = _compute_active_disp(idle_detector)
699
- # Rewrite top header line (index 0) to include updated display
700
- top_lines[0] = f"[blue]{engine['name']}[/blue] {run_disp} {active_disp}\n"
701
-
702
505
  # Combine top summary and details
703
506
  all_lines = top_lines + status_lines
704
507
  console.print(
@@ -737,3 +540,114 @@ def engine_status(
737
540
  console.print("[red]❌ Could not retrieve bootstrap log[/red]")
738
541
  except Exception as e:
739
542
  console.print(f"[red]❌ Error fetching log: {e}[/red]")
543
+
544
+
545
+ def _format_idle_status_display(
546
+ idle_info: Optional[Dict[str, Any]], running_state: str
547
+ ) -> str:
548
+ """Computes the rich string for active/idle status display."""
549
+ # If we don't have idle info or it's explicitly unavailable, show N/A
550
+ if not idle_info or idle_info.get("available") is False:
551
+ return "[dim]N/A[/dim]"
552
+
553
+ if idle_info.get("status") == "active":
554
+ return "[green]Active[/green]"
555
+ if running_state in ("stopped", "stopping"):
556
+ return "[dim]N/A[/dim]"
557
+
558
+ # If idle, show time/threshold with time remaining if available
559
+ if idle_info.get("status") == "idle":
560
+ idle_seconds_v = idle_info.get("idle_seconds")
561
+ thresh_v = idle_info.get("idle_threshold")
562
+ if isinstance(idle_seconds_v, (int, float)) and isinstance(
563
+ thresh_v, (int, float)
564
+ ):
565
+ remaining = max(0, int(thresh_v) - int(idle_seconds_v))
566
+ remaining_mins = remaining // 60
567
+ if remaining_mins == 0:
568
+ return f"[yellow]Idle {int(idle_seconds_v)//60}m/{int(thresh_v)//60}m: [red]<1m[/red] left[/yellow]"
569
+ else:
570
+ return f"[yellow]Idle {int(idle_seconds_v)//60}m/{int(thresh_v)//60}m: [red]{remaining_mins}m[/red] left[/yellow]"
571
+ elif isinstance(thresh_v, (int, float)):
572
+ return f"[yellow]Idle ?/{int(thresh_v)//60}m[/yellow]"
573
+ else:
574
+ return "[yellow]Idle ?/?[/yellow]"
575
+
576
+ # Default to N/A if we can't determine status
577
+ return "[dim]N/A[/dim]"
578
+
579
+
580
+ def _fetch_live_idle_data(instance_id: str) -> Optional[Dict]:
581
+ """
582
+ Fetch and parse the live idle detector state from an engine via SSM.
583
+
584
+ This is the single source of truth for on-engine idle status. It fetches
585
+ the `last_state.json` file, parses it, and transforms it into the schema
586
+ used by the CLI for display logic.
587
+ """
588
+ try:
589
+ ssm = boto3.client("ssm", region_name="us-east-1")
590
+ res = ssm.send_command(
591
+ InstanceIds=[instance_id],
592
+ DocumentName="AWS-RunShellScript",
593
+ Parameters={
594
+ "commands": [
595
+ "cat /var/run/idle-detector/last_state.json 2>/dev/null || true",
596
+ ],
597
+ "executionTimeout": ["5"],
598
+ },
599
+ )
600
+ cid = res["Command"]["CommandId"]
601
+ # Wait up to 3 seconds for SSM command to complete
602
+ for _ in range(6): # 6 * 0.5 = 3 seconds
603
+ time.sleep(0.5)
604
+ inv = ssm.get_command_invocation(CommandId=cid, InstanceId=instance_id)
605
+ if inv["Status"] in ["Success", "Failed"]:
606
+ break
607
+ if inv["Status"] != "Success":
608
+ return None
609
+ content = inv["StandardOutputContent"].strip()
610
+ if not content:
611
+ return None
612
+ data = json.loads(content)
613
+ # Convert last_state schema (new or old) to idle_detector schema used by CLI output
614
+ idle_info: Dict[str, Any] = {"available": True}
615
+
616
+ # Active/idle
617
+ idle_flag = bool(data.get("idle", False))
618
+ idle_info["status"] = "idle" if idle_flag else "active"
619
+
620
+ # Threshold and elapsed
621
+ if isinstance(data.get("timeout_sec"), (int, float)):
622
+ idle_info["idle_threshold"] = int(data["timeout_sec"]) # seconds
623
+ if isinstance(data.get("idle_seconds"), (int, float)):
624
+ idle_info["idle_seconds"] = int(data["idle_seconds"])
625
+
626
+ # Keep raw reasons for sensor display when available (new schema)
627
+ if isinstance(data.get("reasons"), list):
628
+ idle_info["_reasons_raw"] = data["reasons"]
629
+ else:
630
+ # Fallback: synthesize reasons from the old forensics layout
631
+ f_all = data.get("forensics", {}) or {}
632
+ synthesized = []
633
+
634
+ def _mk(sensor_name: str, key: str):
635
+ entry = f_all.get(key, {}) or {}
636
+ synthesized.append(
637
+ {
638
+ "sensor": sensor_name,
639
+ "active": bool(entry.get("active", False)),
640
+ "reason": entry.get("reason", ""),
641
+ "forensic": entry.get("forensic", {}),
642
+ }
643
+ )
644
+
645
+ _mk("CoffeeLockSensor", "coffee")
646
+ _mk("ActiveLoginSensor", "ssh")
647
+ _mk("IDEConnectionSensor", "ide")
648
+ _mk("DockerWorkloadSensor", "docker")
649
+ idle_info["_reasons_raw"] = synthesized
650
+
651
+ return idle_info
652
+ except Exception:
653
+ return None
@@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"
5
5
 
6
6
  [project]
7
7
  name = "dayhoff-tools"
8
- version = "1.9.10"
8
+ version = "1.9.12"
9
9
  description = "Common tools for all the repos at Dayhoff Labs"
10
10
  authors = [
11
11
  {name = "Daniel Martin-Alarcon", email = "dma@dayhofflabs.com"}
@@ -1,49 +0,0 @@
1
- """Engine and Studio management commands for DHT CLI."""
2
-
3
- import typer
4
-
5
- # Initialize Typer apps
6
- engine_app = typer.Typer(help="Manage compute engines for development.")
7
- studio_app = typer.Typer(help="Manage persistent development studios.")
8
-
9
- # Import all command functions
10
- from .engine_core import engine_status, launch_engine, list_engines
11
- from .engine_lifecycle import start_engine, stop_engine, terminate_engine
12
- from .engine_maintenance import coffee, debug_engine, idle_timeout_cmd, repair_engine
13
- from .engine_management import config_ssh, create_ami, resize_engine, ssh_engine
14
- from .studio_commands import (
15
- attach_studio,
16
- create_studio,
17
- delete_studio,
18
- detach_studio,
19
- list_studios,
20
- reset_studio,
21
- resize_studio,
22
- studio_status,
23
- )
24
-
25
- # Register engine commands
26
- engine_app.command("launch")(launch_engine)
27
- engine_app.command("list")(list_engines)
28
- engine_app.command("status")(engine_status)
29
- engine_app.command("start")(start_engine)
30
- engine_app.command("stop")(stop_engine)
31
- engine_app.command("terminate")(terminate_engine)
32
- engine_app.command("ssh")(ssh_engine)
33
- engine_app.command("config-ssh")(config_ssh)
34
- engine_app.command("resize")(resize_engine)
35
- engine_app.command("gami")(create_ami)
36
- engine_app.command("coffee")(coffee)
37
- engine_app.command("idle")(idle_timeout_cmd)
38
- engine_app.command("debug")(debug_engine)
39
- engine_app.command("repair")(repair_engine)
40
-
41
- # Register studio commands
42
- studio_app.command("create")(create_studio)
43
- studio_app.command("status")(studio_status)
44
- studio_app.command("attach")(attach_studio)
45
- studio_app.command("detach")(detach_studio)
46
- studio_app.command("delete")(delete_studio)
47
- studio_app.command("list")(list_studios)
48
- studio_app.command("reset")(reset_studio)
49
- studio_app.command("resize")(resize_studio)
File without changes