fbuild 1.2.8__py3-none-any.whl → 1.2.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. fbuild/__init__.py +5 -1
  2. fbuild/build/configurable_compiler.py +49 -6
  3. fbuild/build/configurable_linker.py +14 -9
  4. fbuild/build/orchestrator_esp32.py +6 -3
  5. fbuild/build/orchestrator_rp2040.py +6 -2
  6. fbuild/cli.py +300 -5
  7. fbuild/config/ini_parser.py +13 -1
  8. fbuild/daemon/__init__.py +11 -0
  9. fbuild/daemon/async_client.py +5 -4
  10. fbuild/daemon/async_client_lib.py +1543 -0
  11. fbuild/daemon/async_protocol.py +825 -0
  12. fbuild/daemon/async_server.py +2100 -0
  13. fbuild/daemon/client.py +425 -13
  14. fbuild/daemon/configuration_lock.py +13 -13
  15. fbuild/daemon/connection.py +508 -0
  16. fbuild/daemon/connection_registry.py +579 -0
  17. fbuild/daemon/daemon.py +517 -164
  18. fbuild/daemon/daemon_context.py +72 -1
  19. fbuild/daemon/device_discovery.py +477 -0
  20. fbuild/daemon/device_manager.py +821 -0
  21. fbuild/daemon/error_collector.py +263 -263
  22. fbuild/daemon/file_cache.py +332 -332
  23. fbuild/daemon/firmware_ledger.py +46 -123
  24. fbuild/daemon/lock_manager.py +508 -508
  25. fbuild/daemon/messages.py +431 -0
  26. fbuild/daemon/operation_registry.py +288 -288
  27. fbuild/daemon/processors/build_processor.py +34 -1
  28. fbuild/daemon/processors/deploy_processor.py +1 -3
  29. fbuild/daemon/processors/locking_processor.py +7 -7
  30. fbuild/daemon/request_processor.py +457 -457
  31. fbuild/daemon/shared_serial.py +7 -7
  32. fbuild/daemon/status_manager.py +238 -238
  33. fbuild/daemon/subprocess_manager.py +316 -316
  34. fbuild/deploy/docker_utils.py +182 -2
  35. fbuild/deploy/monitor.py +1 -1
  36. fbuild/deploy/qemu_runner.py +71 -13
  37. fbuild/ledger/board_ledger.py +46 -122
  38. fbuild/output.py +238 -2
  39. fbuild/packages/library_compiler.py +15 -5
  40. fbuild/packages/library_manager.py +12 -6
  41. fbuild-1.2.15.dist-info/METADATA +569 -0
  42. {fbuild-1.2.8.dist-info → fbuild-1.2.15.dist-info}/RECORD +46 -39
  43. fbuild-1.2.8.dist-info/METADATA +0 -468
  44. {fbuild-1.2.8.dist-info → fbuild-1.2.15.dist-info}/WHEEL +0 -0
  45. {fbuild-1.2.8.dist-info → fbuild-1.2.15.dist-info}/entry_points.txt +0 -0
  46. {fbuild-1.2.8.dist-info → fbuild-1.2.15.dist-info}/licenses/LICENSE +0 -0
  47. {fbuild-1.2.8.dist-info → fbuild-1.2.15.dist-info}/top_level.txt +0 -0
fbuild/daemon/daemon.py CHANGED
@@ -19,6 +19,7 @@ Architecture:
19
19
  """
20
20
 
21
21
  import _thread
22
+ import json
22
23
  import logging
23
24
  import multiprocessing
24
25
  import os
@@ -27,11 +28,14 @@ import subprocess
27
28
  import sys
28
29
  import threading
29
30
  import time
31
+ from dataclasses import dataclass, field
30
32
  from logging.handlers import TimedRotatingFileHandler
31
33
  from pathlib import Path
34
+ from typing import Any, Callable, TypeVar
32
35
 
33
36
  import psutil
34
37
 
38
+ from fbuild.daemon.connection_registry import ConnectionRegistry
35
39
  from fbuild.daemon.daemon_context import (
36
40
  DaemonContext,
37
41
  cleanup_daemon_context,
@@ -50,6 +54,9 @@ from fbuild.daemon.processors.deploy_processor import DeployRequestProcessor
50
54
  from fbuild.daemon.processors.install_deps_processor import InstallDependenciesProcessor
51
55
  from fbuild.daemon.processors.monitor_processor import MonitorRequestProcessor
52
56
 
57
+ # Type variable for request types
58
+ RequestT = TypeVar("RequestT", BuildRequest, DeployRequest, MonitorRequest, InstallDependenciesRequest)
59
+
53
60
  # Daemon configuration
54
61
  DAEMON_NAME = "fbuild_daemon"
55
62
 
@@ -70,10 +77,76 @@ INSTALL_DEPS_REQUEST_FILE = DAEMON_DIR / "install_deps_request.json"
70
77
  LOG_FILE = DAEMON_DIR / "daemon.log"
71
78
  PROCESS_REGISTRY_FILE = DAEMON_DIR / "process_registry.json"
72
79
  FILE_CACHE_FILE = DAEMON_DIR / "file_cache.json"
80
+
81
+ # Device management request/response files
82
+ DEVICE_LIST_REQUEST_FILE = DAEMON_DIR / "device_list_request.json"
83
+ DEVICE_LIST_RESPONSE_FILE = DAEMON_DIR / "device_list_response.json"
84
+ DEVICE_STATUS_REQUEST_FILE = DAEMON_DIR / "device_status_request.json"
85
+ DEVICE_STATUS_RESPONSE_FILE = DAEMON_DIR / "device_status_response.json"
86
+ DEVICE_LEASE_REQUEST_FILE = DAEMON_DIR / "device_lease_request.json"
87
+ DEVICE_LEASE_RESPONSE_FILE = DAEMON_DIR / "device_lease_response.json"
88
+ DEVICE_RELEASE_REQUEST_FILE = DAEMON_DIR / "device_release_request.json"
89
+ DEVICE_RELEASE_RESPONSE_FILE = DAEMON_DIR / "device_release_response.json"
90
+ DEVICE_PREEMPT_REQUEST_FILE = DAEMON_DIR / "device_preempt_request.json"
91
+ DEVICE_PREEMPT_RESPONSE_FILE = DAEMON_DIR / "device_preempt_response.json"
92
+
93
+ # Connection management file patterns
94
+ CONNECTION_FILES_PATTERN = "connect_*.json"
95
+ HEARTBEAT_FILES_PATTERN = "heartbeat_*.json"
96
+ DISCONNECT_FILES_PATTERN = "disconnect_*.json"
97
+
73
98
  ORPHAN_CHECK_INTERVAL = 5 # Check for orphaned processes every 5 seconds
74
99
  STALE_LOCK_CHECK_INTERVAL = 60 # Check for stale locks every 60 seconds
75
100
  DEAD_CLIENT_CHECK_INTERVAL = 10 # Check for dead clients every 10 seconds
76
- IDLE_TIMEOUT = 43200 # 12 hours
101
+ IDLE_TIMEOUT = 43200 # 12 hours (fallback)
102
+ # Self-eviction timeout: if daemon has 0 clients AND 0 ops for this duration, shutdown
103
+ # Per TASK.md: "If daemon has 0 clients AND 0 running operations, immediately evict the daemon within 4 seconds."
104
+ SELF_EVICTION_TIMEOUT = 4.0 # 4 seconds
105
+
106
+
107
+ @dataclass
108
+ class RequestConfig:
109
+ """Configuration for a request type in the daemon loop."""
110
+
111
+ request_file: Path
112
+ request_class: type
113
+ processor: Any
114
+ lock: threading.Lock = field(default_factory=threading.Lock)
115
+
116
+
117
+ @dataclass
118
+ class DeviceRequestConfig:
119
+ """Configuration for a device management request."""
120
+
121
+ request_file: Path
122
+ response_file: Path
123
+ handler: Callable[[dict[str, Any], DaemonContext], dict[str, Any]]
124
+ lock: threading.Lock = field(default_factory=threading.Lock)
125
+
126
+
127
+ @dataclass
128
+ class PeriodicTask:
129
+ """Configuration for a periodic daemon task."""
130
+
131
+ name: str
132
+ interval: float
133
+ callback: Callable[[], None]
134
+ last_run: float = 0.0
135
+
136
+ def should_run(self) -> bool:
137
+ """Check if enough time has passed since last run."""
138
+ return time.time() - self.last_run >= self.interval
139
+
140
+ def run(self) -> None:
141
+ """Execute the task and update last run time."""
142
+ try:
143
+ self.callback()
144
+ self.last_run = time.time()
145
+ except KeyboardInterrupt:
146
+ _thread.interrupt_main()
147
+ raise
148
+ except Exception as e:
149
+ logging.error(f"Error in periodic task '{self.name}': {e}", exc_info=True)
77
150
 
78
151
 
79
152
  def setup_logging(foreground: bool = False) -> None:
@@ -111,29 +184,23 @@ def setup_logging(foreground: bool = False) -> None:
111
184
  logger.addHandler(file_handler)
112
185
 
113
186
 
114
- def read_request_file(request_file: Path, request_class: type) -> BuildRequest | DeployRequest | MonitorRequest | None:
187
+ def read_request_file(request_file: Path, request_class: type[RequestT]) -> RequestT | None:
115
188
  """Read and parse request file.
116
189
 
117
190
  Args:
118
191
  request_file: Path to request file
119
- request_class: Class to parse into (BuildRequest, DeployRequest, or MonitorRequest)
192
+ request_class: Class to parse into (BuildRequest, DeployRequest, MonitorRequest, or InstallDependenciesRequest)
120
193
 
121
194
  Returns:
122
195
  Request object if valid, None otherwise
123
196
  """
124
- import json
125
-
126
197
  if not request_file.exists():
127
198
  return None
128
199
 
129
200
  try:
130
201
  with open(request_file) as f:
131
202
  data = json.load(f)
132
-
133
- # Parse into typed request
134
- request = request_class.from_dict(data)
135
- return request
136
-
203
+ return request_class.from_dict(data)
137
204
  except (json.JSONDecodeError, ValueError, TypeError) as e:
138
205
  logging.error(f"Failed to parse request file {request_file}: {e}")
139
206
  return None
@@ -253,6 +320,303 @@ def cleanup_and_exit(context: DaemonContext) -> None:
253
320
  sys.exit(0)
254
321
 
255
322
 
323
+ def handle_device_request(config: DeviceRequestConfig, context: DaemonContext) -> bool:
324
+ """Handle a device request file if it exists.
325
+
326
+ Args:
327
+ config: Device request configuration
328
+ context: Daemon context
329
+
330
+ Returns:
331
+ True if a request was processed, False otherwise
332
+ """
333
+ if not config.request_file.exists():
334
+ return False
335
+
336
+ try:
337
+ with open(config.request_file) as f:
338
+ request_data = json.load(f)
339
+
340
+ # Clear request file immediately (atomic consumption)
341
+ config.request_file.unlink(missing_ok=True)
342
+
343
+ # Process request
344
+ response_data = config.handler(request_data, context)
345
+
346
+ # Write response atomically
347
+ temp_file = config.response_file.with_suffix(".tmp")
348
+ with open(temp_file, "w") as f:
349
+ json.dump(response_data, f, indent=2)
350
+ temp_file.replace(config.response_file)
351
+
352
+ return True
353
+
354
+ except json.JSONDecodeError as e:
355
+ logging.error(f"Invalid JSON in request file {config.request_file}: {e}")
356
+ config.request_file.unlink(missing_ok=True)
357
+ return False
358
+ except KeyboardInterrupt:
359
+ _thread.interrupt_main()
360
+ raise
361
+ except Exception as e:
362
+ logging.error(f"Error handling device request {config.request_file}: {e}")
363
+ try:
364
+ with open(config.response_file, "w") as f:
365
+ json.dump({"success": False, "message": str(e)}, f)
366
+ except KeyboardInterrupt:
367
+ _thread.interrupt_main()
368
+ raise
369
+ except Exception:
370
+ pass
371
+ return False
372
+
373
+
374
+ def handle_device_list_request(request_data: dict[str, Any], context: DaemonContext) -> dict[str, Any]:
375
+ """Handle device list request."""
376
+ refresh = request_data.get("refresh", False)
377
+
378
+ if refresh:
379
+ context.device_manager.refresh_devices()
380
+
381
+ devices = context.device_manager.get_all_devices()
382
+ device_list = []
383
+
384
+ for device_id, state in devices.items():
385
+ device_list.append(
386
+ {
387
+ "device_id": device_id,
388
+ "port": state.device_info.port,
389
+ "is_connected": state.is_connected,
390
+ "exclusive_holder": (state.exclusive_lease.client_id if state.exclusive_lease else None),
391
+ "monitor_count": len(state.monitor_leases),
392
+ }
393
+ )
394
+
395
+ logging.info(f"Device list request processed: {len(device_list)} devices")
396
+ return {"success": True, "devices": device_list}
397
+
398
+
399
+ def handle_device_status_request(request_data: dict[str, Any], context: DaemonContext) -> dict[str, Any]:
400
+ """Handle device status request."""
401
+ device_id = request_data.get("device_id")
402
+ if not device_id:
403
+ return {"success": False, "message": "device_id is required"}
404
+
405
+ status = context.device_manager.get_device_status(device_id)
406
+ if not status.get("exists", False):
407
+ return {"success": False, "message": f"Device {device_id} not found"}
408
+
409
+ logging.info(f"Device status request processed for {device_id}")
410
+ return {"success": True, **status}
411
+
412
+
413
+ def handle_device_lease_request(request_data: dict[str, Any], context: DaemonContext) -> dict[str, Any]:
414
+ """Handle device lease request."""
415
+ device_id = request_data.get("device_id")
416
+ lease_type = request_data.get("lease_type", "exclusive")
417
+ description = request_data.get("description", "")
418
+ # Generate a client ID for file-based IPC clients (they don't have a persistent connection)
419
+ client_id = request_data.get("client_id", f"file-ipc-{time.time()}")
420
+
421
+ if not device_id:
422
+ return {"success": False, "message": "device_id is required"}
423
+
424
+ if lease_type == "monitor":
425
+ lease = context.device_manager.acquire_monitor(
426
+ device_id=device_id,
427
+ client_id=client_id,
428
+ description=description,
429
+ )
430
+ else:
431
+ lease = context.device_manager.acquire_exclusive(
432
+ device_id=device_id,
433
+ client_id=client_id,
434
+ description=description,
435
+ )
436
+
437
+ if lease is None:
438
+ return {
439
+ "success": False,
440
+ "message": f"Failed to acquire {lease_type} lease on {device_id}",
441
+ }
442
+
443
+ logging.info(f"Device lease acquired: {lease_type} on {device_id} (lease_id={lease.lease_id})")
444
+ return {"success": True, "lease_id": lease.lease_id, "client_id": client_id}
445
+
446
+
447
+ def handle_device_release_request(request_data: dict[str, Any], context: DaemonContext) -> dict[str, Any]:
448
+ """Handle device release request."""
449
+ device_id = request_data.get("device_id")
450
+ client_id = request_data.get("client_id")
451
+
452
+ if not device_id:
453
+ return {"success": False, "message": "device_id is required"}
454
+
455
+ # If device_id looks like a UUID, it might be a lease_id
456
+ # Try to find the actual device and release by client
457
+ state = context.device_manager.get_device(device_id)
458
+
459
+ if state is None:
460
+ # Try looking up by lease_id
461
+ return {"success": False, "message": f"Device {device_id} not found"}
462
+
463
+ # If client_id not provided, try to release any lease on this device
464
+ # This is a simplification for file-based IPC where we don't track clients persistently
465
+ if state.exclusive_lease:
466
+ actual_client_id = client_id if client_id else state.exclusive_lease.client_id
467
+ result = context.device_manager.release_lease(state.exclusive_lease.lease_id, actual_client_id)
468
+ if result:
469
+ logging.info(f"Released exclusive lease on {device_id}")
470
+ return {"success": True, "message": f"Released exclusive lease on {device_id}"}
471
+
472
+ return {"success": False, "message": f"No lease found to release on {device_id}"}
473
+
474
+
475
+ def handle_device_preempt_request(request_data: dict[str, Any], context: DaemonContext) -> dict[str, Any]:
476
+ """Handle device preempt request."""
477
+ device_id = request_data.get("device_id")
478
+ reason = request_data.get("reason", "")
479
+ client_id = request_data.get("client_id", f"file-ipc-{time.time()}")
480
+
481
+ if not device_id:
482
+ return {"success": False, "message": "device_id is required"}
483
+
484
+ if not reason:
485
+ return {"success": False, "message": "reason is required for preemption"}
486
+
487
+ try:
488
+ success, preempted_client_id = context.device_manager.preempt_device(
489
+ device_id=device_id,
490
+ requesting_client_id=client_id,
491
+ reason=reason,
492
+ )
493
+
494
+ if success:
495
+ # Get the new lease info
496
+ state = context.device_manager.get_device(device_id)
497
+ lease_id = state.exclusive_lease.lease_id if state and state.exclusive_lease else None
498
+
499
+ logging.info(f"Device {device_id} preempted from {preempted_client_id} by {client_id}")
500
+ return {
501
+ "success": True,
502
+ "preempted_client_id": preempted_client_id,
503
+ "lease_id": lease_id,
504
+ "client_id": client_id,
505
+ }
506
+ else:
507
+ return {"success": False, "message": f"Failed to preempt device {device_id}"}
508
+
509
+ except KeyboardInterrupt:
510
+ _thread.interrupt_main()
511
+ raise
512
+ except Exception as e:
513
+ logging.error(f"Error during device preemption: {e}")
514
+ return {"success": False, "message": str(e)}
515
+
516
+
517
+ def process_operation_request(config: RequestConfig, context: DaemonContext) -> bool:
518
+ """Process an operation request if one exists.
519
+
520
+ Atomically consumes the request file and processes it.
521
+
522
+ Args:
523
+ config: Request configuration (file, class, processor, lock)
524
+ context: Daemon context
525
+
526
+ Returns:
527
+ True if a request was processed, False otherwise
528
+ """
529
+ # Atomically read and clear request file under lock
530
+ with config.lock:
531
+ request = read_request_file(config.request_file, config.request_class)
532
+ if request:
533
+ clear_request_file(config.request_file)
534
+
535
+ if not request:
536
+ return False
537
+
538
+ logging.info(f"Received {config.request_class.__name__}: {request}")
539
+
540
+ # Mark operation in progress
541
+ context.status_manager.set_operation_in_progress(True)
542
+ try:
543
+ config.processor.process_request(request, context)
544
+ finally:
545
+ context.status_manager.set_operation_in_progress(False)
546
+
547
+ return True
548
+
549
+
550
+ def process_connection_files(registry: ConnectionRegistry, daemon_dir: Path) -> None:
551
+ """Process connection/heartbeat/disconnect files from clients."""
552
+ # Process connect files
553
+ for connect_file in daemon_dir.glob("connect_*.json"):
554
+ try:
555
+ with open(connect_file) as f:
556
+ data = json.load(f)
557
+
558
+ # Extract connection ID from filename
559
+ conn_id = connect_file.stem.replace("connect_", "")
560
+
561
+ # Register the connection
562
+ registry.register_connection(
563
+ connection_id=data.get("client_id", conn_id),
564
+ project_dir=data.get("project_dir", ""),
565
+ environment=data.get("environment", ""),
566
+ platform=data.get("platform", ""),
567
+ client_pid=data.get("pid", 0),
568
+ client_hostname=data.get("hostname", ""),
569
+ client_version=data.get("version", ""),
570
+ )
571
+
572
+ # Remove processed file
573
+ connect_file.unlink(missing_ok=True)
574
+ logging.info(f"Registered connection from {data.get('hostname')} pid={data.get('pid')}")
575
+ except KeyboardInterrupt:
576
+ _thread.interrupt_main()
577
+ raise
578
+ except Exception as e:
579
+ logging.error(f"Error processing connect file {connect_file}: {e}")
580
+ connect_file.unlink(missing_ok=True)
581
+
582
+ # Process heartbeat files
583
+ for heartbeat_file in daemon_dir.glob("heartbeat_*.json"):
584
+ try:
585
+ with open(heartbeat_file) as f:
586
+ data = json.load(f)
587
+
588
+ conn_id = data.get("client_id", heartbeat_file.stem.replace("heartbeat_", ""))
589
+ registry.update_heartbeat(conn_id)
590
+
591
+ # Remove processed file
592
+ heartbeat_file.unlink(missing_ok=True)
593
+ except KeyboardInterrupt:
594
+ _thread.interrupt_main()
595
+ raise
596
+ except Exception as e:
597
+ logging.debug(f"Error processing heartbeat file {heartbeat_file}: {e}")
598
+ heartbeat_file.unlink(missing_ok=True)
599
+
600
+ # Process disconnect files
601
+ for disconnect_file in daemon_dir.glob("disconnect_*.json"):
602
+ try:
603
+ with open(disconnect_file) as f:
604
+ data = json.load(f)
605
+
606
+ conn_id = data.get("client_id", disconnect_file.stem.replace("disconnect_", ""))
607
+ registry.unregister_connection(conn_id)
608
+
609
+ # Remove processed file
610
+ disconnect_file.unlink(missing_ok=True)
611
+ logging.info(f"Unregistered connection {conn_id} (reason: {data.get('reason', 'unknown')})")
612
+ except KeyboardInterrupt:
613
+ _thread.interrupt_main()
614
+ raise
615
+ except Exception as e:
616
+ logging.error(f"Error processing disconnect file {disconnect_file}: {e}")
617
+ disconnect_file.unlink(missing_ok=True)
618
+
619
+
256
620
  def run_daemon_loop() -> None:
257
621
  """Main daemon loop: process build, deploy and monitor requests."""
258
622
  daemon_pid = os.getpid()
@@ -276,9 +640,20 @@ def run_daemon_loop() -> None:
276
640
  status_file_path=STATUS_FILE,
277
641
  )
278
642
 
643
+ # Create connection registry for file-based client connection tracking
644
+ connection_registry = ConnectionRegistry(heartbeat_timeout=30.0)
645
+
279
646
  # Write initial IDLE status IMMEDIATELY to prevent clients from reading stale status
280
647
  context.status_manager.update_status(DaemonState.IDLE, "Daemon starting...")
281
648
 
649
+ # Start async server in background thread for real-time client communication
650
+ if context.async_server is not None:
651
+ logging.info("Starting async server in background thread...")
652
+ context.async_server.start_in_background()
653
+ logging.info("Async server started successfully")
654
+ else:
655
+ logging.warning("Async server not available, clients will use file-based IPC only")
656
+
282
657
  # Initialize process tracker
283
658
  process_tracker = ProcessTracker(PROCESS_REGISTRY_FILE)
284
659
 
@@ -289,30 +664,70 @@ def run_daemon_loop() -> None:
289
664
  signal.signal(signal.SIGTERM, signal_handler_wrapper)
290
665
  signal.signal(signal.SIGINT, signal_handler_wrapper)
291
666
 
292
- # Create request processors
293
- build_processor = BuildRequestProcessor()
294
- deploy_processor = DeployRequestProcessor()
295
- install_deps_processor = InstallDependenciesProcessor()
296
- monitor_processor = MonitorRequestProcessor()
667
+ # Configure operation request processors
668
+ operation_requests = [
669
+ RequestConfig(BUILD_REQUEST_FILE, BuildRequest, BuildRequestProcessor()),
670
+ RequestConfig(DEPLOY_REQUEST_FILE, DeployRequest, DeployRequestProcessor()),
671
+ RequestConfig(MONITOR_REQUEST_FILE, MonitorRequest, MonitorRequestProcessor()),
672
+ RequestConfig(INSTALL_DEPS_REQUEST_FILE, InstallDependenciesRequest, InstallDependenciesProcessor()),
673
+ ]
674
+
675
+ # Configure device request handlers
676
+ device_requests = [
677
+ DeviceRequestConfig(DEVICE_LIST_REQUEST_FILE, DEVICE_LIST_RESPONSE_FILE, handle_device_list_request),
678
+ DeviceRequestConfig(DEVICE_STATUS_REQUEST_FILE, DEVICE_STATUS_RESPONSE_FILE, handle_device_status_request),
679
+ DeviceRequestConfig(DEVICE_LEASE_REQUEST_FILE, DEVICE_LEASE_RESPONSE_FILE, handle_device_lease_request),
680
+ DeviceRequestConfig(DEVICE_RELEASE_REQUEST_FILE, DEVICE_RELEASE_RESPONSE_FILE, handle_device_release_request),
681
+ DeviceRequestConfig(DEVICE_PREEMPT_REQUEST_FILE, DEVICE_PREEMPT_RESPONSE_FILE, handle_device_preempt_request),
682
+ ]
297
683
 
298
684
  logging.info(f"Daemon started with PID {daemon_pid}")
299
685
  context.status_manager.update_status(DaemonState.IDLE, "Daemon ready")
300
686
 
301
687
  last_activity = time.time()
302
- last_orphan_check = time.time()
303
- last_cancel_cleanup = time.time()
304
- last_stale_lock_check = time.time()
305
- last_dead_client_check = time.time()
688
+ daemon_empty_since: float | None = None
689
+
690
+ # Define periodic task callbacks
691
+ def cleanup_orphans() -> None:
692
+ orphaned_clients = process_tracker.cleanup_orphaned_processes()
693
+ if orphaned_clients:
694
+ logging.info(f"Cleaned up orphaned processes for {len(orphaned_clients)} dead clients: {orphaned_clients}")
695
+
696
+ def cleanup_cancel_signals() -> None:
697
+ cleanup_stale_cancel_signals()
698
+
699
+ def cleanup_dead_clients() -> None:
700
+ dead_clients = context.client_manager.cleanup_dead_clients()
701
+ if dead_clients:
702
+ logging.info(f"Cleaned up {len(dead_clients)} dead clients: {dead_clients}")
703
+
704
+ def cleanup_stale_locks() -> None:
705
+ stale_locks = context.lock_manager.get_stale_locks()
706
+ stale_count = len(stale_locks["port_locks"]) + len(stale_locks["project_locks"])
707
+ if stale_count > 0:
708
+ logging.warning(f"Found {stale_count} stale locks, force-releasing...")
709
+ released = context.lock_manager.force_release_stale_locks()
710
+ logging.info(f"Force-released {released} stale locks")
711
+ context.lock_manager.cleanup_unused_locks()
712
+
713
+ def process_connections() -> None:
714
+ process_connection_files(connection_registry, DAEMON_DIR)
715
+ cleaned = connection_registry.cleanup_stale_connections()
716
+ if cleaned > 0:
717
+ logging.info(f"Cleaned up {cleaned} stale connections")
718
+
719
+ # Configure periodic tasks
720
+ periodic_tasks = [
721
+ PeriodicTask("orphan_cleanup", ORPHAN_CHECK_INTERVAL, cleanup_orphans),
722
+ PeriodicTask("cancel_signal_cleanup", 60, cleanup_cancel_signals),
723
+ PeriodicTask("dead_client_cleanup", DEAD_CLIENT_CHECK_INTERVAL, cleanup_dead_clients),
724
+ PeriodicTask("stale_lock_cleanup", STALE_LOCK_CHECK_INTERVAL, cleanup_stale_locks),
725
+ PeriodicTask("connection_processing", 2, process_connections),
726
+ ]
306
727
 
307
728
  logging.info("Entering main daemon loop...")
308
729
  iteration_count = 0
309
730
 
310
- # Locks for atomic request consumption
311
- build_request_lock = threading.Lock()
312
- deploy_request_lock = threading.Lock()
313
- install_deps_request_lock = threading.Lock()
314
- monitor_request_lock = threading.Lock()
315
-
316
731
  while True:
317
732
  try:
318
733
  iteration_count += 1
@@ -330,29 +745,26 @@ def run_daemon_loop() -> None:
330
745
  logging.info(f"Idle timeout reached ({idle_time:.1f}s / {IDLE_TIMEOUT}s), shutting down")
331
746
  cleanup_and_exit(context)
332
747
 
333
- # Periodically check for and cleanup orphaned processes
334
- if time.time() - last_orphan_check >= ORPHAN_CHECK_INTERVAL:
335
- try:
336
- orphaned_clients = process_tracker.cleanup_orphaned_processes()
337
- if orphaned_clients:
338
- logging.info(f"Cleaned up orphaned processes for {len(orphaned_clients)} dead clients: {orphaned_clients}")
339
- last_orphan_check = time.time()
340
- except KeyboardInterrupt:
341
- _thread.interrupt_main()
342
- raise
343
- except Exception as e:
344
- logging.error(f"Error during orphan cleanup: {e}", exc_info=True)
345
-
346
- # Periodically cleanup stale cancel signals (every 60 seconds)
347
- if time.time() - last_cancel_cleanup >= 60:
348
- try:
349
- cleanup_stale_cancel_signals()
350
- last_cancel_cleanup = time.time()
351
- except KeyboardInterrupt:
352
- _thread.interrupt_main()
353
- raise
354
- except Exception as e:
355
- logging.error(f"Error during cancel signal cleanup: {e}", exc_info=True)
748
+ # Self-eviction check: if daemon has 0 clients AND 0 ops for SELF_EVICTION_TIMEOUT, shutdown
749
+ client_count = len(connection_registry.connections)
750
+ operation_running = context.status_manager.get_operation_in_progress()
751
+ daemon_is_empty = client_count == 0 and not operation_running
752
+
753
+ if daemon_is_empty:
754
+ if daemon_empty_since is None:
755
+ daemon_empty_since = time.time()
756
+ logging.debug("Daemon is now empty (0 clients, 0 ops), starting eviction timer")
757
+ elif time.time() - daemon_empty_since >= SELF_EVICTION_TIMEOUT:
758
+ logging.info(f"Self-eviction triggered: daemon empty for {time.time() - daemon_empty_since:.1f}s, shutting down")
759
+ cleanup_and_exit(context)
760
+ elif daemon_empty_since is not None:
761
+ logging.debug(f"Daemon is no longer empty (clients={client_count}, op_running={operation_running})")
762
+ daemon_empty_since = None
763
+
764
+ # Run periodic tasks
765
+ for task in periodic_tasks:
766
+ if task.should_run():
767
+ task.run()
356
768
 
357
769
  # Check for manual stale lock clear signal
358
770
  clear_locks_signal = DAEMON_DIR / "clear_stale_locks.signal"
@@ -374,124 +786,30 @@ def run_daemon_loop() -> None:
374
786
  except Exception as e:
375
787
  logging.error(f"Error handling clear locks signal: {e}", exc_info=True)
376
788
 
377
- # Periodically check for and cleanup dead clients (every 10 seconds)
378
- if time.time() - last_dead_client_check >= DEAD_CLIENT_CHECK_INTERVAL:
379
- try:
380
- dead_clients = context.client_manager.cleanup_dead_clients()
381
- if dead_clients:
382
- logging.info(f"Cleaned up {len(dead_clients)} dead clients: {dead_clients}")
383
- last_dead_client_check = time.time()
384
- except KeyboardInterrupt:
385
- _thread.interrupt_main()
386
- raise
387
- except Exception as e:
388
- logging.error(f"Error during dead client cleanup: {e}", exc_info=True)
389
-
390
- # Periodically check for and cleanup stale locks (every 60 seconds)
391
- if time.time() - last_stale_lock_check >= STALE_LOCK_CHECK_INTERVAL:
392
- try:
393
- # Check for stale locks (held beyond timeout)
394
- stale_locks = context.lock_manager.get_stale_locks()
395
- stale_count = len(stale_locks["port_locks"]) + len(stale_locks["project_locks"])
396
- if stale_count > 0:
397
- logging.warning(f"Found {stale_count} stale locks, force-releasing...")
398
- released = context.lock_manager.force_release_stale_locks()
399
- logging.info(f"Force-released {released} stale locks")
400
-
401
- # Also clean up unused lock entries (memory cleanup)
402
- context.lock_manager.cleanup_unused_locks()
403
- last_stale_lock_check = time.time()
404
- except KeyboardInterrupt:
405
- _thread.interrupt_main()
406
- raise
407
- except Exception as e:
408
- logging.error(f"Error during stale lock cleanup: {e}", exc_info=True)
409
-
410
- # Check for build requests (with lock for atomic consumption)
411
- with build_request_lock:
412
- build_request = read_request_file(BUILD_REQUEST_FILE, BuildRequest)
413
- if build_request:
414
- # Clear request file IMMEDIATELY (atomic consumption)
415
- clear_request_file(BUILD_REQUEST_FILE)
416
-
417
- if build_request:
418
- last_activity = time.time()
419
- logging.info(f"Received build request: {build_request}")
420
-
421
- # Mark operation in progress
422
- context.status_manager.set_operation_in_progress(True)
423
-
424
- # Process request
425
- build_processor.process_request(build_request, context)
426
-
427
- # Mark operation complete
428
- context.status_manager.set_operation_in_progress(False)
429
-
430
- # Check for deploy requests (with lock for atomic consumption)
431
- with deploy_request_lock:
432
- deploy_request = read_request_file(DEPLOY_REQUEST_FILE, DeployRequest)
433
- if deploy_request:
434
- # Clear request file IMMEDIATELY (atomic consumption)
435
- clear_request_file(DEPLOY_REQUEST_FILE)
789
+ # Process operation requests (build, deploy, monitor, install_deps)
790
+ for config in operation_requests:
791
+ if process_operation_request(config, context):
792
+ last_activity = time.time()
436
793
 
437
- if deploy_request:
438
- last_activity = time.time()
439
- logging.info(f"Received deploy request: {deploy_request}")
440
-
441
- # Mark operation in progress
442
- context.status_manager.set_operation_in_progress(True)
443
-
444
- # Process request
445
- deploy_processor.process_request(deploy_request, context)
446
-
447
- # Mark operation complete
448
- context.status_manager.set_operation_in_progress(False)
449
-
450
- # Check for monitor requests (with lock for atomic consumption)
451
- with monitor_request_lock:
452
- monitor_request = read_request_file(MONITOR_REQUEST_FILE, MonitorRequest)
453
- if monitor_request:
454
- # Clear request file IMMEDIATELY (atomic consumption)
455
- clear_request_file(MONITOR_REQUEST_FILE)
456
-
457
- if monitor_request:
458
- last_activity = time.time()
459
- logging.info(f"Received monitor request: {monitor_request}")
460
-
461
- # Mark operation in progress
462
- context.status_manager.set_operation_in_progress(True)
463
-
464
- # Process request
465
- monitor_processor.process_request(monitor_request, context)
466
-
467
- # Mark operation complete
468
- context.status_manager.set_operation_in_progress(False)
469
-
470
- # Check for install dependencies requests (with lock for atomic consumption)
471
- with install_deps_request_lock:
472
- install_deps_request = read_request_file(INSTALL_DEPS_REQUEST_FILE, InstallDependenciesRequest)
473
- if install_deps_request:
474
- # Clear request file IMMEDIATELY (atomic consumption)
475
- clear_request_file(INSTALL_DEPS_REQUEST_FILE)
476
-
477
- if install_deps_request:
478
- last_activity = time.time()
479
- logging.info(f"Received install dependencies request: {install_deps_request}")
480
-
481
- # Mark operation in progress
482
- context.status_manager.set_operation_in_progress(True)
483
-
484
- # Process request
485
- install_deps_processor.process_request(install_deps_request, context)
486
-
487
- # Mark operation complete
488
- context.status_manager.set_operation_in_progress(False)
794
+ # Process device management requests
795
+ for config in device_requests:
796
+ with config.lock:
797
+ handle_device_request(config, context)
489
798
 
490
799
  # Sleep briefly to avoid busy-wait
491
800
  time.sleep(0.5)
492
801
 
493
802
  except KeyboardInterrupt:
494
- logging.warning("Daemon interrupted by user")
803
+ # Check if operation is in progress - refuse to exit if so
804
+ if context.status_manager.get_operation_in_progress():
805
+ logging.warning("Received KeyboardInterrupt during active operation. Refusing to exit.")
806
+ print(
807
+ f"\n⚠️ KeyboardInterrupt during operation\n⚠️ Cannot shutdown while operation is active\n⚠️ Use 'kill -9 {os.getpid()}' to force termination\n",
808
+ flush=True,
809
+ )
810
+ # Continue the main loop instead of exiting
811
+ continue
812
+ logging.warning("Daemon interrupted by user (no operation in progress)")
495
813
  _thread.interrupt_main()
496
814
  cleanup_and_exit(context)
497
815
  except Exception as e:
@@ -500,14 +818,36 @@ def run_daemon_loop() -> None:
500
818
  time.sleep(1)
501
819
 
502
820
 
821
+ def parse_spawner_pid() -> int | None:
822
+ """Parse --spawned-by argument from command line.
823
+
824
+ Returns:
825
+ The PID of the client that spawned this daemon, or None if not provided.
826
+ """
827
+ for arg in sys.argv:
828
+ if arg.startswith("--spawned-by="):
829
+ try:
830
+ return int(arg.split("=", 1)[1])
831
+ except (ValueError, IndexError):
832
+ return None
833
+ return None
834
+
835
+
503
836
  def main() -> int:
504
837
  """Main entry point for daemon."""
505
838
  # Parse command-line arguments
506
839
  foreground = "--foreground" in sys.argv
840
+ spawner_pid = parse_spawner_pid()
507
841
 
508
842
  # Setup logging
509
843
  setup_logging(foreground=foreground)
510
844
 
845
+ # Log spawner information immediately after logging setup
846
+ if spawner_pid is not None:
847
+ logging.info(f"Daemon spawned by client PID {spawner_pid}")
848
+ else:
849
+ logging.info("Daemon started without spawner info (manual start or legacy client)")
850
+
511
851
  # Ensure daemon directory exists
512
852
  DAEMON_DIR.mkdir(parents=True, exist_ok=True)
513
853
 
@@ -545,19 +885,32 @@ def main() -> int:
545
885
 
546
886
  # Simple daemonization for cross-platform compatibility
547
887
  try:
548
- # Fork to background
888
+ # Fork to background (Unix/Linux/macOS)
549
889
  if hasattr(os, "fork") and os.fork() > 0: # type: ignore[attr-defined]
550
890
  # Parent process exits
551
891
  return 0
552
892
  except (OSError, AttributeError):
553
- # Fork not supported (Windows) - run in background as subprocess
554
- logging.info("Fork not supported, using subprocess")
893
+ # Fork not supported (Windows) - run in background as detached subprocess
894
+ logging.info("Fork not supported (Windows), using detached subprocess")
895
+ # Build command with spawner info if available
896
+ cmd = [sys.executable, __file__, "--foreground"]
897
+ if spawner_pid is not None:
898
+ cmd.append(f"--spawned-by={spawner_pid}")
899
+
900
+ # On Windows, use proper detachment flags:
901
+ # - CREATE_NEW_PROCESS_GROUP: Isolates daemon from parent's Ctrl-C signals
902
+ # - DETACHED_PROCESS: Daemon survives parent termination, no console inherited
903
+ creationflags = 0
904
+ if sys.platform == "win32":
905
+ creationflags = subprocess.CREATE_NEW_PROCESS_GROUP | subprocess.DETACHED_PROCESS
906
+
555
907
  subprocess.Popen(
556
- [sys.executable, __file__, "--foreground"],
908
+ cmd,
557
909
  stdout=subprocess.DEVNULL,
558
910
  stderr=subprocess.DEVNULL,
559
911
  stdin=subprocess.DEVNULL,
560
912
  cwd=str(DAEMON_DIR),
913
+ creationflags=creationflags,
561
914
  )
562
915
  return 0
563
916