@smilintux/skcapstone 0.4.6 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/.github/workflows/publish.yml +8 -1
  2. package/docs/CUSTOM_AGENT.md +184 -0
  3. package/docs/GETTING_STARTED.md +3 -0
  4. package/launchd/com.skcapstone.daemon.plist +52 -0
  5. package/launchd/com.skcapstone.memory-compress.plist +45 -0
  6. package/launchd/com.skcapstone.skcomm-heartbeat.plist +33 -0
  7. package/launchd/com.skcapstone.skcomm-queue-drain.plist +34 -0
  8. package/launchd/install-launchd.sh +156 -0
  9. package/package.json +1 -1
  10. package/pyproject.toml +1 -1
  11. package/scripts/archive-sessions.sh +88 -0
  12. package/scripts/install.sh +39 -8
  13. package/scripts/notion-api.py +259 -0
  14. package/scripts/nvidia-proxy.mjs +878 -0
  15. package/scripts/proxy-monitor.sh +89 -0
  16. package/scripts/refresh-anthropic-token.sh +94 -0
  17. package/scripts/skgateway.mjs +856 -0
  18. package/scripts/telegram-catchup-all.sh +136 -0
  19. package/scripts/watch-anthropic-token.sh +117 -0
  20. package/src/skcapstone/__init__.py +1 -1
  21. package/src/skcapstone/_cli_monolith.py +4 -4
  22. package/src/skcapstone/api.py +36 -35
  23. package/src/skcapstone/auction.py +8 -8
  24. package/src/skcapstone/blueprint_registry.py +2 -2
  25. package/src/skcapstone/blueprints/builtins/itil-operations.yaml +40 -0
  26. package/src/skcapstone/brain_first.py +238 -0
  27. package/src/skcapstone/chat.py +4 -4
  28. package/src/skcapstone/cli/__init__.py +2 -0
  29. package/src/skcapstone/cli/agents_spawner.py +5 -2
  30. package/src/skcapstone/cli/chat.py +5 -2
  31. package/src/skcapstone/cli/consciousness.py +5 -2
  32. package/src/skcapstone/cli/daemon.py +116 -41
  33. package/src/skcapstone/cli/itil.py +434 -0
  34. package/src/skcapstone/cli/memory.py +4 -4
  35. package/src/skcapstone/cli/skills_cmd.py +2 -2
  36. package/src/skcapstone/cli/soul.py +5 -2
  37. package/src/skcapstone/cli/status.py +11 -8
  38. package/src/skcapstone/cli/upgrade_cmd.py +7 -4
  39. package/src/skcapstone/cli/watch_cmd.py +9 -6
  40. package/src/skcapstone/config_validator.py +7 -4
  41. package/src/skcapstone/consciousness_config.py +27 -0
  42. package/src/skcapstone/consciousness_loop.py +20 -18
  43. package/src/skcapstone/coordination.py +6 -2
  44. package/src/skcapstone/daemon.py +51 -42
  45. package/src/skcapstone/dashboard.py +8 -8
  46. package/src/skcapstone/defaults/lumina/config/claude-hooks.md +42 -0
  47. package/src/skcapstone/doctor.py +5 -2
  48. package/src/skcapstone/dreaming.py +1440 -0
  49. package/src/skcapstone/emotion_tracker.py +2 -2
  50. package/src/skcapstone/export.py +2 -2
  51. package/src/skcapstone/fuse_mount.py +21 -13
  52. package/src/skcapstone/heartbeat.py +33 -29
  53. package/src/skcapstone/itil.py +1104 -0
  54. package/src/skcapstone/launchd.py +426 -0
  55. package/src/skcapstone/mcp_server.py +306 -4
  56. package/src/skcapstone/mcp_tools/__init__.py +4 -0
  57. package/src/skcapstone/mcp_tools/_helpers.py +2 -2
  58. package/src/skcapstone/mcp_tools/ansible_tools.py +7 -4
  59. package/src/skcapstone/mcp_tools/brain_first_tools.py +90 -0
  60. package/src/skcapstone/mcp_tools/capauth_tools.py +7 -4
  61. package/src/skcapstone/mcp_tools/coord_tools.py +8 -4
  62. package/src/skcapstone/mcp_tools/did_tools.py +9 -6
  63. package/src/skcapstone/mcp_tools/gtd_tools.py +1 -1
  64. package/src/skcapstone/mcp_tools/itil_tools.py +657 -0
  65. package/src/skcapstone/mcp_tools/memory_tools.py +6 -2
  66. package/src/skcapstone/mcp_tools/soul_tools.py +6 -2
  67. package/src/skcapstone/mdns_discovery.py +2 -2
  68. package/src/skcapstone/metrics.py +8 -8
  69. package/src/skcapstone/migrate_memories.py +2 -2
  70. package/src/skcapstone/models.py +14 -0
  71. package/src/skcapstone/onboard.py +137 -14
  72. package/src/skcapstone/peer_directory.py +2 -2
  73. package/src/skcapstone/providers/docker.py +2 -2
  74. package/src/skcapstone/scheduled_tasks.py +107 -0
  75. package/src/skcapstone/service_health.py +83 -4
  76. package/src/skcapstone/sync_watcher.py +2 -2
  77. package/src/skcapstone/systemd.py +17 -0
@@ -2,10 +2,14 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ import logging
6
+
5
7
  from mcp.types import TextContent, Tool
6
8
 
7
9
  from ._helpers import _error_response, _home, _json_response, _text_response
8
10
 
11
+ logger = logging.getLogger(__name__)
12
+
9
13
  TOOLS: list[Tool] = [
10
14
  Tool(
11
15
  name="soul_list",
@@ -250,8 +254,8 @@ async def _handle_soul_list(args: dict) -> list[TextContent]:
250
254
  "source": "installed",
251
255
  "active": name == state.active_soul,
252
256
  })
253
- except Exception:
254
- pass
257
+ except Exception as exc:
258
+ logger.warning("Failed to list installed soul blueprints: %s", exc)
255
259
 
256
260
  # 2) Blueprints repo
257
261
  blueprints_repo = Path.home() / "clawd" / "soul-blueprints" / "blueprints"
@@ -238,8 +238,8 @@ class MDNSDiscovery:
238
238
  agent_name,
239
239
  )
240
240
  return
241
- except Exception:
242
- pass
241
+ except Exception as exc:
242
+ logger.warning("Failed to read existing mDNS heartbeat for %s: %s", agent_name, exc)
243
243
 
244
244
  heartbeat = {
245
245
  "agent_name": agent_name,
@@ -358,8 +358,8 @@ class MetricsCollector:
358
358
  1 for t in transports.values()
359
359
  if isinstance(t, dict) and t.get("enabled", True)
360
360
  )
361
- except Exception:
362
- pass
361
+ except Exception as exc:
362
+ logger.warning("Failed to parse skcomm transport config: %s", exc)
363
363
 
364
364
  report.transport = TransportMetrics(
365
365
  available=True,
@@ -474,8 +474,8 @@ class MetricsCollector:
474
474
  if state_path.exists():
475
475
  try:
476
476
  state = json.loads(state_path.read_text(encoding="utf-8"))
477
- except Exception:
478
- pass
477
+ except Exception as exc:
478
+ logger.warning("Failed to read sync_state.json: %s", exc)
479
479
 
480
480
  report.sync = SyncMetrics(
481
481
  available=True,
@@ -510,8 +510,8 @@ class MetricsCollector:
510
510
  try:
511
511
  subs = json.loads(subs_file.read_text(encoding="utf-8"))
512
512
  sub_count = len(subs)
513
- except Exception:
514
- pass
513
+ except Exception as exc:
514
+ logger.warning("Failed to read pubsub subscriptions.json: %s", exc)
515
515
 
516
516
  report.pubsub = PubSubMetrics(
517
517
  available=True,
@@ -546,8 +546,8 @@ class MetricsCollector:
546
546
  try:
547
547
  rot_data = json.loads(rot_log.read_text(encoding="utf-8"))
548
548
  rotations = len(rot_data)
549
- except Exception:
550
- pass
549
+ except Exception as exc:
550
+ logger.warning("Failed to read KMS rotation log: %s", exc)
551
551
 
552
552
  report.kms = KmsMetrics(
553
553
  available=True,
@@ -100,8 +100,8 @@ def migrate(
100
100
  try:
101
101
  existing = store.list_memories(limit=10000)
102
102
  existing_ids = {m.id for m in existing}
103
- except Exception:
104
- pass
103
+ except Exception as exc:
104
+ logger.warning("Failed to load existing memory IDs for deduplication: %s", exc)
105
105
 
106
106
  for entry in entries:
107
107
  if entry.memory_id in existing_ids:
@@ -238,6 +238,19 @@ class SyncConfig(BaseModel):
238
238
  git_remote: Optional[str] = None
239
239
 
240
240
 
241
+ class BrainFirstConfig(BaseModel):
242
+ """Configuration for the brain-first protocol.
243
+
244
+ When enabled, agents consult memory before acting on tasks
245
+ to surface prior knowledge and avoid redundant work.
246
+ """
247
+
248
+ enabled: bool = True
249
+ max_results: int = 5
250
+ min_importance: float = 0.3
251
+ auto_inject: bool = False
252
+
253
+
241
254
  class AgentConfig(BaseModel):
242
255
  """Persistent configuration for the agent runtime."""
243
256
 
@@ -249,6 +262,7 @@ class AgentConfig(BaseModel):
249
262
  trust_home: Path = Path("~/.cloud9")
250
263
  default_connector: Optional[str] = None
251
264
  sync: SyncConfig = Field(default_factory=SyncConfig)
265
+ brain_first: BrainFirstConfig = Field(default_factory=BrainFirstConfig)
252
266
  capabilities: list[str] = Field(
253
267
  default_factory=lambda: ["consciousness", "code", "chat", "memory"]
254
268
  )
@@ -21,6 +21,7 @@ Steps:
21
21
  from __future__ import annotations
22
22
 
23
23
  import json
24
+ import logging
24
25
  import sys
25
26
  import time
26
27
  from datetime import datetime, timezone
@@ -28,6 +29,8 @@ from pathlib import Path
28
29
  from typing import Optional
29
30
 
30
31
  import click
32
+
33
+ logger = logging.getLogger(__name__)
31
34
  from rich.console import Console
32
35
  from rich.panel import Panel
33
36
  from rich.prompt import Confirm, Prompt
@@ -503,8 +506,8 @@ def _step_ollama_models(prereqs: dict) -> bool:
503
506
  if DEFAULT_MODEL in (r.stdout or ""):
504
507
  click.echo(click.style(" ✓ ", fg="green") + f"{DEFAULT_MODEL} already present")
505
508
  return True
506
- except Exception:
507
- pass
509
+ except Exception as exc:
510
+ logger.debug("Failed to check ollama model list: %s", exc)
508
511
 
509
512
  if not click.confirm(f" Pull default model ({DEFAULT_MODEL}, ~2 GB)?", default=True):
510
513
  click.echo(click.style(" ↷ ", fg="bright_black") + f"Skipped — pull later: ollama pull {DEFAULT_MODEL}")
@@ -586,22 +589,40 @@ def _step_config_files(home_path: Path) -> tuple:
586
589
  return consciousness_ok, profiles_ok
587
590
 
588
591
 
589
- def _step_systemd_service() -> bool:
590
- """Install systemd user service for auto-start (optional).
592
+ def _step_autostart_service(agent_name: str = "sovereign") -> bool:
593
+ """Install auto-start service (systemd on Linux, launchd on macOS).
594
+
595
+ Prompts the user to choose which services to install and uses
596
+ the agent name from onboarding for environment variables.
597
+
598
+ Args:
599
+ agent_name: The agent name chosen during onboarding.
591
600
 
592
601
  Returns:
593
602
  True if service was installed.
594
603
  """
595
604
  import platform
596
605
 
597
- if platform.system() != "Linux":
598
- click.echo(click.style(" ↷ ", fg="bright_black") + "Systemd only available on Linux — skipped")
606
+ system = platform.system()
607
+
608
+ if system == "Linux":
609
+ return _step_systemd_service_linux()
610
+ elif system == "Darwin":
611
+ return _step_launchd_service_macos(agent_name)
612
+ else:
613
+ click.echo(
614
+ click.style(" ↷ ", fg="bright_black")
615
+ + f"Auto-start not supported on {system} — skipped"
616
+ )
599
617
  return False
600
618
 
619
+
620
+ def _step_systemd_service_linux() -> bool:
621
+ """Install systemd user service (Linux only)."""
601
622
  if not click.confirm(" Install systemd user service for auto-start at login?", default=False):
602
623
  click.echo(
603
624
  click.style(" ↷ ", fg="bright_black")
604
- + "Skipped — run 'skcapstone systemd install' to enable later"
625
+ + "Skipped — run 'skcapstone daemon install' to enable later"
605
626
  )
606
627
  return False
607
628
 
@@ -622,13 +643,109 @@ def _step_systemd_service() -> bool:
622
643
  return True
623
644
  else:
624
645
  click.echo(click.style(" ✗ ", fg="red") + "Service install failed")
625
- click.echo(click.style(" ", fg="bright_black") + "Run manually: skcapstone systemd install")
646
+ click.echo(click.style(" ", fg="bright_black") + "Run manually: skcapstone daemon install")
626
647
  return False
627
648
  except Exception as exc:
628
649
  click.echo(click.style(" ⚠ ", fg="yellow") + f"Systemd: {exc}")
629
650
  return False
630
651
 
631
652
 
653
+ def _step_launchd_service_macos(agent_name: str) -> bool:
654
+ """Install launchd user agents (macOS only).
655
+
656
+ Shows available services, lets the user choose, and installs
657
+ plist files to ~/Library/LaunchAgents/.
658
+
659
+ Args:
660
+ agent_name: Agent name for SKCAPSTONE_AGENT env var.
661
+
662
+ Returns:
663
+ True if at least one service was installed.
664
+ """
665
+ try:
666
+ from .launchd import install_service, list_available_services
667
+ except ImportError as exc:
668
+ click.echo(click.style(" ⚠ ", fg="yellow") + f"launchd module not available: {exc}")
669
+ return False
670
+
671
+ click.echo(f" Agent name: [cyan]{agent_name}[/] (used in SKCAPSTONE_AGENT)")
672
+ click.echo()
673
+
674
+ # Show available services
675
+ available = list_available_services(agent_name)
676
+ core_services = [s for s in available if s["available"] and not s["suffix"].startswith("sk")]
677
+ optional_services = [s for s in available if s["available"] and s["suffix"].startswith("sk")]
678
+
679
+ click.echo(" Available services:")
680
+ all_available = [s for s in available if s["available"]]
681
+ for i, svc in enumerate(all_available, 1):
682
+ click.echo(f" {i}. {svc['description']} ({svc['label']})")
683
+ click.echo()
684
+
685
+ if not click.confirm(" Install launchd services for auto-start at login?", default=True):
686
+ click.echo(
687
+ click.style(" ↷ ", fg="bright_black")
688
+ + "Skipped — run 'skcapstone daemon install' to enable later"
689
+ )
690
+ return False
691
+
692
+ # Ask: all or pick?
693
+ install_all = click.confirm(" Install all available services?", default=True)
694
+
695
+ selected_suffixes: list[str] = []
696
+ if install_all:
697
+ selected_suffixes = [s["suffix"] for s in all_available]
698
+ else:
699
+ click.echo(" Enter service numbers (comma-separated), or 'none' to skip:")
700
+ raw = click.prompt(" Services", default="1")
701
+ if raw.strip().lower() == "none":
702
+ click.echo(click.style(" ↷ ", fg="bright_black") + "Skipped")
703
+ return False
704
+ try:
705
+ indices = [int(x.strip()) - 1 for x in raw.split(",")]
706
+ selected_suffixes = [
707
+ all_available[i]["suffix"]
708
+ for i in indices
709
+ if 0 <= i < len(all_available)
710
+ ]
711
+ except (ValueError, IndexError):
712
+ click.echo(click.style(" ⚠ ", fg="yellow") + "Invalid selection — installing core services only")
713
+ selected_suffixes = [s["suffix"] for s in all_available if not s["suffix"].startswith("sk")]
714
+
715
+ if not selected_suffixes:
716
+ click.echo(click.style(" ↷ ", fg="bright_black") + "No services selected")
717
+ return False
718
+
719
+ # Ask about immediate start
720
+ start_now = click.confirm(" Start services now?", default=False)
721
+
722
+ try:
723
+ result = install_service(
724
+ agent_name=agent_name,
725
+ services=selected_suffixes,
726
+ start=start_now,
727
+ )
728
+
729
+ if result.get("installed"):
730
+ for svc in result.get("services", []):
731
+ status = "[green]loaded[/]" if svc.get("loaded") else "[dim]installed[/]"
732
+ click.echo(click.style(" ✓ ", fg="green") + f"{svc['label']} — {status}")
733
+
734
+ click.echo()
735
+ click.echo(click.style(" ", fg="bright_black") + "Manage services:")
736
+ click.echo(click.style(" ", fg="bright_black") + " launchctl list | grep skcapstone")
737
+ click.echo(click.style(" ", fg="bright_black") + " launchctl start com.skcapstone.daemon")
738
+ click.echo(click.style(" ", fg="bright_black") + " skcapstone daemon uninstall")
739
+ return True
740
+ else:
741
+ click.echo(click.style(" ✗ ", fg="red") + "No services were installed")
742
+ return False
743
+
744
+ except Exception as exc:
745
+ click.echo(click.style(" ⚠ ", fg="yellow") + f"launchd install: {exc}")
746
+ return False
747
+
748
+
632
749
  def _step_doctor_check(home_path: Path) -> "object":
633
750
  """Run doctor diagnostics and print results.
634
751
 
@@ -851,10 +968,10 @@ def run_onboard(home: Optional[str] = None) -> None:
851
968
  open_task_count = _step_board(home_path, name)
852
969
 
853
970
  # -----------------------------------------------------------------------
854
- # Step 13: Systemd Service (optional)
971
+ # Step 13: Auto-Start Service (systemd on Linux, launchd on macOS)
855
972
  # -----------------------------------------------------------------------
856
- _step_header(13, "Systemd Service")
857
- systemd_ok = _step_systemd_service()
973
+ _step_header(13, "Auto-Start Service")
974
+ service_ok = _step_autostart_service(agent_name=agent_slug)
858
975
 
859
976
  # -----------------------------------------------------------------------
860
977
  # Post-wizard: Doctor Diagnostics
@@ -877,8 +994,8 @@ def run_onboard(home: Optional[str] = None) -> None:
877
994
  soul = load_soul()
878
995
  if soul and soul.boot_message:
879
996
  boot_message = soul.boot_message
880
- except Exception:
881
- pass
997
+ except Exception as exc:
998
+ logger.debug("Failed to load soul boot message, using default: %s", exc)
882
999
 
883
1000
  # -----------------------------------------------------------------------
884
1001
  # Summary table
@@ -911,7 +1028,13 @@ def run_onboard(home: Optional[str] = None) -> None:
911
1028
  summary.add_row("Heartbeat", "[green]ACTIVE[/]" if hb_ok else "[yellow]FAILED[/]", f"{agent_slug}.json" if hb_ok else "see above")
912
1029
  summary.add_row("Crush AI", "[green]READY[/]" if crush_ok else "[yellow]CONFIG ONLY[/]", "~/.config/crush/crush.json")
913
1030
  summary.add_row("Board", "[green]ACTIVE[/]", f"{open_task_count} open tasks")
914
- summary.add_row("Systemd", "[green]INSTALLED[/]" if systemd_ok else "[dim]OPTIONAL[/]", "skcapstone.service" if systemd_ok else "skcapstone systemd install")
1031
+ import platform as _plat
1032
+ _svc_type = "launchd" if _plat.system() == "Darwin" else "systemd"
1033
+ summary.add_row(
1034
+ "Auto-Start",
1035
+ "[green]INSTALLED[/]" if service_ok else "[dim]OPTIONAL[/]",
1036
+ f"{_svc_type} services" if service_ok else f"skcapstone daemon install",
1037
+ )
915
1038
  doctor_status = "[green]ALL PASSED[/]" if doctor_report.all_passed else f"[yellow]{doctor_report.failed_count} failed[/]"
916
1039
  summary.add_row("Doctor", doctor_status, f"{doctor_report.passed_count}/{doctor_report.total_count} checks")
917
1040
  summary.add_row(
@@ -250,8 +250,8 @@ class PeerDirectory:
250
250
  ts = data.get("timestamp", "")
251
251
  if ts:
252
252
  self._entries[agent_name].last_seen = ts
253
- except Exception:
254
- pass
253
+ except Exception as exc:
254
+ logger.warning("Failed to update last_seen from heartbeat for %s: %s", agent_name, exc)
255
255
  continue
256
256
 
257
257
  try:
@@ -307,8 +307,8 @@ class DockerProvider(ProviderBackend):
307
307
  old = client.containers.get(container_name)
308
308
  logger.warning("Removing stale container: %s", container_name)
309
309
  old.remove(force=True)
310
- except Exception:
311
- pass
310
+ except Exception as exc:
311
+ logger.debug("No stale container to remove for %s (expected if first run): %s", container_name, exc)
312
312
 
313
313
  # Ensure named volume for agent state persistence
314
314
  try:
@@ -9,6 +9,7 @@ Built-in recurring tasks:
9
9
  - backend_reprobe — every 5 minutes
10
10
  - memory_promotion_sweep — every hour
11
11
  - profile_freshness_check — every 24 hours
12
+ - dreaming_reflection — every 15 minutes
12
13
 
13
14
  Usage:
14
15
  scheduler = build_scheduler(home, stop_event, consciousness_loop, beacon)
@@ -338,6 +339,85 @@ def make_profile_freshness_task(home: Path, max_age_days: int = 7) -> Callable[[
338
339
  return _run
339
340
 
340
341
 
342
+ def make_dreaming_task(
343
+ home: Path, consciousness_loop: object = None
344
+ ) -> Callable[[], None]:
345
+ """Return a callback that runs the dreaming engine every 15 minutes.
346
+
347
+ Instantiates DreamingEngine lazily (so import errors are deferred until
348
+ first run). The engine itself checks idle state and cooldown internally.
349
+
350
+ Args:
351
+ home: Agent home directory.
352
+ consciousness_loop: ConsciousnessLoop instance for idle detection.
353
+ """
354
+
355
+ def _run() -> None:
356
+ from .consciousness_config import load_dreaming_config
357
+ from .dreaming import DreamingEngine
358
+
359
+ config = load_dreaming_config(home)
360
+ if config is None or not config.enabled:
361
+ return
362
+ engine = DreamingEngine(
363
+ home=home, config=config, consciousness_loop=consciousness_loop
364
+ )
365
+ result = engine.dream()
366
+ if result and result.memories_created:
367
+ logger.info(
368
+ "Dreaming: %d memories created from reflection",
369
+ len(result.memories_created),
370
+ )
371
+ elif result and result.skipped_reason:
372
+ logger.debug("Dreaming skipped: %s", result.skipped_reason)
373
+
374
+ return _run
375
+
376
+
377
+ def make_itil_auto_close_task(home: Path) -> Callable[[], None]:
378
+ """Return a callback that auto-closes resolved incidents after 24h stable.
379
+
380
+ Args:
381
+ home: Shared root directory.
382
+ """
383
+
384
+ def _run() -> None:
385
+ from .itil import ITILManager
386
+
387
+ mgr = ITILManager(home)
388
+ closed = mgr.auto_close_resolved(stable_hours=24)
389
+ if closed:
390
+ logger.info("ITIL auto-close: %d incident(s) closed: %s", len(closed), closed)
391
+ else:
392
+ logger.debug("ITIL auto-close: no incidents to close")
393
+
394
+ return _run
395
+
396
+
397
+ def make_itil_escalation_task(home: Path) -> Callable[[], None]:
398
+ """Return a callback that checks SLA breaches on open incidents.
399
+
400
+ Args:
401
+ home: Shared root directory.
402
+ """
403
+
404
+ def _run() -> None:
405
+ from .itil import ITILManager
406
+
407
+ mgr = ITILManager(home)
408
+ breaches = mgr.check_sla_breaches()
409
+ if breaches:
410
+ for b in breaches:
411
+ logger.warning(
412
+ "ITIL SLA breach: %s (%s) unacknowledged for %d min (limit: %d min)",
413
+ b["id"], b["severity"], b["elapsed_minutes"], b["sla_minutes"],
414
+ )
415
+ else:
416
+ logger.debug("ITIL escalation check: no SLA breaches")
417
+
418
+ return _run
419
+
420
+
341
421
  # ---------------------------------------------------------------------------
342
422
  # Convenience builder
343
423
  # ---------------------------------------------------------------------------
@@ -369,6 +449,8 @@ def build_scheduler(
369
449
  +--------------------------+------------+
370
450
  | profile_freshness_check | 24 hours |
371
451
  +--------------------------+------------+
452
+ | dreaming_reflection | 15 min |
453
+ +--------------------------+------------+
372
454
 
373
455
  Args:
374
456
  home: Agent home directory.
@@ -424,6 +506,13 @@ def build_scheduler(
424
506
  callback=make_profile_freshness_task(home),
425
507
  )
426
508
 
509
+ # Dreaming — idle-time self-reflection via NVIDIA NIM
510
+ scheduler.register(
511
+ name="dreaming_reflection",
512
+ interval_seconds=900, # 15 minutes
513
+ callback=make_dreaming_task(home, consciousness_loop),
514
+ )
515
+
427
516
  # Service health check — pings Qdrant, FalkorDB, Syncthing, daemons
428
517
  try:
429
518
  from .service_health import make_service_health_task
@@ -436,4 +525,22 @@ def build_scheduler(
436
525
  except ImportError:
437
526
  logger.debug("service_health not available — service_health_check task skipped")
438
527
 
528
+ # ITIL escalation check — SLA breach detection every 5 minutes
529
+ try:
530
+ from . import SHARED_ROOT
531
+
532
+ shared = Path(SHARED_ROOT).expanduser()
533
+ scheduler.register(
534
+ name="itil_escalation_check",
535
+ interval_seconds=300, # 5 minutes
536
+ callback=make_itil_escalation_task(shared),
537
+ )
538
+ scheduler.register(
539
+ name="itil_auto_close",
540
+ interval_seconds=1800, # 30 minutes
541
+ callback=make_itil_auto_close_task(shared),
542
+ )
543
+ except Exception:
544
+ logger.debug("ITIL scheduled tasks not available — skipped")
545
+
439
546
  return scheduler
@@ -76,8 +76,8 @@ def _http_check(
76
76
  try:
77
77
  body = json.loads(resp.read().decode("utf-8"))
78
78
  result["version"] = body.get(version_key)
79
- except Exception:
80
- pass
79
+ except Exception as exc:
80
+ logger.warning("Failed to parse version from service health response: %s", exc)
81
81
  except urllib.error.HTTPError as exc:
82
82
  latency = (time.monotonic() - t0) * 1000
83
83
  result["latency_ms"] = round(latency, 1)
@@ -197,16 +197,90 @@ def check_all_services() -> list[dict[str, Any]]:
197
197
  # ---------------------------------------------------------------------------
198
198
 
199
199
 
200
+ def _create_incident_for_down_service(service_result: dict[str, Any]) -> None:
201
+ """Auto-create an ITIL incident for a down service (with dedup).
202
+
203
+ Only creates a new incident if there is no existing open incident
204
+ for the same service. Uses best-effort: failures are logged but
205
+ never block the health check.
206
+ """
207
+ try:
208
+ from . import SHARED_ROOT
209
+ from .itil import ITILManager
210
+
211
+ svc_name = service_result["name"]
212
+ mgr = ITILManager(os.path.expanduser(SHARED_ROOT))
213
+
214
+ # Dedup: skip if there's already an open incident for this service
215
+ existing = mgr.find_open_incident_for_service(svc_name)
216
+ if existing:
217
+ logger.debug(
218
+ "Skipping incident creation for %s — open incident %s exists",
219
+ svc_name, existing.id,
220
+ )
221
+ return
222
+
223
+ error_info = service_result.get("error") or "unreachable"
224
+ mgr.create_incident(
225
+ title=f"{svc_name} down",
226
+ severity="sev3",
227
+ source="service_health",
228
+ affected_services=[svc_name],
229
+ impact=f"Service unreachable: {error_info}",
230
+ managed_by="lumina",
231
+ created_by="service_health",
232
+ tags=["auto-detected", "service-health"],
233
+ )
234
+ logger.info("Auto-created incident for down service: %s", svc_name)
235
+ except Exception as exc:
236
+ logger.debug("Failed to create incident for %s: %s", service_result.get("name"), exc)
237
+
238
+
239
+ def _auto_resolve_recovered_service(service_result: dict[str, Any]) -> None:
240
+ """Auto-resolve sev4 incidents when a service recovers."""
241
+ try:
242
+ from . import SHARED_ROOT
243
+ from .itil import ITILManager
244
+
245
+ svc_name = service_result["name"]
246
+ mgr = ITILManager(os.path.expanduser(SHARED_ROOT))
247
+ existing = mgr.find_open_incident_for_service(svc_name)
248
+ if existing is None:
249
+ return
250
+
251
+ if existing.severity.value == "sev4":
252
+ mgr.update_incident(
253
+ existing.id, "service_health",
254
+ new_status="resolved",
255
+ note=f"Service {svc_name} recovered automatically",
256
+ resolution_summary="Auto-resolved: service came back up",
257
+ )
258
+ logger.info("Auto-resolved sev4 incident %s for recovered service %s",
259
+ existing.id, svc_name)
260
+ else:
261
+ mgr.update_incident(
262
+ existing.id, "service_health",
263
+ note=f"Service {svc_name} appears to be back up",
264
+ )
265
+ except Exception as exc:
266
+ logger.debug("Failed to auto-resolve incident for %s: %s",
267
+ service_result.get("name"), exc)
268
+
269
+
200
270
  def make_service_health_task() -> callable:
201
271
  """Return a zero-arg callback suitable for TaskScheduler.register().
202
272
 
203
273
  Runs check_all_services() and logs results. Down services are logged
204
- at WARNING level; all-up is logged at DEBUG level.
274
+ at WARNING level; all-up is logged at DEBUG level. Auto-creates ITIL
275
+ incidents for down services and auto-resolves sev4 incidents for
276
+ recovered services.
205
277
  """
206
278
 
207
279
  def _run() -> None:
208
280
  results = check_all_services()
209
281
  down = [r for r in results if r["status"] == "down"]
282
+ up = [r for r in results if r["status"] == "up"]
283
+
210
284
  if down:
211
285
  names = ", ".join(r["name"] for r in down)
212
286
  logger.warning(
@@ -216,8 +290,9 @@ def make_service_health_task() -> callable:
216
290
  logger.warning(
217
291
  " %s (%s): %s", r["name"], r["url"], r["error"] or "unreachable"
218
292
  )
293
+ _create_incident_for_down_service(r)
219
294
  else:
220
- up_count = sum(1 for r in results if r["status"] == "up")
295
+ up_count = len(up)
221
296
  logger.debug(
222
297
  "Service health: %d/%d up, %d unknown",
223
298
  up_count,
@@ -225,4 +300,8 @@ def make_service_health_task() -> callable:
225
300
  len(results) - up_count,
226
301
  )
227
302
 
303
+ # Check for recovered services
304
+ for r in up:
305
+ _auto_resolve_recovered_service(r)
306
+
228
307
  return _run
@@ -574,8 +574,8 @@ class SyncWatcher:
574
574
  try:
575
575
  self._observer.stop()
576
576
  self._observer.join(timeout=5)
577
- except Exception:
578
- pass
577
+ except Exception as exc:
578
+ logger.warning("Error stopping SyncWatcher observer: %s", exc)
579
579
  self._observer = None
580
580
  logger.info("SyncWatcher stopped.")
581
581