@smilintux/skcapstone 0.4.5 → 0.4.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/publish.yml +8 -1
- package/docs/CUSTOM_AGENT.md +184 -0
- package/docs/GETTING_STARTED.md +3 -0
- package/launchd/com.skcapstone.daemon.plist +52 -0
- package/launchd/com.skcapstone.memory-compress.plist +45 -0
- package/launchd/com.skcapstone.skcomm-heartbeat.plist +33 -0
- package/launchd/com.skcapstone.skcomm-queue-drain.plist +34 -0
- package/launchd/install-launchd.sh +156 -0
- package/package.json +1 -1
- package/pyproject.toml +1 -1
- package/scripts/archive-sessions.sh +88 -0
- package/scripts/install.sh +39 -8
- package/scripts/notion-api.py +259 -0
- package/scripts/nvidia-proxy.mjs +856 -0
- package/scripts/proxy-monitor.sh +89 -0
- package/scripts/skgateway.mjs +856 -0
- package/scripts/telegram-catchup-all.sh +136 -0
- package/src/skcapstone/__init__.py +1 -1
- package/src/skcapstone/blueprint_registry.py +78 -0
- package/src/skcapstone/blueprints/builtins/itil-operations.yaml +40 -0
- package/src/skcapstone/cli/__init__.py +2 -0
- package/src/skcapstone/cli/daemon.py +116 -41
- package/src/skcapstone/cli/itil.py +434 -0
- package/src/skcapstone/cli/skills_cmd.py +90 -26
- package/src/skcapstone/cli/soul.py +47 -24
- package/src/skcapstone/consciousness_config.py +27 -0
- package/src/skcapstone/coordination.py +1 -0
- package/src/skcapstone/daemon.py +47 -20
- package/src/skcapstone/dreaming.py +761 -0
- package/src/skcapstone/fuse_mount.py +21 -13
- package/src/skcapstone/heartbeat.py +33 -29
- package/src/skcapstone/itil.py +1104 -0
- package/src/skcapstone/launchd.py +426 -0
- package/src/skcapstone/mcp_server.py +258 -0
- package/src/skcapstone/mcp_tools/__init__.py +2 -0
- package/src/skcapstone/mcp_tools/gtd_tools.py +1 -1
- package/src/skcapstone/mcp_tools/itil_tools.py +657 -0
- package/src/skcapstone/mcp_tools/notification_tools.py +12 -11
- package/src/skcapstone/notifications.py +40 -27
- package/src/skcapstone/onboard.py +130 -10
- package/src/skcapstone/scheduled_tasks.py +107 -0
- package/src/skcapstone/service_health.py +81 -2
- package/src/skcapstone/soul.py +19 -0
- package/src/skcapstone/systemd.py +17 -0
|
@@ -19,6 +19,7 @@ from __future__ import annotations
|
|
|
19
19
|
|
|
20
20
|
import datetime
|
|
21
21
|
import logging
|
|
22
|
+
import os
|
|
22
23
|
import platform
|
|
23
24
|
import subprocess
|
|
24
25
|
import threading
|
|
@@ -43,53 +44,65 @@ _TERMINAL_CMDS: list[list[str]] = [
|
|
|
43
44
|
|
|
44
45
|
|
|
45
46
|
def _store_notification_memory(title: str, body: str, urgency: str) -> None:
|
|
46
|
-
"""
|
|
47
|
+
"""Log a notification dispatch to the skcomm/notifications/ directory.
|
|
48
|
+
|
|
49
|
+
These are transport bookkeeping, not persistent memories, so they
|
|
50
|
+
go to ``~/.skcapstone/agents/{agent}/skcomm/notifications/`` instead
|
|
51
|
+
of polluting the memory/ tree that skmemory indexes.
|
|
52
|
+
"""
|
|
47
53
|
try:
|
|
54
|
+
import json as _json
|
|
55
|
+
import uuid
|
|
48
56
|
from . import AGENT_HOME
|
|
49
|
-
from .memory_engine import store as mem_store
|
|
50
|
-
from .models import MemoryLayer
|
|
51
57
|
|
|
52
58
|
home = Path(AGENT_HOME).expanduser()
|
|
53
59
|
if not home.exists():
|
|
54
60
|
return
|
|
55
61
|
|
|
62
|
+
agent_name = os.environ.get("SKCAPSTONE_AGENT", "lumina")
|
|
63
|
+
notif_dir = home / "agents" / agent_name / "skcomm" / "notifications"
|
|
64
|
+
notif_dir.mkdir(parents=True, exist_ok=True)
|
|
65
|
+
|
|
56
66
|
ts = datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
67
|
+
entry = {
|
|
68
|
+
"id": uuid.uuid4().hex[:12],
|
|
69
|
+
"type": "notification-sent",
|
|
70
|
+
"title": title,
|
|
71
|
+
"body": body,
|
|
72
|
+
"urgency": urgency,
|
|
73
|
+
"timestamp": ts,
|
|
74
|
+
}
|
|
75
|
+
path = notif_dir / f"{entry['id']}.json"
|
|
76
|
+
path.write_text(_json.dumps(entry, indent=2), encoding="utf-8")
|
|
66
77
|
except Exception as exc:
|
|
67
|
-
logger.debug("Failed to store notification
|
|
78
|
+
logger.debug("Failed to store notification log: %s", exc)
|
|
68
79
|
|
|
69
80
|
|
|
70
81
|
def _store_click_event(action: str, detail: str) -> None:
|
|
71
|
-
"""
|
|
82
|
+
"""Log a notification click event to the skcomm/notifications/ directory."""
|
|
72
83
|
try:
|
|
84
|
+
import json as _json
|
|
85
|
+
import uuid
|
|
73
86
|
from . import AGENT_HOME
|
|
74
|
-
from .memory_engine import store as mem_store
|
|
75
|
-
from .models import MemoryLayer
|
|
76
87
|
|
|
77
88
|
home = Path(AGENT_HOME).expanduser()
|
|
78
89
|
if not home.exists():
|
|
79
90
|
return
|
|
80
91
|
|
|
92
|
+
agent_name = os.environ.get("SKCAPSTONE_AGENT", "lumina")
|
|
93
|
+
notif_dir = home / "agents" / agent_name / "skcomm" / "notifications"
|
|
94
|
+
notif_dir.mkdir(parents=True, exist_ok=True)
|
|
95
|
+
|
|
81
96
|
ts = datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
layer=MemoryLayer.SHORT_TERM,
|
|
92
|
-
)
|
|
97
|
+
entry = {
|
|
98
|
+
"id": uuid.uuid4().hex[:12],
|
|
99
|
+
"type": "click-event",
|
|
100
|
+
"action": action,
|
|
101
|
+
"detail": detail,
|
|
102
|
+
"timestamp": ts,
|
|
103
|
+
}
|
|
104
|
+
path = notif_dir / f"{entry['id']}.json"
|
|
105
|
+
path.write_text(_json.dumps(entry, indent=2), encoding="utf-8")
|
|
93
106
|
logger.debug("Stored notification click event: %s → %s", action, detail)
|
|
94
107
|
except Exception as exc:
|
|
95
108
|
logger.debug("Failed to store click event in memory: %s", exc)
|
|
@@ -586,22 +586,40 @@ def _step_config_files(home_path: Path) -> tuple:
|
|
|
586
586
|
return consciousness_ok, profiles_ok
|
|
587
587
|
|
|
588
588
|
|
|
589
|
-
def
|
|
590
|
-
"""Install systemd
|
|
589
|
+
def _step_autostart_service(agent_name: str = "sovereign") -> bool:
|
|
590
|
+
"""Install auto-start service (systemd on Linux, launchd on macOS).
|
|
591
|
+
|
|
592
|
+
Prompts the user to choose which services to install and uses
|
|
593
|
+
the agent name from onboarding for environment variables.
|
|
594
|
+
|
|
595
|
+
Args:
|
|
596
|
+
agent_name: The agent name chosen during onboarding.
|
|
591
597
|
|
|
592
598
|
Returns:
|
|
593
599
|
True if service was installed.
|
|
594
600
|
"""
|
|
595
601
|
import platform
|
|
596
602
|
|
|
597
|
-
|
|
598
|
-
|
|
603
|
+
system = platform.system()
|
|
604
|
+
|
|
605
|
+
if system == "Linux":
|
|
606
|
+
return _step_systemd_service_linux()
|
|
607
|
+
elif system == "Darwin":
|
|
608
|
+
return _step_launchd_service_macos(agent_name)
|
|
609
|
+
else:
|
|
610
|
+
click.echo(
|
|
611
|
+
click.style(" ↷ ", fg="bright_black")
|
|
612
|
+
+ f"Auto-start not supported on {system} — skipped"
|
|
613
|
+
)
|
|
599
614
|
return False
|
|
600
615
|
|
|
616
|
+
|
|
617
|
+
def _step_systemd_service_linux() -> bool:
|
|
618
|
+
"""Install systemd user service (Linux only)."""
|
|
601
619
|
if not click.confirm(" Install systemd user service for auto-start at login?", default=False):
|
|
602
620
|
click.echo(
|
|
603
621
|
click.style(" ↷ ", fg="bright_black")
|
|
604
|
-
+ "Skipped — run 'skcapstone
|
|
622
|
+
+ "Skipped — run 'skcapstone daemon install' to enable later"
|
|
605
623
|
)
|
|
606
624
|
return False
|
|
607
625
|
|
|
@@ -622,13 +640,109 @@ def _step_systemd_service() -> bool:
|
|
|
622
640
|
return True
|
|
623
641
|
else:
|
|
624
642
|
click.echo(click.style(" ✗ ", fg="red") + "Service install failed")
|
|
625
|
-
click.echo(click.style(" ", fg="bright_black") + "Run manually: skcapstone
|
|
643
|
+
click.echo(click.style(" ", fg="bright_black") + "Run manually: skcapstone daemon install")
|
|
626
644
|
return False
|
|
627
645
|
except Exception as exc:
|
|
628
646
|
click.echo(click.style(" ⚠ ", fg="yellow") + f"Systemd: {exc}")
|
|
629
647
|
return False
|
|
630
648
|
|
|
631
649
|
|
|
650
|
+
def _step_launchd_service_macos(agent_name: str) -> bool:
|
|
651
|
+
"""Install launchd user agents (macOS only).
|
|
652
|
+
|
|
653
|
+
Shows available services, lets the user choose, and installs
|
|
654
|
+
plist files to ~/Library/LaunchAgents/.
|
|
655
|
+
|
|
656
|
+
Args:
|
|
657
|
+
agent_name: Agent name for SKCAPSTONE_AGENT env var.
|
|
658
|
+
|
|
659
|
+
Returns:
|
|
660
|
+
True if at least one service was installed.
|
|
661
|
+
"""
|
|
662
|
+
try:
|
|
663
|
+
from .launchd import install_service, list_available_services
|
|
664
|
+
except ImportError as exc:
|
|
665
|
+
click.echo(click.style(" ⚠ ", fg="yellow") + f"launchd module not available: {exc}")
|
|
666
|
+
return False
|
|
667
|
+
|
|
668
|
+
click.echo(f" Agent name: [cyan]{agent_name}[/] (used in SKCAPSTONE_AGENT)")
|
|
669
|
+
click.echo()
|
|
670
|
+
|
|
671
|
+
# Show available services
|
|
672
|
+
available = list_available_services(agent_name)
|
|
673
|
+
core_services = [s for s in available if s["available"] and not s["suffix"].startswith("sk")]
|
|
674
|
+
optional_services = [s for s in available if s["available"] and s["suffix"].startswith("sk")]
|
|
675
|
+
|
|
676
|
+
click.echo(" Available services:")
|
|
677
|
+
all_available = [s for s in available if s["available"]]
|
|
678
|
+
for i, svc in enumerate(all_available, 1):
|
|
679
|
+
click.echo(f" {i}. {svc['description']} ({svc['label']})")
|
|
680
|
+
click.echo()
|
|
681
|
+
|
|
682
|
+
if not click.confirm(" Install launchd services for auto-start at login?", default=True):
|
|
683
|
+
click.echo(
|
|
684
|
+
click.style(" ↷ ", fg="bright_black")
|
|
685
|
+
+ "Skipped — run 'skcapstone daemon install' to enable later"
|
|
686
|
+
)
|
|
687
|
+
return False
|
|
688
|
+
|
|
689
|
+
# Ask: all or pick?
|
|
690
|
+
install_all = click.confirm(" Install all available services?", default=True)
|
|
691
|
+
|
|
692
|
+
selected_suffixes: list[str] = []
|
|
693
|
+
if install_all:
|
|
694
|
+
selected_suffixes = [s["suffix"] for s in all_available]
|
|
695
|
+
else:
|
|
696
|
+
click.echo(" Enter service numbers (comma-separated), or 'none' to skip:")
|
|
697
|
+
raw = click.prompt(" Services", default="1")
|
|
698
|
+
if raw.strip().lower() == "none":
|
|
699
|
+
click.echo(click.style(" ↷ ", fg="bright_black") + "Skipped")
|
|
700
|
+
return False
|
|
701
|
+
try:
|
|
702
|
+
indices = [int(x.strip()) - 1 for x in raw.split(",")]
|
|
703
|
+
selected_suffixes = [
|
|
704
|
+
all_available[i]["suffix"]
|
|
705
|
+
for i in indices
|
|
706
|
+
if 0 <= i < len(all_available)
|
|
707
|
+
]
|
|
708
|
+
except (ValueError, IndexError):
|
|
709
|
+
click.echo(click.style(" ⚠ ", fg="yellow") + "Invalid selection — installing core services only")
|
|
710
|
+
selected_suffixes = [s["suffix"] for s in all_available if not s["suffix"].startswith("sk")]
|
|
711
|
+
|
|
712
|
+
if not selected_suffixes:
|
|
713
|
+
click.echo(click.style(" ↷ ", fg="bright_black") + "No services selected")
|
|
714
|
+
return False
|
|
715
|
+
|
|
716
|
+
# Ask about immediate start
|
|
717
|
+
start_now = click.confirm(" Start services now?", default=False)
|
|
718
|
+
|
|
719
|
+
try:
|
|
720
|
+
result = install_service(
|
|
721
|
+
agent_name=agent_name,
|
|
722
|
+
services=selected_suffixes,
|
|
723
|
+
start=start_now,
|
|
724
|
+
)
|
|
725
|
+
|
|
726
|
+
if result.get("installed"):
|
|
727
|
+
for svc in result.get("services", []):
|
|
728
|
+
status = "[green]loaded[/]" if svc.get("loaded") else "[dim]installed[/]"
|
|
729
|
+
click.echo(click.style(" ✓ ", fg="green") + f"{svc['label']} — {status}")
|
|
730
|
+
|
|
731
|
+
click.echo()
|
|
732
|
+
click.echo(click.style(" ", fg="bright_black") + "Manage services:")
|
|
733
|
+
click.echo(click.style(" ", fg="bright_black") + " launchctl list | grep skcapstone")
|
|
734
|
+
click.echo(click.style(" ", fg="bright_black") + " launchctl start com.skcapstone.daemon")
|
|
735
|
+
click.echo(click.style(" ", fg="bright_black") + " skcapstone daemon uninstall")
|
|
736
|
+
return True
|
|
737
|
+
else:
|
|
738
|
+
click.echo(click.style(" ✗ ", fg="red") + "No services were installed")
|
|
739
|
+
return False
|
|
740
|
+
|
|
741
|
+
except Exception as exc:
|
|
742
|
+
click.echo(click.style(" ⚠ ", fg="yellow") + f"launchd install: {exc}")
|
|
743
|
+
return False
|
|
744
|
+
|
|
745
|
+
|
|
632
746
|
def _step_doctor_check(home_path: Path) -> "object":
|
|
633
747
|
"""Run doctor diagnostics and print results.
|
|
634
748
|
|
|
@@ -851,10 +965,10 @@ def run_onboard(home: Optional[str] = None) -> None:
|
|
|
851
965
|
open_task_count = _step_board(home_path, name)
|
|
852
966
|
|
|
853
967
|
# -----------------------------------------------------------------------
|
|
854
|
-
# Step 13:
|
|
968
|
+
# Step 13: Auto-Start Service (systemd on Linux, launchd on macOS)
|
|
855
969
|
# -----------------------------------------------------------------------
|
|
856
|
-
_step_header(13, "
|
|
857
|
-
|
|
970
|
+
_step_header(13, "Auto-Start Service")
|
|
971
|
+
service_ok = _step_autostart_service(agent_name=agent_slug)
|
|
858
972
|
|
|
859
973
|
# -----------------------------------------------------------------------
|
|
860
974
|
# Post-wizard: Doctor Diagnostics
|
|
@@ -911,7 +1025,13 @@ def run_onboard(home: Optional[str] = None) -> None:
|
|
|
911
1025
|
summary.add_row("Heartbeat", "[green]ACTIVE[/]" if hb_ok else "[yellow]FAILED[/]", f"{agent_slug}.json" if hb_ok else "see above")
|
|
912
1026
|
summary.add_row("Crush AI", "[green]READY[/]" if crush_ok else "[yellow]CONFIG ONLY[/]", "~/.config/crush/crush.json")
|
|
913
1027
|
summary.add_row("Board", "[green]ACTIVE[/]", f"{open_task_count} open tasks")
|
|
914
|
-
|
|
1028
|
+
import platform as _plat
|
|
1029
|
+
_svc_type = "launchd" if _plat.system() == "Darwin" else "systemd"
|
|
1030
|
+
summary.add_row(
|
|
1031
|
+
"Auto-Start",
|
|
1032
|
+
"[green]INSTALLED[/]" if service_ok else "[dim]OPTIONAL[/]",
|
|
1033
|
+
f"{_svc_type} services" if service_ok else f"skcapstone daemon install",
|
|
1034
|
+
)
|
|
915
1035
|
doctor_status = "[green]ALL PASSED[/]" if doctor_report.all_passed else f"[yellow]{doctor_report.failed_count} failed[/]"
|
|
916
1036
|
summary.add_row("Doctor", doctor_status, f"{doctor_report.passed_count}/{doctor_report.total_count} checks")
|
|
917
1037
|
summary.add_row(
|
|
@@ -9,6 +9,7 @@ Built-in recurring tasks:
|
|
|
9
9
|
- backend_reprobe — every 5 minutes
|
|
10
10
|
- memory_promotion_sweep — every hour
|
|
11
11
|
- profile_freshness_check — every 24 hours
|
|
12
|
+
- dreaming_reflection — every 15 minutes
|
|
12
13
|
|
|
13
14
|
Usage:
|
|
14
15
|
scheduler = build_scheduler(home, stop_event, consciousness_loop, beacon)
|
|
@@ -338,6 +339,85 @@ def make_profile_freshness_task(home: Path, max_age_days: int = 7) -> Callable[[
|
|
|
338
339
|
return _run
|
|
339
340
|
|
|
340
341
|
|
|
342
|
+
def make_dreaming_task(
|
|
343
|
+
home: Path, consciousness_loop: object = None
|
|
344
|
+
) -> Callable[[], None]:
|
|
345
|
+
"""Return a callback that runs the dreaming engine every 15 minutes.
|
|
346
|
+
|
|
347
|
+
Instantiates DreamingEngine lazily (so import errors are deferred until
|
|
348
|
+
first run). The engine itself checks idle state and cooldown internally.
|
|
349
|
+
|
|
350
|
+
Args:
|
|
351
|
+
home: Agent home directory.
|
|
352
|
+
consciousness_loop: ConsciousnessLoop instance for idle detection.
|
|
353
|
+
"""
|
|
354
|
+
|
|
355
|
+
def _run() -> None:
|
|
356
|
+
from .consciousness_config import load_dreaming_config
|
|
357
|
+
from .dreaming import DreamingEngine
|
|
358
|
+
|
|
359
|
+
config = load_dreaming_config(home)
|
|
360
|
+
if config is None or not config.enabled:
|
|
361
|
+
return
|
|
362
|
+
engine = DreamingEngine(
|
|
363
|
+
home=home, config=config, consciousness_loop=consciousness_loop
|
|
364
|
+
)
|
|
365
|
+
result = engine.dream()
|
|
366
|
+
if result and result.memories_created:
|
|
367
|
+
logger.info(
|
|
368
|
+
"Dreaming: %d memories created from reflection",
|
|
369
|
+
len(result.memories_created),
|
|
370
|
+
)
|
|
371
|
+
elif result and result.skipped_reason:
|
|
372
|
+
logger.debug("Dreaming skipped: %s", result.skipped_reason)
|
|
373
|
+
|
|
374
|
+
return _run
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
def make_itil_auto_close_task(home: Path) -> Callable[[], None]:
|
|
378
|
+
"""Return a callback that auto-closes resolved incidents after 24h stable.
|
|
379
|
+
|
|
380
|
+
Args:
|
|
381
|
+
home: Shared root directory.
|
|
382
|
+
"""
|
|
383
|
+
|
|
384
|
+
def _run() -> None:
|
|
385
|
+
from .itil import ITILManager
|
|
386
|
+
|
|
387
|
+
mgr = ITILManager(home)
|
|
388
|
+
closed = mgr.auto_close_resolved(stable_hours=24)
|
|
389
|
+
if closed:
|
|
390
|
+
logger.info("ITIL auto-close: %d incident(s) closed: %s", len(closed), closed)
|
|
391
|
+
else:
|
|
392
|
+
logger.debug("ITIL auto-close: no incidents to close")
|
|
393
|
+
|
|
394
|
+
return _run
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
def make_itil_escalation_task(home: Path) -> Callable[[], None]:
|
|
398
|
+
"""Return a callback that checks SLA breaches on open incidents.
|
|
399
|
+
|
|
400
|
+
Args:
|
|
401
|
+
home: Shared root directory.
|
|
402
|
+
"""
|
|
403
|
+
|
|
404
|
+
def _run() -> None:
|
|
405
|
+
from .itil import ITILManager
|
|
406
|
+
|
|
407
|
+
mgr = ITILManager(home)
|
|
408
|
+
breaches = mgr.check_sla_breaches()
|
|
409
|
+
if breaches:
|
|
410
|
+
for b in breaches:
|
|
411
|
+
logger.warning(
|
|
412
|
+
"ITIL SLA breach: %s (%s) unacknowledged for %d min (limit: %d min)",
|
|
413
|
+
b["id"], b["severity"], b["elapsed_minutes"], b["sla_minutes"],
|
|
414
|
+
)
|
|
415
|
+
else:
|
|
416
|
+
logger.debug("ITIL escalation check: no SLA breaches")
|
|
417
|
+
|
|
418
|
+
return _run
|
|
419
|
+
|
|
420
|
+
|
|
341
421
|
# ---------------------------------------------------------------------------
|
|
342
422
|
# Convenience builder
|
|
343
423
|
# ---------------------------------------------------------------------------
|
|
@@ -369,6 +449,8 @@ def build_scheduler(
|
|
|
369
449
|
+--------------------------+------------+
|
|
370
450
|
| profile_freshness_check | 24 hours |
|
|
371
451
|
+--------------------------+------------+
|
|
452
|
+
| dreaming_reflection | 15 min |
|
|
453
|
+
+--------------------------+------------+
|
|
372
454
|
|
|
373
455
|
Args:
|
|
374
456
|
home: Agent home directory.
|
|
@@ -424,6 +506,13 @@ def build_scheduler(
|
|
|
424
506
|
callback=make_profile_freshness_task(home),
|
|
425
507
|
)
|
|
426
508
|
|
|
509
|
+
# Dreaming — idle-time self-reflection via NVIDIA NIM
|
|
510
|
+
scheduler.register(
|
|
511
|
+
name="dreaming_reflection",
|
|
512
|
+
interval_seconds=900, # 15 minutes
|
|
513
|
+
callback=make_dreaming_task(home, consciousness_loop),
|
|
514
|
+
)
|
|
515
|
+
|
|
427
516
|
# Service health check — pings Qdrant, FalkorDB, Syncthing, daemons
|
|
428
517
|
try:
|
|
429
518
|
from .service_health import make_service_health_task
|
|
@@ -436,4 +525,22 @@ def build_scheduler(
|
|
|
436
525
|
except ImportError:
|
|
437
526
|
logger.debug("service_health not available — service_health_check task skipped")
|
|
438
527
|
|
|
528
|
+
# ITIL escalation check — SLA breach detection every 5 minutes
|
|
529
|
+
try:
|
|
530
|
+
from . import SHARED_ROOT
|
|
531
|
+
|
|
532
|
+
shared = Path(SHARED_ROOT).expanduser()
|
|
533
|
+
scheduler.register(
|
|
534
|
+
name="itil_escalation_check",
|
|
535
|
+
interval_seconds=300, # 5 minutes
|
|
536
|
+
callback=make_itil_escalation_task(shared),
|
|
537
|
+
)
|
|
538
|
+
scheduler.register(
|
|
539
|
+
name="itil_auto_close",
|
|
540
|
+
interval_seconds=1800, # 30 minutes
|
|
541
|
+
callback=make_itil_auto_close_task(shared),
|
|
542
|
+
)
|
|
543
|
+
except Exception:
|
|
544
|
+
logger.debug("ITIL scheduled tasks not available — skipped")
|
|
545
|
+
|
|
439
546
|
return scheduler
|
|
@@ -197,16 +197,90 @@ def check_all_services() -> list[dict[str, Any]]:
|
|
|
197
197
|
# ---------------------------------------------------------------------------
|
|
198
198
|
|
|
199
199
|
|
|
200
|
+
def _create_incident_for_down_service(service_result: dict[str, Any]) -> None:
|
|
201
|
+
"""Auto-create an ITIL incident for a down service (with dedup).
|
|
202
|
+
|
|
203
|
+
Only creates a new incident if there is no existing open incident
|
|
204
|
+
for the same service. Uses best-effort: failures are logged but
|
|
205
|
+
never block the health check.
|
|
206
|
+
"""
|
|
207
|
+
try:
|
|
208
|
+
from . import SHARED_ROOT
|
|
209
|
+
from .itil import ITILManager
|
|
210
|
+
|
|
211
|
+
svc_name = service_result["name"]
|
|
212
|
+
mgr = ITILManager(os.path.expanduser(SHARED_ROOT))
|
|
213
|
+
|
|
214
|
+
# Dedup: skip if there's already an open incident for this service
|
|
215
|
+
existing = mgr.find_open_incident_for_service(svc_name)
|
|
216
|
+
if existing:
|
|
217
|
+
logger.debug(
|
|
218
|
+
"Skipping incident creation for %s — open incident %s exists",
|
|
219
|
+
svc_name, existing.id,
|
|
220
|
+
)
|
|
221
|
+
return
|
|
222
|
+
|
|
223
|
+
error_info = service_result.get("error") or "unreachable"
|
|
224
|
+
mgr.create_incident(
|
|
225
|
+
title=f"{svc_name} down",
|
|
226
|
+
severity="sev3",
|
|
227
|
+
source="service_health",
|
|
228
|
+
affected_services=[svc_name],
|
|
229
|
+
impact=f"Service unreachable: {error_info}",
|
|
230
|
+
managed_by="lumina",
|
|
231
|
+
created_by="service_health",
|
|
232
|
+
tags=["auto-detected", "service-health"],
|
|
233
|
+
)
|
|
234
|
+
logger.info("Auto-created incident for down service: %s", svc_name)
|
|
235
|
+
except Exception as exc:
|
|
236
|
+
logger.debug("Failed to create incident for %s: %s", service_result.get("name"), exc)
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def _auto_resolve_recovered_service(service_result: dict[str, Any]) -> None:
|
|
240
|
+
"""Auto-resolve sev4 incidents when a service recovers."""
|
|
241
|
+
try:
|
|
242
|
+
from . import SHARED_ROOT
|
|
243
|
+
from .itil import ITILManager
|
|
244
|
+
|
|
245
|
+
svc_name = service_result["name"]
|
|
246
|
+
mgr = ITILManager(os.path.expanduser(SHARED_ROOT))
|
|
247
|
+
existing = mgr.find_open_incident_for_service(svc_name)
|
|
248
|
+
if existing is None:
|
|
249
|
+
return
|
|
250
|
+
|
|
251
|
+
if existing.severity.value == "sev4":
|
|
252
|
+
mgr.update_incident(
|
|
253
|
+
existing.id, "service_health",
|
|
254
|
+
new_status="resolved",
|
|
255
|
+
note=f"Service {svc_name} recovered automatically",
|
|
256
|
+
resolution_summary="Auto-resolved: service came back up",
|
|
257
|
+
)
|
|
258
|
+
logger.info("Auto-resolved sev4 incident %s for recovered service %s",
|
|
259
|
+
existing.id, svc_name)
|
|
260
|
+
else:
|
|
261
|
+
mgr.update_incident(
|
|
262
|
+
existing.id, "service_health",
|
|
263
|
+
note=f"Service {svc_name} appears to be back up",
|
|
264
|
+
)
|
|
265
|
+
except Exception as exc:
|
|
266
|
+
logger.debug("Failed to auto-resolve incident for %s: %s",
|
|
267
|
+
service_result.get("name"), exc)
|
|
268
|
+
|
|
269
|
+
|
|
200
270
|
def make_service_health_task() -> callable:
|
|
201
271
|
"""Return a zero-arg callback suitable for TaskScheduler.register().
|
|
202
272
|
|
|
203
273
|
Runs check_all_services() and logs results. Down services are logged
|
|
204
|
-
at WARNING level; all-up is logged at DEBUG level.
|
|
274
|
+
at WARNING level; all-up is logged at DEBUG level. Auto-creates ITIL
|
|
275
|
+
incidents for down services and auto-resolves sev4 incidents for
|
|
276
|
+
recovered services.
|
|
205
277
|
"""
|
|
206
278
|
|
|
207
279
|
def _run() -> None:
|
|
208
280
|
results = check_all_services()
|
|
209
281
|
down = [r for r in results if r["status"] == "down"]
|
|
282
|
+
up = [r for r in results if r["status"] == "up"]
|
|
283
|
+
|
|
210
284
|
if down:
|
|
211
285
|
names = ", ".join(r["name"] for r in down)
|
|
212
286
|
logger.warning(
|
|
@@ -216,8 +290,9 @@ def make_service_health_task() -> callable:
|
|
|
216
290
|
logger.warning(
|
|
217
291
|
" %s (%s): %s", r["name"], r["url"], r["error"] or "unreachable"
|
|
218
292
|
)
|
|
293
|
+
_create_incident_for_down_service(r)
|
|
219
294
|
else:
|
|
220
|
-
up_count =
|
|
295
|
+
up_count = len(up)
|
|
221
296
|
logger.debug(
|
|
222
297
|
"Service health: %d/%d up, %d unknown",
|
|
223
298
|
up_count,
|
|
@@ -225,4 +300,8 @@ def make_service_health_task() -> callable:
|
|
|
225
300
|
len(results) - up_count,
|
|
226
301
|
)
|
|
227
302
|
|
|
303
|
+
# Check for recovered services
|
|
304
|
+
for r in up:
|
|
305
|
+
_auto_resolve_recovered_service(r)
|
|
306
|
+
|
|
228
307
|
return _run
|
package/src/skcapstone/soul.py
CHANGED
|
@@ -838,6 +838,25 @@ class SoulManager:
|
|
|
838
838
|
"source": "repo",
|
|
839
839
|
"description": desc[:80] if desc else "",
|
|
840
840
|
}
|
|
841
|
+
else:
|
|
842
|
+
# 2b) Local repo not cloned — fall back to GitHub API
|
|
843
|
+
try:
|
|
844
|
+
from .blueprint_registry import _fetch_github_blueprints
|
|
845
|
+
|
|
846
|
+
github_results = _fetch_github_blueprints()
|
|
847
|
+
if github_results:
|
|
848
|
+
for bp in github_results:
|
|
849
|
+
slug = bp["name"]
|
|
850
|
+
if slug not in seen:
|
|
851
|
+
seen[slug] = {
|
|
852
|
+
"name": slug,
|
|
853
|
+
"display_name": bp.get("display_name", slug),
|
|
854
|
+
"category": bp.get("category", ""),
|
|
855
|
+
"source": "github",
|
|
856
|
+
"description": "",
|
|
857
|
+
}
|
|
858
|
+
except Exception:
|
|
859
|
+
pass # offline — show only installed souls
|
|
841
860
|
|
|
842
861
|
# Sort by category, then name
|
|
843
862
|
return sorted(seen.values(), key=lambda d: (d["category"], d["name"]))
|
|
@@ -16,6 +16,7 @@ Usage:
|
|
|
16
16
|
from __future__ import annotations
|
|
17
17
|
|
|
18
18
|
import logging
|
|
19
|
+
import platform
|
|
19
20
|
import shutil
|
|
20
21
|
import subprocess
|
|
21
22
|
from dataclasses import dataclass, field
|
|
@@ -24,6 +25,14 @@ from typing import Optional
|
|
|
24
25
|
|
|
25
26
|
logger = logging.getLogger("skcapstone.systemd")
|
|
26
27
|
|
|
28
|
+
|
|
29
|
+
def _require_linux() -> None:
|
|
30
|
+
"""Raise RuntimeError if not running on Linux."""
|
|
31
|
+
if platform.system() != "Linux":
|
|
32
|
+
raise RuntimeError(
|
|
33
|
+
"systemd is only available on Linux. Use launchd on macOS."
|
|
34
|
+
)
|
|
35
|
+
|
|
27
36
|
SERVICE_NAME = "skcapstone.service"
|
|
28
37
|
SOCKET_NAME = "skcapstone-api.socket"
|
|
29
38
|
HEARTBEAT_SERVICE = "skcomm-heartbeat.service"
|
|
@@ -93,7 +102,11 @@ def _systemctl(*args: str) -> subprocess.CompletedProcess:
|
|
|
93
102
|
|
|
94
103
|
Returns:
|
|
95
104
|
CompletedProcess result.
|
|
105
|
+
|
|
106
|
+
Raises:
|
|
107
|
+
RuntimeError: If not running on Linux.
|
|
96
108
|
"""
|
|
109
|
+
_require_linux()
|
|
97
110
|
return _run(["systemctl", "--user", *args])
|
|
98
111
|
|
|
99
112
|
|
|
@@ -127,6 +140,7 @@ def install_service(
|
|
|
127
140
|
Returns:
|
|
128
141
|
dict: Result with 'installed', 'enabled', 'started' bools.
|
|
129
142
|
"""
|
|
143
|
+
_require_linux()
|
|
130
144
|
target = unit_dir or SYSTEMD_USER_DIR
|
|
131
145
|
source = source_dir or BUNDLED_DIR
|
|
132
146
|
|
|
@@ -184,6 +198,7 @@ def uninstall_service(unit_dir: Optional[Path] = None) -> dict:
|
|
|
184
198
|
Returns:
|
|
185
199
|
dict: Result with 'stopped', 'disabled', 'removed' bools.
|
|
186
200
|
"""
|
|
201
|
+
_require_linux()
|
|
187
202
|
target = unit_dir or SYSTEMD_USER_DIR
|
|
188
203
|
result = {"stopped": False, "disabled": False, "removed": False}
|
|
189
204
|
|
|
@@ -214,6 +229,7 @@ def service_status() -> ServiceStatus:
|
|
|
214
229
|
Returns:
|
|
215
230
|
ServiceStatus: Detailed status information.
|
|
216
231
|
"""
|
|
232
|
+
_require_linux()
|
|
217
233
|
status = ServiceStatus()
|
|
218
234
|
|
|
219
235
|
unit_path = SYSTEMD_USER_DIR / SERVICE_NAME
|
|
@@ -264,6 +280,7 @@ def service_logs(lines: int = 50, follow: bool = False) -> str:
|
|
|
264
280
|
Returns:
|
|
265
281
|
str: Log output or the follow command.
|
|
266
282
|
"""
|
|
283
|
+
_require_linux()
|
|
267
284
|
if follow:
|
|
268
285
|
return f"journalctl --user -u {SERVICE_NAME} -f"
|
|
269
286
|
|