@smilintux/skcapstone 0.2.6 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +61 -0
- package/docs/CUSTOM_AGENT.md +184 -0
- package/docs/GETTING_STARTED.md +3 -0
- package/openclaw-plugin/src/index.ts +75 -4
- package/package.json +1 -1
- package/pyproject.toml +1 -1
- package/scripts/archive-sessions.sh +72 -0
- package/scripts/install.ps1 +2 -1
- package/scripts/install.sh +2 -1
- package/scripts/nvidia-proxy.mjs +727 -0
- package/scripts/telegram-catchup-all.sh +136 -0
- package/src/skcapstone/__init__.py +70 -1
- package/src/skcapstone/agent_card.py +4 -1
- package/src/skcapstone/blueprint_registry.py +78 -0
- package/src/skcapstone/blueprints/builtins/itil-operations.yaml +40 -0
- package/src/skcapstone/cli/__init__.py +2 -0
- package/src/skcapstone/cli/_common.py +5 -5
- package/src/skcapstone/cli/card.py +36 -5
- package/src/skcapstone/cli/config_cmd.py +53 -1
- package/src/skcapstone/cli/itil.py +434 -0
- package/src/skcapstone/cli/peer.py +3 -1
- package/src/skcapstone/cli/peers_dir.py +3 -1
- package/src/skcapstone/cli/preflight_cmd.py +4 -0
- package/src/skcapstone/cli/skills_cmd.py +120 -24
- package/src/skcapstone/cli/soul.py +47 -24
- package/src/skcapstone/cli/status.py +17 -11
- package/src/skcapstone/cli/usage_cmd.py +7 -2
- package/src/skcapstone/consciousness_config.py +27 -0
- package/src/skcapstone/coordination.py +1 -0
- package/src/skcapstone/daemon.py +28 -9
- package/src/skcapstone/defaults/lumina/manifest.json +1 -1
- package/src/skcapstone/doctor.py +115 -0
- package/src/skcapstone/dreaming.py +761 -0
- package/src/skcapstone/itil.py +1104 -0
- package/src/skcapstone/mcp_server.py +258 -0
- package/src/skcapstone/mcp_tools/__init__.py +2 -0
- package/src/skcapstone/mcp_tools/gtd_tools.py +1 -1
- package/src/skcapstone/mcp_tools/itil_tools.py +657 -0
- package/src/skcapstone/mcp_tools/notification_tools.py +12 -11
- package/src/skcapstone/notifications.py +40 -27
- package/src/skcapstone/onboard.py +46 -0
- package/src/skcapstone/pillars/sync.py +11 -4
- package/src/skcapstone/register.py +8 -0
- package/src/skcapstone/scheduled_tasks.py +107 -0
- package/src/skcapstone/service_health.py +81 -2
- package/src/skcapstone/soul.py +19 -0
- package/systemd/skcapstone.service +5 -6
|
@@ -19,6 +19,7 @@ from __future__ import annotations
|
|
|
19
19
|
|
|
20
20
|
import datetime
|
|
21
21
|
import logging
|
|
22
|
+
import os
|
|
22
23
|
import platform
|
|
23
24
|
import subprocess
|
|
24
25
|
import threading
|
|
@@ -43,53 +44,65 @@ _TERMINAL_CMDS: list[list[str]] = [
|
|
|
43
44
|
|
|
44
45
|
|
|
45
46
|
def _store_notification_memory(title: str, body: str, urgency: str) -> None:
|
|
46
|
-
"""
|
|
47
|
+
"""Log a notification dispatch to the skcomm/notifications/ directory.
|
|
48
|
+
|
|
49
|
+
These are transport bookkeeping, not persistent memories, so they
|
|
50
|
+
go to ``~/.skcapstone/agents/{agent}/skcomm/notifications/`` instead
|
|
51
|
+
of polluting the memory/ tree that skmemory indexes.
|
|
52
|
+
"""
|
|
47
53
|
try:
|
|
54
|
+
import json as _json
|
|
55
|
+
import uuid
|
|
48
56
|
from . import AGENT_HOME
|
|
49
|
-
from .memory_engine import store as mem_store
|
|
50
|
-
from .models import MemoryLayer
|
|
51
57
|
|
|
52
58
|
home = Path(AGENT_HOME).expanduser()
|
|
53
59
|
if not home.exists():
|
|
54
60
|
return
|
|
55
61
|
|
|
62
|
+
agent_name = os.environ.get("SKCAPSTONE_AGENT", "lumina")
|
|
63
|
+
notif_dir = home / "agents" / agent_name / "skcomm" / "notifications"
|
|
64
|
+
notif_dir.mkdir(parents=True, exist_ok=True)
|
|
65
|
+
|
|
56
66
|
ts = datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
67
|
+
entry = {
|
|
68
|
+
"id": uuid.uuid4().hex[:12],
|
|
69
|
+
"type": "notification-sent",
|
|
70
|
+
"title": title,
|
|
71
|
+
"body": body,
|
|
72
|
+
"urgency": urgency,
|
|
73
|
+
"timestamp": ts,
|
|
74
|
+
}
|
|
75
|
+
path = notif_dir / f"{entry['id']}.json"
|
|
76
|
+
path.write_text(_json.dumps(entry, indent=2), encoding="utf-8")
|
|
66
77
|
except Exception as exc:
|
|
67
|
-
logger.debug("Failed to store notification
|
|
78
|
+
logger.debug("Failed to store notification log: %s", exc)
|
|
68
79
|
|
|
69
80
|
|
|
70
81
|
def _store_click_event(action: str, detail: str) -> None:
|
|
71
|
-
"""
|
|
82
|
+
"""Log a notification click event to the skcomm/notifications/ directory."""
|
|
72
83
|
try:
|
|
84
|
+
import json as _json
|
|
85
|
+
import uuid
|
|
73
86
|
from . import AGENT_HOME
|
|
74
|
-
from .memory_engine import store as mem_store
|
|
75
|
-
from .models import MemoryLayer
|
|
76
87
|
|
|
77
88
|
home = Path(AGENT_HOME).expanduser()
|
|
78
89
|
if not home.exists():
|
|
79
90
|
return
|
|
80
91
|
|
|
92
|
+
agent_name = os.environ.get("SKCAPSTONE_AGENT", "lumina")
|
|
93
|
+
notif_dir = home / "agents" / agent_name / "skcomm" / "notifications"
|
|
94
|
+
notif_dir.mkdir(parents=True, exist_ok=True)
|
|
95
|
+
|
|
81
96
|
ts = datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
layer=MemoryLayer.SHORT_TERM,
|
|
92
|
-
)
|
|
97
|
+
entry = {
|
|
98
|
+
"id": uuid.uuid4().hex[:12],
|
|
99
|
+
"type": "click-event",
|
|
100
|
+
"action": action,
|
|
101
|
+
"detail": detail,
|
|
102
|
+
"timestamp": ts,
|
|
103
|
+
}
|
|
104
|
+
path = notif_dir / f"{entry['id']}.json"
|
|
105
|
+
path.write_text(_json.dumps(entry, indent=2), encoding="utf-8")
|
|
93
106
|
logger.debug("Stored notification click event: %s → %s", action, detail)
|
|
94
107
|
except Exception as exc:
|
|
95
108
|
logger.debug("Failed to store click event in memory: %s", exc)
|
|
@@ -109,6 +109,52 @@ def _step_identity(home_path: Path, name: str, email: str | None) -> tuple[str,
|
|
|
109
109
|
|
|
110
110
|
(home_path / "skills").mkdir(parents=True, exist_ok=True)
|
|
111
111
|
|
|
112
|
+
# Create full skeleton so all commands work from day one
|
|
113
|
+
agent_slug = name.lower().replace(" ", "-")
|
|
114
|
+
agent_dir = home_path / "agents" / agent_slug
|
|
115
|
+
|
|
116
|
+
skeleton_dirs = [
|
|
117
|
+
# Shared root directories
|
|
118
|
+
home_path / "heartbeats",
|
|
119
|
+
home_path / "peers",
|
|
120
|
+
home_path / "coordination" / "tasks",
|
|
121
|
+
home_path / "coordination" / "agents",
|
|
122
|
+
home_path / "logs",
|
|
123
|
+
home_path / "comms" / "inbox",
|
|
124
|
+
home_path / "comms" / "outbox",
|
|
125
|
+
home_path / "comms" / "archive",
|
|
126
|
+
home_path / "archive",
|
|
127
|
+
home_path / "deployments",
|
|
128
|
+
home_path / "docs",
|
|
129
|
+
home_path / "metrics",
|
|
130
|
+
# Per-agent directories
|
|
131
|
+
agent_dir / "memory" / "short-term",
|
|
132
|
+
agent_dir / "memory" / "mid-term",
|
|
133
|
+
agent_dir / "memory" / "long-term",
|
|
134
|
+
agent_dir / "soul" / "installed",
|
|
135
|
+
agent_dir / "wallet",
|
|
136
|
+
agent_dir / "seeds",
|
|
137
|
+
agent_dir / "identity",
|
|
138
|
+
agent_dir / "config",
|
|
139
|
+
agent_dir / "logs",
|
|
140
|
+
agent_dir / "security",
|
|
141
|
+
agent_dir / "cloud9",
|
|
142
|
+
agent_dir / "trust" / "febs",
|
|
143
|
+
agent_dir / "sync" / "outbox",
|
|
144
|
+
agent_dir / "sync" / "inbox",
|
|
145
|
+
agent_dir / "sync" / "archive",
|
|
146
|
+
agent_dir / "reflections",
|
|
147
|
+
agent_dir / "improvements",
|
|
148
|
+
agent_dir / "scripts",
|
|
149
|
+
agent_dir / "cron",
|
|
150
|
+
agent_dir / "archive",
|
|
151
|
+
agent_dir / "comms" / "inbox",
|
|
152
|
+
agent_dir / "comms" / "outbox",
|
|
153
|
+
agent_dir / "comms" / "archive",
|
|
154
|
+
]
|
|
155
|
+
for d in skeleton_dirs:
|
|
156
|
+
d.mkdir(parents=True, exist_ok=True)
|
|
157
|
+
|
|
112
158
|
manifest = {
|
|
113
159
|
"name": name,
|
|
114
160
|
"version": __version__,
|
|
@@ -72,7 +72,7 @@ def initialize_sync(home: Path, config: Optional[SyncConfig] = None) -> SyncStat
|
|
|
72
72
|
else:
|
|
73
73
|
state.status = PillarStatus.DEGRADED
|
|
74
74
|
|
|
75
|
-
state.seed_count = _count_seeds(sync_dir)
|
|
75
|
+
state.seed_count = _count_seeds(sync_dir, home=home)
|
|
76
76
|
return state
|
|
77
77
|
|
|
78
78
|
|
|
@@ -396,7 +396,7 @@ def discover_sync(home: Path) -> SyncState:
|
|
|
396
396
|
state = SyncState(
|
|
397
397
|
transport=transport,
|
|
398
398
|
sync_path=sync_dir,
|
|
399
|
-
seed_count=_count_seeds(sync_dir),
|
|
399
|
+
seed_count=_count_seeds(sync_dir, home=home),
|
|
400
400
|
status=PillarStatus.ACTIVE,
|
|
401
401
|
)
|
|
402
402
|
|
|
@@ -483,8 +483,8 @@ def _get_hostname() -> str:
|
|
|
483
483
|
return socket.gethostname()
|
|
484
484
|
|
|
485
485
|
|
|
486
|
-
def _count_seeds(sync_dir: Path) -> int:
|
|
487
|
-
"""Count seed files across
|
|
486
|
+
def _count_seeds(sync_dir: Path, home: Optional[Path] = None) -> int:
|
|
487
|
+
"""Count seed files across sync subdirs and the agent seeds directory."""
|
|
488
488
|
count = 0
|
|
489
489
|
for subdir in ("outbox", "inbox", "archive"):
|
|
490
490
|
d = sync_dir / subdir
|
|
@@ -492,6 +492,13 @@ def _count_seeds(sync_dir: Path) -> int:
|
|
|
492
492
|
count += sum(
|
|
493
493
|
1 for f in d.iterdir() if f.name.endswith(SEED_EXTENSION) or f.suffix == ".gpg"
|
|
494
494
|
)
|
|
495
|
+
# Also count seeds in the agent's seeds/ directory
|
|
496
|
+
if home is not None:
|
|
497
|
+
seeds_dir = home / "seeds"
|
|
498
|
+
if seeds_dir.is_dir():
|
|
499
|
+
count += sum(
|
|
500
|
+
1 for f in seeds_dir.iterdir() if f.name.endswith(SEED_EXTENSION)
|
|
501
|
+
)
|
|
495
502
|
return count
|
|
496
503
|
|
|
497
504
|
|
|
@@ -97,6 +97,13 @@ def _build_package_registry(workspace: Optional[Path] = None) -> list[dict]:
|
|
|
97
97
|
"mcp_env": None,
|
|
98
98
|
"openclaw_plugin_path": workspace / "pillar-repos" / "sksecurity" / "openclaw-plugin" / "src" / "index.ts",
|
|
99
99
|
},
|
|
100
|
+
{
|
|
101
|
+
"name": "skseed",
|
|
102
|
+
"mcp_cmd": None,
|
|
103
|
+
"mcp_args": None,
|
|
104
|
+
"mcp_env": None,
|
|
105
|
+
"openclaw_plugin_path": workspace / "pillar-repos" / "skseed" / "openclaw-plugin" / "src" / "index.ts",
|
|
106
|
+
},
|
|
100
107
|
{
|
|
101
108
|
"name": "skgit",
|
|
102
109
|
"mcp_cmd": "node",
|
|
@@ -119,6 +126,7 @@ _PILLAR_DIR_MAP: dict[str, Optional[str]] = {
|
|
|
119
126
|
"capauth": "capauth",
|
|
120
127
|
"cloud9": "cloud9-python",
|
|
121
128
|
"sksecurity": "sksecurity",
|
|
129
|
+
"skseed": "skseed",
|
|
122
130
|
"skgit": None, # skill dir only, no pillar repo
|
|
123
131
|
}
|
|
124
132
|
|
|
@@ -9,6 +9,7 @@ Built-in recurring tasks:
|
|
|
9
9
|
- backend_reprobe — every 5 minutes
|
|
10
10
|
- memory_promotion_sweep — every hour
|
|
11
11
|
- profile_freshness_check — every 24 hours
|
|
12
|
+
- dreaming_reflection — every 15 minutes
|
|
12
13
|
|
|
13
14
|
Usage:
|
|
14
15
|
scheduler = build_scheduler(home, stop_event, consciousness_loop, beacon)
|
|
@@ -338,6 +339,85 @@ def make_profile_freshness_task(home: Path, max_age_days: int = 7) -> Callable[[
|
|
|
338
339
|
return _run
|
|
339
340
|
|
|
340
341
|
|
|
342
|
+
def make_dreaming_task(
|
|
343
|
+
home: Path, consciousness_loop: object = None
|
|
344
|
+
) -> Callable[[], None]:
|
|
345
|
+
"""Return a callback that runs the dreaming engine every 15 minutes.
|
|
346
|
+
|
|
347
|
+
Instantiates DreamingEngine lazily (so import errors are deferred until
|
|
348
|
+
first run). The engine itself checks idle state and cooldown internally.
|
|
349
|
+
|
|
350
|
+
Args:
|
|
351
|
+
home: Agent home directory.
|
|
352
|
+
consciousness_loop: ConsciousnessLoop instance for idle detection.
|
|
353
|
+
"""
|
|
354
|
+
|
|
355
|
+
def _run() -> None:
|
|
356
|
+
from .consciousness_config import load_dreaming_config
|
|
357
|
+
from .dreaming import DreamingEngine
|
|
358
|
+
|
|
359
|
+
config = load_dreaming_config(home)
|
|
360
|
+
if config is None or not config.enabled:
|
|
361
|
+
return
|
|
362
|
+
engine = DreamingEngine(
|
|
363
|
+
home=home, config=config, consciousness_loop=consciousness_loop
|
|
364
|
+
)
|
|
365
|
+
result = engine.dream()
|
|
366
|
+
if result and result.memories_created:
|
|
367
|
+
logger.info(
|
|
368
|
+
"Dreaming: %d memories created from reflection",
|
|
369
|
+
len(result.memories_created),
|
|
370
|
+
)
|
|
371
|
+
elif result and result.skipped_reason:
|
|
372
|
+
logger.debug("Dreaming skipped: %s", result.skipped_reason)
|
|
373
|
+
|
|
374
|
+
return _run
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
def make_itil_auto_close_task(home: Path) -> Callable[[], None]:
|
|
378
|
+
"""Return a callback that auto-closes resolved incidents after 24h stable.
|
|
379
|
+
|
|
380
|
+
Args:
|
|
381
|
+
home: Shared root directory.
|
|
382
|
+
"""
|
|
383
|
+
|
|
384
|
+
def _run() -> None:
|
|
385
|
+
from .itil import ITILManager
|
|
386
|
+
|
|
387
|
+
mgr = ITILManager(home)
|
|
388
|
+
closed = mgr.auto_close_resolved(stable_hours=24)
|
|
389
|
+
if closed:
|
|
390
|
+
logger.info("ITIL auto-close: %d incident(s) closed: %s", len(closed), closed)
|
|
391
|
+
else:
|
|
392
|
+
logger.debug("ITIL auto-close: no incidents to close")
|
|
393
|
+
|
|
394
|
+
return _run
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
def make_itil_escalation_task(home: Path) -> Callable[[], None]:
|
|
398
|
+
"""Return a callback that checks SLA breaches on open incidents.
|
|
399
|
+
|
|
400
|
+
Args:
|
|
401
|
+
home: Shared root directory.
|
|
402
|
+
"""
|
|
403
|
+
|
|
404
|
+
def _run() -> None:
|
|
405
|
+
from .itil import ITILManager
|
|
406
|
+
|
|
407
|
+
mgr = ITILManager(home)
|
|
408
|
+
breaches = mgr.check_sla_breaches()
|
|
409
|
+
if breaches:
|
|
410
|
+
for b in breaches:
|
|
411
|
+
logger.warning(
|
|
412
|
+
"ITIL SLA breach: %s (%s) unacknowledged for %d min (limit: %d min)",
|
|
413
|
+
b["id"], b["severity"], b["elapsed_minutes"], b["sla_minutes"],
|
|
414
|
+
)
|
|
415
|
+
else:
|
|
416
|
+
logger.debug("ITIL escalation check: no SLA breaches")
|
|
417
|
+
|
|
418
|
+
return _run
|
|
419
|
+
|
|
420
|
+
|
|
341
421
|
# ---------------------------------------------------------------------------
|
|
342
422
|
# Convenience builder
|
|
343
423
|
# ---------------------------------------------------------------------------
|
|
@@ -369,6 +449,8 @@ def build_scheduler(
|
|
|
369
449
|
+--------------------------+------------+
|
|
370
450
|
| profile_freshness_check | 24 hours |
|
|
371
451
|
+--------------------------+------------+
|
|
452
|
+
| dreaming_reflection | 15 min |
|
|
453
|
+
+--------------------------+------------+
|
|
372
454
|
|
|
373
455
|
Args:
|
|
374
456
|
home: Agent home directory.
|
|
@@ -424,6 +506,13 @@ def build_scheduler(
|
|
|
424
506
|
callback=make_profile_freshness_task(home),
|
|
425
507
|
)
|
|
426
508
|
|
|
509
|
+
# Dreaming — idle-time self-reflection via NVIDIA NIM
|
|
510
|
+
scheduler.register(
|
|
511
|
+
name="dreaming_reflection",
|
|
512
|
+
interval_seconds=900, # 15 minutes
|
|
513
|
+
callback=make_dreaming_task(home, consciousness_loop),
|
|
514
|
+
)
|
|
515
|
+
|
|
427
516
|
# Service health check — pings Qdrant, FalkorDB, Syncthing, daemons
|
|
428
517
|
try:
|
|
429
518
|
from .service_health import make_service_health_task
|
|
@@ -436,4 +525,22 @@ def build_scheduler(
|
|
|
436
525
|
except ImportError:
|
|
437
526
|
logger.debug("service_health not available — service_health_check task skipped")
|
|
438
527
|
|
|
528
|
+
# ITIL escalation check — SLA breach detection every 5 minutes
|
|
529
|
+
try:
|
|
530
|
+
from . import SHARED_ROOT
|
|
531
|
+
|
|
532
|
+
shared = Path(SHARED_ROOT).expanduser()
|
|
533
|
+
scheduler.register(
|
|
534
|
+
name="itil_escalation_check",
|
|
535
|
+
interval_seconds=300, # 5 minutes
|
|
536
|
+
callback=make_itil_escalation_task(shared),
|
|
537
|
+
)
|
|
538
|
+
scheduler.register(
|
|
539
|
+
name="itil_auto_close",
|
|
540
|
+
interval_seconds=1800, # 30 minutes
|
|
541
|
+
callback=make_itil_auto_close_task(shared),
|
|
542
|
+
)
|
|
543
|
+
except Exception:
|
|
544
|
+
logger.debug("ITIL scheduled tasks not available — skipped")
|
|
545
|
+
|
|
439
546
|
return scheduler
|
|
@@ -197,16 +197,90 @@ def check_all_services() -> list[dict[str, Any]]:
|
|
|
197
197
|
# ---------------------------------------------------------------------------
|
|
198
198
|
|
|
199
199
|
|
|
200
|
+
def _create_incident_for_down_service(service_result: dict[str, Any]) -> None:
|
|
201
|
+
"""Auto-create an ITIL incident for a down service (with dedup).
|
|
202
|
+
|
|
203
|
+
Only creates a new incident if there is no existing open incident
|
|
204
|
+
for the same service. Uses best-effort: failures are logged but
|
|
205
|
+
never block the health check.
|
|
206
|
+
"""
|
|
207
|
+
try:
|
|
208
|
+
from . import SHARED_ROOT
|
|
209
|
+
from .itil import ITILManager
|
|
210
|
+
|
|
211
|
+
svc_name = service_result["name"]
|
|
212
|
+
mgr = ITILManager(os.path.expanduser(SHARED_ROOT))
|
|
213
|
+
|
|
214
|
+
# Dedup: skip if there's already an open incident for this service
|
|
215
|
+
existing = mgr.find_open_incident_for_service(svc_name)
|
|
216
|
+
if existing:
|
|
217
|
+
logger.debug(
|
|
218
|
+
"Skipping incident creation for %s — open incident %s exists",
|
|
219
|
+
svc_name, existing.id,
|
|
220
|
+
)
|
|
221
|
+
return
|
|
222
|
+
|
|
223
|
+
error_info = service_result.get("error") or "unreachable"
|
|
224
|
+
mgr.create_incident(
|
|
225
|
+
title=f"{svc_name} down",
|
|
226
|
+
severity="sev3",
|
|
227
|
+
source="service_health",
|
|
228
|
+
affected_services=[svc_name],
|
|
229
|
+
impact=f"Service unreachable: {error_info}",
|
|
230
|
+
managed_by="lumina",
|
|
231
|
+
created_by="service_health",
|
|
232
|
+
tags=["auto-detected", "service-health"],
|
|
233
|
+
)
|
|
234
|
+
logger.info("Auto-created incident for down service: %s", svc_name)
|
|
235
|
+
except Exception as exc:
|
|
236
|
+
logger.debug("Failed to create incident for %s: %s", service_result.get("name"), exc)
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def _auto_resolve_recovered_service(service_result: dict[str, Any]) -> None:
|
|
240
|
+
"""Auto-resolve sev4 incidents when a service recovers."""
|
|
241
|
+
try:
|
|
242
|
+
from . import SHARED_ROOT
|
|
243
|
+
from .itil import ITILManager
|
|
244
|
+
|
|
245
|
+
svc_name = service_result["name"]
|
|
246
|
+
mgr = ITILManager(os.path.expanduser(SHARED_ROOT))
|
|
247
|
+
existing = mgr.find_open_incident_for_service(svc_name)
|
|
248
|
+
if existing is None:
|
|
249
|
+
return
|
|
250
|
+
|
|
251
|
+
if existing.severity.value == "sev4":
|
|
252
|
+
mgr.update_incident(
|
|
253
|
+
existing.id, "service_health",
|
|
254
|
+
new_status="resolved",
|
|
255
|
+
note=f"Service {svc_name} recovered automatically",
|
|
256
|
+
resolution_summary="Auto-resolved: service came back up",
|
|
257
|
+
)
|
|
258
|
+
logger.info("Auto-resolved sev4 incident %s for recovered service %s",
|
|
259
|
+
existing.id, svc_name)
|
|
260
|
+
else:
|
|
261
|
+
mgr.update_incident(
|
|
262
|
+
existing.id, "service_health",
|
|
263
|
+
note=f"Service {svc_name} appears to be back up",
|
|
264
|
+
)
|
|
265
|
+
except Exception as exc:
|
|
266
|
+
logger.debug("Failed to auto-resolve incident for %s: %s",
|
|
267
|
+
service_result.get("name"), exc)
|
|
268
|
+
|
|
269
|
+
|
|
200
270
|
def make_service_health_task() -> callable:
|
|
201
271
|
"""Return a zero-arg callback suitable for TaskScheduler.register().
|
|
202
272
|
|
|
203
273
|
Runs check_all_services() and logs results. Down services are logged
|
|
204
|
-
at WARNING level; all-up is logged at DEBUG level.
|
|
274
|
+
at WARNING level; all-up is logged at DEBUG level. Auto-creates ITIL
|
|
275
|
+
incidents for down services and auto-resolves sev4 incidents for
|
|
276
|
+
recovered services.
|
|
205
277
|
"""
|
|
206
278
|
|
|
207
279
|
def _run() -> None:
|
|
208
280
|
results = check_all_services()
|
|
209
281
|
down = [r for r in results if r["status"] == "down"]
|
|
282
|
+
up = [r for r in results if r["status"] == "up"]
|
|
283
|
+
|
|
210
284
|
if down:
|
|
211
285
|
names = ", ".join(r["name"] for r in down)
|
|
212
286
|
logger.warning(
|
|
@@ -216,8 +290,9 @@ def make_service_health_task() -> callable:
|
|
|
216
290
|
logger.warning(
|
|
217
291
|
" %s (%s): %s", r["name"], r["url"], r["error"] or "unreachable"
|
|
218
292
|
)
|
|
293
|
+
_create_incident_for_down_service(r)
|
|
219
294
|
else:
|
|
220
|
-
up_count =
|
|
295
|
+
up_count = len(up)
|
|
221
296
|
logger.debug(
|
|
222
297
|
"Service health: %d/%d up, %d unknown",
|
|
223
298
|
up_count,
|
|
@@ -225,4 +300,8 @@ def make_service_health_task() -> callable:
|
|
|
225
300
|
len(results) - up_count,
|
|
226
301
|
)
|
|
227
302
|
|
|
303
|
+
# Check for recovered services
|
|
304
|
+
for r in up:
|
|
305
|
+
_auto_resolve_recovered_service(r)
|
|
306
|
+
|
|
228
307
|
return _run
|
package/src/skcapstone/soul.py
CHANGED
|
@@ -838,6 +838,25 @@ class SoulManager:
|
|
|
838
838
|
"source": "repo",
|
|
839
839
|
"description": desc[:80] if desc else "",
|
|
840
840
|
}
|
|
841
|
+
else:
|
|
842
|
+
# 2b) Local repo not cloned — fall back to GitHub API
|
|
843
|
+
try:
|
|
844
|
+
from .blueprint_registry import _fetch_github_blueprints
|
|
845
|
+
|
|
846
|
+
github_results = _fetch_github_blueprints()
|
|
847
|
+
if github_results:
|
|
848
|
+
for bp in github_results:
|
|
849
|
+
slug = bp["name"]
|
|
850
|
+
if slug not in seen:
|
|
851
|
+
seen[slug] = {
|
|
852
|
+
"name": slug,
|
|
853
|
+
"display_name": bp.get("display_name", slug),
|
|
854
|
+
"category": bp.get("category", ""),
|
|
855
|
+
"source": "github",
|
|
856
|
+
"description": "",
|
|
857
|
+
}
|
|
858
|
+
except Exception:
|
|
859
|
+
pass # offline — show only installed souls
|
|
841
860
|
|
|
842
861
|
# Sort by category, then name
|
|
843
862
|
return sorted(seen.values(), key=lambda d: (d["category"], d["name"]))
|
|
@@ -5,19 +5,18 @@ After=network-online.target ollama.service syncthing.service
|
|
|
5
5
|
Wants=network-online.target
|
|
6
6
|
|
|
7
7
|
[Service]
|
|
8
|
-
Type=
|
|
9
|
-
ExecStart
|
|
10
|
-
ExecStop
|
|
8
|
+
Type=simple
|
|
9
|
+
ExecStart=%h/.skenv/bin/skcapstone daemon start --foreground
|
|
10
|
+
ExecStop=%h/.skenv/bin/skcapstone daemon stop
|
|
11
11
|
ExecReload=/bin/kill -HUP $MAINPID
|
|
12
12
|
Restart=on-failure
|
|
13
13
|
RestartSec=10
|
|
14
|
-
# Watchdog: daemon must call sd_notify("WATCHDOG=1") at least every 5 minutes
|
|
15
|
-
WatchdogSec=300
|
|
16
14
|
# Cap memory to prevent OOM from large model loading
|
|
17
15
|
MemoryMax=4G
|
|
18
16
|
# Keep Ollama models warm for 5 minutes between requests
|
|
19
17
|
Environment=PYTHONUNBUFFERED=1
|
|
20
18
|
Environment=OLLAMA_KEEP_ALIVE=5m
|
|
19
|
+
Environment=SKCAPSTONE_AGENT=lumina
|
|
21
20
|
# Journal logging
|
|
22
21
|
StandardOutput=journal
|
|
23
22
|
StandardError=journal
|
|
@@ -27,7 +26,7 @@ SyslogIdentifier=skcapstone
|
|
|
27
26
|
NoNewPrivileges=true
|
|
28
27
|
ProtectSystem=strict
|
|
29
28
|
ProtectHome=read-only
|
|
30
|
-
ReadWritePaths=%h/.skcapstone %h/.
|
|
29
|
+
ReadWritePaths=%h/.skcapstone %h/.skenv %h/.capauth %h/.cloud9 %h/.skcomm %h/.skchat
|
|
31
30
|
PrivateTmp=true
|
|
32
31
|
ProtectKernelTunables=true
|
|
33
32
|
ProtectControlGroups=true
|