@smilintux/skcapstone 0.4.6 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/publish.yml +8 -1
- package/docs/CUSTOM_AGENT.md +184 -0
- package/docs/GETTING_STARTED.md +3 -0
- package/launchd/com.skcapstone.daemon.plist +52 -0
- package/launchd/com.skcapstone.memory-compress.plist +45 -0
- package/launchd/com.skcapstone.skcomm-heartbeat.plist +33 -0
- package/launchd/com.skcapstone.skcomm-queue-drain.plist +34 -0
- package/launchd/install-launchd.sh +156 -0
- package/package.json +1 -1
- package/pyproject.toml +1 -1
- package/scripts/archive-sessions.sh +88 -0
- package/scripts/install.sh +39 -8
- package/scripts/notion-api.py +259 -0
- package/scripts/nvidia-proxy.mjs +878 -0
- package/scripts/proxy-monitor.sh +89 -0
- package/scripts/refresh-anthropic-token.sh +94 -0
- package/scripts/skgateway.mjs +856 -0
- package/scripts/telegram-catchup-all.sh +136 -0
- package/scripts/watch-anthropic-token.sh +117 -0
- package/src/skcapstone/__init__.py +1 -1
- package/src/skcapstone/_cli_monolith.py +4 -4
- package/src/skcapstone/api.py +36 -35
- package/src/skcapstone/auction.py +8 -8
- package/src/skcapstone/blueprint_registry.py +2 -2
- package/src/skcapstone/blueprints/builtins/itil-operations.yaml +40 -0
- package/src/skcapstone/brain_first.py +238 -0
- package/src/skcapstone/chat.py +4 -4
- package/src/skcapstone/cli/__init__.py +2 -0
- package/src/skcapstone/cli/agents_spawner.py +5 -2
- package/src/skcapstone/cli/chat.py +5 -2
- package/src/skcapstone/cli/consciousness.py +5 -2
- package/src/skcapstone/cli/daemon.py +116 -41
- package/src/skcapstone/cli/itil.py +434 -0
- package/src/skcapstone/cli/memory.py +4 -4
- package/src/skcapstone/cli/skills_cmd.py +2 -2
- package/src/skcapstone/cli/soul.py +5 -2
- package/src/skcapstone/cli/status.py +11 -8
- package/src/skcapstone/cli/upgrade_cmd.py +7 -4
- package/src/skcapstone/cli/watch_cmd.py +9 -6
- package/src/skcapstone/config_validator.py +7 -4
- package/src/skcapstone/consciousness_config.py +27 -0
- package/src/skcapstone/consciousness_loop.py +20 -18
- package/src/skcapstone/coordination.py +6 -2
- package/src/skcapstone/daemon.py +51 -42
- package/src/skcapstone/dashboard.py +8 -8
- package/src/skcapstone/defaults/lumina/config/claude-hooks.md +42 -0
- package/src/skcapstone/doctor.py +5 -2
- package/src/skcapstone/dreaming.py +1440 -0
- package/src/skcapstone/emotion_tracker.py +2 -2
- package/src/skcapstone/export.py +2 -2
- package/src/skcapstone/fuse_mount.py +21 -13
- package/src/skcapstone/heartbeat.py +33 -29
- package/src/skcapstone/itil.py +1104 -0
- package/src/skcapstone/launchd.py +426 -0
- package/src/skcapstone/mcp_server.py +306 -4
- package/src/skcapstone/mcp_tools/__init__.py +4 -0
- package/src/skcapstone/mcp_tools/_helpers.py +2 -2
- package/src/skcapstone/mcp_tools/ansible_tools.py +7 -4
- package/src/skcapstone/mcp_tools/brain_first_tools.py +90 -0
- package/src/skcapstone/mcp_tools/capauth_tools.py +7 -4
- package/src/skcapstone/mcp_tools/coord_tools.py +8 -4
- package/src/skcapstone/mcp_tools/did_tools.py +9 -6
- package/src/skcapstone/mcp_tools/gtd_tools.py +1 -1
- package/src/skcapstone/mcp_tools/itil_tools.py +657 -0
- package/src/skcapstone/mcp_tools/memory_tools.py +6 -2
- package/src/skcapstone/mcp_tools/soul_tools.py +6 -2
- package/src/skcapstone/mdns_discovery.py +2 -2
- package/src/skcapstone/metrics.py +8 -8
- package/src/skcapstone/migrate_memories.py +2 -2
- package/src/skcapstone/models.py +14 -0
- package/src/skcapstone/onboard.py +137 -14
- package/src/skcapstone/peer_directory.py +2 -2
- package/src/skcapstone/providers/docker.py +2 -2
- package/src/skcapstone/scheduled_tasks.py +107 -0
- package/src/skcapstone/service_health.py +83 -4
- package/src/skcapstone/sync_watcher.py +2 -2
- package/src/skcapstone/systemd.py +17 -0
|
@@ -2,10 +2,14 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
+
import logging
|
|
6
|
+
|
|
5
7
|
from mcp.types import TextContent, Tool
|
|
6
8
|
|
|
7
9
|
from ._helpers import _error_response, _home, _json_response, _text_response
|
|
8
10
|
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
9
13
|
TOOLS: list[Tool] = [
|
|
10
14
|
Tool(
|
|
11
15
|
name="soul_list",
|
|
@@ -250,8 +254,8 @@ async def _handle_soul_list(args: dict) -> list[TextContent]:
|
|
|
250
254
|
"source": "installed",
|
|
251
255
|
"active": name == state.active_soul,
|
|
252
256
|
})
|
|
253
|
-
except Exception:
|
|
254
|
-
|
|
257
|
+
except Exception as exc:
|
|
258
|
+
logger.warning("Failed to list installed soul blueprints: %s", exc)
|
|
255
259
|
|
|
256
260
|
# 2) Blueprints repo
|
|
257
261
|
blueprints_repo = Path.home() / "clawd" / "soul-blueprints" / "blueprints"
|
|
@@ -238,8 +238,8 @@ class MDNSDiscovery:
|
|
|
238
238
|
agent_name,
|
|
239
239
|
)
|
|
240
240
|
return
|
|
241
|
-
except Exception:
|
|
242
|
-
|
|
241
|
+
except Exception as exc:
|
|
242
|
+
logger.warning("Failed to read existing mDNS heartbeat for %s: %s", agent_name, exc)
|
|
243
243
|
|
|
244
244
|
heartbeat = {
|
|
245
245
|
"agent_name": agent_name,
|
|
@@ -358,8 +358,8 @@ class MetricsCollector:
|
|
|
358
358
|
1 for t in transports.values()
|
|
359
359
|
if isinstance(t, dict) and t.get("enabled", True)
|
|
360
360
|
)
|
|
361
|
-
except Exception:
|
|
362
|
-
|
|
361
|
+
except Exception as exc:
|
|
362
|
+
logger.warning("Failed to parse skcomm transport config: %s", exc)
|
|
363
363
|
|
|
364
364
|
report.transport = TransportMetrics(
|
|
365
365
|
available=True,
|
|
@@ -474,8 +474,8 @@ class MetricsCollector:
|
|
|
474
474
|
if state_path.exists():
|
|
475
475
|
try:
|
|
476
476
|
state = json.loads(state_path.read_text(encoding="utf-8"))
|
|
477
|
-
except Exception:
|
|
478
|
-
|
|
477
|
+
except Exception as exc:
|
|
478
|
+
logger.warning("Failed to read sync_state.json: %s", exc)
|
|
479
479
|
|
|
480
480
|
report.sync = SyncMetrics(
|
|
481
481
|
available=True,
|
|
@@ -510,8 +510,8 @@ class MetricsCollector:
|
|
|
510
510
|
try:
|
|
511
511
|
subs = json.loads(subs_file.read_text(encoding="utf-8"))
|
|
512
512
|
sub_count = len(subs)
|
|
513
|
-
except Exception:
|
|
514
|
-
|
|
513
|
+
except Exception as exc:
|
|
514
|
+
logger.warning("Failed to read pubsub subscriptions.json: %s", exc)
|
|
515
515
|
|
|
516
516
|
report.pubsub = PubSubMetrics(
|
|
517
517
|
available=True,
|
|
@@ -546,8 +546,8 @@ class MetricsCollector:
|
|
|
546
546
|
try:
|
|
547
547
|
rot_data = json.loads(rot_log.read_text(encoding="utf-8"))
|
|
548
548
|
rotations = len(rot_data)
|
|
549
|
-
except Exception:
|
|
550
|
-
|
|
549
|
+
except Exception as exc:
|
|
550
|
+
logger.warning("Failed to read KMS rotation log: %s", exc)
|
|
551
551
|
|
|
552
552
|
report.kms = KmsMetrics(
|
|
553
553
|
available=True,
|
|
@@ -100,8 +100,8 @@ def migrate(
|
|
|
100
100
|
try:
|
|
101
101
|
existing = store.list_memories(limit=10000)
|
|
102
102
|
existing_ids = {m.id for m in existing}
|
|
103
|
-
except Exception:
|
|
104
|
-
|
|
103
|
+
except Exception as exc:
|
|
104
|
+
logger.warning("Failed to load existing memory IDs for deduplication: %s", exc)
|
|
105
105
|
|
|
106
106
|
for entry in entries:
|
|
107
107
|
if entry.memory_id in existing_ids:
|
package/src/skcapstone/models.py
CHANGED
|
@@ -238,6 +238,19 @@ class SyncConfig(BaseModel):
|
|
|
238
238
|
git_remote: Optional[str] = None
|
|
239
239
|
|
|
240
240
|
|
|
241
|
+
class BrainFirstConfig(BaseModel):
|
|
242
|
+
"""Configuration for the brain-first protocol.
|
|
243
|
+
|
|
244
|
+
When enabled, agents consult memory before acting on tasks
|
|
245
|
+
to surface prior knowledge and avoid redundant work.
|
|
246
|
+
"""
|
|
247
|
+
|
|
248
|
+
enabled: bool = True
|
|
249
|
+
max_results: int = 5
|
|
250
|
+
min_importance: float = 0.3
|
|
251
|
+
auto_inject: bool = False
|
|
252
|
+
|
|
253
|
+
|
|
241
254
|
class AgentConfig(BaseModel):
|
|
242
255
|
"""Persistent configuration for the agent runtime."""
|
|
243
256
|
|
|
@@ -249,6 +262,7 @@ class AgentConfig(BaseModel):
|
|
|
249
262
|
trust_home: Path = Path("~/.cloud9")
|
|
250
263
|
default_connector: Optional[str] = None
|
|
251
264
|
sync: SyncConfig = Field(default_factory=SyncConfig)
|
|
265
|
+
brain_first: BrainFirstConfig = Field(default_factory=BrainFirstConfig)
|
|
252
266
|
capabilities: list[str] = Field(
|
|
253
267
|
default_factory=lambda: ["consciousness", "code", "chat", "memory"]
|
|
254
268
|
)
|
|
@@ -21,6 +21,7 @@ Steps:
|
|
|
21
21
|
from __future__ import annotations
|
|
22
22
|
|
|
23
23
|
import json
|
|
24
|
+
import logging
|
|
24
25
|
import sys
|
|
25
26
|
import time
|
|
26
27
|
from datetime import datetime, timezone
|
|
@@ -28,6 +29,8 @@ from pathlib import Path
|
|
|
28
29
|
from typing import Optional
|
|
29
30
|
|
|
30
31
|
import click
|
|
32
|
+
|
|
33
|
+
logger = logging.getLogger(__name__)
|
|
31
34
|
from rich.console import Console
|
|
32
35
|
from rich.panel import Panel
|
|
33
36
|
from rich.prompt import Confirm, Prompt
|
|
@@ -503,8 +506,8 @@ def _step_ollama_models(prereqs: dict) -> bool:
|
|
|
503
506
|
if DEFAULT_MODEL in (r.stdout or ""):
|
|
504
507
|
click.echo(click.style(" ✓ ", fg="green") + f"{DEFAULT_MODEL} already present")
|
|
505
508
|
return True
|
|
506
|
-
except Exception:
|
|
507
|
-
|
|
509
|
+
except Exception as exc:
|
|
510
|
+
logger.debug("Failed to check ollama model list: %s", exc)
|
|
508
511
|
|
|
509
512
|
if not click.confirm(f" Pull default model ({DEFAULT_MODEL}, ~2 GB)?", default=True):
|
|
510
513
|
click.echo(click.style(" ↷ ", fg="bright_black") + f"Skipped — pull later: ollama pull {DEFAULT_MODEL}")
|
|
@@ -586,22 +589,40 @@ def _step_config_files(home_path: Path) -> tuple:
|
|
|
586
589
|
return consciousness_ok, profiles_ok
|
|
587
590
|
|
|
588
591
|
|
|
589
|
-
def
|
|
590
|
-
"""Install systemd
|
|
592
|
+
def _step_autostart_service(agent_name: str = "sovereign") -> bool:
|
|
593
|
+
"""Install auto-start service (systemd on Linux, launchd on macOS).
|
|
594
|
+
|
|
595
|
+
Prompts the user to choose which services to install and uses
|
|
596
|
+
the agent name from onboarding for environment variables.
|
|
597
|
+
|
|
598
|
+
Args:
|
|
599
|
+
agent_name: The agent name chosen during onboarding.
|
|
591
600
|
|
|
592
601
|
Returns:
|
|
593
602
|
True if service was installed.
|
|
594
603
|
"""
|
|
595
604
|
import platform
|
|
596
605
|
|
|
597
|
-
|
|
598
|
-
|
|
606
|
+
system = platform.system()
|
|
607
|
+
|
|
608
|
+
if system == "Linux":
|
|
609
|
+
return _step_systemd_service_linux()
|
|
610
|
+
elif system == "Darwin":
|
|
611
|
+
return _step_launchd_service_macos(agent_name)
|
|
612
|
+
else:
|
|
613
|
+
click.echo(
|
|
614
|
+
click.style(" ↷ ", fg="bright_black")
|
|
615
|
+
+ f"Auto-start not supported on {system} — skipped"
|
|
616
|
+
)
|
|
599
617
|
return False
|
|
600
618
|
|
|
619
|
+
|
|
620
|
+
def _step_systemd_service_linux() -> bool:
|
|
621
|
+
"""Install systemd user service (Linux only)."""
|
|
601
622
|
if not click.confirm(" Install systemd user service for auto-start at login?", default=False):
|
|
602
623
|
click.echo(
|
|
603
624
|
click.style(" ↷ ", fg="bright_black")
|
|
604
|
-
+ "Skipped — run 'skcapstone
|
|
625
|
+
+ "Skipped — run 'skcapstone daemon install' to enable later"
|
|
605
626
|
)
|
|
606
627
|
return False
|
|
607
628
|
|
|
@@ -622,13 +643,109 @@ def _step_systemd_service() -> bool:
|
|
|
622
643
|
return True
|
|
623
644
|
else:
|
|
624
645
|
click.echo(click.style(" ✗ ", fg="red") + "Service install failed")
|
|
625
|
-
click.echo(click.style(" ", fg="bright_black") + "Run manually: skcapstone
|
|
646
|
+
click.echo(click.style(" ", fg="bright_black") + "Run manually: skcapstone daemon install")
|
|
626
647
|
return False
|
|
627
648
|
except Exception as exc:
|
|
628
649
|
click.echo(click.style(" ⚠ ", fg="yellow") + f"Systemd: {exc}")
|
|
629
650
|
return False
|
|
630
651
|
|
|
631
652
|
|
|
653
|
+
def _step_launchd_service_macos(agent_name: str) -> bool:
|
|
654
|
+
"""Install launchd user agents (macOS only).
|
|
655
|
+
|
|
656
|
+
Shows available services, lets the user choose, and installs
|
|
657
|
+
plist files to ~/Library/LaunchAgents/.
|
|
658
|
+
|
|
659
|
+
Args:
|
|
660
|
+
agent_name: Agent name for SKCAPSTONE_AGENT env var.
|
|
661
|
+
|
|
662
|
+
Returns:
|
|
663
|
+
True if at least one service was installed.
|
|
664
|
+
"""
|
|
665
|
+
try:
|
|
666
|
+
from .launchd import install_service, list_available_services
|
|
667
|
+
except ImportError as exc:
|
|
668
|
+
click.echo(click.style(" ⚠ ", fg="yellow") + f"launchd module not available: {exc}")
|
|
669
|
+
return False
|
|
670
|
+
|
|
671
|
+
click.echo(f" Agent name: [cyan]{agent_name}[/] (used in SKCAPSTONE_AGENT)")
|
|
672
|
+
click.echo()
|
|
673
|
+
|
|
674
|
+
# Show available services
|
|
675
|
+
available = list_available_services(agent_name)
|
|
676
|
+
core_services = [s for s in available if s["available"] and not s["suffix"].startswith("sk")]
|
|
677
|
+
optional_services = [s for s in available if s["available"] and s["suffix"].startswith("sk")]
|
|
678
|
+
|
|
679
|
+
click.echo(" Available services:")
|
|
680
|
+
all_available = [s for s in available if s["available"]]
|
|
681
|
+
for i, svc in enumerate(all_available, 1):
|
|
682
|
+
click.echo(f" {i}. {svc['description']} ({svc['label']})")
|
|
683
|
+
click.echo()
|
|
684
|
+
|
|
685
|
+
if not click.confirm(" Install launchd services for auto-start at login?", default=True):
|
|
686
|
+
click.echo(
|
|
687
|
+
click.style(" ↷ ", fg="bright_black")
|
|
688
|
+
+ "Skipped — run 'skcapstone daemon install' to enable later"
|
|
689
|
+
)
|
|
690
|
+
return False
|
|
691
|
+
|
|
692
|
+
# Ask: all or pick?
|
|
693
|
+
install_all = click.confirm(" Install all available services?", default=True)
|
|
694
|
+
|
|
695
|
+
selected_suffixes: list[str] = []
|
|
696
|
+
if install_all:
|
|
697
|
+
selected_suffixes = [s["suffix"] for s in all_available]
|
|
698
|
+
else:
|
|
699
|
+
click.echo(" Enter service numbers (comma-separated), or 'none' to skip:")
|
|
700
|
+
raw = click.prompt(" Services", default="1")
|
|
701
|
+
if raw.strip().lower() == "none":
|
|
702
|
+
click.echo(click.style(" ↷ ", fg="bright_black") + "Skipped")
|
|
703
|
+
return False
|
|
704
|
+
try:
|
|
705
|
+
indices = [int(x.strip()) - 1 for x in raw.split(",")]
|
|
706
|
+
selected_suffixes = [
|
|
707
|
+
all_available[i]["suffix"]
|
|
708
|
+
for i in indices
|
|
709
|
+
if 0 <= i < len(all_available)
|
|
710
|
+
]
|
|
711
|
+
except (ValueError, IndexError):
|
|
712
|
+
click.echo(click.style(" ⚠ ", fg="yellow") + "Invalid selection — installing core services only")
|
|
713
|
+
selected_suffixes = [s["suffix"] for s in all_available if not s["suffix"].startswith("sk")]
|
|
714
|
+
|
|
715
|
+
if not selected_suffixes:
|
|
716
|
+
click.echo(click.style(" ↷ ", fg="bright_black") + "No services selected")
|
|
717
|
+
return False
|
|
718
|
+
|
|
719
|
+
# Ask about immediate start
|
|
720
|
+
start_now = click.confirm(" Start services now?", default=False)
|
|
721
|
+
|
|
722
|
+
try:
|
|
723
|
+
result = install_service(
|
|
724
|
+
agent_name=agent_name,
|
|
725
|
+
services=selected_suffixes,
|
|
726
|
+
start=start_now,
|
|
727
|
+
)
|
|
728
|
+
|
|
729
|
+
if result.get("installed"):
|
|
730
|
+
for svc in result.get("services", []):
|
|
731
|
+
status = "[green]loaded[/]" if svc.get("loaded") else "[dim]installed[/]"
|
|
732
|
+
click.echo(click.style(" ✓ ", fg="green") + f"{svc['label']} — {status}")
|
|
733
|
+
|
|
734
|
+
click.echo()
|
|
735
|
+
click.echo(click.style(" ", fg="bright_black") + "Manage services:")
|
|
736
|
+
click.echo(click.style(" ", fg="bright_black") + " launchctl list | grep skcapstone")
|
|
737
|
+
click.echo(click.style(" ", fg="bright_black") + " launchctl start com.skcapstone.daemon")
|
|
738
|
+
click.echo(click.style(" ", fg="bright_black") + " skcapstone daemon uninstall")
|
|
739
|
+
return True
|
|
740
|
+
else:
|
|
741
|
+
click.echo(click.style(" ✗ ", fg="red") + "No services were installed")
|
|
742
|
+
return False
|
|
743
|
+
|
|
744
|
+
except Exception as exc:
|
|
745
|
+
click.echo(click.style(" ⚠ ", fg="yellow") + f"launchd install: {exc}")
|
|
746
|
+
return False
|
|
747
|
+
|
|
748
|
+
|
|
632
749
|
def _step_doctor_check(home_path: Path) -> "object":
|
|
633
750
|
"""Run doctor diagnostics and print results.
|
|
634
751
|
|
|
@@ -851,10 +968,10 @@ def run_onboard(home: Optional[str] = None) -> None:
|
|
|
851
968
|
open_task_count = _step_board(home_path, name)
|
|
852
969
|
|
|
853
970
|
# -----------------------------------------------------------------------
|
|
854
|
-
# Step 13:
|
|
971
|
+
# Step 13: Auto-Start Service (systemd on Linux, launchd on macOS)
|
|
855
972
|
# -----------------------------------------------------------------------
|
|
856
|
-
_step_header(13, "
|
|
857
|
-
|
|
973
|
+
_step_header(13, "Auto-Start Service")
|
|
974
|
+
service_ok = _step_autostart_service(agent_name=agent_slug)
|
|
858
975
|
|
|
859
976
|
# -----------------------------------------------------------------------
|
|
860
977
|
# Post-wizard: Doctor Diagnostics
|
|
@@ -877,8 +994,8 @@ def run_onboard(home: Optional[str] = None) -> None:
|
|
|
877
994
|
soul = load_soul()
|
|
878
995
|
if soul and soul.boot_message:
|
|
879
996
|
boot_message = soul.boot_message
|
|
880
|
-
except Exception:
|
|
881
|
-
|
|
997
|
+
except Exception as exc:
|
|
998
|
+
logger.debug("Failed to load soul boot message, using default: %s", exc)
|
|
882
999
|
|
|
883
1000
|
# -----------------------------------------------------------------------
|
|
884
1001
|
# Summary table
|
|
@@ -911,7 +1028,13 @@ def run_onboard(home: Optional[str] = None) -> None:
|
|
|
911
1028
|
summary.add_row("Heartbeat", "[green]ACTIVE[/]" if hb_ok else "[yellow]FAILED[/]", f"{agent_slug}.json" if hb_ok else "see above")
|
|
912
1029
|
summary.add_row("Crush AI", "[green]READY[/]" if crush_ok else "[yellow]CONFIG ONLY[/]", "~/.config/crush/crush.json")
|
|
913
1030
|
summary.add_row("Board", "[green]ACTIVE[/]", f"{open_task_count} open tasks")
|
|
914
|
-
|
|
1031
|
+
import platform as _plat
|
|
1032
|
+
_svc_type = "launchd" if _plat.system() == "Darwin" else "systemd"
|
|
1033
|
+
summary.add_row(
|
|
1034
|
+
"Auto-Start",
|
|
1035
|
+
"[green]INSTALLED[/]" if service_ok else "[dim]OPTIONAL[/]",
|
|
1036
|
+
f"{_svc_type} services" if service_ok else f"skcapstone daemon install",
|
|
1037
|
+
)
|
|
915
1038
|
doctor_status = "[green]ALL PASSED[/]" if doctor_report.all_passed else f"[yellow]{doctor_report.failed_count} failed[/]"
|
|
916
1039
|
summary.add_row("Doctor", doctor_status, f"{doctor_report.passed_count}/{doctor_report.total_count} checks")
|
|
917
1040
|
summary.add_row(
|
|
@@ -250,8 +250,8 @@ class PeerDirectory:
|
|
|
250
250
|
ts = data.get("timestamp", "")
|
|
251
251
|
if ts:
|
|
252
252
|
self._entries[agent_name].last_seen = ts
|
|
253
|
-
except Exception:
|
|
254
|
-
|
|
253
|
+
except Exception as exc:
|
|
254
|
+
logger.warning("Failed to update last_seen from heartbeat for %s: %s", agent_name, exc)
|
|
255
255
|
continue
|
|
256
256
|
|
|
257
257
|
try:
|
|
@@ -307,8 +307,8 @@ class DockerProvider(ProviderBackend):
|
|
|
307
307
|
old = client.containers.get(container_name)
|
|
308
308
|
logger.warning("Removing stale container: %s", container_name)
|
|
309
309
|
old.remove(force=True)
|
|
310
|
-
except Exception:
|
|
311
|
-
|
|
310
|
+
except Exception as exc:
|
|
311
|
+
logger.debug("No stale container to remove for %s (expected if first run): %s", container_name, exc)
|
|
312
312
|
|
|
313
313
|
# Ensure named volume for agent state persistence
|
|
314
314
|
try:
|
|
@@ -9,6 +9,7 @@ Built-in recurring tasks:
|
|
|
9
9
|
- backend_reprobe — every 5 minutes
|
|
10
10
|
- memory_promotion_sweep — every hour
|
|
11
11
|
- profile_freshness_check — every 24 hours
|
|
12
|
+
- dreaming_reflection — every 15 minutes
|
|
12
13
|
|
|
13
14
|
Usage:
|
|
14
15
|
scheduler = build_scheduler(home, stop_event, consciousness_loop, beacon)
|
|
@@ -338,6 +339,85 @@ def make_profile_freshness_task(home: Path, max_age_days: int = 7) -> Callable[[
|
|
|
338
339
|
return _run
|
|
339
340
|
|
|
340
341
|
|
|
342
|
+
def make_dreaming_task(
|
|
343
|
+
home: Path, consciousness_loop: object = None
|
|
344
|
+
) -> Callable[[], None]:
|
|
345
|
+
"""Return a callback that runs the dreaming engine every 15 minutes.
|
|
346
|
+
|
|
347
|
+
Instantiates DreamingEngine lazily (so import errors are deferred until
|
|
348
|
+
first run). The engine itself checks idle state and cooldown internally.
|
|
349
|
+
|
|
350
|
+
Args:
|
|
351
|
+
home: Agent home directory.
|
|
352
|
+
consciousness_loop: ConsciousnessLoop instance for idle detection.
|
|
353
|
+
"""
|
|
354
|
+
|
|
355
|
+
def _run() -> None:
|
|
356
|
+
from .consciousness_config import load_dreaming_config
|
|
357
|
+
from .dreaming import DreamingEngine
|
|
358
|
+
|
|
359
|
+
config = load_dreaming_config(home)
|
|
360
|
+
if config is None or not config.enabled:
|
|
361
|
+
return
|
|
362
|
+
engine = DreamingEngine(
|
|
363
|
+
home=home, config=config, consciousness_loop=consciousness_loop
|
|
364
|
+
)
|
|
365
|
+
result = engine.dream()
|
|
366
|
+
if result and result.memories_created:
|
|
367
|
+
logger.info(
|
|
368
|
+
"Dreaming: %d memories created from reflection",
|
|
369
|
+
len(result.memories_created),
|
|
370
|
+
)
|
|
371
|
+
elif result and result.skipped_reason:
|
|
372
|
+
logger.debug("Dreaming skipped: %s", result.skipped_reason)
|
|
373
|
+
|
|
374
|
+
return _run
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
def make_itil_auto_close_task(home: Path) -> Callable[[], None]:
|
|
378
|
+
"""Return a callback that auto-closes resolved incidents after 24h stable.
|
|
379
|
+
|
|
380
|
+
Args:
|
|
381
|
+
home: Shared root directory.
|
|
382
|
+
"""
|
|
383
|
+
|
|
384
|
+
def _run() -> None:
|
|
385
|
+
from .itil import ITILManager
|
|
386
|
+
|
|
387
|
+
mgr = ITILManager(home)
|
|
388
|
+
closed = mgr.auto_close_resolved(stable_hours=24)
|
|
389
|
+
if closed:
|
|
390
|
+
logger.info("ITIL auto-close: %d incident(s) closed: %s", len(closed), closed)
|
|
391
|
+
else:
|
|
392
|
+
logger.debug("ITIL auto-close: no incidents to close")
|
|
393
|
+
|
|
394
|
+
return _run
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
def make_itil_escalation_task(home: Path) -> Callable[[], None]:
|
|
398
|
+
"""Return a callback that checks SLA breaches on open incidents.
|
|
399
|
+
|
|
400
|
+
Args:
|
|
401
|
+
home: Shared root directory.
|
|
402
|
+
"""
|
|
403
|
+
|
|
404
|
+
def _run() -> None:
|
|
405
|
+
from .itil import ITILManager
|
|
406
|
+
|
|
407
|
+
mgr = ITILManager(home)
|
|
408
|
+
breaches = mgr.check_sla_breaches()
|
|
409
|
+
if breaches:
|
|
410
|
+
for b in breaches:
|
|
411
|
+
logger.warning(
|
|
412
|
+
"ITIL SLA breach: %s (%s) unacknowledged for %d min (limit: %d min)",
|
|
413
|
+
b["id"], b["severity"], b["elapsed_minutes"], b["sla_minutes"],
|
|
414
|
+
)
|
|
415
|
+
else:
|
|
416
|
+
logger.debug("ITIL escalation check: no SLA breaches")
|
|
417
|
+
|
|
418
|
+
return _run
|
|
419
|
+
|
|
420
|
+
|
|
341
421
|
# ---------------------------------------------------------------------------
|
|
342
422
|
# Convenience builder
|
|
343
423
|
# ---------------------------------------------------------------------------
|
|
@@ -369,6 +449,8 @@ def build_scheduler(
|
|
|
369
449
|
+--------------------------+------------+
|
|
370
450
|
| profile_freshness_check | 24 hours |
|
|
371
451
|
+--------------------------+------------+
|
|
452
|
+
| dreaming_reflection | 15 min |
|
|
453
|
+
+--------------------------+------------+
|
|
372
454
|
|
|
373
455
|
Args:
|
|
374
456
|
home: Agent home directory.
|
|
@@ -424,6 +506,13 @@ def build_scheduler(
|
|
|
424
506
|
callback=make_profile_freshness_task(home),
|
|
425
507
|
)
|
|
426
508
|
|
|
509
|
+
# Dreaming — idle-time self-reflection via NVIDIA NIM
|
|
510
|
+
scheduler.register(
|
|
511
|
+
name="dreaming_reflection",
|
|
512
|
+
interval_seconds=900, # 15 minutes
|
|
513
|
+
callback=make_dreaming_task(home, consciousness_loop),
|
|
514
|
+
)
|
|
515
|
+
|
|
427
516
|
# Service health check — pings Qdrant, FalkorDB, Syncthing, daemons
|
|
428
517
|
try:
|
|
429
518
|
from .service_health import make_service_health_task
|
|
@@ -436,4 +525,22 @@ def build_scheduler(
|
|
|
436
525
|
except ImportError:
|
|
437
526
|
logger.debug("service_health not available — service_health_check task skipped")
|
|
438
527
|
|
|
528
|
+
# ITIL escalation check — SLA breach detection every 5 minutes
|
|
529
|
+
try:
|
|
530
|
+
from . import SHARED_ROOT
|
|
531
|
+
|
|
532
|
+
shared = Path(SHARED_ROOT).expanduser()
|
|
533
|
+
scheduler.register(
|
|
534
|
+
name="itil_escalation_check",
|
|
535
|
+
interval_seconds=300, # 5 minutes
|
|
536
|
+
callback=make_itil_escalation_task(shared),
|
|
537
|
+
)
|
|
538
|
+
scheduler.register(
|
|
539
|
+
name="itil_auto_close",
|
|
540
|
+
interval_seconds=1800, # 30 minutes
|
|
541
|
+
callback=make_itil_auto_close_task(shared),
|
|
542
|
+
)
|
|
543
|
+
except Exception:
|
|
544
|
+
logger.debug("ITIL scheduled tasks not available — skipped")
|
|
545
|
+
|
|
439
546
|
return scheduler
|
|
@@ -76,8 +76,8 @@ def _http_check(
|
|
|
76
76
|
try:
|
|
77
77
|
body = json.loads(resp.read().decode("utf-8"))
|
|
78
78
|
result["version"] = body.get(version_key)
|
|
79
|
-
except Exception:
|
|
80
|
-
|
|
79
|
+
except Exception as exc:
|
|
80
|
+
logger.warning("Failed to parse version from service health response: %s", exc)
|
|
81
81
|
except urllib.error.HTTPError as exc:
|
|
82
82
|
latency = (time.monotonic() - t0) * 1000
|
|
83
83
|
result["latency_ms"] = round(latency, 1)
|
|
@@ -197,16 +197,90 @@ def check_all_services() -> list[dict[str, Any]]:
|
|
|
197
197
|
# ---------------------------------------------------------------------------
|
|
198
198
|
|
|
199
199
|
|
|
200
|
+
def _create_incident_for_down_service(service_result: dict[str, Any]) -> None:
|
|
201
|
+
"""Auto-create an ITIL incident for a down service (with dedup).
|
|
202
|
+
|
|
203
|
+
Only creates a new incident if there is no existing open incident
|
|
204
|
+
for the same service. Uses best-effort: failures are logged but
|
|
205
|
+
never block the health check.
|
|
206
|
+
"""
|
|
207
|
+
try:
|
|
208
|
+
from . import SHARED_ROOT
|
|
209
|
+
from .itil import ITILManager
|
|
210
|
+
|
|
211
|
+
svc_name = service_result["name"]
|
|
212
|
+
mgr = ITILManager(os.path.expanduser(SHARED_ROOT))
|
|
213
|
+
|
|
214
|
+
# Dedup: skip if there's already an open incident for this service
|
|
215
|
+
existing = mgr.find_open_incident_for_service(svc_name)
|
|
216
|
+
if existing:
|
|
217
|
+
logger.debug(
|
|
218
|
+
"Skipping incident creation for %s — open incident %s exists",
|
|
219
|
+
svc_name, existing.id,
|
|
220
|
+
)
|
|
221
|
+
return
|
|
222
|
+
|
|
223
|
+
error_info = service_result.get("error") or "unreachable"
|
|
224
|
+
mgr.create_incident(
|
|
225
|
+
title=f"{svc_name} down",
|
|
226
|
+
severity="sev3",
|
|
227
|
+
source="service_health",
|
|
228
|
+
affected_services=[svc_name],
|
|
229
|
+
impact=f"Service unreachable: {error_info}",
|
|
230
|
+
managed_by="lumina",
|
|
231
|
+
created_by="service_health",
|
|
232
|
+
tags=["auto-detected", "service-health"],
|
|
233
|
+
)
|
|
234
|
+
logger.info("Auto-created incident for down service: %s", svc_name)
|
|
235
|
+
except Exception as exc:
|
|
236
|
+
logger.debug("Failed to create incident for %s: %s", service_result.get("name"), exc)
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def _auto_resolve_recovered_service(service_result: dict[str, Any]) -> None:
|
|
240
|
+
"""Auto-resolve sev4 incidents when a service recovers."""
|
|
241
|
+
try:
|
|
242
|
+
from . import SHARED_ROOT
|
|
243
|
+
from .itil import ITILManager
|
|
244
|
+
|
|
245
|
+
svc_name = service_result["name"]
|
|
246
|
+
mgr = ITILManager(os.path.expanduser(SHARED_ROOT))
|
|
247
|
+
existing = mgr.find_open_incident_for_service(svc_name)
|
|
248
|
+
if existing is None:
|
|
249
|
+
return
|
|
250
|
+
|
|
251
|
+
if existing.severity.value == "sev4":
|
|
252
|
+
mgr.update_incident(
|
|
253
|
+
existing.id, "service_health",
|
|
254
|
+
new_status="resolved",
|
|
255
|
+
note=f"Service {svc_name} recovered automatically",
|
|
256
|
+
resolution_summary="Auto-resolved: service came back up",
|
|
257
|
+
)
|
|
258
|
+
logger.info("Auto-resolved sev4 incident %s for recovered service %s",
|
|
259
|
+
existing.id, svc_name)
|
|
260
|
+
else:
|
|
261
|
+
mgr.update_incident(
|
|
262
|
+
existing.id, "service_health",
|
|
263
|
+
note=f"Service {svc_name} appears to be back up",
|
|
264
|
+
)
|
|
265
|
+
except Exception as exc:
|
|
266
|
+
logger.debug("Failed to auto-resolve incident for %s: %s",
|
|
267
|
+
service_result.get("name"), exc)
|
|
268
|
+
|
|
269
|
+
|
|
200
270
|
def make_service_health_task() -> callable:
|
|
201
271
|
"""Return a zero-arg callback suitable for TaskScheduler.register().
|
|
202
272
|
|
|
203
273
|
Runs check_all_services() and logs results. Down services are logged
|
|
204
|
-
at WARNING level; all-up is logged at DEBUG level.
|
|
274
|
+
at WARNING level; all-up is logged at DEBUG level. Auto-creates ITIL
|
|
275
|
+
incidents for down services and auto-resolves sev4 incidents for
|
|
276
|
+
recovered services.
|
|
205
277
|
"""
|
|
206
278
|
|
|
207
279
|
def _run() -> None:
|
|
208
280
|
results = check_all_services()
|
|
209
281
|
down = [r for r in results if r["status"] == "down"]
|
|
282
|
+
up = [r for r in results if r["status"] == "up"]
|
|
283
|
+
|
|
210
284
|
if down:
|
|
211
285
|
names = ", ".join(r["name"] for r in down)
|
|
212
286
|
logger.warning(
|
|
@@ -216,8 +290,9 @@ def make_service_health_task() -> callable:
|
|
|
216
290
|
logger.warning(
|
|
217
291
|
" %s (%s): %s", r["name"], r["url"], r["error"] or "unreachable"
|
|
218
292
|
)
|
|
293
|
+
_create_incident_for_down_service(r)
|
|
219
294
|
else:
|
|
220
|
-
up_count =
|
|
295
|
+
up_count = len(up)
|
|
221
296
|
logger.debug(
|
|
222
297
|
"Service health: %d/%d up, %d unknown",
|
|
223
298
|
up_count,
|
|
@@ -225,4 +300,8 @@ def make_service_health_task() -> callable:
|
|
|
225
300
|
len(results) - up_count,
|
|
226
301
|
)
|
|
227
302
|
|
|
303
|
+
# Check for recovered services
|
|
304
|
+
for r in up:
|
|
305
|
+
_auto_resolve_recovered_service(r)
|
|
306
|
+
|
|
228
307
|
return _run
|
|
@@ -574,8 +574,8 @@ class SyncWatcher:
|
|
|
574
574
|
try:
|
|
575
575
|
self._observer.stop()
|
|
576
576
|
self._observer.join(timeout=5)
|
|
577
|
-
except Exception:
|
|
578
|
-
|
|
577
|
+
except Exception as exc:
|
|
578
|
+
logger.warning("Error stopping SyncWatcher observer: %s", exc)
|
|
579
579
|
self._observer = None
|
|
580
580
|
logger.info("SyncWatcher stopped.")
|
|
581
581
|
|