npm - @agentikos/omega-os - Versions diffs - 0.19.37 → 0.19.39 - Mend

@agentikos/omega-os 0.19.37 → 0.19.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

package/omega/Agentik_Engine/omega_engine/prompt_audit.py ADDED Viewed

@@ -0,0 +1,395 @@
+"""Prompt audit — verify the AISB agent prompts are well-formed AND
+reference the contracts they MUST reference.
+Without this, the engine ships agents that read like generic system prompts
+and the Three Laws / LMC protocol / verified-completion (`.done.json`)
+contracts silently drift out of the role files. Orchestration breaks at
+the first dispatch because a worker doesn't know its done-signal contract
+or an oracle doesn't know it must enforce the laws.
+The audit applies per-file (each role's prompt is scored in isolation —
+not after the engine concatenates LMC + shared protocols at spawn time)
+because the file on disk is what an operator reads and edits. If a role's
+isolated file is silent on the Three Laws, the operator can't tell the
+contract is present, and any edit that breaks the loader will silently
+strip the laws too.
+Public API:
+  - ``audit_agent_prompt(path)`` -> AgentPromptReport (score 0..100, checks dict)
+  - ``audit_aisb_suite(omega_home)`` -> SuiteReport (per-agent + averages)
+  - ``orchestration_health(omega_home)`` -> dict of presence + overlap
+Only stdlib (``pathlib``, ``re``, ``dataclasses``). No new deps.
+"""
+from __future__ import annotations
+import re
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Optional
+# ---------------------------------------------------------------------------
+# Data classes — what each audit returns
+# ---------------------------------------------------------------------------
+@dataclass
+class CheckResult:
+    """One contract check on one prompt file."""
+    passed: bool
+    evidence: Optional[str] = None  # short matched snippet, None if not found
+    points: int = 0                  # points awarded for this check
+@dataclass
+class AgentPromptReport:
+    """Audit result for a single agent prompt .md file."""
+    agent_id: str
+    file_path: str
+    score: int = 0                              # 0..100
+    checks: dict[str, CheckResult] = field(default_factory=dict)
+    violations: list[str] = field(default_factory=list)  # human-readable
+@dataclass
+class SuiteReport:
+    """Audit result for the whole AISB suite."""
+    per_agent: list[AgentPromptReport] = field(default_factory=list)
+    average_score: float = 0.0
+    missing_critical: list[str] = field(default_factory=list)   # score < 60
+    orchestration_chain_intact: bool = False
+# ---------------------------------------------------------------------------
+# Contract patterns — what every well-formed agent prompt should reference
+# ---------------------------------------------------------------------------
+# Three Laws — case-insensitive search for any of these markers
+_RE_THREE_LAWS = re.compile(
+    r"(three\s+laws|first\s+law|second\s+law|third\s+law|"
+    r"law\s+1|law\s+2|law\s+3)",
+    re.IGNORECASE,
+)
+# LMC protocol — concept or filename
+_RE_LMC = re.compile(
+    r"(lmc[\s\-_]*(protocol|gate)?|lead[\s\-]+manager[\s\-]+checker|"
+    r"lmc-protocol\.md)",
+    re.IGNORECASE,
+)
+# Verified-completion contract — `.done.json`
+_RE_DONE_JSON = re.compile(r"\.done\.json|done\.json", re.IGNORECASE)
+# Done-marker tooling — the script that writes the .done.json signal
+_RE_DONE_MARKER = re.compile(
+    r"(worker[\s\-]?mark[\s\-]?done|oracle[\s\-]?mark[\s\-]?done|"
+    r"mark[\s\-]?done\.sh|write_done\b|done_signal|"
+    r"done[\s\-]?marker)",
+    re.IGNORECASE,
+)
+# Scope / role-specific responsibilities — at least one of these tokens
+# tells us the file declares what the agent owns / is allowed to touch.
+_RE_SCOPE = re.compile(
+    r"(scope|files?_owned|owns?\s+R[\s\-]?\d+|responsibilit(y|ies)|"
+    r"^\s*role\b|owned\s+files|files\s+owned|what\s+you\s+own|"
+    r"\bowns\b)",
+    re.IGNORECASE | re.MULTILINE,
+)
+# Fresh context handoff vocabulary
+_RE_FRESH_CONTEXT = re.compile(
+    r"fresh[\s\-]+context|self[\s\-]+contained\s+brief|"
+    r"self[\s\-]+contained\s+context",
+    re.IGNORECASE,
+)
+# Banned phrases — the no-time-panic global rule (rule 46). These are the
+# action-oriented forms that authorize cheating, NOT the descriptive forms
+# (e.g. "lightweight utility agent" describes a haiku tier — fine).
+# We explicitly DO NOT include bare "lightweight" because templates
+# legitimately describe haiku-tier agents as lightweight (architecture
+# descriptor, not a shortcut authorization).
+_BANNED_PHRASES = [
+    "streamlined approach",
+    "streamlined version",
+    "skip audit",
+    "skip the audit",
+    "quick version",
+    "to save time",
+    "custom scoring",
+    "too heavyweight",
+    "lightweight audit",
+    "simplified protocol",
+]
+# Per-check point weights (sum = 100)
+_WEIGHTS = {
+    "three_laws":    25,
+    "lmc_protocol":  15,
+    "done_json":     20,
+    "done_marker":   10,
+    "scope":         15,
+    "fresh_context": 10,
+    "no_banned":      5,
+}
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+def _short_evidence(text: str, match: re.Match) -> str:
+    """Return a short snippet (~60 chars) around the match for the report."""
+    start = max(0, match.start() - 10)
+    end = min(len(text), match.end() + 20)
+    snippet = text[start:end].replace("\n", " ").strip()
+    if len(snippet) > 80:
+        snippet = snippet[:77] + "..."
+    return snippet
+def _check_pattern(
+    text: str,
+    pattern: re.Pattern,
+    name: str,
+    points: int,
+) -> CheckResult:
+    """Run a regex check and return a CheckResult."""
+    m = pattern.search(text)
+    if m:
+        return CheckResult(passed=True, evidence=_short_evidence(text, m),
+                           points=points)
+    return CheckResult(passed=False, evidence=None, points=0)
+def _check_no_banned(text: str, points: int) -> CheckResult:
+    """Pass iff no banned phrase appears. The phrase list is short and
+    targets action-oriented patterns (e.g. 'streamlined approach') rather
+    than descriptive ones (e.g. bare 'lightweight') to avoid false
+    positives in legitimate architecture descriptions."""
+    lowered = text.lower()
+    for phrase in _BANNED_PHRASES:
+        if phrase in lowered:
+            return CheckResult(passed=False, evidence=phrase, points=0)
+    return CheckResult(passed=True, evidence=None, points=points)
+def _agent_id_from_path(path: Path) -> str:
+    """Derive a short agent id from the file path (the .stem)."""
+    return path.stem
+# ---------------------------------------------------------------------------
+# Public API — single file
+# ---------------------------------------------------------------------------
+def audit_agent_prompt(path: Path) -> AgentPromptReport:
+    """Score one agent prompt .md file on the seven contract checks.
+    Scoring (sum = 100):
+      - Three Laws reference                       25 pts
+      - LMC protocol reference                     15 pts
+      - `.done.json` reference                     20 pts
+      - Done-marker tooling reference              10 pts
+      - Scope / files_owned / responsibilities     15 pts
+      - Fresh context / self-contained brief       10 pts
+      - No banned phrases (rule 46-no-time-panic)   5 pts
+    Returns a fully-populated :class:`AgentPromptReport`. If the file does
+    not exist or can't be read, returns a report with score=0 and a single
+    violation explaining why.
+    """
+    path = Path(path)
+    agent_id = _agent_id_from_path(path)
+    report = AgentPromptReport(agent_id=agent_id, file_path=str(path))
+    try:
+        text = path.read_text(encoding="utf-8")
+    except (OSError, UnicodeDecodeError) as exc:
+        report.violations.append(f"unreadable file: {exc}")
+        return report
+    checks: dict[str, CheckResult] = {
+        "three_laws":    _check_pattern(text, _RE_THREE_LAWS,
+                                        "three_laws",
+                                        _WEIGHTS["three_laws"]),
+        "lmc_protocol":  _check_pattern(text, _RE_LMC,
+                                        "lmc_protocol",
+                                        _WEIGHTS["lmc_protocol"]),
+        "done_json":     _check_pattern(text, _RE_DONE_JSON,
+                                        "done_json",
+                                        _WEIGHTS["done_json"]),
+        "done_marker":   _check_pattern(text, _RE_DONE_MARKER,
+                                        "done_marker",
+                                        _WEIGHTS["done_marker"]),
+        "scope":         _check_pattern(text, _RE_SCOPE,
+                                        "scope",
+                                        _WEIGHTS["scope"]),
+        "fresh_context": _check_pattern(text, _RE_FRESH_CONTEXT,
+                                        "fresh_context",
+                                        _WEIGHTS["fresh_context"]),
+        "no_banned":     _check_no_banned(text, _WEIGHTS["no_banned"]),
+    }
+    report.checks = checks
+    # Compute score and violations.
+    score = sum(c.points for c in checks.values())
+    report.score = min(100, score)
+    # Human-readable violations for the doctor line summary.
+    human = {
+        "three_laws":    "Three Laws",
+        "lmc_protocol":  "LMC protocol",
+        "done_json":     "`.done.json` contract",
+        "done_marker":   "done-marker tooling",
+        "scope":         "scope/responsibilities",
+        "fresh_context": "fresh context handoff",
+        "no_banned":     "banned phrase present",
+    }
+    for name, res in checks.items():
+        if not res.passed:
+            label = human[name]
+            if name == "no_banned" and res.evidence:
+                label = f"{label} ({res.evidence})"
+            report.violations.append(label)
+    return report
+# ---------------------------------------------------------------------------
+# Public API — full suite
+# ---------------------------------------------------------------------------
+def _aisb_dir(omega_home: Path) -> Path:
+    return Path(omega_home) / "Agentik_SSOT" / "agents" / "aisb"
+def _iter_aisb_prompts(omega_home: Path) -> list[Path]:
+    """Return every .md directly under aisb/ (subdirs excluded)."""
+    aisb = _aisb_dir(omega_home)
+    if not aisb.is_dir():
+        return []
+    return sorted(p for p in aisb.iterdir()
+                  if p.is_file() and p.suffix == ".md")
+def audit_aisb_suite(omega_home: Path) -> SuiteReport:
+    """Audit every .md directly under ``aisb/`` (subdirs excluded).
+    Returns a :class:`SuiteReport`. If no prompts are found, ``per_agent``
+    is an empty list and downstream callers should warn (the install
+    step 25-aisb-suite probably was skipped).
+    """
+    omega_home = Path(omega_home)
+    paths = _iter_aisb_prompts(omega_home)
+    per_agent = [audit_agent_prompt(p) for p in paths]
+    if per_agent:
+        avg = sum(r.score for r in per_agent) / len(per_agent)
+    else:
+        avg = 0.0
+    missing_critical = sorted(r.agent_id for r in per_agent if r.score < 60)
+    return SuiteReport(
+        per_agent=per_agent,
+        average_score=avg,
+        missing_critical=missing_critical,
+        orchestration_chain_intact=_chain_intact(per_agent),
+    )
+def _chain_intact(reports: list[AgentPromptReport]) -> bool:
+    """The AISB → Oracle → Worker chain is intact when:
+      - each of the three roles exists in the suite, AND
+      - all three reference `.done.json` (the shared completion vocabulary).
+    'Worker' is taken loosely: morpheus (the canonical executor) or
+    construct (alias target for the generic 'worker' role) satisfies the
+    worker slot. This matches the alias map in ``prompts.py``.
+    """
+    by_id = {r.agent_id: r for r in reports}
+    aisb_master = by_id.get("CLAUDE")  # the master prompt of the AISB suite
+    oracle = by_id.get("oracle")
+    worker = by_id.get("morpheus") or by_id.get("construct")
+    if not (aisb_master and oracle and worker):
+        return False
+    return all(r.checks.get("done_json", CheckResult(passed=False)).passed
+               for r in (aisb_master, oracle, worker))
+# ---------------------------------------------------------------------------
+# Public API — orchestration health
+# ---------------------------------------------------------------------------
+def orchestration_health(omega_home: Path) -> dict:
+    """Higher-level check: is the AISB → Oracle → Worker → Checker chain
+    wired and does the suite share the verified-completion vocabulary?
+    Returns a dict with:
+      - ``aisb_master_present``  — CLAUDE.md exists in aisb/
+      - ``oracle_present``       — oracle.md exists
+      - ``workers_role_present`` — morpheus.md OR construct.md exists
+      - ``checker_present``      — seraph.md OR smith.md exists
+      - ``lmc_protocol_present`` — lmc-protocol.md exists
+      - ``shared_vocab_overlap`` — fraction (0.0..1.0) of agents
+        mentioning `.done.json` in their file
+    """
+    omega_home = Path(omega_home)
+    aisb = _aisb_dir(omega_home)
+    def has(name: str) -> bool:
+        return (aisb / name).is_file()
+    out: dict = {
+        "aisb_master_present":  has("CLAUDE.md"),
+        "oracle_present":       has("oracle.md"),
+        "workers_role_present": has("morpheus.md") or has("construct.md"),
+        "checker_present":      has("seraph.md") or has("smith.md"),
+        "lmc_protocol_present": has("lmc-protocol.md"),
+    }
+    # Shared vocabulary overlap — fraction of agents that reference
+    # `.done.json` in their file. A high overlap means the completion
+    # contract is part of the muscle memory of the suite; a low overlap
+    # means some agents are silent on the done signal and may complete
+    # tasks without writing the structured result downstream consumers
+    # expect.
+    paths = _iter_aisb_prompts(omega_home)
+    if not paths:
+        out["shared_vocab_overlap"] = 0.0
+        return out
+    matches = 0
+    for p in paths:
+        try:
+            text = p.read_text(encoding="utf-8")
+        except (OSError, UnicodeDecodeError):
+            continue
+        if _RE_DONE_JSON.search(text):
+            matches += 1
+    out["shared_vocab_overlap"] = matches / len(paths)
+    return out
+__all__ = [
+    "CheckResult",
+    "AgentPromptReport",
+    "SuiteReport",
+    "audit_agent_prompt",
+    "audit_aisb_suite",
+    "orchestration_health",
+]

package/omega/Agentik_Engine/omega_engine/tmux.py CHANGED Viewed

@@ -325,6 +325,22 @@ def _spawn_with_shell_then_run(
     return name
+def omega_window_alive(window_name: str) -> bool:
+    """True if a window named ``window_name`` exists inside the Omega
+    master tmux session.
+    Used by the TUI chat-list panel to render ● (alive) vs ○ (off) next
+    to AISB-chat / Hermès-chat. Cheap — one ``tmux list-windows`` call;
+    returns False on any error including 'no Omega session'.
+    """
+    if not is_alive("Omega"):
+        return False
+    rc, out = _tmux("list-windows", "-t", "Omega", "-F", "#W")
+    if rc != 0:
+        return False
+    return window_name in (out or "").splitlines()
 def spawn_chat_in_omega(
     window_name: str,
     *,