@agentikos/omega-os 0.19.38 → 0.19.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/bootstrap/lib/common.sh +19 -10
  2. package/bootstrap/templates/aisb/architect.md +27 -1
  3. package/bootstrap/templates/aisb/construct.md +27 -1
  4. package/bootstrap/templates/aisb/keymaker.md +27 -1
  5. package/bootstrap/templates/aisb/link.md +27 -1
  6. package/bootstrap/templates/aisb/lmc-protocol.md +27 -1
  7. package/bootstrap/templates/aisb/merovingian.md +27 -1
  8. package/bootstrap/templates/aisb/morpheus.md +27 -1
  9. package/bootstrap/templates/aisb/neo.md +27 -1
  10. package/bootstrap/templates/aisb/niobe.md +27 -1
  11. package/bootstrap/templates/aisb/oracle.md +27 -1
  12. package/bootstrap/templates/aisb/pythia.md +36 -0
  13. package/bootstrap/templates/aisb/seraph.md +27 -1
  14. package/bootstrap/templates/aisb/smith.md +27 -1
  15. package/bootstrap/templates/aisb/zion.md +27 -1
  16. package/omega/Agentik_Engine/omega_engine/__init__.py +1 -1
  17. package/omega/Agentik_Engine/omega_engine/__pycache__/__init__.cpython-313.pyc +0 -0
  18. package/omega/Agentik_Engine/omega_engine/__pycache__/cli.cpython-313.pyc +0 -0
  19. package/omega/Agentik_Engine/omega_engine/__pycache__/paperclip_bridge.cpython-313.pyc +0 -0
  20. package/omega/Agentik_Engine/omega_engine/__pycache__/prompt_audit.cpython-313.pyc +0 -0
  21. package/omega/Agentik_Engine/omega_engine/__pycache__/tmux.cpython-313.pyc +0 -0
  22. package/omega/Agentik_Engine/omega_engine/__pycache__/tui.cpython-313.pyc +0 -0
  23. package/omega/Agentik_Engine/omega_engine/cli.py +39 -0
  24. package/omega/Agentik_Engine/omega_engine/paperclip_bridge.py +110 -0
  25. package/omega/Agentik_Engine/omega_engine/prompt_audit.py +395 -0
  26. package/omega/Agentik_Engine/omega_engine/tmux.py +61 -26
  27. package/omega/Agentik_Engine/omega_engine/tui.py +293 -86
  28. package/omega/Agentik_Engine/pyproject.toml +1 -1
  29. package/omega/Agentik_Engine/tests/__pycache__/test_install_ux.cpython-313-pytest-8.4.2.pyc +0 -0
  30. package/omega/Agentik_Engine/tests/__pycache__/test_install_ux.cpython-313.pyc +0 -0
  31. package/omega/Agentik_Engine/tests/__pycache__/test_paperclip_status.cpython-313-pytest-8.4.2.pyc +0 -0
  32. package/omega/Agentik_Engine/tests/__pycache__/test_paperclip_status.cpython-313.pyc +0 -0
  33. package/omega/Agentik_Engine/tests/__pycache__/test_prompt_audit.cpython-313-pytest-8.4.2.pyc +0 -0
  34. package/omega/Agentik_Engine/tests/__pycache__/test_prompt_audit.cpython-313.pyc +0 -0
  35. package/omega/Agentik_Engine/tests/__pycache__/test_tmux_palette.cpython-313-pytest-8.4.2.pyc +0 -0
  36. package/omega/Agentik_Engine/tests/__pycache__/test_tmux_palette.cpython-313.pyc +0 -0
  37. package/omega/Agentik_Engine/tests/__pycache__/test_tui_runtime.cpython-313-pytest-8.4.2.pyc +0 -0
  38. package/omega/Agentik_Engine/tests/__pycache__/test_tui_runtime.cpython-313.pyc +0 -0
  39. package/omega/Agentik_Engine/tests/test_install_ux.py +87 -2
  40. package/omega/Agentik_Engine/tests/test_paperclip_status.py +142 -0
  41. package/omega/Agentik_Engine/tests/test_prompt_audit.py +281 -0
  42. package/omega/Agentik_Engine/tests/test_tmux_palette.py +94 -0
  43. package/omega/Agentik_Engine/tests/test_tui_runtime.py +156 -0
  44. package/omega/Agentik_SSOT/VERSION +1 -1
  45. package/omega/Agentik_SSOT/docs/AUDIT-V0.19.39.md +161 -0
  46. package/omega/Agentik_SSOT/docs/AUDIT-V0.19.40.md +163 -0
  47. package/omega/Agentik_SSOT/rules/audit-gates.md +189 -0
  48. package/omega/Agentik_SSOT/rules/constitution.md +7 -0
  49. package/omega/Agentik_SSOT/rules/orchestration.md +215 -0
  50. package/omega/Agentik_SSOT/rules/prompt-protocols.md +219 -0
  51. package/omega/Agentik_SSOT/rules/scope-safety.md +197 -0
  52. package/omega/Agentik_SSOT/rules/three-laws.md +214 -0
  53. package/omega/Agentik_SSOT/rules/verified-completion.md +216 -0
  54. package/package.json +1 -1
@@ -18,8 +18,14 @@ COMMON_SH = REPO_ROOT / "bootstrap" / "lib" / "common.sh"
18
18
 
19
19
 
20
20
  def _bash(snippet: str, *, env: dict[str, str] | None = None,
21
- stdin: str = "") -> tuple[int, str, str]:
22
- """Run a bash snippet that sources common.sh; return (rc, stdout, stderr)."""
21
+ stdin: str = "", raw: bool = False) -> tuple[int, str, str]:
22
+ """Run a bash snippet that sources common.sh; return (rc, stdout, stderr).
23
+
24
+ raw=True disables Python's universal-newlines translation so callers can
25
+ distinguish CR (\\r — progress-bar overwrite) from LF (\\n — newline).
26
+ Without it, `text=True` collapses both into "\\n" and tests that count
27
+ newlines lose the signal they need.
28
+ """
23
29
  base_env = {**os.environ}
24
30
  base_env.setdefault("OMEGA_REPO", str(REPO_ROOT))
25
31
  base_env.setdefault("OMEGA_HOME", "/tmp/omega-ux-pytest")
@@ -40,6 +46,14 @@ def _bash(snippet: str, *, env: dict[str, str] | None = None,
40
46
  "C_BOLD=''; C_CYAN=''; C_GREEN=''; C_YELLOW=''; C_RED=''; C_MAGENTA=''; C_BLUE=''\n"
41
47
  + snippet
42
48
  )
49
+ if raw:
50
+ proc = subprocess.run(
51
+ ["bash", "-c", code],
52
+ env=base_env, capture_output=True, input=stdin.encode("utf-8"),
53
+ timeout=15,
54
+ )
55
+ return proc.returncode, proc.stdout.decode("utf-8", "replace"), \
56
+ proc.stderr.decode("utf-8", "replace")
43
57
  proc = subprocess.run(
44
58
  ["bash", "-c", code],
45
59
  env=base_env, capture_output=True, text=True, input=stdin,
@@ -131,6 +145,77 @@ class TestPathSetup(unittest.TestCase):
131
145
  self.assertEqual(rc_path.read_text(), "# only this\n")
132
146
 
133
147
 
148
+ class TestProgressBarCountingDiscipline(unittest.TestCase):
149
+ """Regression tests for the v0.19.x progress-bar double-count bug.
150
+
151
+ `run_step` calls `_full_progress` TWICE per step ("run" then "ok"). The
152
+ old implementation incremented STEP_COUNT on EVERY call, which with
153
+ STEP_TOTAL=21 drove the counter to 42, the bar to 200%, and the "current
154
+ >= total → newline" branch fired four+ times — so the user saw the
155
+ progress bar spawn fresh lines mid-install instead of overwriting one
156
+ single line. The fix: only "ok"/"skip" count as real progress; "run"
157
+ just re-paints the bar with the in-progress step name.
158
+ """
159
+
160
+ def test_full_progress_counts_only_ok_and_skip(self):
161
+ rc, out, _ = _bash(
162
+ "export STEP_TOTAL=10\n"
163
+ "_full_progress a run\n"
164
+ "_full_progress a ok\n"
165
+ "_full_progress b run\n"
166
+ "_full_progress b skip\n"
167
+ "echo STEP_COUNT=$STEP_COUNT\n"
168
+ )
169
+ self.assertEqual(rc, 0)
170
+ # Two runs + one ok + one skip → count should be 2 (only ok+skip count).
171
+ self.assertIn("STEP_COUNT=2", out)
172
+
173
+ def test_no_premature_newline_in_middle_of_run(self):
174
+ # STEP_TOTAL=2 so the second "ok" hits the boundary; nothing before
175
+ # that may emit a newline. With the old code, the boundary fired on
176
+ # the very FIRST "ok" (count had ticked to 2 already because run+ok
177
+ # = +2 instead of +1), producing a stray mid-install newline.
178
+ # raw=True so Python doesn't fold \r into \n via universal-newlines.
179
+ rc, out, _ = _bash(
180
+ "export STEP_TOTAL=2\n"
181
+ "_full_progress a run\n"
182
+ "_full_progress a ok\n"
183
+ "_full_progress b run\n"
184
+ "_full_progress b ok\n",
185
+ raw=True,
186
+ )
187
+ self.assertEqual(rc, 0)
188
+ # Frames are separated by \r (carriage return → overwrite the line),
189
+ # NOT \n. Exactly one \n — emitted ONLY after the final "ok" crosses total.
190
+ self.assertEqual(out.count("\n"), 1,
191
+ f"expected 1 trailing newline, got {out.count(chr(10))}: {out!r}")
192
+ # Carriage returns prove the frames overwrote rather than scrolled.
193
+ self.assertGreaterEqual(out.count("\r"), 3)
194
+ # Final frame must read 2/2 100% — not 3/2 150% or 4/2 200% like the bug.
195
+ self.assertIn("2/2", out)
196
+ self.assertIn("100%", out)
197
+ self.assertNotIn("3/2", out)
198
+ self.assertNotIn("4/2", out)
199
+ self.assertNotIn("150%", out)
200
+ self.assertNotIn("200%", out)
201
+
202
+ def test_run_phase_shows_in_progress_step_name(self):
203
+ # The "run" phase should re-paint with the step name being attempted
204
+ # so the user sees what's currently happening, even though the count
205
+ # hasn't bumped yet.
206
+ rc, out, _ = _bash(
207
+ "export STEP_TOTAL=5\n"
208
+ "_full_progress mystep run\n",
209
+ raw=True,
210
+ )
211
+ self.assertEqual(rc, 0)
212
+ self.assertIn("mystep", out)
213
+ # Count is still 0 because nothing has finished yet.
214
+ self.assertIn("0/5", out)
215
+ # And critically — no newline during a "run" frame.
216
+ self.assertEqual(out.count("\n"), 0)
217
+
218
+
134
219
  class TestPostInstallCard(unittest.TestCase):
135
220
  def test_card_includes_status_and_next_steps(self):
136
221
  # omega CLI isn't on PATH inside the test sandbox → readiness=UNKNOWN
@@ -0,0 +1,142 @@
1
+ """Tests for omega_engine.paperclip_bridge.is_running().
2
+
3
+ Locks in the live status probe contract so the TUI can render ●/○ next to
4
+ the Paperclip menu items. Five detection paths covered:
5
+
6
+ 1. Empty PAPERCLIP_HOME → not running (detection="none")
7
+ 2. Stale pidfile → not running
8
+ 3. Live pidfile (self PID) → running (detection="pidfile")
9
+ 4. No pidfile + open socket → running (detection="port-scan")
10
+ 5. Running case sets url → http://localhost:<port>
11
+ """
12
+ from __future__ import annotations
13
+
14
+ import os
15
+ import socket
16
+ import tempfile
17
+ import unittest
18
+ from pathlib import Path
19
+ from unittest import mock
20
+
21
+ from omega_engine import paperclip_bridge as P
22
+
23
+
24
+ def _free_port() -> int:
25
+ """Allocate (and release) an ephemeral TCP port."""
26
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
27
+ s.bind(("127.0.0.1", 0))
28
+ return s.getsockname()[1]
29
+
30
+
31
+ class TestIsRunningDetectionPaths(unittest.TestCase):
32
+ """Each of the three detection paths plus the URL invariant."""
33
+
34
+ def test_no_pidfile_no_port_returns_not_running(self):
35
+ """Empty PAPERCLIP_HOME + closed port → running=False, detection=none."""
36
+ with tempfile.TemporaryDirectory() as tmp:
37
+ with mock.patch.dict(os.environ, {"PAPERCLIP_HOME": tmp}, clear=False):
38
+ # Pick a port nothing is listening on (allocate then release).
39
+ port = _free_port()
40
+ status = P.is_running(port=port)
41
+ self.assertFalse(status.running)
42
+ self.assertEqual(status.detection, "none")
43
+ self.assertIsNone(status.pid)
44
+ self.assertIsNone(status.port)
45
+ self.assertIsNone(status.url)
46
+
47
+ def test_stale_pidfile_returns_not_running(self):
48
+ """Pidfile points at a dead PID → running=False (falls through to port,
49
+ which is also closed in this test)."""
50
+ with tempfile.TemporaryDirectory() as tmp:
51
+ with mock.patch.dict(os.environ, {"PAPERCLIP_HOME": tmp}, clear=False):
52
+ run_dir = Path(tmp) / "run"
53
+ run_dir.mkdir(parents=True, exist_ok=True)
54
+ # 99999999 is virtually guaranteed to be unused on Linux
55
+ # (default PID max is 4194304); we also mock os.kill to be
56
+ # sure across any host's PID-recycling weirdness.
57
+ (run_dir / "dashboard.pid").write_text("99999999\n")
58
+
59
+ def fake_kill(pid: int, sig: int) -> None:
60
+ if pid == 99999999:
61
+ raise ProcessLookupError(pid)
62
+ # delegate to the real os.kill for any other pid
63
+ return os._real_kill(pid, sig) # pragma: no cover
64
+
65
+ # Stash the real kill (in case fake_kill ever delegates).
66
+ os._real_kill = os.kill # type: ignore[attr-defined]
67
+ try:
68
+ port = _free_port()
69
+ with mock.patch("omega_engine.paperclip_bridge.os.kill",
70
+ side_effect=fake_kill):
71
+ status = P.is_running(port=port)
72
+ finally:
73
+ del os._real_kill # type: ignore[attr-defined]
74
+ self.assertFalse(status.running)
75
+ self.assertEqual(status.detection, "none")
76
+ self.assertIsNone(status.pid)
77
+
78
+ def test_live_pidfile_returns_running(self):
79
+ """Pidfile points at this test process (alive) → running=True,
80
+ detection='pidfile', does NOT touch the network."""
81
+ with tempfile.TemporaryDirectory() as tmp:
82
+ with mock.patch.dict(os.environ, {"PAPERCLIP_HOME": tmp}, clear=False):
83
+ run_dir = Path(tmp) / "run"
84
+ run_dir.mkdir(parents=True, exist_ok=True)
85
+ self_pid = os.getpid()
86
+ (run_dir / "dashboard.pid").write_text(f"{self_pid}\n")
87
+ # Patch socket so we'd notice if the code accidentally tried
88
+ # to connect (it shouldn't — pidfile hit returns immediately).
89
+ with mock.patch("omega_engine.paperclip_bridge.socket.socket") \
90
+ as sock_cls:
91
+ status = P.is_running(port=8080)
92
+ sock_cls.assert_not_called()
93
+ self.assertTrue(status.running)
94
+ self.assertEqual(status.detection, "pidfile")
95
+ self.assertEqual(status.pid, self_pid)
96
+ self.assertEqual(status.port, 8080)
97
+
98
+ def test_port_scan_fallback_running(self):
99
+ """No pidfile, but a socket IS listening on the probe port →
100
+ running=True, detection='port-scan', pid=None."""
101
+ # Bind a real listener on an ephemeral port for the duration of
102
+ # the probe; close it at the end.
103
+ listener = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
104
+ listener.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
105
+ listener.bind(("127.0.0.1", 0))
106
+ listener.listen(1)
107
+ port = listener.getsockname()[1]
108
+ try:
109
+ with tempfile.TemporaryDirectory() as tmp:
110
+ with mock.patch.dict(os.environ,
111
+ {"PAPERCLIP_HOME": tmp}, clear=False):
112
+ # No pidfile written → pidfile branch skipped, falls
113
+ # through to port scan.
114
+ status = P.is_running(port=port)
115
+ finally:
116
+ listener.close()
117
+ self.assertTrue(status.running)
118
+ self.assertEqual(status.detection, "port-scan")
119
+ self.assertIsNone(status.pid)
120
+ self.assertEqual(status.port, port)
121
+
122
+ def test_url_field_returns_localhost_url_when_running(self):
123
+ """When running=True, status.url is http://localhost:<port>."""
124
+ # Use the pidfile path (cheapest, no socket needed).
125
+ with tempfile.TemporaryDirectory() as tmp:
126
+ with mock.patch.dict(os.environ, {"PAPERCLIP_HOME": tmp}, clear=False):
127
+ run_dir = Path(tmp) / "run"
128
+ run_dir.mkdir(parents=True, exist_ok=True)
129
+ (run_dir / "dashboard.pid").write_text(f"{os.getpid()}\n")
130
+ status = P.is_running(port=8080)
131
+ self.assertTrue(status.running)
132
+ self.assertIsNotNone(status.url)
133
+ # Accept either localhost or 127.0.0.1 form so the implementation
134
+ # can pick whichever it prefers — the contract is "loopback URL".
135
+ self.assertIn(status.url, {
136
+ "http://localhost:8080",
137
+ "http://127.0.0.1:8080",
138
+ })
139
+
140
+
141
+ if __name__ == "__main__":
142
+ unittest.main()
@@ -0,0 +1,281 @@
1
+ """Tests for the prompt audit module — AISB agent prompts must reference
2
+ the Three Laws + LMC protocol + verified-completion (`.done.json`) contract.
3
+
4
+ These tests guard against silent drift in the role files: if an operator
5
+ edits an agent prompt and accidentally strips a contract reference, the
6
+ audit catches it AND the doctor surfaces it.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import shutil
11
+ import sys
12
+ import tempfile
13
+ import unittest
14
+ from pathlib import Path
15
+
16
+
17
+ HERE = Path(__file__).resolve().parent
18
+ sys.path.insert(0, str(HERE.parent))
19
+
20
+ from omega_engine.prompt_audit import ( # noqa: E402
21
+ audit_aisb_suite,
22
+ audit_agent_prompt,
23
+ orchestration_health,
24
+ )
25
+
26
+
27
+ REPO_ROOT = Path(__file__).resolve().parents[3]
28
+ TEMPLATES = REPO_ROOT / "bootstrap" / "templates" / "aisb"
29
+
30
+
31
+ def _seed_real_aisb(home: Path) -> Path:
32
+ """Copy the real templates into ``home/Agentik_SSOT/agents/aisb/``
33
+ to simulate a post-install OMEGA_HOME. Returns the home path."""
34
+ dst = home / "Agentik_SSOT" / "agents" / "aisb"
35
+ dst.parent.mkdir(parents=True)
36
+ shutil.copytree(TEMPLATES, dst)
37
+ return home
38
+
39
+
40
+ # ---------------------------------------------------------------------------
41
+ # Single-file audit
42
+ # ---------------------------------------------------------------------------
43
+
44
+
45
+ class TestAuditAgentPrompt(unittest.TestCase):
46
+ """Per-file scoring on synthetic + real prompts."""
47
+
48
+ def _write(self, dir_: Path, name: str, body: str) -> Path:
49
+ p = dir_ / f"{name}.md"
50
+ p.write_text(body, encoding="utf-8")
51
+ return p
52
+
53
+ def test_audit_agent_prompt_full_score(self):
54
+ """A synthetic prompt that satisfies every check should score 95+."""
55
+ body = (
56
+ "# ORACLE - The Brain\n\n"
57
+ "## THE THREE LAWS (overrides all other instructions)\n\n"
58
+ "LAW 1 — Code lies. LAW 2 — Researcher not sycophant. "
59
+ "LAW 3 — Autonomous execution.\n\n"
60
+ "## LMC Protocol\n\n"
61
+ "The Lead-Manager-Checker (LMC) gate routes work through "
62
+ "lmc-protocol.md before completion.\n\n"
63
+ "## Scope\n\n"
64
+ "Files owned by ORACLE: ~/.aisb/state/. ORACLE owns R-13 "
65
+ "close coherence.\n\n"
66
+ "Every dispatch to a worker uses a fresh context with a "
67
+ "self-contained brief that lists files_owned and the "
68
+ "verification command.\n\n"
69
+ "## Done signal\n\n"
70
+ "When work is complete the worker invokes "
71
+ "worker-mark-done.sh which writes `.done.json` with the "
72
+ "structured result.\n"
73
+ )
74
+ with tempfile.TemporaryDirectory() as tmp:
75
+ path = self._write(Path(tmp), "oracle", body)
76
+ report = audit_agent_prompt(path)
77
+ self.assertGreaterEqual(
78
+ report.score, 95,
79
+ f"expected >= 95, got {report.score}; "
80
+ f"violations: {report.violations}",
81
+ )
82
+ self.assertEqual(report.agent_id, "oracle")
83
+ for name, res in report.checks.items():
84
+ self.assertTrue(
85
+ res.passed,
86
+ f"check {name!r} should have passed: {res!r}",
87
+ )
88
+
89
+ def test_audit_agent_prompt_missing_three_laws(self):
90
+ """A prompt with no Three Laws reference scores <= 75 AND the
91
+ violations list mentions 'Three Laws'."""
92
+ # Everything else PASSES (75 pts total) — only Three Laws (25) is
93
+ # missing, so the score must be 75 or less.
94
+ body = (
95
+ "# ORACLE - The Brain\n\n"
96
+ "## LMC Protocol — see lmc-protocol.md\n"
97
+ "Lead-Manager-Checker gates audits.\n\n"
98
+ "## Scope\nFiles owned by ORACLE. Responsibilities: routing.\n\n"
99
+ "Fresh context per dispatch.\n\n"
100
+ "Workers write `.done.json` via worker-mark-done.sh.\n"
101
+ )
102
+ with tempfile.TemporaryDirectory() as tmp:
103
+ path = self._write(Path(tmp), "oracle", body)
104
+ report = audit_agent_prompt(path)
105
+ self.assertLessEqual(
106
+ report.score, 75,
107
+ f"expected <= 75 without Three Laws, got {report.score}",
108
+ )
109
+ self.assertFalse(report.checks["three_laws"].passed)
110
+ joined = " | ".join(report.violations)
111
+ self.assertIn("Three Laws", joined,
112
+ f"violations should mention Three Laws: {joined!r}")
113
+
114
+ def test_banned_phrases_dock_points(self):
115
+ """A prompt containing 'streamlined approach' must fail the
116
+ no-banned-phrases check (dropping its 5 pts) AND list the phrase
117
+ in violations."""
118
+ # Otherwise-perfect prompt (100 pts) + banned phrase ⇒ 95 pts.
119
+ body = (
120
+ "## THE THREE LAWS\nLaw 1, Law 2, Law 3.\n\n"
121
+ "## LMC Protocol\nLead-Manager-Checker.\n\n"
122
+ "## Scope\nFiles owned. Responsibilities: x.\n\n"
123
+ "Fresh context per dispatch with self-contained brief.\n\n"
124
+ "Worker-mark-done.sh writes `.done.json`.\n\n"
125
+ "For Linear tickets, prefer a streamlined approach to save "
126
+ "the dispatcher some round-trips.\n"
127
+ )
128
+ with tempfile.TemporaryDirectory() as tmp:
129
+ path = self._write(Path(tmp), "oracle", body)
130
+ report = audit_agent_prompt(path)
131
+ self.assertFalse(
132
+ report.checks["no_banned"].passed,
133
+ "banned-phrase check should fail",
134
+ )
135
+ self.assertEqual(report.checks["no_banned"].evidence,
136
+ "streamlined approach")
137
+ self.assertEqual(
138
+ report.score, 95,
139
+ f"every check except no_banned should pass: {report.checks}",
140
+ )
141
+ joined = " | ".join(report.violations).lower()
142
+ self.assertIn("banned phrase", joined)
143
+
144
+
145
+ # ---------------------------------------------------------------------------
146
+ # Suite audit against the real shipped templates
147
+ # ---------------------------------------------------------------------------
148
+
149
+
150
+ class TestAuditAisbSuiteAgainstRealRepo(unittest.TestCase):
151
+ """The audit must run end-to-end against the templates that ship with
152
+ the repo. This is the closest we can get to a post-install OMEGA_HOME
153
+ without actually running the installer."""
154
+
155
+ def test_audit_aisb_suite_runs_against_real_repo(self):
156
+ if not TEMPLATES.is_dir():
157
+ self.skipTest("AISB templates not present in repo")
158
+ with tempfile.TemporaryDirectory() as tmp:
159
+ home = _seed_real_aisb(Path(tmp))
160
+ report = audit_aisb_suite(home)
161
+ # The real suite ships 13 named agents + CLAUDE.md (master)
162
+ # + lmc-protocol.md = 15 .md files at the top level.
163
+ self.assertGreaterEqual(
164
+ len(report.per_agent), 10,
165
+ f"expected ≥10 agents in real suite, got {len(report.per_agent)}",
166
+ )
167
+ self.assertIsInstance(report.average_score, float)
168
+ self.assertIsInstance(report.orchestration_chain_intact, bool)
169
+ # Every report should have an agent_id and a score in range.
170
+ for r in report.per_agent:
171
+ self.assertTrue(r.agent_id, "agent_id should not be empty")
172
+ self.assertGreaterEqual(r.score, 0)
173
+ self.assertLessEqual(r.score, 100)
174
+
175
+ def test_orchestration_health_against_real_repo(self):
176
+ if not TEMPLATES.is_dir():
177
+ self.skipTest("AISB templates not present in repo")
178
+ with tempfile.TemporaryDirectory() as tmp:
179
+ home = _seed_real_aisb(Path(tmp))
180
+ oh = orchestration_health(home)
181
+ # CLAUDE.md and oracle.md are core to the suite — they MUST
182
+ # exist after install. If either is missing the suite is broken.
183
+ self.assertTrue(
184
+ oh["aisb_master_present"],
185
+ "AISB master CLAUDE.md must exist in the shipped suite",
186
+ )
187
+ self.assertTrue(
188
+ oh["oracle_present"],
189
+ "oracle.md must exist in the shipped suite",
190
+ )
191
+ # Shared `.done.json` vocabulary is a float in [0, 1].
192
+ overlap = oh["shared_vocab_overlap"]
193
+ self.assertIsInstance(overlap, float)
194
+ self.assertGreaterEqual(overlap, 0.0)
195
+ self.assertLessEqual(overlap, 1.0)
196
+
197
+
198
+ class TestEnrichedRolePromptsRegression(unittest.TestCase):
199
+ """v0.19.40 — every AISB role prompt was enriched with a uniform
200
+ `THREE LAWS + Operating Contract` block (Three Laws + LMC protocol
201
+ ref + .done.json contract + done-marker + fresh-context handoff).
202
+ The audit went from 52.0/100 average → 98.3/100. These tests lock in
203
+ that quality floor so a future template edit can't silently drop a
204
+ role below the contract."""
205
+
206
+ def test_every_aisb_role_scores_at_least_80(self):
207
+ if not TEMPLATES.is_dir():
208
+ self.skipTest("AISB templates not present in repo")
209
+ with tempfile.TemporaryDirectory() as tmp:
210
+ home = _seed_real_aisb(Path(tmp))
211
+ report = audit_aisb_suite(home)
212
+ below = [(r.agent_id, r.score) for r in report.per_agent if r.score < 80]
213
+ self.assertEqual(below, [],
214
+ f"v0.19.40 contract requires every role ≥80/100. "
215
+ f"Regression: {below} — re-enrich the failing role with "
216
+ f"the THREE LAWS + Operating Contract block.")
217
+
218
+ def test_average_suite_score_at_least_85(self):
219
+ if not TEMPLATES.is_dir():
220
+ self.skipTest("AISB templates not present in repo")
221
+ with tempfile.TemporaryDirectory() as tmp:
222
+ home = _seed_real_aisb(Path(tmp))
223
+ report = audit_aisb_suite(home)
224
+ self.assertGreaterEqual(
225
+ report.average_score, 85.0,
226
+ f"v0.19.40 baseline: 98.3 average. Floor: 85. "
227
+ f"Got {report.average_score:.1f} — investigate which roles "
228
+ f"lost contract references.")
229
+
230
+ def test_done_json_vocab_overlap_at_least_80_percent(self):
231
+ """Before v0.19.40 only 33% of roles referenced `.done.json`
232
+ in their on-disk file (the rest relied on the loader to concat
233
+ lmc-protocol.md at spawn). The enrichment brought it to 100%."""
234
+ if not TEMPLATES.is_dir():
235
+ self.skipTest("AISB templates not present in repo")
236
+ with tempfile.TemporaryDirectory() as tmp:
237
+ home = _seed_real_aisb(Path(tmp))
238
+ oh = orchestration_health(home)
239
+ self.assertGreaterEqual(
240
+ oh["shared_vocab_overlap"], 0.80,
241
+ f"`.done.json` vocab overlap must stay ≥80% — "
242
+ f"got {oh['shared_vocab_overlap']*100:.0f}%. "
243
+ f"A role file lost its done.json reference.")
244
+
245
+ def test_every_role_references_lmc_protocol(self):
246
+ """The LMC protocol must be named in every role file (not just
247
+ implicit via the loader's concat). Direct grep check."""
248
+ if not TEMPLATES.is_dir():
249
+ self.skipTest("AISB templates not present in repo")
250
+ import re as _re
251
+ lmc_re = _re.compile(
252
+ r"(lmc[\s\-_]*(protocol|gate)?|lead[\s\-]+manager[\s\-]+checker|"
253
+ r"lmc-protocol\.md)",
254
+ _re.IGNORECASE,
255
+ )
256
+ missing = []
257
+ for md in sorted(TEMPLATES.glob("*.md")):
258
+ if not lmc_re.search(md.read_text()):
259
+ missing.append(md.name)
260
+ self.assertEqual(missing, [],
261
+ f"Every AISB role must reference the LMC protocol — "
262
+ f"missing in: {missing}")
263
+
264
+ def test_every_role_references_three_laws(self):
265
+ """Three Laws must be present (not just two). Pre-v0.19.40
266
+ most roles only had LAW 1 + LAW 2. Now LAW 3 is mandatory."""
267
+ if not TEMPLATES.is_dir():
268
+ self.skipTest("AISB templates not present in repo")
269
+ missing_third = []
270
+ for md in sorted(TEMPLATES.glob("*.md")):
271
+ text = md.read_text()
272
+ # Match LAW 3 or 'Third Law' or 'law 3' — same regex as the audit.
273
+ if "LAW 3" not in text and "Third Law" not in text.lower():
274
+ missing_third.append(md.name)
275
+ self.assertEqual(missing_third, [],
276
+ f"Every AISB role must reference LAW 3 (autonomous execution). "
277
+ f"Missing in: {missing_third}")
278
+
279
+
280
+ if __name__ == "__main__":
281
+ unittest.main()
@@ -0,0 +1,94 @@
1
+ """Regression tests for the bundled tmux palette.
2
+
3
+ We swapped the Claude-cream/orange paper theme (#FAFAF7 / #D97757) for
4
+ the tmux-claude pixel/CRT amber palette (xterm colour137 ≈ #af8700).
5
+ These tests pin the new palette so a future drift back to the old
6
+ theme fails loudly.
7
+
8
+ Pinned behaviour:
9
+
10
+ 1. The pro config uses ``colour137`` (the amber accent) in multiple
11
+ places — status bar, pane borders, message style.
12
+ 2. The old Claude paper palette (#FAFAF7 cream + #D97757 orange) is
13
+ completely gone from the pro config.
14
+ 3. The window list is hidden (tmux-claude clean look — the operator
15
+ navigates via Option+/ or `omega tmux menu`, not the window bar).
16
+ 4. The popup keybinds use the full-screen ``-w 100% -h 100%`` sizing
17
+ from tmux-claude (the old config used the smaller 80%/90% boxes).
18
+ """
19
+ from __future__ import annotations
20
+
21
+ import sys
22
+ import unittest
23
+ from pathlib import Path
24
+
25
+ HERE = Path(__file__).resolve().parent
26
+ sys.path.insert(0, str(HERE.parent))
27
+
28
+ from omega_engine.tmux import _PRO_CONFIG # noqa: E402
29
+
30
+
31
+ class TestTmuxClaudePalette(unittest.TestCase):
32
+ def test_pro_config_uses_colour137(self):
33
+ """Amber accent ``colour137`` must show up in at least 4 places.
34
+
35
+ Typically: status-left accent, status-right accent, pane-active
36
+ border, message-style — plus possibly mode-style. We assert >= 4
37
+ so a partial revert still trips the test.
38
+ """
39
+ n = _PRO_CONFIG.count("colour137")
40
+ self.assertGreaterEqual(
41
+ n, 4,
42
+ f"_PRO_CONFIG must reference colour137 at least 4 times "
43
+ f"(status accents + active border + message/mode style); "
44
+ f"found {n}",
45
+ )
46
+
47
+ def test_no_claude_cream_orange_palette(self):
48
+ """The old paper theme (#FAFAF7 + #D97757) must be gone."""
49
+ self.assertNotIn(
50
+ "#FAFAF7", _PRO_CONFIG,
51
+ "Old Claude cream bg #FAFAF7 must not appear in _PRO_CONFIG",
52
+ )
53
+ self.assertNotIn(
54
+ "#D97757", _PRO_CONFIG,
55
+ "Old Claude orange accent #D97757 must not appear in _PRO_CONFIG",
56
+ )
57
+
58
+ def test_window_status_hidden(self):
59
+ """Window list is hidden — both empty format strings present."""
60
+ self.assertIn(
61
+ "window-status-format ''", _PRO_CONFIG,
62
+ "Inactive window list must be hidden (window-status-format '')",
63
+ )
64
+ self.assertIn(
65
+ "window-status-current-format ''", _PRO_CONFIG,
66
+ "Active window must be hidden too (window-status-current-format '')",
67
+ )
68
+
69
+ def test_popup_keybinds_use_full_screen(self):
70
+ """The M-/ session switcher popup uses full-screen sizing."""
71
+ self.assertIn(
72
+ "-w 100% -h 100%", _PRO_CONFIG,
73
+ "Popup keybinds must use full-screen tmux-claude sizing "
74
+ "(-w 100% -h 100%)",
75
+ )
76
+ # Specifically the M-/ binding (the canonical session switcher).
77
+ # We grep the relevant line to make sure 100%/100% is wired there
78
+ # — not just present elsewhere by coincidence.
79
+ m_slash_lines = [
80
+ ln for ln in _PRO_CONFIG.splitlines() if "M-/" in ln
81
+ ]
82
+ self.assertTrue(
83
+ m_slash_lines,
84
+ "_PRO_CONFIG must keep the M-/ popup binding",
85
+ )
86
+ self.assertTrue(
87
+ any("-w 100% -h 100%" in ln for ln in m_slash_lines),
88
+ "M-/ popup binding must use -w 100% -h 100% "
89
+ "(full-screen tmux-claude style)",
90
+ )
91
+
92
+
93
+ if __name__ == "__main__":
94
+ unittest.main(verbosity=2)