npm - loki-mode - Versions diffs - 7.45.0 → 7.46.0 - Mend

loki-mode 7.45.0 → 7.46.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

package/README.md +16 -12
package/SKILL.md +5 -5
package/VERSION +1 -1
package/autonomy/CONSTITUTION.md +9 -2
package/autonomy/lib/sentrux-gate.sh +1 -1
package/autonomy/loki +2 -2
package/autonomy/run.sh +355 -92
package/dashboard/__init__.py +1 -1
package/dashboard/registry.py +156 -62
package/dashboard/server.py +9 -10
package/docs/COMPARISON.md +10 -10
package/docs/COMPETITIVE-ANALYSIS.md +1 -1
package/docs/INSTALLATION.md +2 -2
package/docs/P0-SWEEP-PLAN.md +163 -0
package/docs/architecture/STATE-MACHINES.md +18 -19
package/docs/architecture/bmad-loki-voice-agent-council-analysis.md +1 -1
package/docs/auto-claude-comparison.md +14 -11
package/docs/certification/01-core-concepts/lesson.md +12 -11
package/docs/certification/01-core-concepts/quiz.md +6 -6
package/docs/certification/05-troubleshooting/lesson.md +23 -13
package/docs/certification/05-troubleshooting/quiz.md +3 -3
package/docs/certification/answer-key.md +2 -2
package/docs/certification/certification-exam.md +9 -9
package/docs/competitive/bolt-new-analysis.md +1 -1
package/docs/competitive/emergence-others-analysis.md +9 -9
package/docs/competitive/replit-lovable-analysis.md +3 -3
package/docs/cursor-comparison.md +15 -12
package/docs/dashboard-guide.md +9 -7
package/docs/prd-purple-lab-platform-v2.md +1 -1
package/docs/prd-purple-lab-platform.md +3 -3
package/docs/show-hn-post.md +2 -2
package/loki-ts/dist/loki.js +2 -2
package/mcp/__init__.py +1 -1
package/package.json +2 -2
package/plugins/loki-mode/.claude-plugin/plugin.json +2 -2
package/plugins/loki-mode/README.md +1 -1
package/references/magic-rarv-integration.md +1 -1
package/references/quality-control.md +5 -5
package/references/sdlc-phases.md +1 -2
package/skills/00-index.md +1 -1
package/skills/artifacts.md +1 -1
package/skills/healing.md +1 -1
package/skills/magic-modules.md +3 -3
package/skills/quality-gates.md +52 -39
package/skills/testing.md +1 -1

package/dashboard/registry.py CHANGED Viewed

@@ -9,9 +9,11 @@ from __future__ import annotations
 import json
 import os
+import tempfile
+from contextlib import contextmanager
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import Optional
+from typing import Iterator, Optional
 import hashlib
@@ -25,6 +27,62 @@ def _ensure_registry_dir() -> None:
     REGISTRY_DIR.mkdir(parents=True, exist_ok=True)
+@contextmanager
+def _registry_lock() -> Iterator[None]:
+    """
+    Best-effort advisory lock around a read-modify-write of the registry.
+    Two concurrent writers (e.g. two `loki docker start` in different repos, or
+    a docker run racing a host `loki start`) would otherwise both load the old
+    registry, mutate, and save, dropping one writer's entry (lost update). This
+    serializes the leaf mutators so they take turns.
+    Degrades gracefully: if fcntl is unavailable (Windows) or the lock cannot
+    be acquired for any reason, execution proceeds without a lock rather than
+    blocking a build. The atomic write in _save_registry still guarantees no
+    reader ever sees a torn file; only the lost-update protection is
+    best-effort.
+    The lock path is derived from the current REGISTRY_DIR at call time (not a
+    module-level constant) so tests that monkeypatch REGISTRY_DIR stay
+    hermetic. Not reentrant: do not nest this around another leaf mutator (the
+    leaf mutators do not call one another).
+    """
+    _ensure_registry_dir()
+    lock_fd = None
+    locked = False
+    try:
+        import fcntl  # POSIX only; absent on Windows
+        lock_path = REGISTRY_DIR / ".registry.lock"
+        try:
+            lock_fd = os.open(str(lock_path), os.O_CREAT | os.O_RDWR, 0o644)
+            fcntl.flock(lock_fd, fcntl.LOCK_EX)
+            locked = True
+        except OSError:
+            # Could not open or lock the file; proceed without the lock.
+            locked = False
+    except ImportError:
+        # fcntl not available (e.g. Windows); proceed without the lock.
+        lock_fd = None
+    try:
+        yield
+    finally:
+        if lock_fd is not None:
+            try:
+                if locked:
+                    import fcntl
+                    fcntl.flock(lock_fd, fcntl.LOCK_UN)
+            except (OSError, ImportError):
+                pass
+            try:
+                os.close(lock_fd)
+            except OSError:
+                pass
 def _load_registry() -> dict:
     """Load the project registry from disk."""
     _ensure_registry_dir()
@@ -38,10 +96,39 @@ def _load_registry() -> dict:
 def _save_registry(registry: dict) -> None:
-    """Save the project registry to disk."""
+    """
+    Save the project registry to disk atomically.
+    Writes to a temp file in the SAME directory as REGISTRY_FILE (so os.replace
+    is an atomic rename on the same filesystem), flushes and fsyncs it, then
+    os.replace()s it over the destination. Every reader therefore sees either
+    the complete old file or the complete new file, never a half-written (torn)
+    one. The temp file is removed on any error path so partial files never
+    leak.
+    Note: atomic write alone eliminates torn reads but does not by itself
+    prevent lost updates under true simultaneity. The leaf mutators wrap their
+    load->mutate->save in _registry_lock() to serialize concurrent writers and
+    reduce that window; when locking is unavailable the degradation is honest
+    (torn reads still impossible, lost-update still possible).
+    """
     _ensure_registry_dir()
-    with open(REGISTRY_FILE, "w") as f:
-        json.dump(registry, f, indent=2, default=str)
+    tmp_fd, tmp_path = tempfile.mkstemp(
+        dir=str(REGISTRY_DIR), prefix=".projects.", suffix=".tmp"
+    )
+    try:
+        with os.fdopen(tmp_fd, "w") as f:
+            json.dump(registry, f, indent=2, default=str)
+            f.flush()
+            os.fsync(f.fileno())
+        os.replace(tmp_path, str(REGISTRY_FILE))
+    except BaseException:
+        # Clean up the temp file on any failure so we never leak partial files.
+        try:
+            os.unlink(tmp_path)
+        except OSError:
+            pass
+        raise
 def _generate_project_id(path: str) -> str:
@@ -70,34 +157,38 @@ def register_project(
     if not os.path.isdir(path):
         raise ValueError(f"Path does not exist: {path}")
-    registry = _load_registry()
     project_id = _generate_project_id(path)
-    # Check if already registered
-    if project_id in registry["projects"]:
-        # Update existing entry
-        project = registry["projects"][project_id]
-        if name:
-            project["name"] = name
-        if alias:
-            project["alias"] = alias
-        project["updated_at"] = datetime.now(timezone.utc).isoformat()
-    else:
-        # Create new entry
-        project = {
-            "id": project_id,
-            "path": path,
-            "name": name or os.path.basename(path),
-            "alias": alias,
-            "registered_at": datetime.now(timezone.utc).isoformat(),
-            "updated_at": datetime.now(timezone.utc).isoformat(),
-            "last_accessed": None,
-            "has_loki_dir": os.path.isdir(os.path.join(path, ".loki")),
-            "status": "active",
-        }
-        registry["projects"][project_id] = project
+    # Lock the load->mutate->save so concurrent registrations serialize and do
+    # not lost-update each other (the multi-repo `loki docker` happy path).
+    with _registry_lock():
+        registry = _load_registry()
-    _save_registry(registry)
+        # Check if already registered
+        if project_id in registry["projects"]:
+            # Update existing entry
+            project = registry["projects"][project_id]
+            if name:
+                project["name"] = name
+            if alias:
+                project["alias"] = alias
+            project["updated_at"] = datetime.now(timezone.utc).isoformat()
+        else:
+            # Create new entry
+            project = {
+                "id": project_id,
+                "path": path,
+                "name": name or os.path.basename(path),
+                "alias": alias,
+                "registered_at": datetime.now(timezone.utc).isoformat(),
+                "updated_at": datetime.now(timezone.utc).isoformat(),
+                "last_accessed": None,
+                "has_loki_dir": os.path.isdir(os.path.join(path, ".loki")),
+                "status": "active",
+            }
+            registry["projects"][project_id] = project
+        _save_registry(registry)
     return project
@@ -111,19 +202,20 @@ def unregister_project(identifier: str) -> bool:
     Returns:
         True if removed, False if not found
     """
-    registry = _load_registry()
-    # Find by ID, path, or alias
-    project_id = None
-    for pid, project in registry["projects"].items():
-        if pid == identifier or project["path"] == identifier or project.get("alias") == identifier:
-            project_id = pid
-            break
-    if project_id:
-        del registry["projects"][project_id]
-        _save_registry(registry)
-        return True
+    with _registry_lock():
+        registry = _load_registry()
+        # Find by ID, path, or alias
+        project_id = None
+        for pid, project in registry["projects"].items():
+            if pid == identifier or project["path"] == identifier or project.get("alias") == identifier:
+                project_id = pid
+                break
+        if project_id:
+            del registry["projects"][project_id]
+            _save_registry(registry)
+            return True
     return False
@@ -179,13 +271,14 @@ def update_last_accessed(identifier: str) -> Optional[dict]:
     Returns:
         Updated project entry or None
     """
-    registry = _load_registry()
-    for pid, project in registry["projects"].items():
-        if pid == identifier or project["path"] == identifier or project.get("alias") == identifier:
-            project["last_accessed"] = datetime.now(timezone.utc).isoformat()
-            _save_registry(registry)
-            return project
+    with _registry_lock():
+        registry = _load_registry()
+        for pid, project in registry["projects"].items():
+            if pid == identifier or project["path"] == identifier or project.get("alias") == identifier:
+                project["last_accessed"] = datetime.now(timezone.utc).isoformat()
+                _save_registry(registry)
+                return project
     return None
@@ -207,19 +300,20 @@ def mark_project_stopped(identifier: str) -> Optional[dict]:
         Idempotent: marking an already-stopped project is a no-op that still
         returns the entry.
     """
-    registry = _load_registry()
-    for pid_key, project in registry["projects"].items():
-        if (
-            pid_key == identifier
-            or project["path"] == identifier
-            or project.get("alias") == identifier
-        ):
-            project["status"] = "stopped"
-            project["pid"] = None
-            project["updated_at"] = datetime.now(timezone.utc).isoformat()
-            _save_registry(registry)
-            return project
+    with _registry_lock():
+        registry = _load_registry()
+        for pid_key, project in registry["projects"].items():
+            if (
+                pid_key == identifier
+                or project["path"] == identifier
+                or project.get("alias") == identifier
+            ):
+                project["status"] = "stopped"
+                project["pid"] = None
+                project["updated_at"] = datetime.now(timezone.utc).isoformat()
+                _save_registry(registry)
+                return project
     return None

package/dashboard/server.py CHANGED Viewed

@@ -885,7 +885,7 @@ async def agent_card() -> dict:
         "capabilities": {
             "agents": 41,
             "swarms": 8,
-            "quality_gates": 9,
+            "quality_gates": 8,
             "providers": ["claude", "codex", "cline", "aider"],
             "streaming": True,
             "pushNotifications": False,
@@ -7177,15 +7177,14 @@ async def remove_checklist_waiver(item_id: str):
 # =============================================================================
 _DEFAULT_QUALITY_GATES = [
-    {"name": "Static Analysis", "description": "CodeQL, ESLint, type checking", "status": "pending"},
-    {"name": "Parallel Code Review", "description": "3-reviewer blind review system", "status": "pending"},
-    {"name": "Anti-Sycophancy Check", "description": "Devil's advocate on unanimous approval", "status": "pending"},
-    {"name": "Severity Assessment", "description": "Critical/High/Medium = BLOCK", "status": "pending"},
-    {"name": "Unit Test Coverage", "description": "Target >80% coverage, 100% pass", "status": "pending"},
-    {"name": "Integration Tests", "description": "End-to-end verification", "status": "pending"},
-    {"name": "Security Scan", "description": "Dependency audit, OWASP checks", "status": "pending"},
-    {"name": "Build Verification", "description": "Clean build with no warnings", "status": "pending"},
-    {"name": "Council Vote", "description": "Completion council consensus", "status": "pending"},
+    {"name": "Static Analysis", "description": "CodeQL, ESLint/Pylint, type-checker findings on the diff", "status": "pending"},
+    {"name": "Test Suite", "description": "Project test runner pass/fail (red blocks)", "status": "pending"},
+    {"name": "Blind Code Review", "description": "3-reviewer blind review; Critical/High = BLOCK; Medium/Low advisory", "status": "pending"},
+    {"name": "Anti-Sycophancy", "description": "Devil's Advocate re-review on unanimous PASS", "status": "pending"},
+    {"name": "Mock Integrity", "description": "Tautological-assertion and mock-ratio detection", "status": "pending"},
+    {"name": "Test Mutation", "description": "Assertion-churn (test-fitting) detection", "status": "pending"},
+    {"name": "Documentation Coverage", "description": "README presence, docs freshness, API docs for exported symbols", "status": "pending"},
+    {"name": "Magic Modules Debate", "description": "Spec-vs-implementation debate on generated modules", "status": "pending"},
 ]

package/docs/COMPARISON.md CHANGED Viewed

@@ -14,8 +14,8 @@
 | **Type** | Skill/Framework | Enterprise Platform | Standalone Agent | Cloud Agent | AI IDE | CLI Agent | AI IDE | AI IDE | Cloud Agent | AI IDE (OSS) |
 | **Autonomy Level** | High (minimal human) | High | Full | High | Medium-High | High | High | High | High | High |
 | **Max Runtime** | Unlimited | Async/Scheduled | Hours | Per-task | Session | Session | Days | Async | Per-task | Session |
-| **Pricing** | Free (OSS) | Enterprise | $20/mo | ChatGPT Plus | $20/mo | API costs | Free preview | Free preview | $19/mo | Free (OSS) |
-| **Open Source** | Yes | No | No | No | No | No | No | No | No | Yes |
+| **Pricing** | Free (source-available) | Enterprise | $20/mo | ChatGPT Plus | $20/mo | API costs | Free preview | Free preview | $19/mo | Free (OSS) |
+| **Source model** | Source-available (BUSL-1.1) | No | No | No | No | No | No | No | No | Yes |
 | **GitHub Stars** | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | 70.9k |
 ---
@@ -37,7 +37,7 @@
 |---------|--------------|-----------|-----------|------------|----------|-----------------|--------------|--------------|
 | **Code Review** | 3 blind reviewers + devil's advocate | Basic | Basic | BugBot PR | Property-based | Artifacts | Doc/Review | Basic |
 | **Anti-Sycophancy** | Yes (CONSENSAGENT) | No | No | No | No | No | No | No |
-| **Quality Gates** | 11 gates + PBT | Basic | Sandbox | Tests | Spec validation | Artifact checks | Tests | Permissions |
+| **Quality Gates** | 8 gates + PBT | Basic | Sandbox | Tests | Spec validation | Artifact checks | Tests | Permissions |
 | **Constitutional AI** | Yes (principles) | No | Refusal training | No | No | No | No | No |
 ---
@@ -146,10 +146,10 @@
 | Feature | **Zencoder** | **Loki Mode** | **Assessment** |
 |---------|-------------|---------------|----------------|
-| **Four Pillars** | Structured Workflows, SDD, Multi-Agent Verification, Parallel Execution | SDLC + RARV + 9 Gates + Worktrees | TIE |
+| **Four Pillars** | Structured Workflows, SDD, Multi-Agent Verification, Parallel Execution | SDLC + RARV + 8 Gates + Worktrees | TIE |
 | **Spec-Driven Dev** | Specs as first-class objects | OpenAPI-first | TIE |
 | **Multi-Agent Verification** | Model diversity (Claude vs OpenAI, 54% improvement) | 3 blind reviewers + devil's advocate | Different approach (N/A for Claude Code - only Claude models) |
-| **Quality Gates** | Built-in verification loops | 7 explicit gates + anti-sycophancy | **Loki Mode** |
+| **Quality Gates** | Built-in verification loops | 8 explicit gates + anti-sycophancy | **Loki Mode** |
 | **Memory System** | Not documented | 3-tier episodic/semantic/procedural | **Loki Mode** |
 | **Agent Specialization** | Custom Zen Agents | 41 pre-defined specialized agent roles | **Loki Mode** |
 | **CI Failure Analysis** | Explicit pattern with auto-fix | DevOps agent only | **ADOPTED from Zencoder** |
@@ -178,7 +178,7 @@
 ### Where Loki Mode EXCEEDS Zencoder
-1. **Quality Control**: 7 explicit gates + blind review + devil's advocate vs built-in loops
+1. **Quality Control**: 8 explicit gates + blind review + devil's advocate vs built-in loops
 2. **Memory System**: 3-tier (episodic/semantic/procedural) with cross-project learning
 3. **Agent Specialization**: 41 pre-defined specialized agent roles across 8 domains
 4. **Anti-Sycophancy**: CONSENSAGENT patterns prevent reviewer groupthink
@@ -207,7 +207,7 @@
 | **Skills** | Progressive disclosure | 6 slash commands | N/A | 129 skills | N/A | 35 skills | Memory focus |
 | **Multi-Provider** | Yes (Claude/Codex/Gemini) | 3 CLIs (separate) | No | No | No | No | No |
 | **Memory System** | 3-tier (episodic/semantic/procedural) | None | N/A | N/A | Hybrid | N/A | SQLite+FTS5 |
-| **Quality Gates** | 11 gates + Completion Council | User verify only | Two-Stage Review | N/A | Consensus | Tiered | N/A |
+| **Quality Gates** | 8 gates + Completion Council | User verify only | Two-Stage Review | N/A | Consensus | Tiered | N/A |
 | **Context Mgmt** | Standard | Fresh per task (core innovation) | Fresh per task | N/A | N/A | N/A | Progressive |
 | **Autonomy** | High (minimal human) | Semi (checkpoints) | Human-guided | Human-guided | Orchestrated | Human-guided | N/A |
@@ -232,7 +232,7 @@ These are patterns from competing projects that are **practically and scientific
 |----------|---------|-------------------------|
 | **Multi-Provider Support** | Only skill supporting Claude, Codex, and Gemini with graceful degradation | All 8 competitors are Claude-only |
 | **RARV Cycle** | Reason-Act-Reflect-Verify is more rigorous than Plan-Execute | Most use simple Plan-Execute |
-| **11-Gate Quality System** | Static analysis + 3 reviewers + devil's advocate + anti-sycophancy + severity blocking + coverage + debate + backward-compat (healing) + Phase 1 closure | Superpowers has 2-stage, others have less |
+| **8-Gate Quality System** | Static analysis + test suite (pass/fail) + 3 blind reviewers with severity blocking + devil's advocate + mock-integrity + test-mutation + documentation coverage + Magic Modules debate (backward-compat is a conditional healing auditor) + Phase 1 closure | Superpowers has 2-stage, others have less |
 | **Constitutional AI Integration** | Principles-based self-critique from Anthropic research | None have this |
 | **Anti-Sycophancy (CONSENSAGENT)** | Blind review + devil's advocate prevents groupthink | None have this |
 | **Provider Abstraction Layer** | Clean degradation from full-featured to sequential-only | Claude-only projects can't degrade |
@@ -359,12 +359,12 @@ Tiered agent architecture with explicit escalation:
 |-----------|-------------------|
 | **Autonomy** | Designed for high autonomy with minimal human intervention |
 | **Multi-Agent** | 41 prompt-defined agent roles in 8 domains adopted per phase (parallel review council + optional worktree streams on Claude, sequential elsewhere) vs 1-8 in competitors, with all output gated by blind review + council |
-| **Quality** | 11 gates + blind review + devil's advocate + property-based testing |
+| **Quality** | 8 gates + blind review + devil's advocate + property-based testing |
 | **Research** | 10+ academic papers integrated vs proprietary/undisclosed |
 | **Anti-Sycophancy** | Only agent with CONSENSAGENT-based blind review |
 | **Memory** | 3-tier memory (episodic/semantic/procedural) + review learning + cross-project |
 | **Transformation** | Code migration workflows (language, database, framework) |
-| **Cost** | Free (open source) vs $20-500/month |
+| **Cost** | Free (source-available, BUSL-1.1) vs $20-500/month |
 | **Customization** | Full source access vs black box |
 ---

package/docs/COMPETITIVE-ANALYSIS.md CHANGED Viewed

@@ -20,7 +20,7 @@ GSD is the closest competitor -- a context engineering system that spawns fresh
 | Adoption | 594 stars, 6K/wk npm | 11,903 stars, 21K/wk npm | GSD (20x) |
 | Simplicity | Complex (5.4K-line run.sh, 12 Python modules) | Simple (markdown agents + slash commands) | GSD |
 | Full autonomy | Walk away, come back to deployed product | Human checkpoints at discuss/verify/milestone | Loki |
-| Quality gates | 9-gate + Completion Council + anti-sycophancy | User verification only | Loki |
+| Quality gates | 8-gate + Completion Council + anti-sycophancy | User verification only | Loki |
 | Memory system | Episodic/semantic/procedural + vector search | None | Loki |
 | Context management | Standard | Fresh subagent contexts per task (core innovation) | GSD |
 | Time to value | Learn architecture, understand CLI flags | `npx get-shit-done-cc` and go | GSD |

package/docs/INSTALLATION.md CHANGED Viewed

@@ -2,7 +2,7 @@
 The flagship product of [Autonomi](https://www.autonomi.dev/). Loki Mode is a spec-driven autonomous builder with a built-in trust layer that takes any spec to a deployed product and verifies completion with evidence (quality gates plus a completion council), not just a "done" claim. Complete installation instructions for all platforms and use cases.
-**Version:** v7.45.0
+**Version:** v7.46.0
 ---
@@ -389,7 +389,7 @@ provider works inside the container. Provide auth with your Anthropic API key:
 # Run Loki Mode in Docker (Claude provider, API-key auth)
 docker run --rm -e ANTHROPIC_API_KEY="$ANTHROPIC_API_KEY" \
   -v $(pwd):/workspace -w /workspace \
-  asklokesh/loki-mode:7.45.0 start ./my-spec.md
+  asklokesh/loki-mode:7.46.0 start ./my-spec.md
 ```
 ##### docker compose + .env (no host install)

package/docs/P0-SWEEP-PLAN.md ADDED Viewed

@@ -0,0 +1,163 @@
+# P0 Verification-Credibility Sweep -- Architecture Plan
+Persisted from the Architect (opus). Every line number re-verified by grep.
+Goal: make Loki's verification layer honest and real. A hollow wedge is
+existential for a "proof of done" product. Fix or remove every false/hollow gate
+claim, wire the unwired detectors, make anti-sycophancy act.
+## 0. Verified ground truth
+- P0-1: enforce_test_coverage() at autonomy/run.sh:7031. `local coverage_pct=0`
+  at 7038 is never reassigned; no coverage tool invoked. 7257 emits min_coverage
+  (the threshold), not a measured value. Gate decides purely on test_passed.
+- P0-2: skills/quality-gates.md:5-17 lists 11 gates; gates 1 (Input Guardrails)
+  and 5 (Output Guardrails) have NO gate function. wiki/Quality-Gates.md:14-28
+  duplicates. (21 'guardrail' refs in autonomy/ are CLI help/comments/flags.)
+- P0-3: tests/detect-mock-problems.sh + tests/detect-test-mutations.sh invoked
+  0 times in autonomy/run.sh. quality-gates.md:74-77 claims HIGH=FAIL.
+- P0-4: anti-sycophancy block run.sh:8316-8323 only logs + writes
+  anti-sycophancy.txt. No Devil's-Advocate re-review. INERT. Bun mirror
+  loki-ts/src/runner/quality_gates.ts:804-808 equally inert.
+- Gate inventory: phantom (Input/Output Guardrails); wired-but-unlisted
+  (run_magic_debate_gate at run.sh:14067); "Gate 10 Backward Compat" is the
+  legacy-healing-auditor SPECIALIST (run.sh:7875-7979), conditional, not a loop
+  gate; "Gate 6 Severity Blocking" is the block policy inside code review, not a
+  function.
+### Functions actually invoked in orchestration (run.sh:13938-14084)
+enforce_static_analysis (13945); enforce_test_coverage (13967); run_code_review
+(13987); run_doc_quality_gate (14058); run_magic_debate_gate (14070); plus
+conditional legacy-healing-auditor reviewer.
+## 1. Canonical final gate list (THE CONTRACT -- docs transcribe, never recompute)
+Honest count after this sweep: 8 gates.
+| # | Gate | Function / mechanism | Blocking | Opt-out flag |
+|---|------|---------------------|----------|--------------|
+| 1 | Static Analysis | enforce_static_analysis (run.sh:6699) | Yes (ladder) | PHASE_STATIC_ANALYSIS=false |
+| 2 | Test Suite (pass/fail) | enforce_test_coverage (run.sh:7031) | Yes (red blocks) | PHASE_UNIT_TESTS=false |
+| 3 | Blind Code Review (3-reviewer council + severity blocking) | run_code_review (run.sh:7788) | Yes (Crit/High block) | PHASE_CODE_REVIEW=false |
+| 4 | Anti-Sycophancy / Devil's Advocate (on unanimous PASS) | run_code_review sub-step (run.sh:8316+) | Yes (DA Crit/High block) | LOKI_GATE_DEVILS_ADVOCATE=false |
+| 5 | Mock Integrity Detector | enforce_mock_integrity -> tests/detect-mock-problems.sh | Yes (HIGH blocks) | LOKI_GATE_MOCK=false |
+| 6 | Test Mutation Detector | enforce_mutation_integrity -> tests/detect-test-mutations.sh | Yes (HIGH blocks) | LOKI_GATE_MUTATION=false |
+| 7 | Documentation Coverage | run_doc_quality_gate (run.sh:7388) | Yes | LOKI_GATE_DOC_COVERAGE=false |
+| 8 | Magic Modules Debate | run_magic_debate_gate (run.sh:7495) | Yes (BLOCK sev) | LOKI_GATE_MAGIC_DEBATE=false |
+Conditional auditor (documented separately, NOT numbered): Backward-Compatibility
+/ legacy-healing-auditor (healing mode only). Removed: Input/Output Guardrails.
+### Doc files to update to "8 gates" (docs owner)
+README.md (22,29,196,255); SKILL.md (3,10); CLAUDE.md (44);
+plugins/loki-mode/README.md (4); wiki/Quality-Gates.md (14-48);
+wiki/Environment-Variables.md (62); wiki/Home.md (3,13); wiki/CLI-Reference.md
+(230); docs/cursor-comparison.md (14,177,195); docs/COMPARISON.md (40,210,362);
+skills/quality-gates.md (5,13,14-17,19-66,69-82,650,655,668); skills/00-index.md
+(51). CHANGELOG.md: NEW top entry ONLY; never rewrite historical entries
+(5837/6181/6335/6340).
+## 2. P0-1 Coverage honesty (Fix B) -- Slice A (run.sh owner) + Slice B (docs)
+- run.sh: remove dead `local coverage_pct=0` (7038). Relabel logs: 13966
+  "test suite (pass/fail)"; 7265/7270 "Test suite gate".
+- KEEP the min_coverage JSON field at 7257 (consumed by autonomy/loki:27529-27530,
+  16138 and asserted in tests/test-report-command.sh:116,
+  tests/test-completion-council-affirmative-evidence.sh:126,
+  tests/test-evidence-gate.sh:155). Only change misleading consumer strings in
+  autonomy/loki (27530, 16138) to "Min coverage TARGET (not measured)".
+- docs (skills/quality-gates.md): :13 drop ">80% coverage" -> "coverage % not
+  measured in this release"; :650/:655 reword to pass/fail + target-only; :668
+  remove coverage.json artifact line. Note Fix A (real measurement) as follow-up.
+## 3. P0-2 Phantom guardrails -- Slice B (docs only)
+Remove gates 1 & 5 entirely (do not "mark planned"). Renumber to the 8-gate
+table. Edit skills/quality-gates.md:5-17, wiki/Quality-Gates.md:14-28, + all
+list files in section 1.
+## 4. P0-3 Wire detectors -- Slice A (run.sh) + Slice D (scripts) + Slice C (Bun)
+Exit-code asymmetry (load-bearing):
+- detect-mock-problems.sh exits 1 on CRITICAL/HIGH (179-182), 0 otherwise.
+  Exit code already = block-on-HIGH.
+- detect-test-mutations.sh exits 0 unless --strict; --strict blocks on ANY
+  finding (over-blocks MED/LOW). DO NOT use --strict. Wrapper greps stdout for
+  [HIGH] to decide block; route MED/LOW to findings injection.
+New run.sh functions (place after run_magic_debate_gate ~7560):
+  enforce_mock_integrity()      # HIGH -> return 1; MED/LOW -> findings file
+  enforce_mutation_integrity()  # grep -c '\[HIGH\]' >0 -> return 1; MED/LOW -> findings
+Both cd "${TARGET_DIR}", use LOKI_GATE_TIMEOUT wrapping, write findings into
+${TARGET_DIR}/.loki/quality/ for the Phase-1 findings injector.
+Orchestration insert: after the pause-check at 13983, before code-review at
+13985. Mirror the existing pattern with track_gate_failure/clear_gate_failure +
+gate_failures string. Toggles LOKI_GATE_MOCK / LOKI_GATE_MUTATION (matches
+existing LOKI_GATE_DOC_COVERAGE / LOKI_GATE_MAGIC_DEBATE convention).
+Detector-script (Slice D): optional --block-high mode on detect-test-mutations.sh
+(exit 2 on HIGH) keeping --strict intact; OR rely on wrapper grep (no script
+change). Verify detect-mock-problems.sh exit semantics. Do NOT touch run.sh.
+## 5. P0-4 Anti-sycophancy acts -- Slice A (run.sh) + Slice C (Bun)
+Read run_code_review 7788-8316 first. At 8316-8323 unanimous block: dispatch ONE
+Devil's-Advocate reviewer reusing the existing reviewer-invocation +
+parse_verdict helpers; if DA returns Crit/High set has_blocking=true so the
+EXISTING block at 8326-8330 fires (return 1). Keep anti-sycophancy.txt for audit.
+Gate behind LOKI_GATE_DEVILS_ADVOCATE (default true).
+## 6. P0-5 Honest per-gate table -- Slice B (docs)
+Replace skills/quality-gates.md:5-17 + prose 19-82 with the 8-gate table plus
+columns: detects X / does NOT detect Y / opt-out flag / blocking. Honesty
+entries: gate 2 "does NOT detect coverage %"; gate 5 "does NOT detect semantic
+correctness of mocks"; gate 6 "does NOT detect logically-correct-but-weak
+assertions".
+## 7. Bash <-> Bun parity matrix
+| Change | Bun mirror | File |
+|--------|-----------|------|
+| P0-1 label/honesty | Yes (light) | quality_gates.ts runTestCoverage (402): no false % strings |
+| P0-2 gate count | docs only | -- |
+| P0-3 mock gate | Yes | quality_gates.ts: add mock_integrity to GateName (69-74) + runMockIntegrity + sequence (1474-1480) + toggle |
+| P0-3 mutation gate | Yes | quality_gates.ts: add mutation_integrity + runMutationIntegrity + sequence + toggle |
+| P0-4 devil's advocate | Yes | quality_gates.ts runCodeReview (709), inert at 804-808: add DA dispatch + block |
+| P0-5 doc table | docs only | -- |
+Bun escalation ladder is generic; new gates inherit once added to union+sequence.
+## 8. Slice boundaries (independent; no file collisions)
+- Slice A -- run.sh runtime (ONE owner, serialized): P0-1 (run.sh + autonomy/loki
+  strings), P0-3 new funcs + orchestration insert, P0-4. Owns autonomy/run.sh +
+  autonomy/loki exclusively.
+- Slice B -- Docs (ONE owner): P0-2 + P0-5 + all "11->8 gates" edits. Both edit
+  skills/quality-gates.md so MUST be one slice. New CHANGELOG entry only.
+- Slice C -- Bun parity (ONE owner): loki-ts/src/runner/quality_gates.ts only.
+- Slice D -- Detector scripts (ONE owner): tests/detect-test-mutations.sh
+  --block-high; verify detect-mock-problems.sh. No run.sh.
+- Slice E -- SDET tests (ONE owner; after A/C/D): fixtures + assertions.
+Order: D and B parallel anytime; A depends on D contract; C mirrors A; E last.
+## 9. Test plan (SDET, Slice E)
+- P0-1: grep assert no ">80%"/"min_coverage: 80% # Never drop"/"coverage.json"
+  in any list doc. Behavior: passing tests pass, failing tests block.
+- P0-2: grep assert zero live "11 gates"/"Input Guardrails"/"Output Guardrails"
+  (CHANGELOG excepted); "8" present in quality-gates.md + wiki.
+- P0-3 mock: fixture with tautological assertion -> enforce_mock_integrity
+  returns 1, BLOCKS, track_gate_failure increments. Clean -> 0, clears. MED-only
+  -> 0 + findings file.
+- P0-3 mutation: fixture commit changing assertion values + impl (HIGH) ->
+  returns 1, BLOCKS. MED-only -> 0 + findings (proves not over-blocking).
+- P0-4: unanimous PASS + DA High -> run_code_review returns 1. Unanimous PASS +
+  DA clean -> 0 + anti-sycophancy.txt exists.
+- Parity: Bun sequence includes mock_integrity + mutation_integrity; runCodeReview
+  blocks on DA High; existing loki-ts tests green.
+## 10. Risks + binding constraints
+Risks: (1) min_coverage JSON field has live consumers + 3 test assertions -- keep
+field, fix strings only. (2) mutation --strict over-blocks -- parse HIGH instead.
+(3) detectors run against TARGET project test files -- cd TARGET_DIR + timeout
+wrap. (4) stale cross-file comment line refs exist; do not chase, do not add new.
+Binding constraints (every dev agent): NO version bumps (integrator once); NO
+commits/push; NO emojis; NO em dashes; full gate applies (touches runtime/gates/
+parity); stay inside your slice file ownership; run.sh is single-owner.
+Canonical count decision: 8 (recommended). Keeping backward-compat numbered
+would make it 9 but reintroduces the listed-but-not-a-loop-gate honesty gap this
+sweep exists to close.

package/docs/architecture/STATE-MACHINES.md CHANGED Viewed

@@ -972,7 +972,7 @@ Source: `run.sh:7880-7881` (checklist_should_verify, checklist_verify)
 ## 7. Quality Gates
-### 7.1 Nine-Gate Pipeline
+### 7.1 Eight-Gate Pipeline
 Source: `skills/quality-gates.md`
@@ -980,40 +980,39 @@ Source: `skills/quality-gates.md`
   Code Change
       |
       v
-  Gate 1: Static Analysis (CodeQL, ESLint)
-      |──BLOCK (critical findings)──> [REJECTED]
+  Gate 1: Static Analysis (CodeQL, ESLint/Pylint, type-checker)
+      |──BLOCK (severity ladder)──> [REJECTED]
       v
-  Gate 2: Type Check (tsc --noEmit)
+  Gate 2: Test Suite (pass/fail; red blocks; coverage % not measured this release)
       |──BLOCK──> [REJECTED]
       v
-  Gate 3: Unit Tests (>80% coverage, 100% pass)
-      |──BLOCK──> [REJECTED]
-      v
-  Gate 4: Integration Tests
-      |──BLOCK──> [REJECTED]
-      v
-  Gate 5: 3-Reviewer Blind Review (see 7.3)
+  Gate 3: Blind 3-Reviewer Review with severity blocking (see 7.3)
       |──BLOCK (Critical/High severity)──> [REJECTED]
       v
-  Gate 6: Anti-Sycophancy Check
-      |──BLOCK (devil's advocate finds issues)──> [REJECTED]
+  Gate 4: Anti-Sycophancy Devil's Advocate (on unanimous PASS)
+      |──BLOCK (devil's advocate Crit/High findings)──> [REJECTED]
       v
-  Gate 7: Security Scan
-      |──BLOCK──> [REJECTED]
+  Gate 5: Mock Integrity Detector
+      |──BLOCK (HIGH findings)──> [REJECTED]
       v
-  Gate 8: Performance Check
-      |──BLOCK──> [REJECTED]
+  Gate 6: Test Mutation Detector
+      |──BLOCK (HIGH findings)──> [REJECTED]
       v
-  Gate 9: E2E / Playwright
+  Gate 7: Documentation Coverage
       |──BLOCK──> [REJECTED]
       v
+  Gate 8: Magic Modules Debate
+      |──BLOCK (BLOCK-severity findings)──> [REJECTED]
+      v
   [APPROVED]
 ```
+Backward-compatibility is a conditional healing-mode auditor, not a numbered gate.
 Gate status values: `passed`, `failed`, `skipped`
 Persistence: `.loki/dashboard-state.json` field `qualityGates`
 Severity levels: `critical`, `high`, `medium`, `low`
-Blocking threshold: Critical and High always block; Medium blocks by default.
+Blocking threshold: Critical and High block; Medium and Low are advisory.
 ### 7.2 Model Escalation

package/docs/architecture/bmad-loki-voice-agent-council-analysis.md CHANGED Viewed

@@ -57,5 +57,5 @@ architecture, and adversarial review -- complementing Loki Mode's autonomous exe
 1. P0 must ship independently and prove value before P1/P2 begin
 2. No runtime dependency on BMAD repo -- adapter reads BMAD output artifacts only
 3. Zero regression on existing non-BMAD workflows
-4. All code must pass existing 9-gate quality system
+4. All code must pass existing 8-gate quality system
 5. Context budget: BMAD additions must stay under 15K tokens per iteration