loki-mode 7.45.0 → 7.46.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/README.md +16 -12
  2. package/SKILL.md +5 -5
  3. package/VERSION +1 -1
  4. package/autonomy/CONSTITUTION.md +9 -2
  5. package/autonomy/lib/sentrux-gate.sh +1 -1
  6. package/autonomy/loki +2 -2
  7. package/autonomy/run.sh +355 -92
  8. package/dashboard/__init__.py +1 -1
  9. package/dashboard/registry.py +156 -62
  10. package/dashboard/server.py +9 -10
  11. package/docs/COMPARISON.md +10 -10
  12. package/docs/COMPETITIVE-ANALYSIS.md +1 -1
  13. package/docs/INSTALLATION.md +2 -2
  14. package/docs/P0-SWEEP-PLAN.md +163 -0
  15. package/docs/architecture/STATE-MACHINES.md +18 -19
  16. package/docs/architecture/bmad-loki-voice-agent-council-analysis.md +1 -1
  17. package/docs/auto-claude-comparison.md +14 -11
  18. package/docs/certification/01-core-concepts/lesson.md +12 -11
  19. package/docs/certification/01-core-concepts/quiz.md +6 -6
  20. package/docs/certification/05-troubleshooting/lesson.md +23 -13
  21. package/docs/certification/05-troubleshooting/quiz.md +3 -3
  22. package/docs/certification/answer-key.md +2 -2
  23. package/docs/certification/certification-exam.md +9 -9
  24. package/docs/competitive/bolt-new-analysis.md +1 -1
  25. package/docs/competitive/emergence-others-analysis.md +9 -9
  26. package/docs/competitive/replit-lovable-analysis.md +3 -3
  27. package/docs/cursor-comparison.md +15 -12
  28. package/docs/dashboard-guide.md +9 -7
  29. package/docs/prd-purple-lab-platform-v2.md +1 -1
  30. package/docs/prd-purple-lab-platform.md +3 -3
  31. package/docs/show-hn-post.md +2 -2
  32. package/loki-ts/dist/loki.js +2 -2
  33. package/mcp/__init__.py +1 -1
  34. package/package.json +2 -2
  35. package/plugins/loki-mode/.claude-plugin/plugin.json +2 -2
  36. package/plugins/loki-mode/README.md +1 -1
  37. package/references/magic-rarv-integration.md +1 -1
  38. package/references/quality-control.md +5 -5
  39. package/references/sdlc-phases.md +1 -2
  40. package/skills/00-index.md +1 -1
  41. package/skills/artifacts.md +1 -1
  42. package/skills/healing.md +1 -1
  43. package/skills/magic-modules.md +3 -3
  44. package/skills/quality-gates.md +52 -39
  45. package/skills/testing.md +1 -1
@@ -9,9 +9,11 @@ from __future__ import annotations
9
9
 
10
10
  import json
11
11
  import os
12
+ import tempfile
13
+ from contextlib import contextmanager
12
14
  from datetime import datetime, timezone
13
15
  from pathlib import Path
14
- from typing import Optional
16
+ from typing import Iterator, Optional
15
17
  import hashlib
16
18
 
17
19
 
@@ -25,6 +27,62 @@ def _ensure_registry_dir() -> None:
25
27
  REGISTRY_DIR.mkdir(parents=True, exist_ok=True)
26
28
 
27
29
 
30
+ @contextmanager
31
+ def _registry_lock() -> Iterator[None]:
32
+ """
33
+ Best-effort advisory lock around a read-modify-write of the registry.
34
+
35
+ Two concurrent writers (e.g. two `loki docker start` in different repos, or
36
+ a docker run racing a host `loki start`) would otherwise both load the old
37
+ registry, mutate, and save, dropping one writer's entry (lost update). This
38
+ serializes the leaf mutators so they take turns.
39
+
40
+ Degrades gracefully: if fcntl is unavailable (Windows) or the lock cannot
41
+ be acquired for any reason, execution proceeds without a lock rather than
42
+ blocking a build. The atomic write in _save_registry still guarantees no
43
+ reader ever sees a torn file; only the lost-update protection is
44
+ best-effort.
45
+
46
+ The lock path is derived from the current REGISTRY_DIR at call time (not a
47
+ module-level constant) so tests that monkeypatch REGISTRY_DIR stay
48
+ hermetic. Not reentrant: do not nest this around another leaf mutator (the
49
+ leaf mutators do not call one another).
50
+ """
51
+ _ensure_registry_dir()
52
+ lock_fd = None
53
+ locked = False
54
+ try:
55
+ import fcntl # POSIX only; absent on Windows
56
+
57
+ lock_path = REGISTRY_DIR / ".registry.lock"
58
+ try:
59
+ lock_fd = os.open(str(lock_path), os.O_CREAT | os.O_RDWR, 0o644)
60
+ fcntl.flock(lock_fd, fcntl.LOCK_EX)
61
+ locked = True
62
+ except OSError:
63
+ # Could not open or lock the file; proceed without the lock.
64
+ locked = False
65
+ except ImportError:
66
+ # fcntl not available (e.g. Windows); proceed without the lock.
67
+ lock_fd = None
68
+
69
+ try:
70
+ yield
71
+ finally:
72
+ if lock_fd is not None:
73
+ try:
74
+ if locked:
75
+ import fcntl
76
+
77
+ fcntl.flock(lock_fd, fcntl.LOCK_UN)
78
+ except (OSError, ImportError):
79
+ pass
80
+ try:
81
+ os.close(lock_fd)
82
+ except OSError:
83
+ pass
84
+
85
+
28
86
  def _load_registry() -> dict:
29
87
  """Load the project registry from disk."""
30
88
  _ensure_registry_dir()
@@ -38,10 +96,39 @@ def _load_registry() -> dict:
38
96
 
39
97
 
40
98
  def _save_registry(registry: dict) -> None:
41
- """Save the project registry to disk."""
99
+ """
100
+ Save the project registry to disk atomically.
101
+
102
+ Writes to a temp file in the SAME directory as REGISTRY_FILE (so os.replace
103
+ is an atomic rename on the same filesystem), flushes and fsyncs it, then
104
+ os.replace()s it over the destination. Every reader therefore sees either
105
+ the complete old file or the complete new file, never a half-written (torn)
106
+ one. The temp file is removed on any error path so partial files never
107
+ leak.
108
+
109
+ Note: atomic write alone eliminates torn reads but does not by itself
110
+ prevent lost updates under true simultaneity. The leaf mutators wrap their
111
+ load->mutate->save in _registry_lock() to serialize concurrent writers and
112
+ reduce that window; when locking is unavailable the degradation is honest
113
+ (torn reads still impossible, lost-update still possible).
114
+ """
42
115
  _ensure_registry_dir()
43
- with open(REGISTRY_FILE, "w") as f:
44
- json.dump(registry, f, indent=2, default=str)
116
+ tmp_fd, tmp_path = tempfile.mkstemp(
117
+ dir=str(REGISTRY_DIR), prefix=".projects.", suffix=".tmp"
118
+ )
119
+ try:
120
+ with os.fdopen(tmp_fd, "w") as f:
121
+ json.dump(registry, f, indent=2, default=str)
122
+ f.flush()
123
+ os.fsync(f.fileno())
124
+ os.replace(tmp_path, str(REGISTRY_FILE))
125
+ except BaseException:
126
+ # Clean up the temp file on any failure so we never leak partial files.
127
+ try:
128
+ os.unlink(tmp_path)
129
+ except OSError:
130
+ pass
131
+ raise
45
132
 
46
133
 
47
134
  def _generate_project_id(path: str) -> str:
@@ -70,34 +157,38 @@ def register_project(
70
157
  if not os.path.isdir(path):
71
158
  raise ValueError(f"Path does not exist: {path}")
72
159
 
73
- registry = _load_registry()
74
160
  project_id = _generate_project_id(path)
75
161
 
76
- # Check if already registered
77
- if project_id in registry["projects"]:
78
- # Update existing entry
79
- project = registry["projects"][project_id]
80
- if name:
81
- project["name"] = name
82
- if alias:
83
- project["alias"] = alias
84
- project["updated_at"] = datetime.now(timezone.utc).isoformat()
85
- else:
86
- # Create new entry
87
- project = {
88
- "id": project_id,
89
- "path": path,
90
- "name": name or os.path.basename(path),
91
- "alias": alias,
92
- "registered_at": datetime.now(timezone.utc).isoformat(),
93
- "updated_at": datetime.now(timezone.utc).isoformat(),
94
- "last_accessed": None,
95
- "has_loki_dir": os.path.isdir(os.path.join(path, ".loki")),
96
- "status": "active",
97
- }
98
- registry["projects"][project_id] = project
162
+ # Lock the load->mutate->save so concurrent registrations serialize and do
163
+ # not lost-update each other (the multi-repo `loki docker` happy path).
164
+ with _registry_lock():
165
+ registry = _load_registry()
99
166
 
100
- _save_registry(registry)
167
+ # Check if already registered
168
+ if project_id in registry["projects"]:
169
+ # Update existing entry
170
+ project = registry["projects"][project_id]
171
+ if name:
172
+ project["name"] = name
173
+ if alias:
174
+ project["alias"] = alias
175
+ project["updated_at"] = datetime.now(timezone.utc).isoformat()
176
+ else:
177
+ # Create new entry
178
+ project = {
179
+ "id": project_id,
180
+ "path": path,
181
+ "name": name or os.path.basename(path),
182
+ "alias": alias,
183
+ "registered_at": datetime.now(timezone.utc).isoformat(),
184
+ "updated_at": datetime.now(timezone.utc).isoformat(),
185
+ "last_accessed": None,
186
+ "has_loki_dir": os.path.isdir(os.path.join(path, ".loki")),
187
+ "status": "active",
188
+ }
189
+ registry["projects"][project_id] = project
190
+
191
+ _save_registry(registry)
101
192
  return project
102
193
 
103
194
 
@@ -111,19 +202,20 @@ def unregister_project(identifier: str) -> bool:
111
202
  Returns:
112
203
  True if removed, False if not found
113
204
  """
114
- registry = _load_registry()
115
-
116
- # Find by ID, path, or alias
117
- project_id = None
118
- for pid, project in registry["projects"].items():
119
- if pid == identifier or project["path"] == identifier or project.get("alias") == identifier:
120
- project_id = pid
121
- break
122
-
123
- if project_id:
124
- del registry["projects"][project_id]
125
- _save_registry(registry)
126
- return True
205
+ with _registry_lock():
206
+ registry = _load_registry()
207
+
208
+ # Find by ID, path, or alias
209
+ project_id = None
210
+ for pid, project in registry["projects"].items():
211
+ if pid == identifier or project["path"] == identifier or project.get("alias") == identifier:
212
+ project_id = pid
213
+ break
214
+
215
+ if project_id:
216
+ del registry["projects"][project_id]
217
+ _save_registry(registry)
218
+ return True
127
219
  return False
128
220
 
129
221
 
@@ -179,13 +271,14 @@ def update_last_accessed(identifier: str) -> Optional[dict]:
179
271
  Returns:
180
272
  Updated project entry or None
181
273
  """
182
- registry = _load_registry()
183
-
184
- for pid, project in registry["projects"].items():
185
- if pid == identifier or project["path"] == identifier or project.get("alias") == identifier:
186
- project["last_accessed"] = datetime.now(timezone.utc).isoformat()
187
- _save_registry(registry)
188
- return project
274
+ with _registry_lock():
275
+ registry = _load_registry()
276
+
277
+ for pid, project in registry["projects"].items():
278
+ if pid == identifier or project["path"] == identifier or project.get("alias") == identifier:
279
+ project["last_accessed"] = datetime.now(timezone.utc).isoformat()
280
+ _save_registry(registry)
281
+ return project
189
282
  return None
190
283
 
191
284
 
@@ -207,19 +300,20 @@ def mark_project_stopped(identifier: str) -> Optional[dict]:
207
300
  Idempotent: marking an already-stopped project is a no-op that still
208
301
  returns the entry.
209
302
  """
210
- registry = _load_registry()
211
-
212
- for pid_key, project in registry["projects"].items():
213
- if (
214
- pid_key == identifier
215
- or project["path"] == identifier
216
- or project.get("alias") == identifier
217
- ):
218
- project["status"] = "stopped"
219
- project["pid"] = None
220
- project["updated_at"] = datetime.now(timezone.utc).isoformat()
221
- _save_registry(registry)
222
- return project
303
+ with _registry_lock():
304
+ registry = _load_registry()
305
+
306
+ for pid_key, project in registry["projects"].items():
307
+ if (
308
+ pid_key == identifier
309
+ or project["path"] == identifier
310
+ or project.get("alias") == identifier
311
+ ):
312
+ project["status"] = "stopped"
313
+ project["pid"] = None
314
+ project["updated_at"] = datetime.now(timezone.utc).isoformat()
315
+ _save_registry(registry)
316
+ return project
223
317
  return None
224
318
 
225
319
 
@@ -885,7 +885,7 @@ async def agent_card() -> dict:
885
885
  "capabilities": {
886
886
  "agents": 41,
887
887
  "swarms": 8,
888
- "quality_gates": 9,
888
+ "quality_gates": 8,
889
889
  "providers": ["claude", "codex", "cline", "aider"],
890
890
  "streaming": True,
891
891
  "pushNotifications": False,
@@ -7177,15 +7177,14 @@ async def remove_checklist_waiver(item_id: str):
7177
7177
  # =============================================================================
7178
7178
 
7179
7179
  _DEFAULT_QUALITY_GATES = [
7180
- {"name": "Static Analysis", "description": "CodeQL, ESLint, type checking", "status": "pending"},
7181
- {"name": "Parallel Code Review", "description": "3-reviewer blind review system", "status": "pending"},
7182
- {"name": "Anti-Sycophancy Check", "description": "Devil's advocate on unanimous approval", "status": "pending"},
7183
- {"name": "Severity Assessment", "description": "Critical/High/Medium = BLOCK", "status": "pending"},
7184
- {"name": "Unit Test Coverage", "description": "Target >80% coverage, 100% pass", "status": "pending"},
7185
- {"name": "Integration Tests", "description": "End-to-end verification", "status": "pending"},
7186
- {"name": "Security Scan", "description": "Dependency audit, OWASP checks", "status": "pending"},
7187
- {"name": "Build Verification", "description": "Clean build with no warnings", "status": "pending"},
7188
- {"name": "Council Vote", "description": "Completion council consensus", "status": "pending"},
7180
+ {"name": "Static Analysis", "description": "CodeQL, ESLint/Pylint, type-checker findings on the diff", "status": "pending"},
7181
+ {"name": "Test Suite", "description": "Project test runner pass/fail (red blocks)", "status": "pending"},
7182
+ {"name": "Blind Code Review", "description": "3-reviewer blind review; Critical/High = BLOCK; Medium/Low advisory", "status": "pending"},
7183
+ {"name": "Anti-Sycophancy", "description": "Devil's Advocate re-review on unanimous PASS", "status": "pending"},
7184
+ {"name": "Mock Integrity", "description": "Tautological-assertion and mock-ratio detection", "status": "pending"},
7185
+ {"name": "Test Mutation", "description": "Assertion-churn (test-fitting) detection", "status": "pending"},
7186
+ {"name": "Documentation Coverage", "description": "README presence, docs freshness, API docs for exported symbols", "status": "pending"},
7187
+ {"name": "Magic Modules Debate", "description": "Spec-vs-implementation debate on generated modules", "status": "pending"},
7189
7188
  ]
7190
7189
 
7191
7190
 
@@ -14,8 +14,8 @@
14
14
  | **Type** | Skill/Framework | Enterprise Platform | Standalone Agent | Cloud Agent | AI IDE | CLI Agent | AI IDE | AI IDE | Cloud Agent | AI IDE (OSS) |
15
15
  | **Autonomy Level** | High (minimal human) | High | Full | High | Medium-High | High | High | High | High | High |
16
16
  | **Max Runtime** | Unlimited | Async/Scheduled | Hours | Per-task | Session | Session | Days | Async | Per-task | Session |
17
- | **Pricing** | Free (OSS) | Enterprise | $20/mo | ChatGPT Plus | $20/mo | API costs | Free preview | Free preview | $19/mo | Free (OSS) |
18
- | **Open Source** | Yes | No | No | No | No | No | No | No | No | Yes |
17
+ | **Pricing** | Free (source-available) | Enterprise | $20/mo | ChatGPT Plus | $20/mo | API costs | Free preview | Free preview | $19/mo | Free (OSS) |
18
+ | **Source model** | Source-available (BUSL-1.1) | No | No | No | No | No | No | No | No | Yes |
19
19
  | **GitHub Stars** | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | 70.9k |
20
20
 
21
21
  ---
@@ -37,7 +37,7 @@
37
37
  |---------|--------------|-----------|-----------|------------|----------|-----------------|--------------|--------------|
38
38
  | **Code Review** | 3 blind reviewers + devil's advocate | Basic | Basic | BugBot PR | Property-based | Artifacts | Doc/Review | Basic |
39
39
  | **Anti-Sycophancy** | Yes (CONSENSAGENT) | No | No | No | No | No | No | No |
40
- | **Quality Gates** | 11 gates + PBT | Basic | Sandbox | Tests | Spec validation | Artifact checks | Tests | Permissions |
40
+ | **Quality Gates** | 8 gates + PBT | Basic | Sandbox | Tests | Spec validation | Artifact checks | Tests | Permissions |
41
41
  | **Constitutional AI** | Yes (principles) | No | Refusal training | No | No | No | No | No |
42
42
 
43
43
  ---
@@ -146,10 +146,10 @@
146
146
 
147
147
  | Feature | **Zencoder** | **Loki Mode** | **Assessment** |
148
148
  |---------|-------------|---------------|----------------|
149
- | **Four Pillars** | Structured Workflows, SDD, Multi-Agent Verification, Parallel Execution | SDLC + RARV + 9 Gates + Worktrees | TIE |
149
+ | **Four Pillars** | Structured Workflows, SDD, Multi-Agent Verification, Parallel Execution | SDLC + RARV + 8 Gates + Worktrees | TIE |
150
150
  | **Spec-Driven Dev** | Specs as first-class objects | OpenAPI-first | TIE |
151
151
  | **Multi-Agent Verification** | Model diversity (Claude vs OpenAI, 54% improvement) | 3 blind reviewers + devil's advocate | Different approach (N/A for Claude Code - only Claude models) |
152
- | **Quality Gates** | Built-in verification loops | 7 explicit gates + anti-sycophancy | **Loki Mode** |
152
+ | **Quality Gates** | Built-in verification loops | 8 explicit gates + anti-sycophancy | **Loki Mode** |
153
153
  | **Memory System** | Not documented | 3-tier episodic/semantic/procedural | **Loki Mode** |
154
154
  | **Agent Specialization** | Custom Zen Agents | 41 pre-defined specialized agent roles | **Loki Mode** |
155
155
  | **CI Failure Analysis** | Explicit pattern with auto-fix | DevOps agent only | **ADOPTED from Zencoder** |
@@ -178,7 +178,7 @@
178
178
 
179
179
  ### Where Loki Mode EXCEEDS Zencoder
180
180
 
181
- 1. **Quality Control**: 7 explicit gates + blind review + devil's advocate vs built-in loops
181
+ 1. **Quality Control**: 8 explicit gates + blind review + devil's advocate vs built-in loops
182
182
  2. **Memory System**: 3-tier (episodic/semantic/procedural) with cross-project learning
183
183
  3. **Agent Specialization**: 41 pre-defined specialized agent roles across 8 domains
184
184
  4. **Anti-Sycophancy**: CONSENSAGENT patterns prevent reviewer groupthink
@@ -207,7 +207,7 @@
207
207
  | **Skills** | Progressive disclosure | 6 slash commands | N/A | 129 skills | N/A | 35 skills | Memory focus |
208
208
  | **Multi-Provider** | Yes (Claude/Codex/Gemini) | 3 CLIs (separate) | No | No | No | No | No |
209
209
  | **Memory System** | 3-tier (episodic/semantic/procedural) | None | N/A | N/A | Hybrid | N/A | SQLite+FTS5 |
210
- | **Quality Gates** | 11 gates + Completion Council | User verify only | Two-Stage Review | N/A | Consensus | Tiered | N/A |
210
+ | **Quality Gates** | 8 gates + Completion Council | User verify only | Two-Stage Review | N/A | Consensus | Tiered | N/A |
211
211
  | **Context Mgmt** | Standard | Fresh per task (core innovation) | Fresh per task | N/A | N/A | N/A | Progressive |
212
212
  | **Autonomy** | High (minimal human) | Semi (checkpoints) | Human-guided | Human-guided | Orchestrated | Human-guided | N/A |
213
213
 
@@ -232,7 +232,7 @@ These are patterns from competing projects that are **practically and scientific
232
232
  |----------|---------|-------------------------|
233
233
  | **Multi-Provider Support** | Only skill supporting Claude, Codex, and Gemini with graceful degradation | All 8 competitors are Claude-only |
234
234
  | **RARV Cycle** | Reason-Act-Reflect-Verify is more rigorous than Plan-Execute | Most use simple Plan-Execute |
235
- | **11-Gate Quality System** | Static analysis + 3 reviewers + devil's advocate + anti-sycophancy + severity blocking + coverage + debate + backward-compat (healing) + Phase 1 closure | Superpowers has 2-stage, others have less |
235
+ | **8-Gate Quality System** | Static analysis + test suite (pass/fail) + 3 blind reviewers with severity blocking + devil's advocate + mock-integrity + test-mutation + documentation coverage + Magic Modules debate (backward-compat is a conditional healing auditor) + Phase 1 closure | Superpowers has 2-stage, others have less |
236
236
  | **Constitutional AI Integration** | Principles-based self-critique from Anthropic research | None have this |
237
237
  | **Anti-Sycophancy (CONSENSAGENT)** | Blind review + devil's advocate prevents groupthink | None have this |
238
238
  | **Provider Abstraction Layer** | Clean degradation from full-featured to sequential-only | Claude-only projects can't degrade |
@@ -359,12 +359,12 @@ Tiered agent architecture with explicit escalation:
359
359
  |-----------|-------------------|
360
360
  | **Autonomy** | Designed for high autonomy with minimal human intervention |
361
361
  | **Multi-Agent** | 41 prompt-defined agent roles in 8 domains adopted per phase (parallel review council + optional worktree streams on Claude, sequential elsewhere) vs 1-8 in competitors, with all output gated by blind review + council |
362
- | **Quality** | 11 gates + blind review + devil's advocate + property-based testing |
362
+ | **Quality** | 8 gates + blind review + devil's advocate + property-based testing |
363
363
  | **Research** | 10+ academic papers integrated vs proprietary/undisclosed |
364
364
  | **Anti-Sycophancy** | Only agent with CONSENSAGENT-based blind review |
365
365
  | **Memory** | 3-tier memory (episodic/semantic/procedural) + review learning + cross-project |
366
366
  | **Transformation** | Code migration workflows (language, database, framework) |
367
- | **Cost** | Free (open source) vs $20-500/month |
367
+ | **Cost** | Free (source-available, BUSL-1.1) vs $20-500/month |
368
368
  | **Customization** | Full source access vs black box |
369
369
 
370
370
  ---
@@ -20,7 +20,7 @@ GSD is the closest competitor -- a context engineering system that spawns fresh
20
20
  | Adoption | 594 stars, 6K/wk npm | 11,903 stars, 21K/wk npm | GSD (20x) |
21
21
  | Simplicity | Complex (5.4K-line run.sh, 12 Python modules) | Simple (markdown agents + slash commands) | GSD |
22
22
  | Full autonomy | Walk away, come back to deployed product | Human checkpoints at discuss/verify/milestone | Loki |
23
- | Quality gates | 9-gate + Completion Council + anti-sycophancy | User verification only | Loki |
23
+ | Quality gates | 8-gate + Completion Council + anti-sycophancy | User verification only | Loki |
24
24
  | Memory system | Episodic/semantic/procedural + vector search | None | Loki |
25
25
  | Context management | Standard | Fresh subagent contexts per task (core innovation) | GSD |
26
26
  | Time to value | Learn architecture, understand CLI flags | `npx get-shit-done-cc` and go | GSD |
@@ -2,7 +2,7 @@
2
2
 
3
3
  The flagship product of [Autonomi](https://www.autonomi.dev/). Loki Mode is a spec-driven autonomous builder with a built-in trust layer that takes any spec to a deployed product and verifies completion with evidence (quality gates plus a completion council), not just a "done" claim. Complete installation instructions for all platforms and use cases.
4
4
 
5
- **Version:** v7.45.0
5
+ **Version:** v7.46.0
6
6
 
7
7
  ---
8
8
 
@@ -389,7 +389,7 @@ provider works inside the container. Provide auth with your Anthropic API key:
389
389
  # Run Loki Mode in Docker (Claude provider, API-key auth)
390
390
  docker run --rm -e ANTHROPIC_API_KEY="$ANTHROPIC_API_KEY" \
391
391
  -v $(pwd):/workspace -w /workspace \
392
- asklokesh/loki-mode:7.45.0 start ./my-spec.md
392
+ asklokesh/loki-mode:7.46.0 start ./my-spec.md
393
393
  ```
394
394
 
395
395
  ##### docker compose + .env (no host install)
@@ -0,0 +1,163 @@
1
+ # P0 Verification-Credibility Sweep -- Architecture Plan
2
+
3
+ Persisted from the Architect (opus). Every line number re-verified by grep.
4
+ Goal: make Loki's verification layer honest and real. A hollow wedge is
5
+ existential for a "proof of done" product. Fix or remove every false/hollow gate
6
+ claim, wire the unwired detectors, make anti-sycophancy act.
7
+
8
+ ## 0. Verified ground truth
9
+
10
+ - P0-1: enforce_test_coverage() at autonomy/run.sh:7031. `local coverage_pct=0`
11
+ at 7038 is never reassigned; no coverage tool invoked. 7257 emits min_coverage
12
+ (the threshold), not a measured value. Gate decides purely on test_passed.
13
+ - P0-2: skills/quality-gates.md:5-17 lists 11 gates; gates 1 (Input Guardrails)
14
+ and 5 (Output Guardrails) have NO gate function. wiki/Quality-Gates.md:14-28
15
+ duplicates. (21 'guardrail' refs in autonomy/ are CLI help/comments/flags.)
16
+ - P0-3: tests/detect-mock-problems.sh + tests/detect-test-mutations.sh invoked
17
+ 0 times in autonomy/run.sh. quality-gates.md:74-77 claims HIGH=FAIL.
18
+ - P0-4: anti-sycophancy block run.sh:8316-8323 only logs + writes
19
+ anti-sycophancy.txt. No Devil's-Advocate re-review. INERT. Bun mirror
20
+ loki-ts/src/runner/quality_gates.ts:804-808 equally inert.
21
+ - Gate inventory: phantom (Input/Output Guardrails); wired-but-unlisted
22
+ (run_magic_debate_gate at run.sh:14067); "Gate 10 Backward Compat" is the
23
+ legacy-healing-auditor SPECIALIST (run.sh:7875-7979), conditional, not a loop
24
+ gate; "Gate 6 Severity Blocking" is the block policy inside code review, not a
25
+ function.
26
+
27
+ ### Functions actually invoked in orchestration (run.sh:13938-14084)
28
+ enforce_static_analysis (13945); enforce_test_coverage (13967); run_code_review
29
+ (13987); run_doc_quality_gate (14058); run_magic_debate_gate (14070); plus
30
+ conditional legacy-healing-auditor reviewer.
31
+
32
+ ## 1. Canonical final gate list (THE CONTRACT -- docs transcribe, never recompute)
33
+
34
+ Honest count after this sweep: 8 gates.
35
+
36
+ | # | Gate | Function / mechanism | Blocking | Opt-out flag |
37
+ |---|------|---------------------|----------|--------------|
38
+ | 1 | Static Analysis | enforce_static_analysis (run.sh:6699) | Yes (ladder) | PHASE_STATIC_ANALYSIS=false |
39
+ | 2 | Test Suite (pass/fail) | enforce_test_coverage (run.sh:7031) | Yes (red blocks) | PHASE_UNIT_TESTS=false |
40
+ | 3 | Blind Code Review (3-reviewer council + severity blocking) | run_code_review (run.sh:7788) | Yes (Crit/High block) | PHASE_CODE_REVIEW=false |
41
+ | 4 | Anti-Sycophancy / Devil's Advocate (on unanimous PASS) | run_code_review sub-step (run.sh:8316+) | Yes (DA Crit/High block) | LOKI_GATE_DEVILS_ADVOCATE=false |
42
+ | 5 | Mock Integrity Detector | enforce_mock_integrity -> tests/detect-mock-problems.sh | Yes (HIGH blocks) | LOKI_GATE_MOCK=false |
43
+ | 6 | Test Mutation Detector | enforce_mutation_integrity -> tests/detect-test-mutations.sh | Yes (HIGH blocks) | LOKI_GATE_MUTATION=false |
44
+ | 7 | Documentation Coverage | run_doc_quality_gate (run.sh:7388) | Yes | LOKI_GATE_DOC_COVERAGE=false |
45
+ | 8 | Magic Modules Debate | run_magic_debate_gate (run.sh:7495) | Yes (BLOCK sev) | LOKI_GATE_MAGIC_DEBATE=false |
46
+
47
+ Conditional auditor (documented separately, NOT numbered): Backward-Compatibility
48
+ / legacy-healing-auditor (healing mode only). Removed: Input/Output Guardrails.
49
+
50
+ ### Doc files to update to "8 gates" (docs owner)
51
+ README.md (22,29,196,255); SKILL.md (3,10); CLAUDE.md (44);
52
+ plugins/loki-mode/README.md (4); wiki/Quality-Gates.md (14-48);
53
+ wiki/Environment-Variables.md (62); wiki/Home.md (3,13); wiki/CLI-Reference.md
54
+ (230); docs/cursor-comparison.md (14,177,195); docs/COMPARISON.md (40,210,362);
55
+ skills/quality-gates.md (5,13,14-17,19-66,69-82,650,655,668); skills/00-index.md
56
+ (51). CHANGELOG.md: NEW top entry ONLY; never rewrite historical entries
57
+ (5837/6181/6335/6340).
58
+
59
+ ## 2. P0-1 Coverage honesty (Fix B) -- Slice A (run.sh owner) + Slice B (docs)
60
+ - run.sh: remove dead `local coverage_pct=0` (7038). Relabel logs: 13966
61
+ "test suite (pass/fail)"; 7265/7270 "Test suite gate".
62
+ - KEEP the min_coverage JSON field at 7257 (consumed by autonomy/loki:27529-27530,
63
+ 16138 and asserted in tests/test-report-command.sh:116,
64
+ tests/test-completion-council-affirmative-evidence.sh:126,
65
+ tests/test-evidence-gate.sh:155). Only change misleading consumer strings in
66
+ autonomy/loki (27530, 16138) to "Min coverage TARGET (not measured)".
67
+ - docs (skills/quality-gates.md): :13 drop ">80% coverage" -> "coverage % not
68
+ measured in this release"; :650/:655 reword to pass/fail + target-only; :668
69
+ remove coverage.json artifact line. Note Fix A (real measurement) as follow-up.
70
+
71
+ ## 3. P0-2 Phantom guardrails -- Slice B (docs only)
72
+ Remove gates 1 & 5 entirely (do not "mark planned"). Renumber to the 8-gate
73
+ table. Edit skills/quality-gates.md:5-17, wiki/Quality-Gates.md:14-28, + all
74
+ list files in section 1.
75
+
76
+ ## 4. P0-3 Wire detectors -- Slice A (run.sh) + Slice D (scripts) + Slice C (Bun)
77
+ Exit-code asymmetry (load-bearing):
78
+ - detect-mock-problems.sh exits 1 on CRITICAL/HIGH (179-182), 0 otherwise.
79
+ Exit code already = block-on-HIGH.
80
+ - detect-test-mutations.sh exits 0 unless --strict; --strict blocks on ANY
81
+ finding (over-blocks MED/LOW). DO NOT use --strict. Wrapper greps stdout for
82
+ [HIGH] to decide block; route MED/LOW to findings injection.
83
+
84
+ New run.sh functions (place after run_magic_debate_gate ~7560):
85
+ enforce_mock_integrity() # HIGH -> return 1; MED/LOW -> findings file
86
+ enforce_mutation_integrity() # grep -c '\[HIGH\]' >0 -> return 1; MED/LOW -> findings
87
+ Both cd "${TARGET_DIR}", use LOKI_GATE_TIMEOUT wrapping, write findings into
88
+ ${TARGET_DIR}/.loki/quality/ for the Phase-1 findings injector.
89
+
90
+ Orchestration insert: after the pause-check at 13983, before code-review at
91
+ 13985. Mirror the existing pattern with track_gate_failure/clear_gate_failure +
92
+ gate_failures string. Toggles LOKI_GATE_MOCK / LOKI_GATE_MUTATION (matches
93
+ existing LOKI_GATE_DOC_COVERAGE / LOKI_GATE_MAGIC_DEBATE convention).
94
+
95
+ Detector-script (Slice D): optional --block-high mode on detect-test-mutations.sh
96
+ (exit 2 on HIGH) keeping --strict intact; OR rely on wrapper grep (no script
97
+ change). Verify detect-mock-problems.sh exit semantics. Do NOT touch run.sh.
98
+
99
+ ## 5. P0-4 Anti-sycophancy acts -- Slice A (run.sh) + Slice C (Bun)
100
+ Read run_code_review 7788-8316 first. At 8316-8323 unanimous block: dispatch ONE
101
+ Devil's-Advocate reviewer reusing the existing reviewer-invocation +
102
+ parse_verdict helpers; if DA returns Crit/High set has_blocking=true so the
103
+ EXISTING block at 8326-8330 fires (return 1). Keep anti-sycophancy.txt for audit.
104
+ Gate behind LOKI_GATE_DEVILS_ADVOCATE (default true).
105
+
106
+ ## 6. P0-5 Honest per-gate table -- Slice B (docs)
107
+ Replace skills/quality-gates.md:5-17 + prose 19-82 with the 8-gate table plus
108
+ columns: detects X / does NOT detect Y / opt-out flag / blocking. Honesty
109
+ entries: gate 2 "does NOT detect coverage %"; gate 5 "does NOT detect semantic
110
+ correctness of mocks"; gate 6 "does NOT detect logically-correct-but-weak
111
+ assertions".
112
+
113
+ ## 7. Bash <-> Bun parity matrix
114
+ | Change | Bun mirror | File |
115
+ |--------|-----------|------|
116
+ | P0-1 label/honesty | Yes (light) | quality_gates.ts runTestCoverage (402): no false % strings |
117
+ | P0-2 gate count | docs only | -- |
118
+ | P0-3 mock gate | Yes | quality_gates.ts: add mock_integrity to GateName (69-74) + runMockIntegrity + sequence (1474-1480) + toggle |
119
+ | P0-3 mutation gate | Yes | quality_gates.ts: add mutation_integrity + runMutationIntegrity + sequence + toggle |
120
+ | P0-4 devil's advocate | Yes | quality_gates.ts runCodeReview (709), inert at 804-808: add DA dispatch + block |
121
+ | P0-5 doc table | docs only | -- |
122
+ Bun escalation ladder is generic; new gates inherit once added to union+sequence.
123
+
124
+ ## 8. Slice boundaries (independent; no file collisions)
125
+ - Slice A -- run.sh runtime (ONE owner, serialized): P0-1 (run.sh + autonomy/loki
126
+ strings), P0-3 new funcs + orchestration insert, P0-4. Owns autonomy/run.sh +
127
+ autonomy/loki exclusively.
128
+ - Slice B -- Docs (ONE owner): P0-2 + P0-5 + all "11->8 gates" edits. Both edit
129
+ skills/quality-gates.md so MUST be one slice. New CHANGELOG entry only.
130
+ - Slice C -- Bun parity (ONE owner): loki-ts/src/runner/quality_gates.ts only.
131
+ - Slice D -- Detector scripts (ONE owner): tests/detect-test-mutations.sh
132
+ --block-high; verify detect-mock-problems.sh. No run.sh.
133
+ - Slice E -- SDET tests (ONE owner; after A/C/D): fixtures + assertions.
134
+ Order: D and B parallel anytime; A depends on D contract; C mirrors A; E last.
135
+
136
+ ## 9. Test plan (SDET, Slice E)
137
+ - P0-1: grep assert no ">80%"/"min_coverage: 80% # Never drop"/"coverage.json"
138
+ in any list doc. Behavior: passing tests pass, failing tests block.
139
+ - P0-2: grep assert zero live "11 gates"/"Input Guardrails"/"Output Guardrails"
140
+ (CHANGELOG excepted); "8" present in quality-gates.md + wiki.
141
+ - P0-3 mock: fixture with tautological assertion -> enforce_mock_integrity
142
+ returns 1, BLOCKS, track_gate_failure increments. Clean -> 0, clears. MED-only
143
+ -> 0 + findings file.
144
+ - P0-3 mutation: fixture commit changing assertion values + impl (HIGH) ->
145
+ returns 1, BLOCKS. MED-only -> 0 + findings (proves not over-blocking).
146
+ - P0-4: unanimous PASS + DA High -> run_code_review returns 1. Unanimous PASS +
147
+ DA clean -> 0 + anti-sycophancy.txt exists.
148
+ - Parity: Bun sequence includes mock_integrity + mutation_integrity; runCodeReview
149
+ blocks on DA High; existing loki-ts tests green.
150
+
151
+ ## 10. Risks + binding constraints
152
+ Risks: (1) min_coverage JSON field has live consumers + 3 test assertions -- keep
153
+ field, fix strings only. (2) mutation --strict over-blocks -- parse HIGH instead.
154
+ (3) detectors run against TARGET project test files -- cd TARGET_DIR + timeout
155
+ wrap. (4) stale cross-file comment line refs exist; do not chase, do not add new.
156
+
157
+ Binding constraints (every dev agent): NO version bumps (integrator once); NO
158
+ commits/push; NO emojis; NO em dashes; full gate applies (touches runtime/gates/
159
+ parity); stay inside your slice file ownership; run.sh is single-owner.
160
+
161
+ Canonical count decision: 8 (recommended). Keeping backward-compat numbered
162
+ would make it 9 but reintroduces the listed-but-not-a-loop-gate honesty gap this
163
+ sweep exists to close.
@@ -972,7 +972,7 @@ Source: `run.sh:7880-7881` (checklist_should_verify, checklist_verify)
972
972
 
973
973
  ## 7. Quality Gates
974
974
 
975
- ### 7.1 Nine-Gate Pipeline
975
+ ### 7.1 Eight-Gate Pipeline
976
976
 
977
977
  Source: `skills/quality-gates.md`
978
978
 
@@ -980,40 +980,39 @@ Source: `skills/quality-gates.md`
980
980
  Code Change
981
981
  |
982
982
  v
983
- Gate 1: Static Analysis (CodeQL, ESLint)
984
- |──BLOCK (critical findings)──> [REJECTED]
983
+ Gate 1: Static Analysis (CodeQL, ESLint/Pylint, type-checker)
984
+ |──BLOCK (severity ladder)──> [REJECTED]
985
985
  v
986
- Gate 2: Type Check (tsc --noEmit)
986
+ Gate 2: Test Suite (pass/fail; red blocks; coverage % not measured this release)
987
987
  |──BLOCK──> [REJECTED]
988
988
  v
989
- Gate 3: Unit Tests (>80% coverage, 100% pass)
990
- |──BLOCK──> [REJECTED]
991
- v
992
- Gate 4: Integration Tests
993
- |──BLOCK──> [REJECTED]
994
- v
995
- Gate 5: 3-Reviewer Blind Review (see 7.3)
989
+ Gate 3: Blind 3-Reviewer Review with severity blocking (see 7.3)
996
990
  |──BLOCK (Critical/High severity)──> [REJECTED]
997
991
  v
998
- Gate 6: Anti-Sycophancy Check
999
- |──BLOCK (devil's advocate finds issues)──> [REJECTED]
992
+ Gate 4: Anti-Sycophancy Devil's Advocate (on unanimous PASS)
993
+ |──BLOCK (devil's advocate Crit/High findings)──> [REJECTED]
1000
994
  v
1001
- Gate 7: Security Scan
1002
- |──BLOCK──> [REJECTED]
995
+ Gate 5: Mock Integrity Detector
996
+ |──BLOCK (HIGH findings)──> [REJECTED]
1003
997
  v
1004
- Gate 8: Performance Check
1005
- |──BLOCK──> [REJECTED]
998
+ Gate 6: Test Mutation Detector
999
+ |──BLOCK (HIGH findings)──> [REJECTED]
1006
1000
  v
1007
- Gate 9: E2E / Playwright
1001
+ Gate 7: Documentation Coverage
1008
1002
  |──BLOCK──> [REJECTED]
1009
1003
  v
1004
+ Gate 8: Magic Modules Debate
1005
+ |──BLOCK (BLOCK-severity findings)──> [REJECTED]
1006
+ v
1010
1007
  [APPROVED]
1011
1008
  ```
1012
1009
 
1010
+ Backward-compatibility is a conditional healing-mode auditor, not a numbered gate.
1011
+
1013
1012
  Gate status values: `passed`, `failed`, `skipped`
1014
1013
  Persistence: `.loki/dashboard-state.json` field `qualityGates`
1015
1014
  Severity levels: `critical`, `high`, `medium`, `low`
1016
- Blocking threshold: Critical and High always block; Medium blocks by default.
1015
+ Blocking threshold: Critical and High block; Medium and Low are advisory.
1017
1016
 
1018
1017
  ### 7.2 Model Escalation
1019
1018
 
@@ -57,5 +57,5 @@ architecture, and adversarial review -- complementing Loki Mode's autonomous exe
57
57
  1. P0 must ship independently and prove value before P1/P2 begin
58
58
  2. No runtime dependency on BMAD repo -- adapter reads BMAD output artifacts only
59
59
  3. Zero regression on existing non-BMAD workflows
60
- 4. All code must pass existing 9-gate quality system
60
+ 4. All code must pass existing 8-gate quality system
61
61
  5. Context budget: BMAD additions must stay under 15K tokens per iteration