npm - loki-mode - Versions diffs - 7.26.0 → 7.27.0 - Mend

loki-mode 7.26.0 → 7.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/README.md +12 -11
package/SKILL.md +2 -2
package/VERSION +1 -1
package/autonomy/completion-council.sh +25 -0
package/autonomy/lib/trust_metrics.py +636 -0
package/autonomy/loki +93 -0
package/autonomy/run.sh +113 -5
package/autonomy/verify.sh +1075 -0
package/dashboard/__init__.py +1 -1
package/dashboard/static/index.html +1 -1
package/docs/COMPARISON.md +9 -9
package/docs/COMPETITIVE-ANALYSIS.md +18 -37
package/docs/INSTALLATION.md +1 -1
package/docs/auto-claude-comparison.md +9 -6
package/docs/certification/01-core-concepts/lesson.md +3 -3
package/docs/competitive/emergence-others-analysis.md +1 -1
package/docs/competitive/replit-lovable-analysis.md +1 -1
package/docs/cursor-comparison.md +1 -1
package/docs/prd-purple-lab-platform.md +1 -1
package/docs/show-hn-post.md +2 -2
package/loki-ts/dist/loki.js +2 -2
package/mcp/__init__.py +1 -1
package/package.json +1 -1
package/providers/codex.sh +3 -2
package/references/agent-types.md +9 -9
package/references/agents.md +8 -8
package/references/business-ops.md +1 -1
package/references/competitive-analysis.md +1 -1
package/skills/agents.md +3 -3
package/skills/providers.md +3 -3

package/dashboard/__init__.py CHANGED Viewed

@@ -7,7 +7,7 @@ Modules:
     control: Session control API (start/stop/pause/resume)
 """
-__version__ = "7.26.0"
+__version__ = "7.27.0"
 # Expose the control app for easy import
 try:

package/dashboard/static/index.html CHANGED Viewed

@@ -6523,7 +6523,7 @@ var LokiDashboard=(()=>{var Ee=Object.defineProperty;var rt=Object.getOwnPropert
         <p>App runner not started</p>
         <p class="hint">App runner will start after the first successful build iteration.</p>
       </div>
-    `}_attachEventListeners(){let e=this.shadowRoot;if(!e)return;let t=e.querySelector('[data-action="restart"]'),i=e.querySelector('[data-action="stop"]');t&&t.addEventListener("click",()=>this._handleRestart()),i&&i.addEventListener("click",()=>this._handleStop())}_escapeHtml(e){return e?String(e).replace(/&/g,"&amp;").replace(/</g,"&lt;").replace(/>/g,"&gt;").replace(/"/g,"&quot;"):""}};customElements.define("loki-app-status",Q);var Ke={not_initialized:{color:"var(--loki-text-muted, #71717a)",label:"No app yet",pulse:!1},starting:{color:"var(--loki-yellow, #ca8a04)",label:"Starting",pulse:!0},running:{color:"var(--loki-green, #16a34a)",label:"Running",pulse:!0},stale:{color:"var(--loki-yellow, #ca8a04)",label:"Stale",pulse:!1},completed:{color:"var(--loki-text-muted, #a1a1aa)",label:"Completed",pulse:!1},failed:{color:"var(--loki-red, #dc2626)",label:"Could not start",pulse:!1},crashed:{color:"var(--loki-red, #dc2626)",label:"Crashed",pulse:!1},stopped:{color:"var(--loki-text-muted, #a1a1aa)",label:"Stopped",pulse:!1},error:{color:"var(--loki-text-muted, #71717a)",label:"Status unavailable",pulse:!1},unknown:{color:"var(--loki-text-muted, #71717a)",label:"Unknown",pulse:!1}},X=class extends h{static get observedAttributes(){return["api-url","theme"]}constructor(){super(),this._api=null,this._pollInterval=null,this._visibilityHandler=null,this._status=null,this._errors=null,this._error=null,this._lastDataHash=null,this._detailsOpen=!1}connectedCallback(){super.connectedCallback(),this._setupApi(),this.render(),this._loadData(),this._startPolling()}disconnectedCallback(){super.disconnectedCallback(),this._stopPolling()}attributeChangedCallback(e,t,i){t!==i&&(e==="api-url"&&this._api&&(this._api.baseUrl=i,this._loadData()),e==="theme"&&this._applyTheme())}_setupApi(){let e=this.getAttribute("api-url")||window.location.origin;this._api=g({baseUrl:e})}_startPolling(){this._pollInterval=setInterval(()=>this._loadData(),3e3),this._visibilityHandler=()=>{document.hidden?this._pollInterval&&(clearInterval(this._pollInterval),this._pollInterval=null):this._pollInterval||(this._loadData(),this._pollInterval=setInterval(()=>this._loadData(),3e3))},document.addEventListener("visibilitychange",this._visibilityHandler)}_stopPolling(){this._pollInterval&&(clearInterval(this._pollInterval),this._pollInterval=null),this._visibilityHandler&&(document.removeEventListener("visibilitychange",this._visibilityHandler),this._visibilityHandler=null)}async _loadData(){try{let e=await this._api.getAppRunnerStatus(),t=e?.status||"not_initialized",i=null;if(t==="crashed"||t==="failed")try{i=await this._api.getAppRunnerErrors(50)}catch{i=null}let a=JSON.stringify({status:t,port:e?.port,url:e?.url,crash:e?.crash_count,errLen:i?.lines?.length||0}),s=this._error!==null;if(a===this._lastDataHash&&!s)return;this._lastDataHash=a,this._status=e,this._errors=i,this._error=null,this.render()}catch(e){this._error||(this._error=`Could not read app status: ${e.message}`,this.render())}}_isValidUrl(e){if(!e)return!1;try{let t=new URL(e);return t.protocol==="http:"||t.protocol==="https:"}catch{return!1}}async _handleRestart(){try{await this._api.restartApp(),this._loadData()}catch(e){this._error=`Restart failed: ${e.message}`,this.render()}}_handleRefresh(){let e=this.shadowRoot;if(!e)return;let t=e.querySelector("iframe.preview-frame"),i=this._status;if(t&&i&&this._isValidUrl(i.url)){let a=(i.url.includes("?")?"&":"?")+"_t="+Date.now();t.src=i.url+a}}_handleOpenExternal(){let e=this._status;e&&this._isValidUrl(e.url)&&window.open(e.url,"_blank","noopener")}_toggleDetails(){this._detailsOpen=!this._detailsOpen,this.render()}_getStyles(){return`
+    `}_attachEventListeners(){let e=this.shadowRoot;if(!e)return;let t=e.querySelector('[data-action="restart"]'),i=e.querySelector('[data-action="stop"]');t&&t.addEventListener("click",()=>this._handleRestart()),i&&i.addEventListener("click",()=>this._handleStop())}_escapeHtml(e){return e?String(e).replace(/&/g,"&amp;").replace(/</g,"&lt;").replace(/>/g,"&gt;").replace(/"/g,"&quot;"):""}};customElements.define("loki-app-status",Q);var Ke={not_initialized:{color:"var(--loki-text-muted, #71717a)",label:"No app yet",pulse:!1},starting:{color:"var(--loki-yellow, #ca8a04)",label:"Starting",pulse:!0},running:{color:"var(--loki-green, #16a34a)",label:"Running",pulse:!0},stale:{color:"var(--loki-yellow, #ca8a04)",label:"Stale",pulse:!1},completed:{color:"var(--loki-text-muted, #a1a1aa)",label:"Completed",pulse:!1},failed:{color:"var(--loki-red, #dc2626)",label:"Could not start",pulse:!1},crashed:{color:"var(--loki-red, #dc2626)",label:"Crashed",pulse:!1},stopped:{color:"var(--loki-text-muted, #a1a1aa)",label:"Stopped",pulse:!1},error:{color:"var(--loki-text-muted, #71717a)",label:"Status unavailable",pulse:!1},unknown:{color:"var(--loki-text-muted, #71717a)",label:"Unknown",pulse:!1}},X=class extends h{static get observedAttributes(){return["api-url","theme"]}constructor(){super(),this._api=null,this._pollInterval=null,this._visibilityHandler=null,this._status=null,this._errors=null,this._error=null,this._lastDataHash=null,this._detailsOpen=!1}connectedCallback(){super.connectedCallback(),this._setupApi(),this.render(),this._loadData(),this._startPolling()}disconnectedCallback(){super.disconnectedCallback(),this._stopPolling()}attributeChangedCallback(e,t,i){t!==i&&(e==="api-url"&&this._api&&(this._api.baseUrl=i,this._loadData()),e==="theme"&&this._applyTheme())}_setupApi(){let e=this.getAttribute("api-url")||window.location.origin;this._api=g({baseUrl:e})}_startPolling(){this._pollInterval=setInterval(()=>this._loadData(),3e3),this._visibilityHandler=()=>{document.hidden?this._pollInterval&&(clearInterval(this._pollInterval),this._pollInterval=null):this._pollInterval||(this._loadData(),this._pollInterval=setInterval(()=>this._loadData(),3e3))},document.addEventListener("visibilitychange",this._visibilityHandler)}_stopPolling(){this._pollInterval&&(clearInterval(this._pollInterval),this._pollInterval=null),this._visibilityHandler&&(document.removeEventListener("visibilitychange",this._visibilityHandler),this._visibilityHandler=null)}async _loadData(){try{let e=await this._api.getAppRunnerStatus(),t=e?.status||"not_initialized",i=null;if(t==="crashed"||t==="failed")try{i=await this._api.getAppRunnerErrors(50)}catch{i=null}let a=JSON.stringify({status:t,port:e?.port,url:e?.url,crash:e?.crash_count,errLen:i?.lines?.length||0,healthOk:e?.last_health?.ok===!0}),s=this._error!==null;if(a===this._lastDataHash&&!s)return;this._lastDataHash=a,this._status=e,this._errors=i,this._error=null,this.render()}catch(e){this._error||(this._error=`Could not read app status: ${e.message}`,this.render())}}_isValidUrl(e){if(!e)return!1;try{let t=new URL(e);return t.protocol==="http:"||t.protocol==="https:"}catch{return!1}}async _handleRestart(){try{await this._api.restartApp(),this._loadData()}catch(e){this._error=`Restart failed: ${e.message}`,this.render()}}_handleRefresh(){let e=this.shadowRoot;if(!e)return;let t=e.querySelector("iframe.preview-frame"),i=this._status;if(t&&i&&this._isValidUrl(i.url)){let a=(i.url.includes("?")?"&":"?")+"_t="+Date.now();t.src=i.url+a}}_handleOpenExternal(){let e=this._status;e&&this._isValidUrl(e.url)&&window.open(e.url,"_blank","noopener")}_toggleDetails(){this._detailsOpen=!this._detailsOpen,this.render()}_getStyles(){return`
       .preview { padding: 16px; font-family: var(--loki-font-family, system-ui, -apple-system, sans-serif); color: var(--loki-text-primary, #201515); }
       .header { display: flex; align-items: center; justify-content: space-between; margin-bottom: 12px; gap: 12px; flex-wrap: wrap; }
       .header-left { display: flex; align-items: center; gap: 10px; }

package/docs/COMPARISON.md CHANGED Viewed

@@ -24,10 +24,10 @@
 | Feature | **Loki Mode** | **Devin** | **Codex** | **Cursor** | **Kiro** | **Antigravity** | **Amazon Q** | **OpenCode** |
 |---------|--------------|-----------|-----------|------------|----------|-----------------|--------------|--------------|
-| **Multi-Agent** | 41 agents in 8 swarms | Single | Single | Up to 8 parallel | Background | Manager Surface | Multiple types | 4 built-in |
+| **Multi-Agent** | 41 prompt-defined agent roles in 8 domains adopted per phase; parallel review council + optional worktree streams (Claude), sequential elsewhere | Single | Single | Up to 8 parallel | Background | Manager Surface | Multiple types | 4 built-in |
 | **Orchestration** | Full orchestrator | N/A | N/A | Git worktree | Hooks | Manager view | Workflow | Subagents |
 | **Parallel Exec** | 10+ Haiku, 4 impl (worktree) | No | No | 8 max | Yes | Yes | Yes | Yes |
-| **Agent Swarms** | Eng, Ops, Business, Data, Product, Growth, Review | N/A | N/A | N/A | N/A | N/A | 3 types | N/A |
+| **Agent Domains** | Eng, Ops, Business, Data, Product, Growth, Review | N/A | N/A | N/A | N/A | N/A | 3 types | N/A |
 ---
@@ -151,7 +151,7 @@
 | **Multi-Agent Verification** | Model diversity (Claude vs OpenAI, 54% improvement) | 3 blind reviewers + devil's advocate | Different approach (N/A for Claude Code - only Claude models) |
 | **Quality Gates** | Built-in verification loops | 7 explicit gates + anti-sycophancy | **Loki Mode** |
 | **Memory System** | Not documented | 3-tier episodic/semantic/procedural | **Loki Mode** |
-| **Agent Specialization** | Custom Zen Agents | 41 pre-defined specialized agents | **Loki Mode** |
+| **Agent Specialization** | Custom Zen Agents | 41 pre-defined specialized agent roles | **Loki Mode** |
 | **CI Failure Analysis** | Explicit pattern with auto-fix | DevOps agent only | **ADOPTED from Zencoder** |
 | **Review Comment Resolution** | Auto-apply simple changes | Manual review | **ADOPTED from Zencoder** |
 | **Dependency Management** | Scheduled PRs, one group at a time | Mentioned only | **ADOPTED from Zencoder** |
@@ -180,7 +180,7 @@
 1. **Quality Control**: 7 explicit gates + blind review + devil's advocate vs built-in loops
 2. **Memory System**: 3-tier (episodic/semantic/procedural) with cross-project learning
-3. **Agent Specialization**: 41 pre-defined specialized agents across 8 swarms
+3. **Agent Specialization**: 41 pre-defined specialized agent roles across 8 domains
 4. **Anti-Sycophancy**: CONSENSAGENT patterns prevent reviewer groupthink
 5. **Autonomy Design**: Minimal human intervention from PRD to production
 6. **Research Foundation**: 10+ academic papers integrated vs proprietary
@@ -203,7 +203,7 @@
 |---------|--------------|---------|-----------------|------------|-----------------|---------------------|----------------|
 | **Stars** | 594 | 11,903 | 35K+ | 26K+ | 13.7K | N/A | N/A |
 | **npm/wk** | 6.1K | 21.4K | N/A | N/A | N/A | N/A | N/A |
-| **Agents** | 41 in 8 swarms | 11 agents | Fresh per task | 108 agents | Swarm-based | 32 agents | N/A |
+| **Agents** | 41 roles in 8 domains | 11 agents | Fresh per task | 108 agents | Swarm-based | 32 agents | N/A |
 | **Skills** | Progressive disclosure | 6 slash commands | N/A | 129 skills | N/A | 35 skills | Memory focus |
 | **Multi-Provider** | Yes (Claude/Codex/Gemini) | 3 CLIs (separate) | No | No | No | No | No |
 | **Memory System** | 3-tier (episodic/semantic/procedural) | None | N/A | N/A | Hybrid | N/A | SQLite+FTS5 |
@@ -236,7 +236,7 @@ These are patterns from competing projects that are **practically and scientific
 | **Constitutional AI Integration** | Principles-based self-critique from Anthropic research | None have this |
 | **Anti-Sycophancy (CONSENSAGENT)** | Blind review + devil's advocate prevents groupthink | None have this |
 | **Provider Abstraction Layer** | Clean degradation from full-featured to sequential-only | Claude-only projects can't degrade |
-| **41 Specialized Agents** | Purpose-built agents in 8 swarms vs generic | agents (108) has more but less organized |
+| **41 Specialized Agent Roles** | Purpose-built role definitions in 8 domains vs generic; Loki gates every role's output through blind review + council | agents (108) has more but less organized |
 | **Research Foundation** | 10+ academic papers integrated with citations | Most have no research backing |
 ### Superpowers Deep-Dive (35K+ Stars)
@@ -258,7 +258,7 @@ Plugin marketplace architecture with unprecedented scale:
 | Pattern | Description | Loki Mode Status |
 |---------|-------------|------------------|
 | **72 Plugins** | Modular, focused plugins instead of monolith | Different approach (progressive disclosure) |
-| **108 Agents** | Specialized agents for specific domains | 41 agents in Loki Mode |
+| **108 Agents** | Specialized agents for specific domains | 41 agent roles in Loki Mode |
 | **129 Skills** | Skills as first-class objects | 10 skills in skills/ |
 | **Four-Tier Model Strategy** | Explicit tier selection with constraints | Similar to Loki Mode tiers |
@@ -342,7 +342,7 @@ Tiered agent architecture with explicit escalation:
 | Agent | Killer Feature |
 |-------|---------------|
-| **Loki Mode** | Minimal-human-intervention full SDLC from any spec (PRD, GitHub issue, or YAML), 41 agents in 8 swarms, Constitutional AI, anti-sycophancy, cross-project learning, code transformation, property-based testing |
+| **Loki Mode** | Minimal-human-intervention full SDLC from any spec (PRD, GitHub issue, or YAML), 41 agent roles in 8 domains, Constitutional AI, anti-sycophancy, cross-project learning, code transformation, property-based testing |
 | **Devin** | Full software engineer persona, Slack integration, 67% PR merge rate |
 | **OpenAI Codex** | Skills marketplace, $skill-creator, GPT-5.2-Codex, secure sandbox |
 | **Cursor** | 8 parallel agents, BugBot, Memories, $10B valuation, Composer model (250 tok/s) |
@@ -358,7 +358,7 @@ Tiered agent architecture with explicit escalation:
 | Dimension | Loki Mode Advantage |
 |-----------|-------------------|
 | **Autonomy** | Designed for high autonomy with minimal human intervention |
-| **Multi-Agent** | 41 specialized agents in 8 swarms vs 1-8 in competitors |
+| **Multi-Agent** | 41 prompt-defined agent roles in 8 domains adopted per phase (parallel review council + optional worktree streams on Claude, sequential elsewhere) vs 1-8 in competitors, with all output gated by blind review + council |
 | **Quality** | 11 gates + blind review + devil's advocate + property-based testing |
 | **Research** | 10+ academic papers integrated vs proprietary/undisclosed |
 | **Anti-Sycophancy** | Only agent with CONSENSAGENT-based blind review |

package/docs/COMPETITIVE-ANALYSIS.md CHANGED Viewed

@@ -39,7 +39,7 @@ GSD is the closest competitor -- a context engineering system that spawns fresh
 | **Agent Count** | 41 types | 64+ agents | 5 roles | Unlimited | 8 parallel | 1 autonomous |
 | **Parallel Execution** | Yes (multi-agent) | Yes (swarms) | Sequential | Yes (crews) | Yes (8 worktrees) | Yes (fleet) |
 | **Published Benchmarks** | 98.78% HumanEval (self-reported, max 3 retries) | None | 85.9-87.7% HumanEval | None | ~250 tok/s | 15% complex tasks |
-| **SWE-bench Score** | 99.67% patch gen (unevaluated, 299/300) | Unknown | Unknown | Unknown | Unknown | 15% complex |
+| **SWE-bench Score** | Not measured (patch generation harness exists; official evaluator not run, so no resolve rate exists) | Unknown | Unknown | Unknown | Unknown | 15% complex |
 | **Full SDLC** | Yes (8 phases) | Yes | Partial | Partial | No | Partial |
 | **Business Ops** | **Yes (8 agents)** | No | No | No | No | No |
 | **Enterprise Security** | `--dangerously-skip-permissions` | MCP sandboxed | Sandboxed | Audit logs, RBAC | Staged autonomy | Sandboxed |
@@ -213,49 +213,30 @@ GSD is the closest competitor -- a context engineering system that spawns fresh
 **Failed Problems (after RARV):** HumanEval/32, HumanEval/50
-### SWE-bench Lite Results (Full 300 Problems)
+### SWE-bench Lite: Honest Status
-**Direct Claude (Single Agent Baseline):**
-| Metric | Value |
-|--------|-------|
-| **Patch Generation** | **99.67%** |
-| Generated | 299/300 problems |
-| Errors | 1 |
-| Model | Claude Opus 4.5 |
-| Time | 6.17 hours |
-**Loki Mode Multi-Agent (with RARV):**
-| Metric | Value |
-|--------|-------|
-| **Patch Generation** | **99.67%** |
-| Generated | 299/300 problems |
-| Errors/Timeouts | 1 |
-| Model | Claude Opus 4.5 |
-| Time | 3.5 hours |
-**Three-Way Comparison:**
-| System | SWE-bench Patch Gen | Notes |
-|--------|---------------------|-------|
-| **Direct Claude** | **99.67%** (299/300) | Single agent, minimal overhead |
-| **Loki Mode (multi-agent)** | **99.67%** (299/300) | 4-agent pipeline with RARV |
-| Devin | ~15% complex tasks | Commercial, different benchmark |
-**Key Finding:** After timeout optimization (Architect: 60s->120s), the multi-agent RARV pipeline matches direct Claude's performance on SWE-bench. Both achieve 99.67% patch generation rate.
+**Loki Mode has NO SWE-bench score.** What exists: a harness that GENERATED candidate
+patches for 299 of 300 SWE-bench Lite problems (Claude Opus 4.5; single-agent run
+6.17h, 4-agent RARV pipeline 3.5h). Patch GENERATION only means a diff was produced.
+It does NOT mean the patch fixes the issue. The official SWE-bench evaluator (apply
+patch, run the repo's test suite) was never run, so there is no resolve rate, and
+generation rates must not be compared against other tools' task-resolution rates
+(e.g. Devin's independently-tested task success). For context, frontier model resolve
+rates on SWE-bench Verified are in the ~85-90 percent range; any higher number from
+any tool should be treated with suspicion, including ours.
-**Note:** Patches generated; full validation (resolve rate) requires running the Docker-based SWE-bench harness to apply patches and execute test suites.
+**What a fair claim looks like:** "A reproducible patch-generation harness exists;
+resolve rate is not yet measured." Nothing stronger.
 ---
 ## Critical Gaps to Address
-### Priority 1: Benchmarks (COMPLETED)
-- **Gap:** ~~No published HumanEval or SWE-bench scores~~ RESOLVED
-- **Result:** 98.17% HumanEval Pass@1 (beats MetaGPT by 10.5%)
-- **Result:** 99.67% SWE-bench Lite patch generation (299/300)
-- **Next:** Run full SWE-bench harness for resolve rate validation
+### Priority 1: Benchmarks (PARTIAL)
+- **Real:** 98.78% HumanEval Pass@1 with RARV (162/164, reproducible harness; 98.17% single-agent baseline)
+- **NOT real:** SWE-bench. Patch generation ran (299/300 diffs produced); the official
+  evaluator never ran, so there is NO resolve rate and no comparable score.
+- **Next:** run a contamination-resistant evaluator end-to-end (or publish nothing for SWE-bench)
 ### Priority 2: Security Model (Critical for Enterprise)
 - **Gap:** Relies on `--dangerously-skip-permissions`

package/docs/INSTALLATION.md CHANGED Viewed

@@ -2,7 +2,7 @@
 The flagship product of [Autonomi](https://www.autonomi.dev/). Loki Mode is a spec-driven autonomous builder with a built-in trust layer that takes any spec to a deployed product and verifies completion with evidence (quality gates plus a completion council), not just a "done" claim. Complete installation instructions for all platforms and use cases.
-**Version:** v7.26.0
+**Version:** v7.27.0
 ---

package/docs/auto-claude-comparison.md CHANGED Viewed

@@ -85,7 +85,7 @@ Loki Mode is built on peer-reviewed research:
 **Verdict: Loki Mode wins** - Academically grounded.
 ### 2. Specialized Agent Types
-Loki Mode has 41 predefined agent types across 6 swarms:
+Loki Mode has 41 specialized agent roles across 8 domains - prompt-defined specifications the orchestrator adopts per phase, with parallel review (blind council) and optional worktree streams on Claude Code, sequential on other providers:
 - Engineering (8): frontend, backend, database, mobile, API, QA, perf, infra
 - Operations (8): DevOps, SRE, security, monitoring, incident, release, cost, compliance
 - Business (8): marketing, sales, finance, legal, support, HR, investor, partnerships
@@ -142,13 +142,16 @@ Loki Mode has 14 quality gates:
 ### 6. Published Benchmarks
 Loki Mode:
-- HumanEval: 98.78% Pass@1 (162/164)
-- SWE-bench: 99.67% patch generation (299/300)
-- Documented methodology with reproducible results
+- HumanEval: 98.78% Pass@1 (162/164, reproducible harness)
+- SWE-bench: not measured. A patch-generation harness exists (299/300 diffs
+  produced), but the official evaluator was never run, so there is no resolve
+  rate. Patch generation is not a success metric and is not comparable to
+  other tools' resolution scores.
 **Auto-Claude:** No published benchmarks.
-**Verdict: Loki Mode wins** - Verified performance claims.
+**Verdict: Loki Mode wins on HumanEval transparency** - one real, reproducible
+number versus none. No SWE-bench performance claim is made.
 ### 7. Licensing
 - Loki Mode: MIT (free, no restrictions)
@@ -248,7 +251,7 @@ Loki Mode now incorporates proven patterns from Cursor's large-scale agent deplo
 **Loki Mode is better if you want:**
 - Research-backed architecture
 - Full spec-to-product lifecycle (not just coding)
-- 41 specialized agents
+- 41 specialized agent roles
 - Anti-sycophancy measures
 - MIT license
 - No subscription requirement

package/docs/certification/01-core-concepts/lesson.md CHANGED Viewed

@@ -70,9 +70,9 @@ Or via the environment variable `LOKI_COMPLEXITY=simple|standard|complex`.
 ## Agents
-Loki Mode defines **41 specialized agent types** organized into **8 swarms**:
+Loki Mode defines **41 specialized agent types** organized into **8 domains**:
-| Swarm | Agent Count | Examples |
+| Domain | Agent Count | Examples |
 |-------|-------------|----------|
 | Engineering | 8 | frontend, backend, database, mobile, api, qa, perf, infra |
 | Operations | 8 | devops, sre, security, monitor, incident, release, cost, compliance |
@@ -179,4 +179,4 @@ Every Loki Mode project uses these files in the `.loki/` directory:
 ## Summary
-Loki Mode is an autonomous multi-agent system that follows the RARV cycle to build software from PRDs. It uses 41 agent types organized into 8 swarms, enforces quality through 9 gates with blind peer review, and maintains episodic/semantic/procedural memory for continuous learning. Projects are classified into simple, standard, or complex tiers that determine the number of phases executed.
+Loki Mode is an autonomous multi-agent system that follows the RARV cycle to build software from PRDs. It uses 41 agent types organized into 8 domains, enforces quality through 9 gates with blind peer review, and maintains episodic/semantic/procedural memory for continuous learning. Projects are classified into simple, standard, or complex tiers that determine the number of phases executed.

package/docs/competitive/emergence-others-analysis.md CHANGED Viewed

@@ -457,7 +457,7 @@ This positioning highlights three unique capabilities no competitor offers toget
 | Open source | Yes (Apache-2.0) | Yes |
 | Speed | 240+ tokens/sec | Depends on provider |
 | Providers | OpenAI only | 5 providers |
-| Multi-agent | Experimental (isolated) | 41 agent types, 8 swarms |
+| Multi-agent | Experimental (isolated) | 41 agent types, 8 domains |
 | Quality | Single-pass review | 10-gate system |
 | **Loki Mode advantage:** | Autonomous pipeline, multi-provider, mature multi-agent |

package/docs/competitive/replit-lovable-analysis.md CHANGED Viewed

@@ -312,7 +312,7 @@ Replit Agent has evolved rapidly through four major versions:
 |-----------|:-----------:|:-----------:|:---------:|
 | Natural language to app | Yes | Yes | Yes (via PRD) |
 | Autonomous execution | 200 min sessions | Per-prompt | Unlimited (budget-gated) |
-| Multi-agent orchestration | Parallel agents (Agent 4) | No | Yes (41 agent types, 8 swarms) |
+| Multi-agent orchestration | Parallel agents (Agent 4) | No | Yes (41 agent types, 8 domains) |
 | Self-testing loop | Yes | No | Yes (RARV cycle) |
 | Code review | No | No | Yes (3-reviewer blind review) |
 | Anti-sycophancy | No | No | Yes (devil's advocate) |

package/docs/cursor-comparison.md CHANGED Viewed

@@ -122,7 +122,7 @@ BOOTSTRAP -> DISCOVERY -> ARCHITECTURE -> INFRASTRUCTURE
      -> DEVELOPMENT -> QA -> DEPLOYMENT -> GROWTH (continuous)
 ```
-**41 Specialized Agent Types across 8 swarms:**
+**41 Specialized Agent Roles across 8 domains** (prompt-defined specifications the orchestrator adopts per phase; parallel review council and optional worktree streams on Claude Code, sequential on other providers):
 - Engineering (8 types)
 - Operations (8 types)
 - Business (8 types)

package/docs/prd-purple-lab-platform.md CHANGED Viewed

@@ -198,7 +198,7 @@ Each gate: name, status (pass/fail/pending), details expandable
 ### Agent Activity
 Real-time grid showing which of the 41 agent types are active:
-- Agent name, type, swarm, model tier (Opus/Sonnet/Haiku)
+- Agent name, type, domain, model tier (Opus/Sonnet/Haiku)
 - Current task
 - Status (working/idle/completed)
 - Duration

package/docs/show-hn-post.md CHANGED Viewed

@@ -2,13 +2,13 @@
 ## Title
-Show HN: Loki Mode - PRD in, tested code out (41 agents, 9 quality gates, RARV self-verification)
+Show HN: Loki Mode - PRD in, tested code out (41 agent roles, 9 quality gates, RARV self-verification)
 ## Body
 I built Loki Mode because I got tired of the copy-paste loop between AI coding assistants and my terminal. I wanted to hand over a PRD and get back a working, tested codebase -- not perfect, but a solid starting point.
-**What it does:** You give it a Product Requirements Document. It breaks the work into tasks, dispatches them across 41 specialized agent types organized into 8 swarms (engineering, operations, business, data, product, growth, review, orchestration), and runs every iteration through a self-verification loop called RARV: Reason, Act, Reflect, Verify. The idea is that the system catches its own mistakes before you have to.
+**What it does:** You give it a Product Requirements Document. It breaks the work into tasks across 41 specialized agent roles organized into 8 domains (engineering, operations, business, data, product, growth, review, orchestration) -- prompt-defined specifications the orchestrator adopts per phase, with parallel review (blind council) and optional worktree streams on Claude Code, sequential on other providers -- and runs every iteration through a self-verification loop called RARV: Reason, Act, Reflect, Verify. The idea is that the system catches its own mistakes before you have to.
 **Quality gates:** 9 automated gates including 3-reviewer blind review (agents review each other's work without seeing prior reviews), anti-sycophancy checks (a devil's advocate pass on unanimous approvals), and mock/mutation detection. These are not foolproof, but they catch a surprising number of issues that single-pass generation misses.

package/loki-ts/dist/loki.js CHANGED Viewed

@@ -1,5 +1,5 @@
 // @bun
-var f8=Object.defineProperty;var u8=($)=>$;function c8($,Q){this[$]=u8.bind(null,Q)}var g=($,Q)=>{for(var Z in Q)f8($,Z,{get:Q[Z],enumerable:!0,configurable:!0,set:c8.bind(Q,Z)})};var k=($,Q)=>()=>($&&(Q=$($=0)),Q);var X1=import.meta.require;var F$={};g(F$,{lokiDir:()=>P,homeLokiDir:()=>o1,findRepoRootForVersion:()=>d1,REPO_ROOT:()=>f});import{resolve as n,dirname as l1}from"path";import{fileURLToPath as p8}from"url";import{existsSync as L1}from"fs";import{homedir as l8}from"os";function d8(){let $=j$;for(let Q=0;Q<6;Q++){if(L1(n($,"VERSION"))&&L1(n($,"autonomy/run.sh")))return $;let Z=l1($);if(Z===$)break;$=Z}return n(j$,"..","..","..")}function d1($){let Q=$;for(let Z=0;Z<6;Z++){if(L1(n(Q,"VERSION"))&&L1(n(Q,"autonomy/run.sh")))return Q;let z=l1(Q);if(z===Q)break;Q=z}return n($,"..","..","..")}function P(){return process.env.LOKI_DIR??n(process.cwd(),".loki")}function o1(){return n(l8(),".loki")}var j$,f;var y=k(()=>{j$=l1(p8(import.meta.url));f=d8()});import{readFileSync as o8}from"fs";import{resolve as n8,dirname as a8}from"path";import{fileURLToPath as s8}from"url";function k1(){if($1!==null)return $1;let $="7.26.0";if(typeof $==="string"&&$.length>0)return $1=$,$1;try{let Q=a8(s8(import.meta.url)),Z=d1(Q);$1=o8(n8(Z,"VERSION"),"utf-8").trim()}catch{$1="unknown"}return $1}var $1=null;var n1=k(()=>{y()});var E$={};g(E$,{runOrThrow:()=>t8,run:()=>j,commandVersion:()=>i8,commandExists:()=>v,ShellError:()=>a1});async function j($,Q={}){let Z=Bun.spawn({cmd:[...$],stdout:"pipe",stderr:"pipe",env:Q.env?{...process.env,...Q.env}:process.env,cwd:Q.cwd}),z,K;if(Q.timeoutMs&&Q.timeoutMs>0)z=setTimeout(()=>{try{Z.kill("SIGTERM")}catch{}K=setTimeout(()=>{try{Z.kill("SIGKILL")}catch{}},2000)},Q.timeoutMs);try{let[H,X,q]=await Promise.all([new Response(Z.stdout).text(),new Response(Z.stderr).text(),Z.exited]);return{stdout:H,stderr:X,exitCode:q}}finally{if(z)clearTimeout(z);if(K)clearTimeout(K)}}async function t8($,Q={}){let Z=await j($,Q);if(Z.exitCode!==0)throw new a1(`command failed (${Z.exitCode}): ${$.join(" ")}`,Z.exitCode,Z.stdout,Z.stderr);return Z}async function v($){let Q=r8($),Z=await j(["sh","-c",`command -v ${Q}`],{timeoutMs:5000});if(Z.exitCode===0)return Z.stdout.trim()||null;return null}function r8($){if(!/^[A-Za-z0-9._/-]+$/.test($))throw Error(`refused to shell-escape suspect token: ${$}`);return $}async function i8($,Q="--version"){if(!await v($))return null;let z=await j([$,Q],{timeoutMs:5000});if(z.exitCode!==0)return null;return((z.stdout||z.stderr).split(/\r?\n/)[0]?.trim()??"")||null}var a1;var d=k(()=>{a1=class a1 extends Error{message;exitCode;stdout;stderr;constructor($,Q,Z,z){super($);this.message=$;this.exitCode=Q;this.stdout=Z;this.stderr=z;this.name="ShellError"}}});function a($){return e8?"":$}var e8,T,N,_,KZ,A,R,h,J;var c=k(()=>{e8=(process.env.NO_COLOR??"").length>0;T=a("\x1B[0;31m"),N=a("\x1B[0;32m"),_=a("\x1B[1;33m"),KZ=a("\x1B[0;34m"),A=a("\x1B[0;36m"),R=a("\x1B[1m"),h=a("\x1B[2m"),J=a("\x1B[0m")});import{existsSync as U7}from"fs";async function Q1(){if(B1!==void 0)return B1;let $="/opt/homebrew/bin/python3.12";if(U7($))return B1=$,$;let Q=await v("python3.12");if(Q)return B1=Q,Q;let Z=await v("python3");return B1=Z,Z}async function Z1($,Q={}){let Z=await Q1();if(!Z)return{stdout:"",stderr:"python3 not found",exitCode:127};return j([Z,"-c",$],Q)}var B1;var H1=k(()=>{d()});var d$={};g(d$,{runStatus:()=>N7});import{existsSync as b,readFileSync as q1,readdirSync as v$,statSync as f$}from"fs";import{resolve as D,basename as P7}from"path";import{homedir as L7}from"os";async function j7(){if(await v("jq"))return!0;return process.stdout.write(`${T}Error: jq is required but not installed.${J}
+var f8=Object.defineProperty;var u8=($)=>$;function c8($,Q){this[$]=u8.bind(null,Q)}var g=($,Q)=>{for(var Z in Q)f8($,Z,{get:Q[Z],enumerable:!0,configurable:!0,set:c8.bind(Q,Z)})};var k=($,Q)=>()=>($&&(Q=$($=0)),Q);var X1=import.meta.require;var F$={};g(F$,{lokiDir:()=>P,homeLokiDir:()=>o1,findRepoRootForVersion:()=>d1,REPO_ROOT:()=>f});import{resolve as n,dirname as l1}from"path";import{fileURLToPath as p8}from"url";import{existsSync as L1}from"fs";import{homedir as l8}from"os";function d8(){let $=j$;for(let Q=0;Q<6;Q++){if(L1(n($,"VERSION"))&&L1(n($,"autonomy/run.sh")))return $;let Z=l1($);if(Z===$)break;$=Z}return n(j$,"..","..","..")}function d1($){let Q=$;for(let Z=0;Z<6;Z++){if(L1(n(Q,"VERSION"))&&L1(n(Q,"autonomy/run.sh")))return Q;let z=l1(Q);if(z===Q)break;Q=z}return n($,"..","..","..")}function P(){return process.env.LOKI_DIR??n(process.cwd(),".loki")}function o1(){return n(l8(),".loki")}var j$,f;var y=k(()=>{j$=l1(p8(import.meta.url));f=d8()});import{readFileSync as o8}from"fs";import{resolve as n8,dirname as a8}from"path";import{fileURLToPath as s8}from"url";function k1(){if($1!==null)return $1;let $="7.27.0";if(typeof $==="string"&&$.length>0)return $1=$,$1;try{let Q=a8(s8(import.meta.url)),Z=d1(Q);$1=o8(n8(Z,"VERSION"),"utf-8").trim()}catch{$1="unknown"}return $1}var $1=null;var n1=k(()=>{y()});var E$={};g(E$,{runOrThrow:()=>t8,run:()=>j,commandVersion:()=>i8,commandExists:()=>v,ShellError:()=>a1});async function j($,Q={}){let Z=Bun.spawn({cmd:[...$],stdout:"pipe",stderr:"pipe",env:Q.env?{...process.env,...Q.env}:process.env,cwd:Q.cwd}),z,K;if(Q.timeoutMs&&Q.timeoutMs>0)z=setTimeout(()=>{try{Z.kill("SIGTERM")}catch{}K=setTimeout(()=>{try{Z.kill("SIGKILL")}catch{}},2000)},Q.timeoutMs);try{let[H,X,q]=await Promise.all([new Response(Z.stdout).text(),new Response(Z.stderr).text(),Z.exited]);return{stdout:H,stderr:X,exitCode:q}}finally{if(z)clearTimeout(z);if(K)clearTimeout(K)}}async function t8($,Q={}){let Z=await j($,Q);if(Z.exitCode!==0)throw new a1(`command failed (${Z.exitCode}): ${$.join(" ")}`,Z.exitCode,Z.stdout,Z.stderr);return Z}async function v($){let Q=r8($),Z=await j(["sh","-c",`command -v ${Q}`],{timeoutMs:5000});if(Z.exitCode===0)return Z.stdout.trim()||null;return null}function r8($){if(!/^[A-Za-z0-9._/-]+$/.test($))throw Error(`refused to shell-escape suspect token: ${$}`);return $}async function i8($,Q="--version"){if(!await v($))return null;let z=await j([$,Q],{timeoutMs:5000});if(z.exitCode!==0)return null;return((z.stdout||z.stderr).split(/\r?\n/)[0]?.trim()??"")||null}var a1;var d=k(()=>{a1=class a1 extends Error{message;exitCode;stdout;stderr;constructor($,Q,Z,z){super($);this.message=$;this.exitCode=Q;this.stdout=Z;this.stderr=z;this.name="ShellError"}}});function a($){return e8?"":$}var e8,T,N,_,KZ,A,R,h,J;var c=k(()=>{e8=(process.env.NO_COLOR??"").length>0;T=a("\x1B[0;31m"),N=a("\x1B[0;32m"),_=a("\x1B[1;33m"),KZ=a("\x1B[0;34m"),A=a("\x1B[0;36m"),R=a("\x1B[1m"),h=a("\x1B[2m"),J=a("\x1B[0m")});import{existsSync as U7}from"fs";async function Q1(){if(B1!==void 0)return B1;let $="/opt/homebrew/bin/python3.12";if(U7($))return B1=$,$;let Q=await v("python3.12");if(Q)return B1=Q,Q;let Z=await v("python3");return B1=Z,Z}async function Z1($,Q={}){let Z=await Q1();if(!Z)return{stdout:"",stderr:"python3 not found",exitCode:127};return j([Z,"-c",$],Q)}var B1;var H1=k(()=>{d()});var d$={};g(d$,{runStatus:()=>N7});import{existsSync as b,readFileSync as q1,readdirSync as v$,statSync as f$}from"fs";import{resolve as D,basename as P7}from"path";import{homedir as L7}from"os";async function j7(){if(await v("jq"))return!0;return process.stdout.write(`${T}Error: jq is required but not installed.${J}
 `),process.stdout.write(`Install with:
 `),process.stdout.write(`  brew install jq    (macOS)
 `),process.stdout.write(`  apt install jq     (Debian/Ubuntu)
@@ -787,4 +787,4 @@ Set LOKI_LEGACY_BASH=1 to force the bash CLI for every command.
 `),2}default:return process.stderr.write(`Unknown command: ${Q}
 `),process.stderr.write(v8),2}}g$();process.on("SIGINT",()=>process.exit(130));process.on("SIGTERM",()=>process.exit(143));var l3=await p3(Bun.argv.slice(2));process.exit(l3);
-//# debugId=7BD97DA7996A924D64756E2164756E21
+//# debugId=07AC5AC0D01821A064756E2164756E21

package/mcp/__init__.py CHANGED Viewed

@@ -57,4 +57,4 @@ try:
 except ImportError:
     __all__ = ['mcp']
-__version__ = '7.26.0'
+__version__ = '7.27.0'

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "loki-mode",
-  "version": "7.26.0",
+  "version": "7.27.0",
   "description": "Loki Mode by Autonomi. Autonomous spec-to-product system: takes a PRD, GitHub issue, OpenAPI/JSON/YAML, or one-line brief to a deployed app via the RARV-C closure loop with 11 quality gates. Provider-agnostic (Claude Code, OpenAI Codex, Cline, Aider).",
   "keywords": [
     "agent",

package/providers/codex.sh CHANGED Viewed

@@ -32,7 +32,7 @@ PROVIDER_CLI="codex"
 # VERIFIED: exec --full-auto confirmed in codex exec --help (v0.98.0)
 # --full-auto: sets --ask-for-approval on-request + --sandbox workspace-write (v0.98.0)
 # Alternative: "exec --dangerously-bypass-approvals-and-sandbox" (legacy, no sandbox)
-PROVIDER_AUTONOMOUS_FLAG="exec --full-auto"
+PROVIDER_AUTONOMOUS_FLAG="exec --full-auto --skip-git-repo-check"
 PROVIDER_PROMPT_FLAG=""
 PROVIDER_PROMPT_POSITIONAL=true
@@ -119,7 +119,7 @@ provider_version() {
 provider_invoke() {
     local prompt="$1"
     shift
-    codex exec --full-auto "$prompt" "$@"
+    codex exec --full-auto --skip-git-repo-check "$prompt" "$@"
 }
 # Model tier to effort level parameter (Codex uses effort, not separate models)
@@ -210,6 +210,7 @@ provider_invoke_with_tier() {
     codex exec \
         --ask-for-approval never \
         --sandbox danger-full-access \
+        --skip-git-repo-check \
         "${extra_flags[@]}" \
         "$prompt" "$@"
 }

package/references/agent-types.md CHANGED Viewed

@@ -6,11 +6,11 @@ Complete definitions and capabilities for all 41 specialized agent types.
 ## Overview
-Loki Mode has 41 predefined agent types organized into 8 specialized swarms (37 domain agents + 4 orchestration agents). The orchestrator spawns only the agents needed for your project -- typically 5-10 for simple projects, more for complex ones.
+Loki Mode has 41 specialized agent roles across 8 domains (37 domain agents + 4 orchestration agents) - prompt-defined specifications the orchestrator adopts per phase, not separate processes. Parallelism on Claude Code comes from the blind review council, the adversarial reviewer, and optional git-worktree streams; on other providers everything runs sequentially. The orchestrator activates only the roles needed for your project -- typically 5-10 for simple projects, more for complex ones.
 ---
-## Engineering Swarm (8 types)
+## Engineering Domain (8 types)
 | Agent | Capabilities |
 |-------|-------------|
@@ -25,7 +25,7 @@ Loki Mode has 41 predefined agent types organized into 8 specialized swarms (37
 ---
-## Operations Swarm (8 types)
+## Operations Domain (8 types)
 | Agent | Capabilities |
 |-------|-------------|
@@ -40,7 +40,7 @@ Loki Mode has 41 predefined agent types organized into 8 specialized swarms (37
 ---
-## Business Swarm (8 types)
+## Business Domain (8 types)
 | Agent | Capabilities |
 |-------|-------------|
@@ -55,7 +55,7 @@ Loki Mode has 41 predefined agent types organized into 8 specialized swarms (37
 ---
-## Data Swarm (3 types)
+## Data Domain (3 types)
 | Agent | Capabilities |
 |-------|-------------|
@@ -65,7 +65,7 @@ Loki Mode has 41 predefined agent types organized into 8 specialized swarms (37
 ---
-## Product Swarm (3 types)
+## Product Domain (3 types)
 | Agent | Capabilities |
 |-------|-------------|
@@ -75,7 +75,7 @@ Loki Mode has 41 predefined agent types organized into 8 specialized swarms (37
 ---
-## Growth Swarm (4 types)
+## Growth Domain (4 types)
 | Agent | Capabilities |
 |-------|-------------|
@@ -86,7 +86,7 @@ Loki Mode has 41 predefined agent types organized into 8 specialized swarms (37
 ---
-## Review Swarm (3 types)
+## Review Domain (3 types)
 | Agent | Capabilities |
 |-------|-------------|
@@ -96,7 +96,7 @@ Loki Mode has 41 predefined agent types organized into 8 specialized swarms (37
 ---
-## Orchestration Swarm (4 types)
+## Orchestration Domain (4 types)
 > **Source:** [Cursor Scaling Learnings](./cursor-learnings.md) - patterns proven at large agent scale

package/references/agents.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # Agent Type Definitions
-Complete specifications for all 41 specialized agent types in the Loki Mode multi-agent system (37 domain agents + 4 orchestration agents).
+Complete specifications for all 41 specialized agent roles in Loki Mode (37 domain agents + 4 orchestration agents). These are prompt-defined role specifications the orchestrator adopts per phase, not separate processes.
 **Note:** These are agent TYPE definitions, not a fixed count. Loki Mode dynamically spawns agents based on project needs - a simple todo app might use 5-10 agents, while a complex startup spawns more as needed.
@@ -45,7 +45,7 @@ Update after every task completion.
 ---
-## Engineering Swarm (8 Agents)
+## Engineering Domain (8 Agents)
 ### eng-frontend
 **Capabilities:**
@@ -241,7 +241,7 @@ Update after every task completion.
 ---
-## Operations Swarm (8 Agents)
+## Operations Domain (8 Agents)
 ### ops-devops
 **Capabilities:**
@@ -437,7 +437,7 @@ Update after every task completion.
 ---
-## Business Swarm (8 Agents)
+## Business Domain (8 Agents)
 ### biz-marketing
 **Capabilities:**
@@ -634,7 +634,7 @@ Update after every task completion.
 ---
-## Data Swarm (3 Agents)
+## Data Domain (3 Agents)
 ### data-ml
 **Capabilities:**
@@ -710,7 +710,7 @@ Update after every task completion.
 ---
-## Product Swarm (3 Agents)
+## Product Domain (3 Agents)
 ### prod-pm
 **Capabilities:**
@@ -787,7 +787,7 @@ Update after every task completion.
 ---
-## Review Swarm (3 Agents)
+## Review Domain (3 Agents)
 ### review-code
 **Capabilities:**
@@ -875,7 +875,7 @@ Update after every task completion.
 ---
-## Growth Swarm (4 Agents)
+## Growth Domain (4 Agents)
 ### growth-hacker
 **Capabilities:**

package/references/business-ops.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # Business Operations Reference
-Workflows and procedures for business swarm agents.
+Workflows and procedures for business domain agents.
 ## Marketing Operations

package/references/competitive-analysis.md CHANGED Viewed

@@ -182,7 +182,7 @@ Dexter shows value of domain specialization. Our 41 agent types follow this patt
    - Most haven't scaled across enterprise
 ### Loki Mode Alignment
-- Multi-agent architecture (41 types, 8 swarms)
+- Multi-agent architecture (41 role types, 8 domains)
 - Plan Agents (orchestrator, planner)
 - Execution Agents (eng-*, ops-*, biz-*)
 - Security controls (LOKI_SANDBOX_MODE, LOKI_BLOCKED_COMMANDS)

package/skills/agents.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # Agent Dispatch & Structured Prompting
-> **Full agent type definitions:** See `references/agent-types.md` for complete 41 agent role specifications across 8 swarms (Engineering, Operations, Business, Data, Product, Growth, Review, Orchestration).
+> **Full agent type definitions:** See `references/agent-types.md` for complete 41 agent role specifications across 8 domains (Engineering, Operations, Business, Data, Product, Growth, Review, Orchestration). These are prompt-defined specifications the orchestrator adopts per phase; parallelism on Claude Code comes from the blind review council, the adversarial reviewer, and optional git-worktree streams, sequential on other providers.
 ---
@@ -249,8 +249,8 @@ Priority order for context:
 See `references/agent-types.md` for complete specifications. Summary:
-| Swarm | Agent Types | Count |
-|-------|-------------|-------|
+| Domain | Agent Types | Count |
+|--------|-------------|-------|
 | Engineering | frontend, backend, database, mobile, api, qa, perf, infra | 8 |
 | Operations | devops, sre, security, monitor, incident, release, cost, compliance | 8 |
 | Business | marketing, sales, finance, legal, support, hr, investor, partnerships | 8 |

package/skills/providers.md CHANGED Viewed

@@ -6,7 +6,7 @@ Loki Mode supports four AI providers for autonomous execution.
 > **CLI Flags Verified:** The autonomous mode flags have been verified against actual CLI help output:
 > - Claude: `--dangerously-skip-permissions` (verified)
-> - Codex: `--full-auto` (recommended, v0.98.0) or `exec --dangerously-bypass-approvals-and-sandbox` (legacy)
+> - Codex: `exec --full-auto --skip-git-repo-check` (the harness invocation; --skip-git-repo-check required on fresh non-git dirs) or `exec --dangerously-bypass-approvals-and-sandbox` (legacy)
 | Feature | Claude Code | OpenAI Codex | Cline CLI | Aider |
 |---------|-------------|--------------|-----------|-------|
@@ -56,7 +56,7 @@ Task(model="haiku", ...)   # Fast tier (parallelize)
 ---
-## OpenAI Codex CLI (Degraded Mode)
+## OpenAI Codex CLI (Experimental, Degraded Mode)
 **Best for:** Teams standardized on OpenAI. Accepts feature tradeoffs.
@@ -70,7 +70,7 @@ Task(model="haiku", ...)   # Fast tier (parallelize)
 **Invocation:**
 ```bash
 # Recommended (v0.98.0+)
-codex --full-auto "$prompt"
+codex exec --full-auto --skip-git-repo-check "$prompt"
 # Legacy (still supported)
 codex exec --dangerously-bypass-approvals-and-sandbox "$prompt"