loki-mode 7.25.0 → 7.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,7 +7,7 @@ Modules:
7
7
  control: Session control API (start/stop/pause/resume)
8
8
  """
9
9
 
10
- __version__ = "7.25.0"
10
+ __version__ = "7.27.0"
11
11
 
12
12
  # Expose the control app for easy import
13
13
  try:
@@ -6523,7 +6523,7 @@ var LokiDashboard=(()=>{var Ee=Object.defineProperty;var rt=Object.getOwnPropert
6523
6523
  <p>App runner not started</p>
6524
6524
  <p class="hint">App runner will start after the first successful build iteration.</p>
6525
6525
  </div>
6526
- `}_attachEventListeners(){let e=this.shadowRoot;if(!e)return;let t=e.querySelector('[data-action="restart"]'),i=e.querySelector('[data-action="stop"]');t&&t.addEventListener("click",()=>this._handleRestart()),i&&i.addEventListener("click",()=>this._handleStop())}_escapeHtml(e){return e?String(e).replace(/&/g,"&amp;").replace(/</g,"&lt;").replace(/>/g,"&gt;").replace(/"/g,"&quot;"):""}};customElements.define("loki-app-status",Q);var Ke={not_initialized:{color:"var(--loki-text-muted, #71717a)",label:"No app yet",pulse:!1},starting:{color:"var(--loki-yellow, #ca8a04)",label:"Starting",pulse:!0},running:{color:"var(--loki-green, #16a34a)",label:"Running",pulse:!0},stale:{color:"var(--loki-yellow, #ca8a04)",label:"Stale",pulse:!1},completed:{color:"var(--loki-text-muted, #a1a1aa)",label:"Completed",pulse:!1},failed:{color:"var(--loki-red, #dc2626)",label:"Could not start",pulse:!1},crashed:{color:"var(--loki-red, #dc2626)",label:"Crashed",pulse:!1},stopped:{color:"var(--loki-text-muted, #a1a1aa)",label:"Stopped",pulse:!1},error:{color:"var(--loki-text-muted, #71717a)",label:"Status unavailable",pulse:!1},unknown:{color:"var(--loki-text-muted, #71717a)",label:"Unknown",pulse:!1}},X=class extends h{static get observedAttributes(){return["api-url","theme"]}constructor(){super(),this._api=null,this._pollInterval=null,this._visibilityHandler=null,this._status=null,this._errors=null,this._error=null,this._lastDataHash=null,this._detailsOpen=!1}connectedCallback(){super.connectedCallback(),this._setupApi(),this.render(),this._loadData(),this._startPolling()}disconnectedCallback(){super.disconnectedCallback(),this._stopPolling()}attributeChangedCallback(e,t,i){t!==i&&(e==="api-url"&&this._api&&(this._api.baseUrl=i,this._loadData()),e==="theme"&&this._applyTheme())}_setupApi(){let e=this.getAttribute("api-url")||window.location.origin;this._api=g({baseUrl:e})}_startPolling(){this._pollInterval=setInterval(()=>this._loadData(),3e3),this._visibilityHandler=()=>{document.hidden?this._pollInterval&&(clearInterval(this._pollInterval),this._pollInterval=null):this._pollInterval||(this._loadData(),this._pollInterval=setInterval(()=>this._loadData(),3e3))},document.addEventListener("visibilitychange",this._visibilityHandler)}_stopPolling(){this._pollInterval&&(clearInterval(this._pollInterval),this._pollInterval=null),this._visibilityHandler&&(document.removeEventListener("visibilitychange",this._visibilityHandler),this._visibilityHandler=null)}async _loadData(){try{let e=await this._api.getAppRunnerStatus(),t=e?.status||"not_initialized",i=null;if(t==="crashed"||t==="failed")try{i=await this._api.getAppRunnerErrors(50)}catch{i=null}let a=JSON.stringify({status:t,port:e?.port,url:e?.url,crash:e?.crash_count,errLen:i?.lines?.length||0}),s=this._error!==null;if(a===this._lastDataHash&&!s)return;this._lastDataHash=a,this._status=e,this._errors=i,this._error=null,this.render()}catch(e){this._error||(this._error=`Could not read app status: ${e.message}`,this.render())}}_isValidUrl(e){if(!e)return!1;try{let t=new URL(e);return t.protocol==="http:"||t.protocol==="https:"}catch{return!1}}async _handleRestart(){try{await this._api.restartApp(),this._loadData()}catch(e){this._error=`Restart failed: ${e.message}`,this.render()}}_handleRefresh(){let e=this.shadowRoot;if(!e)return;let t=e.querySelector("iframe.preview-frame"),i=this._status;if(t&&i&&this._isValidUrl(i.url)){let a=(i.url.includes("?")?"&":"?")+"_t="+Date.now();t.src=i.url+a}}_handleOpenExternal(){let e=this._status;e&&this._isValidUrl(e.url)&&window.open(e.url,"_blank","noopener")}_toggleDetails(){this._detailsOpen=!this._detailsOpen,this.render()}_getStyles(){return`
6526
+ `}_attachEventListeners(){let e=this.shadowRoot;if(!e)return;let t=e.querySelector('[data-action="restart"]'),i=e.querySelector('[data-action="stop"]');t&&t.addEventListener("click",()=>this._handleRestart()),i&&i.addEventListener("click",()=>this._handleStop())}_escapeHtml(e){return e?String(e).replace(/&/g,"&amp;").replace(/</g,"&lt;").replace(/>/g,"&gt;").replace(/"/g,"&quot;"):""}};customElements.define("loki-app-status",Q);var Ke={not_initialized:{color:"var(--loki-text-muted, #71717a)",label:"No app yet",pulse:!1},starting:{color:"var(--loki-yellow, #ca8a04)",label:"Starting",pulse:!0},running:{color:"var(--loki-green, #16a34a)",label:"Running",pulse:!0},stale:{color:"var(--loki-yellow, #ca8a04)",label:"Stale",pulse:!1},completed:{color:"var(--loki-text-muted, #a1a1aa)",label:"Completed",pulse:!1},failed:{color:"var(--loki-red, #dc2626)",label:"Could not start",pulse:!1},crashed:{color:"var(--loki-red, #dc2626)",label:"Crashed",pulse:!1},stopped:{color:"var(--loki-text-muted, #a1a1aa)",label:"Stopped",pulse:!1},error:{color:"var(--loki-text-muted, #71717a)",label:"Status unavailable",pulse:!1},unknown:{color:"var(--loki-text-muted, #71717a)",label:"Unknown",pulse:!1}},X=class extends h{static get observedAttributes(){return["api-url","theme"]}constructor(){super(),this._api=null,this._pollInterval=null,this._visibilityHandler=null,this._status=null,this._errors=null,this._error=null,this._lastDataHash=null,this._detailsOpen=!1}connectedCallback(){super.connectedCallback(),this._setupApi(),this.render(),this._loadData(),this._startPolling()}disconnectedCallback(){super.disconnectedCallback(),this._stopPolling()}attributeChangedCallback(e,t,i){t!==i&&(e==="api-url"&&this._api&&(this._api.baseUrl=i,this._loadData()),e==="theme"&&this._applyTheme())}_setupApi(){let e=this.getAttribute("api-url")||window.location.origin;this._api=g({baseUrl:e})}_startPolling(){this._pollInterval=setInterval(()=>this._loadData(),3e3),this._visibilityHandler=()=>{document.hidden?this._pollInterval&&(clearInterval(this._pollInterval),this._pollInterval=null):this._pollInterval||(this._loadData(),this._pollInterval=setInterval(()=>this._loadData(),3e3))},document.addEventListener("visibilitychange",this._visibilityHandler)}_stopPolling(){this._pollInterval&&(clearInterval(this._pollInterval),this._pollInterval=null),this._visibilityHandler&&(document.removeEventListener("visibilitychange",this._visibilityHandler),this._visibilityHandler=null)}async _loadData(){try{let e=await this._api.getAppRunnerStatus(),t=e?.status||"not_initialized",i=null;if(t==="crashed"||t==="failed")try{i=await this._api.getAppRunnerErrors(50)}catch{i=null}let a=JSON.stringify({status:t,port:e?.port,url:e?.url,crash:e?.crash_count,errLen:i?.lines?.length||0,healthOk:e?.last_health?.ok===!0}),s=this._error!==null;if(a===this._lastDataHash&&!s)return;this._lastDataHash=a,this._status=e,this._errors=i,this._error=null,this.render()}catch(e){this._error||(this._error=`Could not read app status: ${e.message}`,this.render())}}_isValidUrl(e){if(!e)return!1;try{let t=new URL(e);return t.protocol==="http:"||t.protocol==="https:"}catch{return!1}}async _handleRestart(){try{await this._api.restartApp(),this._loadData()}catch(e){this._error=`Restart failed: ${e.message}`,this.render()}}_handleRefresh(){let e=this.shadowRoot;if(!e)return;let t=e.querySelector("iframe.preview-frame"),i=this._status;if(t&&i&&this._isValidUrl(i.url)){let a=(i.url.includes("?")?"&":"?")+"_t="+Date.now();t.src=i.url+a}}_handleOpenExternal(){let e=this._status;e&&this._isValidUrl(e.url)&&window.open(e.url,"_blank","noopener")}_toggleDetails(){this._detailsOpen=!this._detailsOpen,this.render()}_getStyles(){return`
6527
6527
  .preview { padding: 16px; font-family: var(--loki-font-family, system-ui, -apple-system, sans-serif); color: var(--loki-text-primary, #201515); }
6528
6528
  .header { display: flex; align-items: center; justify-content: space-between; margin-bottom: 12px; gap: 12px; flex-wrap: wrap; }
6529
6529
  .header-left { display: flex; align-items: center; gap: 10px; }
@@ -24,10 +24,10 @@
24
24
 
25
25
  | Feature | **Loki Mode** | **Devin** | **Codex** | **Cursor** | **Kiro** | **Antigravity** | **Amazon Q** | **OpenCode** |
26
26
  |---------|--------------|-----------|-----------|------------|----------|-----------------|--------------|--------------|
27
- | **Multi-Agent** | 41 agents in 8 swarms | Single | Single | Up to 8 parallel | Background | Manager Surface | Multiple types | 4 built-in |
27
+ | **Multi-Agent** | 41 prompt-defined agent roles in 8 domains adopted per phase; parallel review council + optional worktree streams (Claude), sequential elsewhere | Single | Single | Up to 8 parallel | Background | Manager Surface | Multiple types | 4 built-in |
28
28
  | **Orchestration** | Full orchestrator | N/A | N/A | Git worktree | Hooks | Manager view | Workflow | Subagents |
29
29
  | **Parallel Exec** | 10+ Haiku, 4 impl (worktree) | No | No | 8 max | Yes | Yes | Yes | Yes |
30
- | **Agent Swarms** | Eng, Ops, Business, Data, Product, Growth, Review | N/A | N/A | N/A | N/A | N/A | 3 types | N/A |
30
+ | **Agent Domains** | Eng, Ops, Business, Data, Product, Growth, Review | N/A | N/A | N/A | N/A | N/A | 3 types | N/A |
31
31
 
32
32
  ---
33
33
 
@@ -151,7 +151,7 @@
151
151
  | **Multi-Agent Verification** | Model diversity (Claude vs OpenAI, 54% improvement) | 3 blind reviewers + devil's advocate | Different approach (N/A for Claude Code - only Claude models) |
152
152
  | **Quality Gates** | Built-in verification loops | 7 explicit gates + anti-sycophancy | **Loki Mode** |
153
153
  | **Memory System** | Not documented | 3-tier episodic/semantic/procedural | **Loki Mode** |
154
- | **Agent Specialization** | Custom Zen Agents | 41 pre-defined specialized agents | **Loki Mode** |
154
+ | **Agent Specialization** | Custom Zen Agents | 41 pre-defined specialized agent roles | **Loki Mode** |
155
155
  | **CI Failure Analysis** | Explicit pattern with auto-fix | DevOps agent only | **ADOPTED from Zencoder** |
156
156
  | **Review Comment Resolution** | Auto-apply simple changes | Manual review | **ADOPTED from Zencoder** |
157
157
  | **Dependency Management** | Scheduled PRs, one group at a time | Mentioned only | **ADOPTED from Zencoder** |
@@ -180,7 +180,7 @@
180
180
 
181
181
  1. **Quality Control**: 7 explicit gates + blind review + devil's advocate vs built-in loops
182
182
  2. **Memory System**: 3-tier (episodic/semantic/procedural) with cross-project learning
183
- 3. **Agent Specialization**: 41 pre-defined specialized agents across 8 swarms
183
+ 3. **Agent Specialization**: 41 pre-defined specialized agent roles across 8 domains
184
184
  4. **Anti-Sycophancy**: CONSENSAGENT patterns prevent reviewer groupthink
185
185
  5. **Autonomy Design**: Minimal human intervention from PRD to production
186
186
  6. **Research Foundation**: 10+ academic papers integrated vs proprietary
@@ -203,7 +203,7 @@
203
203
  |---------|--------------|---------|-----------------|------------|-----------------|---------------------|----------------|
204
204
  | **Stars** | 594 | 11,903 | 35K+ | 26K+ | 13.7K | N/A | N/A |
205
205
  | **npm/wk** | 6.1K | 21.4K | N/A | N/A | N/A | N/A | N/A |
206
- | **Agents** | 41 in 8 swarms | 11 agents | Fresh per task | 108 agents | Swarm-based | 32 agents | N/A |
206
+ | **Agents** | 41 roles in 8 domains | 11 agents | Fresh per task | 108 agents | Swarm-based | 32 agents | N/A |
207
207
  | **Skills** | Progressive disclosure | 6 slash commands | N/A | 129 skills | N/A | 35 skills | Memory focus |
208
208
  | **Multi-Provider** | Yes (Claude/Codex/Gemini) | 3 CLIs (separate) | No | No | No | No | No |
209
209
  | **Memory System** | 3-tier (episodic/semantic/procedural) | None | N/A | N/A | Hybrid | N/A | SQLite+FTS5 |
@@ -236,7 +236,7 @@ These are patterns from competing projects that are **practically and scientific
236
236
  | **Constitutional AI Integration** | Principles-based self-critique from Anthropic research | None have this |
237
237
  | **Anti-Sycophancy (CONSENSAGENT)** | Blind review + devil's advocate prevents groupthink | None have this |
238
238
  | **Provider Abstraction Layer** | Clean degradation from full-featured to sequential-only | Claude-only projects can't degrade |
239
- | **41 Specialized Agents** | Purpose-built agents in 8 swarms vs generic | agents (108) has more but less organized |
239
+ | **41 Specialized Agent Roles** | Purpose-built role definitions in 8 domains vs generic; Loki gates every role's output through blind review + council | agents (108) has more but less organized |
240
240
  | **Research Foundation** | 10+ academic papers integrated with citations | Most have no research backing |
241
241
 
242
242
  ### Superpowers Deep-Dive (35K+ Stars)
@@ -258,7 +258,7 @@ Plugin marketplace architecture with unprecedented scale:
258
258
  | Pattern | Description | Loki Mode Status |
259
259
  |---------|-------------|------------------|
260
260
  | **72 Plugins** | Modular, focused plugins instead of monolith | Different approach (progressive disclosure) |
261
- | **108 Agents** | Specialized agents for specific domains | 41 agents in Loki Mode |
261
+ | **108 Agents** | Specialized agents for specific domains | 41 agent roles in Loki Mode |
262
262
  | **129 Skills** | Skills as first-class objects | 10 skills in skills/ |
263
263
  | **Four-Tier Model Strategy** | Explicit tier selection with constraints | Similar to Loki Mode tiers |
264
264
 
@@ -342,7 +342,7 @@ Tiered agent architecture with explicit escalation:
342
342
 
343
343
  | Agent | Killer Feature |
344
344
  |-------|---------------|
345
- | **Loki Mode** | Minimal-human-intervention full SDLC from any spec (PRD, GitHub issue, or YAML), 41 agents in 8 swarms, Constitutional AI, anti-sycophancy, cross-project learning, code transformation, property-based testing |
345
+ | **Loki Mode** | Minimal-human-intervention full SDLC from any spec (PRD, GitHub issue, or YAML), 41 agent roles in 8 domains, Constitutional AI, anti-sycophancy, cross-project learning, code transformation, property-based testing |
346
346
  | **Devin** | Full software engineer persona, Slack integration, 67% PR merge rate |
347
347
  | **OpenAI Codex** | Skills marketplace, $skill-creator, GPT-5.2-Codex, secure sandbox |
348
348
  | **Cursor** | 8 parallel agents, BugBot, Memories, $10B valuation, Composer model (250 tok/s) |
@@ -358,7 +358,7 @@ Tiered agent architecture with explicit escalation:
358
358
  | Dimension | Loki Mode Advantage |
359
359
  |-----------|-------------------|
360
360
  | **Autonomy** | Designed for high autonomy with minimal human intervention |
361
- | **Multi-Agent** | 41 specialized agents in 8 swarms vs 1-8 in competitors |
361
+ | **Multi-Agent** | 41 prompt-defined agent roles in 8 domains adopted per phase (parallel review council + optional worktree streams on Claude, sequential elsewhere) vs 1-8 in competitors, with all output gated by blind review + council |
362
362
  | **Quality** | 11 gates + blind review + devil's advocate + property-based testing |
363
363
  | **Research** | 10+ academic papers integrated vs proprietary/undisclosed |
364
364
  | **Anti-Sycophancy** | Only agent with CONSENSAGENT-based blind review |
@@ -39,7 +39,7 @@ GSD is the closest competitor -- a context engineering system that spawns fresh
39
39
  | **Agent Count** | 41 types | 64+ agents | 5 roles | Unlimited | 8 parallel | 1 autonomous |
40
40
  | **Parallel Execution** | Yes (multi-agent) | Yes (swarms) | Sequential | Yes (crews) | Yes (8 worktrees) | Yes (fleet) |
41
41
  | **Published Benchmarks** | 98.78% HumanEval (self-reported, max 3 retries) | None | 85.9-87.7% HumanEval | None | ~250 tok/s | 15% complex tasks |
42
- | **SWE-bench Score** | 99.67% patch gen (unevaluated, 299/300) | Unknown | Unknown | Unknown | Unknown | 15% complex |
42
+ | **SWE-bench Score** | Not measured (patch generation harness exists; official evaluator not run, so no resolve rate exists) | Unknown | Unknown | Unknown | Unknown | 15% complex |
43
43
  | **Full SDLC** | Yes (8 phases) | Yes | Partial | Partial | No | Partial |
44
44
  | **Business Ops** | **Yes (8 agents)** | No | No | No | No | No |
45
45
  | **Enterprise Security** | `--dangerously-skip-permissions` | MCP sandboxed | Sandboxed | Audit logs, RBAC | Staged autonomy | Sandboxed |
@@ -213,49 +213,30 @@ GSD is the closest competitor -- a context engineering system that spawns fresh
213
213
 
214
214
  **Failed Problems (after RARV):** HumanEval/32, HumanEval/50
215
215
 
216
- ### SWE-bench Lite Results (Full 300 Problems)
216
+ ### SWE-bench Lite: Honest Status
217
217
 
218
- **Direct Claude (Single Agent Baseline):**
219
-
220
- | Metric | Value |
221
- |--------|-------|
222
- | **Patch Generation** | **99.67%** |
223
- | Generated | 299/300 problems |
224
- | Errors | 1 |
225
- | Model | Claude Opus 4.5 |
226
- | Time | 6.17 hours |
227
-
228
- **Loki Mode Multi-Agent (with RARV):**
229
-
230
- | Metric | Value |
231
- |--------|-------|
232
- | **Patch Generation** | **99.67%** |
233
- | Generated | 299/300 problems |
234
- | Errors/Timeouts | 1 |
235
- | Model | Claude Opus 4.5 |
236
- | Time | 3.5 hours |
237
-
238
- **Three-Way Comparison:**
239
-
240
- | System | SWE-bench Patch Gen | Notes |
241
- |--------|---------------------|-------|
242
- | **Direct Claude** | **99.67%** (299/300) | Single agent, minimal overhead |
243
- | **Loki Mode (multi-agent)** | **99.67%** (299/300) | 4-agent pipeline with RARV |
244
- | Devin | ~15% complex tasks | Commercial, different benchmark |
245
-
246
- **Key Finding:** After timeout optimization (Architect: 60s->120s), the multi-agent RARV pipeline matches direct Claude's performance on SWE-bench. Both achieve 99.67% patch generation rate.
218
+ **Loki Mode has NO SWE-bench score.** What exists: a harness that GENERATED candidate
219
+ patches for 299 of 300 SWE-bench Lite problems (Claude Opus 4.5; single-agent run
220
+ 6.17h, 4-agent RARV pipeline 3.5h). Patch GENERATION only means a diff was produced.
221
+ It does NOT mean the patch fixes the issue. The official SWE-bench evaluator (apply
222
+ patch, run the repo's test suite) was never run, so there is no resolve rate, and
223
+ generation rates must not be compared against other tools' task-resolution rates
224
+ (e.g. Devin's independently-tested task success). For context, frontier model resolve
225
+ rates on SWE-bench Verified are in the ~85-90 percent range; any higher number from
226
+ any tool should be treated with suspicion, including ours.
247
227
 
248
- **Note:** Patches generated; full validation (resolve rate) requires running the Docker-based SWE-bench harness to apply patches and execute test suites.
228
+ **What a fair claim looks like:** "A reproducible patch-generation harness exists;
229
+ resolve rate is not yet measured." Nothing stronger.
249
230
 
250
231
  ---
251
232
 
252
233
  ## Critical Gaps to Address
253
234
 
254
- ### Priority 1: Benchmarks (COMPLETED)
255
- - **Gap:** ~~No published HumanEval or SWE-bench scores~~ RESOLVED
256
- - **Result:** 98.17% HumanEval Pass@1 (beats MetaGPT by 10.5%)
257
- - **Result:** 99.67% SWE-bench Lite patch generation (299/300)
258
- - **Next:** Run full SWE-bench harness for resolve rate validation
235
+ ### Priority 1: Benchmarks (PARTIAL)
236
+ - **Real:** 98.78% HumanEval Pass@1 with RARV (162/164, reproducible harness; 98.17% single-agent baseline)
237
+ - **NOT real:** SWE-bench. Patch generation ran (299/300 diffs produced); the official
238
+ evaluator never ran, so there is NO resolve rate and no comparable score.
239
+ - **Next:** run a contamination-resistant evaluator end-to-end (or publish nothing for SWE-bench)
259
240
 
260
241
  ### Priority 2: Security Model (Critical for Enterprise)
261
242
  - **Gap:** Relies on `--dangerously-skip-permissions`
@@ -2,7 +2,7 @@
2
2
 
3
3
  The flagship product of [Autonomi](https://www.autonomi.dev/). Loki Mode is a spec-driven autonomous builder with a built-in trust layer that takes any spec to a deployed product and verifies completion with evidence (quality gates plus a completion council), not just a "done" claim. Complete installation instructions for all platforms and use cases.
4
4
 
5
- **Version:** v7.25.0
5
+ **Version:** v7.27.0
6
6
 
7
7
  ---
8
8
 
@@ -85,7 +85,7 @@ Loki Mode is built on peer-reviewed research:
85
85
  **Verdict: Loki Mode wins** - Academically grounded.
86
86
 
87
87
  ### 2. Specialized Agent Types
88
- Loki Mode has 41 predefined agent types across 6 swarms:
88
+ Loki Mode has 41 specialized agent roles across 8 domains - prompt-defined specifications the orchestrator adopts per phase, with parallel review (blind council) and optional worktree streams on Claude Code, sequential on other providers:
89
89
  - Engineering (8): frontend, backend, database, mobile, API, QA, perf, infra
90
90
  - Operations (8): DevOps, SRE, security, monitoring, incident, release, cost, compliance
91
91
  - Business (8): marketing, sales, finance, legal, support, HR, investor, partnerships
@@ -142,13 +142,16 @@ Loki Mode has 14 quality gates:
142
142
 
143
143
  ### 6. Published Benchmarks
144
144
  Loki Mode:
145
- - HumanEval: 98.78% Pass@1 (162/164)
146
- - SWE-bench: 99.67% patch generation (299/300)
147
- - Documented methodology with reproducible results
145
+ - HumanEval: 98.78% Pass@1 (162/164, reproducible harness)
146
+ - SWE-bench: not measured. A patch-generation harness exists (299/300 diffs
147
+ produced), but the official evaluator was never run, so there is no resolve
148
+ rate. Patch generation is not a success metric and is not comparable to
149
+ other tools' resolution scores.
148
150
 
149
151
  **Auto-Claude:** No published benchmarks.
150
152
 
151
- **Verdict: Loki Mode wins** - Verified performance claims.
153
+ **Verdict: Loki Mode wins on HumanEval transparency** - one real, reproducible
154
+ number versus none. No SWE-bench performance claim is made.
152
155
 
153
156
  ### 7. Licensing
154
157
  - Loki Mode: MIT (free, no restrictions)
@@ -248,7 +251,7 @@ Loki Mode now incorporates proven patterns from Cursor's large-scale agent deplo
248
251
  **Loki Mode is better if you want:**
249
252
  - Research-backed architecture
250
253
  - Full spec-to-product lifecycle (not just coding)
251
- - 41 specialized agents
254
+ - 41 specialized agent roles
252
255
  - Anti-sycophancy measures
253
256
  - MIT license
254
257
  - No subscription requirement
@@ -70,9 +70,9 @@ Or via the environment variable `LOKI_COMPLEXITY=simple|standard|complex`.
70
70
 
71
71
  ## Agents
72
72
 
73
- Loki Mode defines **41 specialized agent types** organized into **8 swarms**:
73
+ Loki Mode defines **41 specialized agent types** organized into **8 domains**:
74
74
 
75
- | Swarm | Agent Count | Examples |
75
+ | Domain | Agent Count | Examples |
76
76
  |-------|-------------|----------|
77
77
  | Engineering | 8 | frontend, backend, database, mobile, api, qa, perf, infra |
78
78
  | Operations | 8 | devops, sre, security, monitor, incident, release, cost, compliance |
@@ -179,4 +179,4 @@ Every Loki Mode project uses these files in the `.loki/` directory:
179
179
 
180
180
  ## Summary
181
181
 
182
- Loki Mode is an autonomous multi-agent system that follows the RARV cycle to build software from PRDs. It uses 41 agent types organized into 8 swarms, enforces quality through 9 gates with blind peer review, and maintains episodic/semantic/procedural memory for continuous learning. Projects are classified into simple, standard, or complex tiers that determine the number of phases executed.
182
+ Loki Mode is an autonomous multi-agent system that follows the RARV cycle to build software from PRDs. It uses 41 agent types organized into 8 domains, enforces quality through 9 gates with blind peer review, and maintains episodic/semantic/procedural memory for continuous learning. Projects are classified into simple, standard, or complex tiers that determine the number of phases executed.
@@ -457,7 +457,7 @@ This positioning highlights three unique capabilities no competitor offers toget
457
457
  | Open source | Yes (Apache-2.0) | Yes |
458
458
  | Speed | 240+ tokens/sec | Depends on provider |
459
459
  | Providers | OpenAI only | 5 providers |
460
- | Multi-agent | Experimental (isolated) | 41 agent types, 8 swarms |
460
+ | Multi-agent | Experimental (isolated) | 41 agent types, 8 domains |
461
461
  | Quality | Single-pass review | 10-gate system |
462
462
  | **Loki Mode advantage:** | Autonomous pipeline, multi-provider, mature multi-agent |
463
463
 
@@ -312,7 +312,7 @@ Replit Agent has evolved rapidly through four major versions:
312
312
  |-----------|:-----------:|:-----------:|:---------:|
313
313
  | Natural language to app | Yes | Yes | Yes (via PRD) |
314
314
  | Autonomous execution | 200 min sessions | Per-prompt | Unlimited (budget-gated) |
315
- | Multi-agent orchestration | Parallel agents (Agent 4) | No | Yes (41 agent types, 8 swarms) |
315
+ | Multi-agent orchestration | Parallel agents (Agent 4) | No | Yes (41 agent types, 8 domains) |
316
316
  | Self-testing loop | Yes | No | Yes (RARV cycle) |
317
317
  | Code review | No | No | Yes (3-reviewer blind review) |
318
318
  | Anti-sycophancy | No | No | Yes (devil's advocate) |
@@ -122,7 +122,7 @@ BOOTSTRAP -> DISCOVERY -> ARCHITECTURE -> INFRASTRUCTURE
122
122
  -> DEVELOPMENT -> QA -> DEPLOYMENT -> GROWTH (continuous)
123
123
  ```
124
124
 
125
- **41 Specialized Agent Types across 8 swarms:**
125
+ **41 Specialized Agent Roles across 8 domains** (prompt-defined specifications the orchestrator adopts per phase; parallel review council and optional worktree streams on Claude Code, sequential on other providers):
126
126
  - Engineering (8 types)
127
127
  - Operations (8 types)
128
128
  - Business (8 types)
@@ -198,7 +198,7 @@ Each gate: name, status (pass/fail/pending), details expandable
198
198
  ### Agent Activity
199
199
 
200
200
  Real-time grid showing which of the 41 agent types are active:
201
- - Agent name, type, swarm, model tier (Opus/Sonnet/Haiku)
201
+ - Agent name, type, domain, model tier (Opus/Sonnet/Haiku)
202
202
  - Current task
203
203
  - Status (working/idle/completed)
204
204
  - Duration
@@ -2,13 +2,13 @@
2
2
 
3
3
  ## Title
4
4
 
5
- Show HN: Loki Mode - PRD in, tested code out (41 agents, 9 quality gates, RARV self-verification)
5
+ Show HN: Loki Mode - PRD in, tested code out (41 agent roles, 9 quality gates, RARV self-verification)
6
6
 
7
7
  ## Body
8
8
 
9
9
  I built Loki Mode because I got tired of the copy-paste loop between AI coding assistants and my terminal. I wanted to hand over a PRD and get back a working, tested codebase -- not perfect, but a solid starting point.
10
10
 
11
- **What it does:** You give it a Product Requirements Document. It breaks the work into tasks, dispatches them across 41 specialized agent types organized into 8 swarms (engineering, operations, business, data, product, growth, review, orchestration), and runs every iteration through a self-verification loop called RARV: Reason, Act, Reflect, Verify. The idea is that the system catches its own mistakes before you have to.
11
+ **What it does:** You give it a Product Requirements Document. It breaks the work into tasks across 41 specialized agent roles organized into 8 domains (engineering, operations, business, data, product, growth, review, orchestration) -- prompt-defined specifications the orchestrator adopts per phase, with parallel review (blind council) and optional worktree streams on Claude Code, sequential on other providers -- and runs every iteration through a self-verification loop called RARV: Reason, Act, Reflect, Verify. The idea is that the system catches its own mistakes before you have to.
12
12
 
13
13
  **Quality gates:** 9 automated gates including 3-reviewer blind review (agents review each other's work without seeing prior reviews), anti-sycophancy checks (a devil's advocate pass on unanimous approvals), and mock/mutation detection. These are not foolproof, but they catch a surprising number of issues that single-pass generation misses.
14
14
 
@@ -1,5 +1,5 @@
1
1
  // @bun
2
- var f8=Object.defineProperty;var u8=($)=>$;function c8($,Q){this[$]=u8.bind(null,Q)}var g=($,Q)=>{for(var Z in Q)f8($,Z,{get:Q[Z],enumerable:!0,configurable:!0,set:c8.bind(Q,Z)})};var k=($,Q)=>()=>($&&(Q=$($=0)),Q);var X1=import.meta.require;var F$={};g(F$,{lokiDir:()=>P,homeLokiDir:()=>o1,findRepoRootForVersion:()=>d1,REPO_ROOT:()=>f});import{resolve as n,dirname as l1}from"path";import{fileURLToPath as p8}from"url";import{existsSync as L1}from"fs";import{homedir as l8}from"os";function d8(){let $=j$;for(let Q=0;Q<6;Q++){if(L1(n($,"VERSION"))&&L1(n($,"autonomy/run.sh")))return $;let Z=l1($);if(Z===$)break;$=Z}return n(j$,"..","..","..")}function d1($){let Q=$;for(let Z=0;Z<6;Z++){if(L1(n(Q,"VERSION"))&&L1(n(Q,"autonomy/run.sh")))return Q;let z=l1(Q);if(z===Q)break;Q=z}return n($,"..","..","..")}function P(){return process.env.LOKI_DIR??n(process.cwd(),".loki")}function o1(){return n(l8(),".loki")}var j$,f;var y=k(()=>{j$=l1(p8(import.meta.url));f=d8()});import{readFileSync as o8}from"fs";import{resolve as n8,dirname as a8}from"path";import{fileURLToPath as s8}from"url";function k1(){if($1!==null)return $1;let $="7.25.0";if(typeof $==="string"&&$.length>0)return $1=$,$1;try{let Q=a8(s8(import.meta.url)),Z=d1(Q);$1=o8(n8(Z,"VERSION"),"utf-8").trim()}catch{$1="unknown"}return $1}var $1=null;var n1=k(()=>{y()});var E$={};g(E$,{runOrThrow:()=>t8,run:()=>j,commandVersion:()=>i8,commandExists:()=>v,ShellError:()=>a1});async function j($,Q={}){let Z=Bun.spawn({cmd:[...$],stdout:"pipe",stderr:"pipe",env:Q.env?{...process.env,...Q.env}:process.env,cwd:Q.cwd}),z,K;if(Q.timeoutMs&&Q.timeoutMs>0)z=setTimeout(()=>{try{Z.kill("SIGTERM")}catch{}K=setTimeout(()=>{try{Z.kill("SIGKILL")}catch{}},2000)},Q.timeoutMs);try{let[H,X,q]=await Promise.all([new Response(Z.stdout).text(),new Response(Z.stderr).text(),Z.exited]);return{stdout:H,stderr:X,exitCode:q}}finally{if(z)clearTimeout(z);if(K)clearTimeout(K)}}async function t8($,Q={}){let Z=await j($,Q);if(Z.exitCode!==0)throw new a1(`command failed (${Z.exitCode}): ${$.join(" ")}`,Z.exitCode,Z.stdout,Z.stderr);return Z}async function v($){let Q=r8($),Z=await j(["sh","-c",`command -v ${Q}`],{timeoutMs:5000});if(Z.exitCode===0)return Z.stdout.trim()||null;return null}function r8($){if(!/^[A-Za-z0-9._/-]+$/.test($))throw Error(`refused to shell-escape suspect token: ${$}`);return $}async function i8($,Q="--version"){if(!await v($))return null;let z=await j([$,Q],{timeoutMs:5000});if(z.exitCode!==0)return null;return((z.stdout||z.stderr).split(/\r?\n/)[0]?.trim()??"")||null}var a1;var d=k(()=>{a1=class a1 extends Error{message;exitCode;stdout;stderr;constructor($,Q,Z,z){super($);this.message=$;this.exitCode=Q;this.stdout=Z;this.stderr=z;this.name="ShellError"}}});function a($){return e8?"":$}var e8,T,N,_,KZ,A,R,h,J;var c=k(()=>{e8=(process.env.NO_COLOR??"").length>0;T=a("\x1B[0;31m"),N=a("\x1B[0;32m"),_=a("\x1B[1;33m"),KZ=a("\x1B[0;34m"),A=a("\x1B[0;36m"),R=a("\x1B[1m"),h=a("\x1B[2m"),J=a("\x1B[0m")});import{existsSync as U7}from"fs";async function Q1(){if(B1!==void 0)return B1;let $="/opt/homebrew/bin/python3.12";if(U7($))return B1=$,$;let Q=await v("python3.12");if(Q)return B1=Q,Q;let Z=await v("python3");return B1=Z,Z}async function Z1($,Q={}){let Z=await Q1();if(!Z)return{stdout:"",stderr:"python3 not found",exitCode:127};return j([Z,"-c",$],Q)}var B1;var H1=k(()=>{d()});var d$={};g(d$,{runStatus:()=>N7});import{existsSync as b,readFileSync as q1,readdirSync as v$,statSync as f$}from"fs";import{resolve as D,basename as P7}from"path";import{homedir as L7}from"os";async function j7(){if(await v("jq"))return!0;return process.stdout.write(`${T}Error: jq is required but not installed.${J}
2
+ var f8=Object.defineProperty;var u8=($)=>$;function c8($,Q){this[$]=u8.bind(null,Q)}var g=($,Q)=>{for(var Z in Q)f8($,Z,{get:Q[Z],enumerable:!0,configurable:!0,set:c8.bind(Q,Z)})};var k=($,Q)=>()=>($&&(Q=$($=0)),Q);var X1=import.meta.require;var F$={};g(F$,{lokiDir:()=>P,homeLokiDir:()=>o1,findRepoRootForVersion:()=>d1,REPO_ROOT:()=>f});import{resolve as n,dirname as l1}from"path";import{fileURLToPath as p8}from"url";import{existsSync as L1}from"fs";import{homedir as l8}from"os";function d8(){let $=j$;for(let Q=0;Q<6;Q++){if(L1(n($,"VERSION"))&&L1(n($,"autonomy/run.sh")))return $;let Z=l1($);if(Z===$)break;$=Z}return n(j$,"..","..","..")}function d1($){let Q=$;for(let Z=0;Z<6;Z++){if(L1(n(Q,"VERSION"))&&L1(n(Q,"autonomy/run.sh")))return Q;let z=l1(Q);if(z===Q)break;Q=z}return n($,"..","..","..")}function P(){return process.env.LOKI_DIR??n(process.cwd(),".loki")}function o1(){return n(l8(),".loki")}var j$,f;var y=k(()=>{j$=l1(p8(import.meta.url));f=d8()});import{readFileSync as o8}from"fs";import{resolve as n8,dirname as a8}from"path";import{fileURLToPath as s8}from"url";function k1(){if($1!==null)return $1;let $="7.27.0";if(typeof $==="string"&&$.length>0)return $1=$,$1;try{let Q=a8(s8(import.meta.url)),Z=d1(Q);$1=o8(n8(Z,"VERSION"),"utf-8").trim()}catch{$1="unknown"}return $1}var $1=null;var n1=k(()=>{y()});var E$={};g(E$,{runOrThrow:()=>t8,run:()=>j,commandVersion:()=>i8,commandExists:()=>v,ShellError:()=>a1});async function j($,Q={}){let Z=Bun.spawn({cmd:[...$],stdout:"pipe",stderr:"pipe",env:Q.env?{...process.env,...Q.env}:process.env,cwd:Q.cwd}),z,K;if(Q.timeoutMs&&Q.timeoutMs>0)z=setTimeout(()=>{try{Z.kill("SIGTERM")}catch{}K=setTimeout(()=>{try{Z.kill("SIGKILL")}catch{}},2000)},Q.timeoutMs);try{let[H,X,q]=await Promise.all([new Response(Z.stdout).text(),new Response(Z.stderr).text(),Z.exited]);return{stdout:H,stderr:X,exitCode:q}}finally{if(z)clearTimeout(z);if(K)clearTimeout(K)}}async function t8($,Q={}){let Z=await j($,Q);if(Z.exitCode!==0)throw new a1(`command failed (${Z.exitCode}): ${$.join(" ")}`,Z.exitCode,Z.stdout,Z.stderr);return Z}async function v($){let Q=r8($),Z=await j(["sh","-c",`command -v ${Q}`],{timeoutMs:5000});if(Z.exitCode===0)return Z.stdout.trim()||null;return null}function r8($){if(!/^[A-Za-z0-9._/-]+$/.test($))throw Error(`refused to shell-escape suspect token: ${$}`);return $}async function i8($,Q="--version"){if(!await v($))return null;let z=await j([$,Q],{timeoutMs:5000});if(z.exitCode!==0)return null;return((z.stdout||z.stderr).split(/\r?\n/)[0]?.trim()??"")||null}var a1;var d=k(()=>{a1=class a1 extends Error{message;exitCode;stdout;stderr;constructor($,Q,Z,z){super($);this.message=$;this.exitCode=Q;this.stdout=Z;this.stderr=z;this.name="ShellError"}}});function a($){return e8?"":$}var e8,T,N,_,KZ,A,R,h,J;var c=k(()=>{e8=(process.env.NO_COLOR??"").length>0;T=a("\x1B[0;31m"),N=a("\x1B[0;32m"),_=a("\x1B[1;33m"),KZ=a("\x1B[0;34m"),A=a("\x1B[0;36m"),R=a("\x1B[1m"),h=a("\x1B[2m"),J=a("\x1B[0m")});import{existsSync as U7}from"fs";async function Q1(){if(B1!==void 0)return B1;let $="/opt/homebrew/bin/python3.12";if(U7($))return B1=$,$;let Q=await v("python3.12");if(Q)return B1=Q,Q;let Z=await v("python3");return B1=Z,Z}async function Z1($,Q={}){let Z=await Q1();if(!Z)return{stdout:"",stderr:"python3 not found",exitCode:127};return j([Z,"-c",$],Q)}var B1;var H1=k(()=>{d()});var d$={};g(d$,{runStatus:()=>N7});import{existsSync as b,readFileSync as q1,readdirSync as v$,statSync as f$}from"fs";import{resolve as D,basename as P7}from"path";import{homedir as L7}from"os";async function j7(){if(await v("jq"))return!0;return process.stdout.write(`${T}Error: jq is required but not installed.${J}
3
3
  `),process.stdout.write(`Install with:
4
4
  `),process.stdout.write(` brew install jq (macOS)
5
5
  `),process.stdout.write(` apt install jq (Debian/Ubuntu)
@@ -787,4 +787,4 @@ Set LOKI_LEGACY_BASH=1 to force the bash CLI for every command.
787
787
  `),2}default:return process.stderr.write(`Unknown command: ${Q}
788
788
  `),process.stderr.write(v8),2}}g$();process.on("SIGINT",()=>process.exit(130));process.on("SIGTERM",()=>process.exit(143));var l3=await p3(Bun.argv.slice(2));process.exit(l3);
789
789
 
790
- //# debugId=C1988314C1A4579264756E2164756E21
790
+ //# debugId=07AC5AC0D01821A064756E2164756E21
package/mcp/__init__.py CHANGED
@@ -57,4 +57,4 @@ try:
57
57
  except ImportError:
58
58
  __all__ = ['mcp']
59
59
 
60
- __version__ = '7.25.0'
60
+ __version__ = '7.27.0'
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "loki-mode",
3
- "version": "7.25.0",
3
+ "version": "7.27.0",
4
4
  "description": "Loki Mode by Autonomi. Autonomous spec-to-product system: takes a PRD, GitHub issue, OpenAPI/JSON/YAML, or one-line brief to a deployed app via the RARV-C closure loop with 11 quality gates. Provider-agnostic (Claude Code, OpenAI Codex, Cline, Aider).",
5
5
  "keywords": [
6
6
  "agent",
@@ -32,7 +32,7 @@ PROVIDER_CLI="codex"
32
32
  # VERIFIED: exec --full-auto confirmed in codex exec --help (v0.98.0)
33
33
  # --full-auto: sets --ask-for-approval on-request + --sandbox workspace-write (v0.98.0)
34
34
  # Alternative: "exec --dangerously-bypass-approvals-and-sandbox" (legacy, no sandbox)
35
- PROVIDER_AUTONOMOUS_FLAG="exec --full-auto"
35
+ PROVIDER_AUTONOMOUS_FLAG="exec --full-auto --skip-git-repo-check"
36
36
  PROVIDER_PROMPT_FLAG=""
37
37
  PROVIDER_PROMPT_POSITIONAL=true
38
38
 
@@ -119,7 +119,7 @@ provider_version() {
119
119
  provider_invoke() {
120
120
  local prompt="$1"
121
121
  shift
122
- codex exec --full-auto "$prompt" "$@"
122
+ codex exec --full-auto --skip-git-repo-check "$prompt" "$@"
123
123
  }
124
124
 
125
125
  # Model tier to effort level parameter (Codex uses effort, not separate models)
@@ -210,6 +210,7 @@ provider_invoke_with_tier() {
210
210
  codex exec \
211
211
  --ask-for-approval never \
212
212
  --sandbox danger-full-access \
213
+ --skip-git-repo-check \
213
214
  "${extra_flags[@]}" \
214
215
  "$prompt" "$@"
215
216
  }
@@ -6,11 +6,11 @@ Complete definitions and capabilities for all 41 specialized agent types.
6
6
 
7
7
  ## Overview
8
8
 
9
- Loki Mode has 41 predefined agent types organized into 8 specialized swarms (37 domain agents + 4 orchestration agents). The orchestrator spawns only the agents needed for your project -- typically 5-10 for simple projects, more for complex ones.
9
+ Loki Mode has 41 specialized agent roles across 8 domains (37 domain agents + 4 orchestration agents) - prompt-defined specifications the orchestrator adopts per phase, not separate processes. Parallelism on Claude Code comes from the blind review council, the adversarial reviewer, and optional git-worktree streams; on other providers everything runs sequentially. The orchestrator activates only the roles needed for your project -- typically 5-10 for simple projects, more for complex ones.
10
10
 
11
11
  ---
12
12
 
13
- ## Engineering Swarm (8 types)
13
+ ## Engineering Domain (8 types)
14
14
 
15
15
  | Agent | Capabilities |
16
16
  |-------|-------------|
@@ -25,7 +25,7 @@ Loki Mode has 41 predefined agent types organized into 8 specialized swarms (37
25
25
 
26
26
  ---
27
27
 
28
- ## Operations Swarm (8 types)
28
+ ## Operations Domain (8 types)
29
29
 
30
30
  | Agent | Capabilities |
31
31
  |-------|-------------|
@@ -40,7 +40,7 @@ Loki Mode has 41 predefined agent types organized into 8 specialized swarms (37
40
40
 
41
41
  ---
42
42
 
43
- ## Business Swarm (8 types)
43
+ ## Business Domain (8 types)
44
44
 
45
45
  | Agent | Capabilities |
46
46
  |-------|-------------|
@@ -55,7 +55,7 @@ Loki Mode has 41 predefined agent types organized into 8 specialized swarms (37
55
55
 
56
56
  ---
57
57
 
58
- ## Data Swarm (3 types)
58
+ ## Data Domain (3 types)
59
59
 
60
60
  | Agent | Capabilities |
61
61
  |-------|-------------|
@@ -65,7 +65,7 @@ Loki Mode has 41 predefined agent types organized into 8 specialized swarms (37
65
65
 
66
66
  ---
67
67
 
68
- ## Product Swarm (3 types)
68
+ ## Product Domain (3 types)
69
69
 
70
70
  | Agent | Capabilities |
71
71
  |-------|-------------|
@@ -75,7 +75,7 @@ Loki Mode has 41 predefined agent types organized into 8 specialized swarms (37
75
75
 
76
76
  ---
77
77
 
78
- ## Growth Swarm (4 types)
78
+ ## Growth Domain (4 types)
79
79
 
80
80
  | Agent | Capabilities |
81
81
  |-------|-------------|
@@ -86,7 +86,7 @@ Loki Mode has 41 predefined agent types organized into 8 specialized swarms (37
86
86
 
87
87
  ---
88
88
 
89
- ## Review Swarm (3 types)
89
+ ## Review Domain (3 types)
90
90
 
91
91
  | Agent | Capabilities |
92
92
  |-------|-------------|
@@ -96,7 +96,7 @@ Loki Mode has 41 predefined agent types organized into 8 specialized swarms (37
96
96
 
97
97
  ---
98
98
 
99
- ## Orchestration Swarm (4 types)
99
+ ## Orchestration Domain (4 types)
100
100
 
101
101
  > **Source:** [Cursor Scaling Learnings](./cursor-learnings.md) - patterns proven at large agent scale
102
102
 
@@ -1,6 +1,6 @@
1
1
  # Agent Type Definitions
2
2
 
3
- Complete specifications for all 41 specialized agent types in the Loki Mode multi-agent system (37 domain agents + 4 orchestration agents).
3
+ Complete specifications for all 41 specialized agent roles in Loki Mode (37 domain agents + 4 orchestration agents). These are prompt-defined role specifications the orchestrator adopts per phase, not separate processes.
4
4
 
5
5
  **Note:** These are agent TYPE definitions, not a fixed count. Loki Mode dynamically spawns agents based on project needs - a simple todo app might use 5-10 agents, while a complex startup spawns more as needed.
6
6
 
@@ -45,7 +45,7 @@ Update after every task completion.
45
45
 
46
46
  ---
47
47
 
48
- ## Engineering Swarm (8 Agents)
48
+ ## Engineering Domain (8 Agents)
49
49
 
50
50
  ### eng-frontend
51
51
  **Capabilities:**
@@ -241,7 +241,7 @@ Update after every task completion.
241
241
 
242
242
  ---
243
243
 
244
- ## Operations Swarm (8 Agents)
244
+ ## Operations Domain (8 Agents)
245
245
 
246
246
  ### ops-devops
247
247
  **Capabilities:**
@@ -437,7 +437,7 @@ Update after every task completion.
437
437
 
438
438
  ---
439
439
 
440
- ## Business Swarm (8 Agents)
440
+ ## Business Domain (8 Agents)
441
441
 
442
442
  ### biz-marketing
443
443
  **Capabilities:**
@@ -634,7 +634,7 @@ Update after every task completion.
634
634
 
635
635
  ---
636
636
 
637
- ## Data Swarm (3 Agents)
637
+ ## Data Domain (3 Agents)
638
638
 
639
639
  ### data-ml
640
640
  **Capabilities:**
@@ -710,7 +710,7 @@ Update after every task completion.
710
710
 
711
711
  ---
712
712
 
713
- ## Product Swarm (3 Agents)
713
+ ## Product Domain (3 Agents)
714
714
 
715
715
  ### prod-pm
716
716
  **Capabilities:**
@@ -787,7 +787,7 @@ Update after every task completion.
787
787
 
788
788
  ---
789
789
 
790
- ## Review Swarm (3 Agents)
790
+ ## Review Domain (3 Agents)
791
791
 
792
792
  ### review-code
793
793
  **Capabilities:**
@@ -875,7 +875,7 @@ Update after every task completion.
875
875
 
876
876
  ---
877
877
 
878
- ## Growth Swarm (4 Agents)
878
+ ## Growth Domain (4 Agents)
879
879
 
880
880
  ### growth-hacker
881
881
  **Capabilities:**
@@ -1,6 +1,6 @@
1
1
  # Business Operations Reference
2
2
 
3
- Workflows and procedures for business swarm agents.
3
+ Workflows and procedures for business domain agents.
4
4
 
5
5
  ## Marketing Operations
6
6
 
@@ -182,7 +182,7 @@ Dexter shows value of domain specialization. Our 41 agent types follow this patt
182
182
  - Most haven't scaled across enterprise
183
183
 
184
184
  ### Loki Mode Alignment
185
- - Multi-agent architecture (41 types, 8 swarms)
185
+ - Multi-agent architecture (41 role types, 8 domains)
186
186
  - Plan Agents (orchestrator, planner)
187
187
  - Execution Agents (eng-*, ops-*, biz-*)
188
188
  - Security controls (LOKI_SANDBOX_MODE, LOKI_BLOCKED_COMMANDS)
package/skills/agents.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Agent Dispatch & Structured Prompting
2
2
 
3
- > **Full agent type definitions:** See `references/agent-types.md` for complete 41 agent role specifications across 8 swarms (Engineering, Operations, Business, Data, Product, Growth, Review, Orchestration).
3
+ > **Full agent type definitions:** See `references/agent-types.md` for complete 41 agent role specifications across 8 domains (Engineering, Operations, Business, Data, Product, Growth, Review, Orchestration). These are prompt-defined specifications the orchestrator adopts per phase; parallelism on Claude Code comes from the blind review council, the adversarial reviewer, and optional git-worktree streams, sequential on other providers.
4
4
 
5
5
  ---
6
6
 
@@ -249,8 +249,8 @@ Priority order for context:
249
249
 
250
250
  See `references/agent-types.md` for complete specifications. Summary:
251
251
 
252
- | Swarm | Agent Types | Count |
253
- |-------|-------------|-------|
252
+ | Domain | Agent Types | Count |
253
+ |--------|-------------|-------|
254
254
  | Engineering | frontend, backend, database, mobile, api, qa, perf, infra | 8 |
255
255
  | Operations | devops, sre, security, monitor, incident, release, cost, compliance | 8 |
256
256
  | Business | marketing, sales, finance, legal, support, hr, investor, partnerships | 8 |
@@ -6,7 +6,7 @@ Loki Mode supports four AI providers for autonomous execution.
6
6
 
7
7
  > **CLI Flags Verified:** The autonomous mode flags have been verified against actual CLI help output:
8
8
  > - Claude: `--dangerously-skip-permissions` (verified)
9
- > - Codex: `--full-auto` (recommended, v0.98.0) or `exec --dangerously-bypass-approvals-and-sandbox` (legacy)
9
+ > - Codex: `exec --full-auto --skip-git-repo-check` (the harness invocation; --skip-git-repo-check required on fresh non-git dirs) or `exec --dangerously-bypass-approvals-and-sandbox` (legacy)
10
10
 
11
11
  | Feature | Claude Code | OpenAI Codex | Cline CLI | Aider |
12
12
  |---------|-------------|--------------|-----------|-------|
@@ -56,7 +56,7 @@ Task(model="haiku", ...) # Fast tier (parallelize)
56
56
 
57
57
  ---
58
58
 
59
- ## OpenAI Codex CLI (Degraded Mode)
59
+ ## OpenAI Codex CLI (Experimental, Degraded Mode)
60
60
 
61
61
  **Best for:** Teams standardized on OpenAI. Accepts feature tradeoffs.
62
62
 
@@ -70,7 +70,7 @@ Task(model="haiku", ...) # Fast tier (parallelize)
70
70
  **Invocation:**
71
71
  ```bash
72
72
  # Recommended (v0.98.0+)
73
- codex --full-auto "$prompt"
73
+ codex exec --full-auto --skip-git-repo-check "$prompt"
74
74
 
75
75
  # Legacy (still supported)
76
76
  codex exec --dangerously-bypass-approvals-and-sandbox "$prompt"