loki-mode 7.26.0 → 7.27.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -11
- package/SKILL.md +2 -2
- package/VERSION +1 -1
- package/autonomy/completion-council.sh +25 -0
- package/autonomy/lib/trust_metrics.py +636 -0
- package/autonomy/loki +93 -0
- package/autonomy/run.sh +113 -5
- package/autonomy/verify.sh +1075 -0
- package/dashboard/__init__.py +1 -1
- package/dashboard/static/index.html +1 -1
- package/docs/COMPARISON.md +9 -9
- package/docs/COMPETITIVE-ANALYSIS.md +18 -37
- package/docs/INSTALLATION.md +1 -1
- package/docs/auto-claude-comparison.md +9 -6
- package/docs/certification/01-core-concepts/lesson.md +3 -3
- package/docs/competitive/emergence-others-analysis.md +1 -1
- package/docs/competitive/replit-lovable-analysis.md +1 -1
- package/docs/cursor-comparison.md +1 -1
- package/docs/prd-purple-lab-platform.md +1 -1
- package/docs/show-hn-post.md +2 -2
- package/loki-ts/dist/loki.js +2 -2
- package/mcp/__init__.py +1 -1
- package/package.json +1 -1
- package/providers/codex.sh +3 -2
- package/references/agent-types.md +9 -9
- package/references/agents.md +8 -8
- package/references/business-ops.md +1 -1
- package/references/competitive-analysis.md +1 -1
- package/skills/agents.md +3 -3
- package/skills/providers.md +3 -3
package/dashboard/__init__.py
CHANGED
|
@@ -6523,7 +6523,7 @@ var LokiDashboard=(()=>{var Ee=Object.defineProperty;var rt=Object.getOwnPropert
|
|
|
6523
6523
|
<p>App runner not started</p>
|
|
6524
6524
|
<p class="hint">App runner will start after the first successful build iteration.</p>
|
|
6525
6525
|
</div>
|
|
6526
|
-
`}_attachEventListeners(){let e=this.shadowRoot;if(!e)return;let t=e.querySelector('[data-action="restart"]'),i=e.querySelector('[data-action="stop"]');t&&t.addEventListener("click",()=>this._handleRestart()),i&&i.addEventListener("click",()=>this._handleStop())}_escapeHtml(e){return e?String(e).replace(/&/g,"&").replace(/</g,"<").replace(/>/g,">").replace(/"/g,"""):""}};customElements.define("loki-app-status",Q);var Ke={not_initialized:{color:"var(--loki-text-muted, #71717a)",label:"No app yet",pulse:!1},starting:{color:"var(--loki-yellow, #ca8a04)",label:"Starting",pulse:!0},running:{color:"var(--loki-green, #16a34a)",label:"Running",pulse:!0},stale:{color:"var(--loki-yellow, #ca8a04)",label:"Stale",pulse:!1},completed:{color:"var(--loki-text-muted, #a1a1aa)",label:"Completed",pulse:!1},failed:{color:"var(--loki-red, #dc2626)",label:"Could not start",pulse:!1},crashed:{color:"var(--loki-red, #dc2626)",label:"Crashed",pulse:!1},stopped:{color:"var(--loki-text-muted, #a1a1aa)",label:"Stopped",pulse:!1},error:{color:"var(--loki-text-muted, #71717a)",label:"Status unavailable",pulse:!1},unknown:{color:"var(--loki-text-muted, #71717a)",label:"Unknown",pulse:!1}},X=class extends h{static get observedAttributes(){return["api-url","theme"]}constructor(){super(),this._api=null,this._pollInterval=null,this._visibilityHandler=null,this._status=null,this._errors=null,this._error=null,this._lastDataHash=null,this._detailsOpen=!1}connectedCallback(){super.connectedCallback(),this._setupApi(),this.render(),this._loadData(),this._startPolling()}disconnectedCallback(){super.disconnectedCallback(),this._stopPolling()}attributeChangedCallback(e,t,i){t!==i&&(e==="api-url"&&this._api&&(this._api.baseUrl=i,this._loadData()),e==="theme"&&this._applyTheme())}_setupApi(){let e=this.getAttribute("api-url")||window.location.origin;this._api=g({baseUrl:e})}_startPolling(){this._pollInterval=setInterval(()=>this._loadData(),3e3),this._visibilityHandler=()=>{document.hidden?this._pollInterval&&(clearInterval(this._pollInterval),this._pollInterval=null):this._pollInterval||(this._loadData(),this._pollInterval=setInterval(()=>this._loadData(),3e3))},document.addEventListener("visibilitychange",this._visibilityHandler)}_stopPolling(){this._pollInterval&&(clearInterval(this._pollInterval),this._pollInterval=null),this._visibilityHandler&&(document.removeEventListener("visibilitychange",this._visibilityHandler),this._visibilityHandler=null)}async _loadData(){try{let e=await this._api.getAppRunnerStatus(),t=e?.status||"not_initialized",i=null;if(t==="crashed"||t==="failed")try{i=await this._api.getAppRunnerErrors(50)}catch{i=null}let a=JSON.stringify({status:t,port:e?.port,url:e?.url,crash:e?.crash_count,errLen:i?.lines?.length||0}),s=this._error!==null;if(a===this._lastDataHash&&!s)return;this._lastDataHash=a,this._status=e,this._errors=i,this._error=null,this.render()}catch(e){this._error||(this._error=`Could not read app status: ${e.message}`,this.render())}}_isValidUrl(e){if(!e)return!1;try{let t=new URL(e);return t.protocol==="http:"||t.protocol==="https:"}catch{return!1}}async _handleRestart(){try{await this._api.restartApp(),this._loadData()}catch(e){this._error=`Restart failed: ${e.message}`,this.render()}}_handleRefresh(){let e=this.shadowRoot;if(!e)return;let t=e.querySelector("iframe.preview-frame"),i=this._status;if(t&&i&&this._isValidUrl(i.url)){let a=(i.url.includes("?")?"&":"?")+"_t="+Date.now();t.src=i.url+a}}_handleOpenExternal(){let e=this._status;e&&this._isValidUrl(e.url)&&window.open(e.url,"_blank","noopener")}_toggleDetails(){this._detailsOpen=!this._detailsOpen,this.render()}_getStyles(){return`
|
|
6526
|
+
`}_attachEventListeners(){let e=this.shadowRoot;if(!e)return;let t=e.querySelector('[data-action="restart"]'),i=e.querySelector('[data-action="stop"]');t&&t.addEventListener("click",()=>this._handleRestart()),i&&i.addEventListener("click",()=>this._handleStop())}_escapeHtml(e){return e?String(e).replace(/&/g,"&").replace(/</g,"<").replace(/>/g,">").replace(/"/g,"""):""}};customElements.define("loki-app-status",Q);var Ke={not_initialized:{color:"var(--loki-text-muted, #71717a)",label:"No app yet",pulse:!1},starting:{color:"var(--loki-yellow, #ca8a04)",label:"Starting",pulse:!0},running:{color:"var(--loki-green, #16a34a)",label:"Running",pulse:!0},stale:{color:"var(--loki-yellow, #ca8a04)",label:"Stale",pulse:!1},completed:{color:"var(--loki-text-muted, #a1a1aa)",label:"Completed",pulse:!1},failed:{color:"var(--loki-red, #dc2626)",label:"Could not start",pulse:!1},crashed:{color:"var(--loki-red, #dc2626)",label:"Crashed",pulse:!1},stopped:{color:"var(--loki-text-muted, #a1a1aa)",label:"Stopped",pulse:!1},error:{color:"var(--loki-text-muted, #71717a)",label:"Status unavailable",pulse:!1},unknown:{color:"var(--loki-text-muted, #71717a)",label:"Unknown",pulse:!1}},X=class extends h{static get observedAttributes(){return["api-url","theme"]}constructor(){super(),this._api=null,this._pollInterval=null,this._visibilityHandler=null,this._status=null,this._errors=null,this._error=null,this._lastDataHash=null,this._detailsOpen=!1}connectedCallback(){super.connectedCallback(),this._setupApi(),this.render(),this._loadData(),this._startPolling()}disconnectedCallback(){super.disconnectedCallback(),this._stopPolling()}attributeChangedCallback(e,t,i){t!==i&&(e==="api-url"&&this._api&&(this._api.baseUrl=i,this._loadData()),e==="theme"&&this._applyTheme())}_setupApi(){let e=this.getAttribute("api-url")||window.location.origin;this._api=g({baseUrl:e})}_startPolling(){this._pollInterval=setInterval(()=>this._loadData(),3e3),this._visibilityHandler=()=>{document.hidden?this._pollInterval&&(clearInterval(this._pollInterval),this._pollInterval=null):this._pollInterval||(this._loadData(),this._pollInterval=setInterval(()=>this._loadData(),3e3))},document.addEventListener("visibilitychange",this._visibilityHandler)}_stopPolling(){this._pollInterval&&(clearInterval(this._pollInterval),this._pollInterval=null),this._visibilityHandler&&(document.removeEventListener("visibilitychange",this._visibilityHandler),this._visibilityHandler=null)}async _loadData(){try{let e=await this._api.getAppRunnerStatus(),t=e?.status||"not_initialized",i=null;if(t==="crashed"||t==="failed")try{i=await this._api.getAppRunnerErrors(50)}catch{i=null}let a=JSON.stringify({status:t,port:e?.port,url:e?.url,crash:e?.crash_count,errLen:i?.lines?.length||0,healthOk:e?.last_health?.ok===!0}),s=this._error!==null;if(a===this._lastDataHash&&!s)return;this._lastDataHash=a,this._status=e,this._errors=i,this._error=null,this.render()}catch(e){this._error||(this._error=`Could not read app status: ${e.message}`,this.render())}}_isValidUrl(e){if(!e)return!1;try{let t=new URL(e);return t.protocol==="http:"||t.protocol==="https:"}catch{return!1}}async _handleRestart(){try{await this._api.restartApp(),this._loadData()}catch(e){this._error=`Restart failed: ${e.message}`,this.render()}}_handleRefresh(){let e=this.shadowRoot;if(!e)return;let t=e.querySelector("iframe.preview-frame"),i=this._status;if(t&&i&&this._isValidUrl(i.url)){let a=(i.url.includes("?")?"&":"?")+"_t="+Date.now();t.src=i.url+a}}_handleOpenExternal(){let e=this._status;e&&this._isValidUrl(e.url)&&window.open(e.url,"_blank","noopener")}_toggleDetails(){this._detailsOpen=!this._detailsOpen,this.render()}_getStyles(){return`
|
|
6527
6527
|
.preview { padding: 16px; font-family: var(--loki-font-family, system-ui, -apple-system, sans-serif); color: var(--loki-text-primary, #201515); }
|
|
6528
6528
|
.header { display: flex; align-items: center; justify-content: space-between; margin-bottom: 12px; gap: 12px; flex-wrap: wrap; }
|
|
6529
6529
|
.header-left { display: flex; align-items: center; gap: 10px; }
|
package/docs/COMPARISON.md
CHANGED
|
@@ -24,10 +24,10 @@
|
|
|
24
24
|
|
|
25
25
|
| Feature | **Loki Mode** | **Devin** | **Codex** | **Cursor** | **Kiro** | **Antigravity** | **Amazon Q** | **OpenCode** |
|
|
26
26
|
|---------|--------------|-----------|-----------|------------|----------|-----------------|--------------|--------------|
|
|
27
|
-
| **Multi-Agent** | 41
|
|
27
|
+
| **Multi-Agent** | 41 prompt-defined agent roles in 8 domains adopted per phase; parallel review council + optional worktree streams (Claude), sequential elsewhere | Single | Single | Up to 8 parallel | Background | Manager Surface | Multiple types | 4 built-in |
|
|
28
28
|
| **Orchestration** | Full orchestrator | N/A | N/A | Git worktree | Hooks | Manager view | Workflow | Subagents |
|
|
29
29
|
| **Parallel Exec** | 10+ Haiku, 4 impl (worktree) | No | No | 8 max | Yes | Yes | Yes | Yes |
|
|
30
|
-
| **Agent
|
|
30
|
+
| **Agent Domains** | Eng, Ops, Business, Data, Product, Growth, Review | N/A | N/A | N/A | N/A | N/A | 3 types | N/A |
|
|
31
31
|
|
|
32
32
|
---
|
|
33
33
|
|
|
@@ -151,7 +151,7 @@
|
|
|
151
151
|
| **Multi-Agent Verification** | Model diversity (Claude vs OpenAI, 54% improvement) | 3 blind reviewers + devil's advocate | Different approach (N/A for Claude Code - only Claude models) |
|
|
152
152
|
| **Quality Gates** | Built-in verification loops | 7 explicit gates + anti-sycophancy | **Loki Mode** |
|
|
153
153
|
| **Memory System** | Not documented | 3-tier episodic/semantic/procedural | **Loki Mode** |
|
|
154
|
-
| **Agent Specialization** | Custom Zen Agents | 41 pre-defined specialized
|
|
154
|
+
| **Agent Specialization** | Custom Zen Agents | 41 pre-defined specialized agent roles | **Loki Mode** |
|
|
155
155
|
| **CI Failure Analysis** | Explicit pattern with auto-fix | DevOps agent only | **ADOPTED from Zencoder** |
|
|
156
156
|
| **Review Comment Resolution** | Auto-apply simple changes | Manual review | **ADOPTED from Zencoder** |
|
|
157
157
|
| **Dependency Management** | Scheduled PRs, one group at a time | Mentioned only | **ADOPTED from Zencoder** |
|
|
@@ -180,7 +180,7 @@
|
|
|
180
180
|
|
|
181
181
|
1. **Quality Control**: 7 explicit gates + blind review + devil's advocate vs built-in loops
|
|
182
182
|
2. **Memory System**: 3-tier (episodic/semantic/procedural) with cross-project learning
|
|
183
|
-
3. **Agent Specialization**: 41 pre-defined specialized
|
|
183
|
+
3. **Agent Specialization**: 41 pre-defined specialized agent roles across 8 domains
|
|
184
184
|
4. **Anti-Sycophancy**: CONSENSAGENT patterns prevent reviewer groupthink
|
|
185
185
|
5. **Autonomy Design**: Minimal human intervention from PRD to production
|
|
186
186
|
6. **Research Foundation**: 10+ academic papers integrated vs proprietary
|
|
@@ -203,7 +203,7 @@
|
|
|
203
203
|
|---------|--------------|---------|-----------------|------------|-----------------|---------------------|----------------|
|
|
204
204
|
| **Stars** | 594 | 11,903 | 35K+ | 26K+ | 13.7K | N/A | N/A |
|
|
205
205
|
| **npm/wk** | 6.1K | 21.4K | N/A | N/A | N/A | N/A | N/A |
|
|
206
|
-
| **Agents** | 41 in 8
|
|
206
|
+
| **Agents** | 41 roles in 8 domains | 11 agents | Fresh per task | 108 agents | Swarm-based | 32 agents | N/A |
|
|
207
207
|
| **Skills** | Progressive disclosure | 6 slash commands | N/A | 129 skills | N/A | 35 skills | Memory focus |
|
|
208
208
|
| **Multi-Provider** | Yes (Claude/Codex/Gemini) | 3 CLIs (separate) | No | No | No | No | No |
|
|
209
209
|
| **Memory System** | 3-tier (episodic/semantic/procedural) | None | N/A | N/A | Hybrid | N/A | SQLite+FTS5 |
|
|
@@ -236,7 +236,7 @@ These are patterns from competing projects that are **practically and scientific
|
|
|
236
236
|
| **Constitutional AI Integration** | Principles-based self-critique from Anthropic research | None have this |
|
|
237
237
|
| **Anti-Sycophancy (CONSENSAGENT)** | Blind review + devil's advocate prevents groupthink | None have this |
|
|
238
238
|
| **Provider Abstraction Layer** | Clean degradation from full-featured to sequential-only | Claude-only projects can't degrade |
|
|
239
|
-
| **41 Specialized
|
|
239
|
+
| **41 Specialized Agent Roles** | Purpose-built role definitions in 8 domains vs generic; Loki gates every role's output through blind review + council | agents (108) has more but less organized |
|
|
240
240
|
| **Research Foundation** | 10+ academic papers integrated with citations | Most have no research backing |
|
|
241
241
|
|
|
242
242
|
### Superpowers Deep-Dive (35K+ Stars)
|
|
@@ -258,7 +258,7 @@ Plugin marketplace architecture with unprecedented scale:
|
|
|
258
258
|
| Pattern | Description | Loki Mode Status |
|
|
259
259
|
|---------|-------------|------------------|
|
|
260
260
|
| **72 Plugins** | Modular, focused plugins instead of monolith | Different approach (progressive disclosure) |
|
|
261
|
-
| **108 Agents** | Specialized agents for specific domains | 41
|
|
261
|
+
| **108 Agents** | Specialized agents for specific domains | 41 agent roles in Loki Mode |
|
|
262
262
|
| **129 Skills** | Skills as first-class objects | 10 skills in skills/ |
|
|
263
263
|
| **Four-Tier Model Strategy** | Explicit tier selection with constraints | Similar to Loki Mode tiers |
|
|
264
264
|
|
|
@@ -342,7 +342,7 @@ Tiered agent architecture with explicit escalation:
|
|
|
342
342
|
|
|
343
343
|
| Agent | Killer Feature |
|
|
344
344
|
|-------|---------------|
|
|
345
|
-
| **Loki Mode** | Minimal-human-intervention full SDLC from any spec (PRD, GitHub issue, or YAML), 41
|
|
345
|
+
| **Loki Mode** | Minimal-human-intervention full SDLC from any spec (PRD, GitHub issue, or YAML), 41 agent roles in 8 domains, Constitutional AI, anti-sycophancy, cross-project learning, code transformation, property-based testing |
|
|
346
346
|
| **Devin** | Full software engineer persona, Slack integration, 67% PR merge rate |
|
|
347
347
|
| **OpenAI Codex** | Skills marketplace, $skill-creator, GPT-5.2-Codex, secure sandbox |
|
|
348
348
|
| **Cursor** | 8 parallel agents, BugBot, Memories, $10B valuation, Composer model (250 tok/s) |
|
|
@@ -358,7 +358,7 @@ Tiered agent architecture with explicit escalation:
|
|
|
358
358
|
| Dimension | Loki Mode Advantage |
|
|
359
359
|
|-----------|-------------------|
|
|
360
360
|
| **Autonomy** | Designed for high autonomy with minimal human intervention |
|
|
361
|
-
| **Multi-Agent** | 41
|
|
361
|
+
| **Multi-Agent** | 41 prompt-defined agent roles in 8 domains adopted per phase (parallel review council + optional worktree streams on Claude, sequential elsewhere) vs 1-8 in competitors, with all output gated by blind review + council |
|
|
362
362
|
| **Quality** | 11 gates + blind review + devil's advocate + property-based testing |
|
|
363
363
|
| **Research** | 10+ academic papers integrated vs proprietary/undisclosed |
|
|
364
364
|
| **Anti-Sycophancy** | Only agent with CONSENSAGENT-based blind review |
|
|
@@ -39,7 +39,7 @@ GSD is the closest competitor -- a context engineering system that spawns fresh
|
|
|
39
39
|
| **Agent Count** | 41 types | 64+ agents | 5 roles | Unlimited | 8 parallel | 1 autonomous |
|
|
40
40
|
| **Parallel Execution** | Yes (multi-agent) | Yes (swarms) | Sequential | Yes (crews) | Yes (8 worktrees) | Yes (fleet) |
|
|
41
41
|
| **Published Benchmarks** | 98.78% HumanEval (self-reported, max 3 retries) | None | 85.9-87.7% HumanEval | None | ~250 tok/s | 15% complex tasks |
|
|
42
|
-
| **SWE-bench Score** |
|
|
42
|
+
| **SWE-bench Score** | Not measured (patch generation harness exists; official evaluator not run, so no resolve rate exists) | Unknown | Unknown | Unknown | Unknown | 15% complex |
|
|
43
43
|
| **Full SDLC** | Yes (8 phases) | Yes | Partial | Partial | No | Partial |
|
|
44
44
|
| **Business Ops** | **Yes (8 agents)** | No | No | No | No | No |
|
|
45
45
|
| **Enterprise Security** | `--dangerously-skip-permissions` | MCP sandboxed | Sandboxed | Audit logs, RBAC | Staged autonomy | Sandboxed |
|
|
@@ -213,49 +213,30 @@ GSD is the closest competitor -- a context engineering system that spawns fresh
|
|
|
213
213
|
|
|
214
214
|
**Failed Problems (after RARV):** HumanEval/32, HumanEval/50
|
|
215
215
|
|
|
216
|
-
### SWE-bench Lite
|
|
216
|
+
### SWE-bench Lite: Honest Status
|
|
217
217
|
|
|
218
|
-
**
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
**Loki Mode Multi-Agent (with RARV):**
|
|
229
|
-
|
|
230
|
-
| Metric | Value |
|
|
231
|
-
|--------|-------|
|
|
232
|
-
| **Patch Generation** | **99.67%** |
|
|
233
|
-
| Generated | 299/300 problems |
|
|
234
|
-
| Errors/Timeouts | 1 |
|
|
235
|
-
| Model | Claude Opus 4.5 |
|
|
236
|
-
| Time | 3.5 hours |
|
|
237
|
-
|
|
238
|
-
**Three-Way Comparison:**
|
|
239
|
-
|
|
240
|
-
| System | SWE-bench Patch Gen | Notes |
|
|
241
|
-
|--------|---------------------|-------|
|
|
242
|
-
| **Direct Claude** | **99.67%** (299/300) | Single agent, minimal overhead |
|
|
243
|
-
| **Loki Mode (multi-agent)** | **99.67%** (299/300) | 4-agent pipeline with RARV |
|
|
244
|
-
| Devin | ~15% complex tasks | Commercial, different benchmark |
|
|
245
|
-
|
|
246
|
-
**Key Finding:** After timeout optimization (Architect: 60s->120s), the multi-agent RARV pipeline matches direct Claude's performance on SWE-bench. Both achieve 99.67% patch generation rate.
|
|
218
|
+
**Loki Mode has NO SWE-bench score.** What exists: a harness that GENERATED candidate
|
|
219
|
+
patches for 299 of 300 SWE-bench Lite problems (Claude Opus 4.5; single-agent run
|
|
220
|
+
6.17h, 4-agent RARV pipeline 3.5h). Patch GENERATION only means a diff was produced.
|
|
221
|
+
It does NOT mean the patch fixes the issue. The official SWE-bench evaluator (apply
|
|
222
|
+
patch, run the repo's test suite) was never run, so there is no resolve rate, and
|
|
223
|
+
generation rates must not be compared against other tools' task-resolution rates
|
|
224
|
+
(e.g. Devin's independently-tested task success). For context, frontier model resolve
|
|
225
|
+
rates on SWE-bench Verified are in the ~85-90 percent range; any higher number from
|
|
226
|
+
any tool should be treated with suspicion, including ours.
|
|
247
227
|
|
|
248
|
-
**
|
|
228
|
+
**What a fair claim looks like:** "A reproducible patch-generation harness exists;
|
|
229
|
+
resolve rate is not yet measured." Nothing stronger.
|
|
249
230
|
|
|
250
231
|
---
|
|
251
232
|
|
|
252
233
|
## Critical Gaps to Address
|
|
253
234
|
|
|
254
|
-
### Priority 1: Benchmarks (
|
|
255
|
-
- **
|
|
256
|
-
- **
|
|
257
|
-
|
|
258
|
-
- **Next:**
|
|
235
|
+
### Priority 1: Benchmarks (PARTIAL)
|
|
236
|
+
- **Real:** 98.78% HumanEval Pass@1 with RARV (162/164, reproducible harness; 98.17% single-agent baseline)
|
|
237
|
+
- **NOT real:** SWE-bench. Patch generation ran (299/300 diffs produced); the official
|
|
238
|
+
evaluator never ran, so there is NO resolve rate and no comparable score.
|
|
239
|
+
- **Next:** run a contamination-resistant evaluator end-to-end (or publish nothing for SWE-bench)
|
|
259
240
|
|
|
260
241
|
### Priority 2: Security Model (Critical for Enterprise)
|
|
261
242
|
- **Gap:** Relies on `--dangerously-skip-permissions`
|
package/docs/INSTALLATION.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
The flagship product of [Autonomi](https://www.autonomi.dev/). Loki Mode is a spec-driven autonomous builder with a built-in trust layer that takes any spec to a deployed product and verifies completion with evidence (quality gates plus a completion council), not just a "done" claim. Complete installation instructions for all platforms and use cases.
|
|
4
4
|
|
|
5
|
-
**Version:** v7.
|
|
5
|
+
**Version:** v7.27.0
|
|
6
6
|
|
|
7
7
|
---
|
|
8
8
|
|
|
@@ -85,7 +85,7 @@ Loki Mode is built on peer-reviewed research:
|
|
|
85
85
|
**Verdict: Loki Mode wins** - Academically grounded.
|
|
86
86
|
|
|
87
87
|
### 2. Specialized Agent Types
|
|
88
|
-
Loki Mode has 41
|
|
88
|
+
Loki Mode has 41 specialized agent roles across 8 domains - prompt-defined specifications the orchestrator adopts per phase, with parallel review (blind council) and optional worktree streams on Claude Code, sequential on other providers:
|
|
89
89
|
- Engineering (8): frontend, backend, database, mobile, API, QA, perf, infra
|
|
90
90
|
- Operations (8): DevOps, SRE, security, monitoring, incident, release, cost, compliance
|
|
91
91
|
- Business (8): marketing, sales, finance, legal, support, HR, investor, partnerships
|
|
@@ -142,13 +142,16 @@ Loki Mode has 14 quality gates:
|
|
|
142
142
|
|
|
143
143
|
### 6. Published Benchmarks
|
|
144
144
|
Loki Mode:
|
|
145
|
-
- HumanEval: 98.78% Pass@1 (162/164)
|
|
146
|
-
- SWE-bench:
|
|
147
|
-
|
|
145
|
+
- HumanEval: 98.78% Pass@1 (162/164, reproducible harness)
|
|
146
|
+
- SWE-bench: not measured. A patch-generation harness exists (299/300 diffs
|
|
147
|
+
produced), but the official evaluator was never run, so there is no resolve
|
|
148
|
+
rate. Patch generation is not a success metric and is not comparable to
|
|
149
|
+
other tools' resolution scores.
|
|
148
150
|
|
|
149
151
|
**Auto-Claude:** No published benchmarks.
|
|
150
152
|
|
|
151
|
-
**Verdict: Loki Mode wins** -
|
|
153
|
+
**Verdict: Loki Mode wins on HumanEval transparency** - one real, reproducible
|
|
154
|
+
number versus none. No SWE-bench performance claim is made.
|
|
152
155
|
|
|
153
156
|
### 7. Licensing
|
|
154
157
|
- Loki Mode: MIT (free, no restrictions)
|
|
@@ -248,7 +251,7 @@ Loki Mode now incorporates proven patterns from Cursor's large-scale agent deplo
|
|
|
248
251
|
**Loki Mode is better if you want:**
|
|
249
252
|
- Research-backed architecture
|
|
250
253
|
- Full spec-to-product lifecycle (not just coding)
|
|
251
|
-
- 41 specialized
|
|
254
|
+
- 41 specialized agent roles
|
|
252
255
|
- Anti-sycophancy measures
|
|
253
256
|
- MIT license
|
|
254
257
|
- No subscription requirement
|
|
@@ -70,9 +70,9 @@ Or via the environment variable `LOKI_COMPLEXITY=simple|standard|complex`.
|
|
|
70
70
|
|
|
71
71
|
## Agents
|
|
72
72
|
|
|
73
|
-
Loki Mode defines **41 specialized agent types** organized into **8
|
|
73
|
+
Loki Mode defines **41 specialized agent types** organized into **8 domains**:
|
|
74
74
|
|
|
75
|
-
|
|
|
75
|
+
| Domain | Agent Count | Examples |
|
|
76
76
|
|-------|-------------|----------|
|
|
77
77
|
| Engineering | 8 | frontend, backend, database, mobile, api, qa, perf, infra |
|
|
78
78
|
| Operations | 8 | devops, sre, security, monitor, incident, release, cost, compliance |
|
|
@@ -179,4 +179,4 @@ Every Loki Mode project uses these files in the `.loki/` directory:
|
|
|
179
179
|
|
|
180
180
|
## Summary
|
|
181
181
|
|
|
182
|
-
Loki Mode is an autonomous multi-agent system that follows the RARV cycle to build software from PRDs. It uses 41 agent types organized into 8
|
|
182
|
+
Loki Mode is an autonomous multi-agent system that follows the RARV cycle to build software from PRDs. It uses 41 agent types organized into 8 domains, enforces quality through 9 gates with blind peer review, and maintains episodic/semantic/procedural memory for continuous learning. Projects are classified into simple, standard, or complex tiers that determine the number of phases executed.
|
|
@@ -457,7 +457,7 @@ This positioning highlights three unique capabilities no competitor offers toget
|
|
|
457
457
|
| Open source | Yes (Apache-2.0) | Yes |
|
|
458
458
|
| Speed | 240+ tokens/sec | Depends on provider |
|
|
459
459
|
| Providers | OpenAI only | 5 providers |
|
|
460
|
-
| Multi-agent | Experimental (isolated) | 41 agent types, 8
|
|
460
|
+
| Multi-agent | Experimental (isolated) | 41 agent types, 8 domains |
|
|
461
461
|
| Quality | Single-pass review | 10-gate system |
|
|
462
462
|
| **Loki Mode advantage:** | Autonomous pipeline, multi-provider, mature multi-agent |
|
|
463
463
|
|
|
@@ -312,7 +312,7 @@ Replit Agent has evolved rapidly through four major versions:
|
|
|
312
312
|
|-----------|:-----------:|:-----------:|:---------:|
|
|
313
313
|
| Natural language to app | Yes | Yes | Yes (via PRD) |
|
|
314
314
|
| Autonomous execution | 200 min sessions | Per-prompt | Unlimited (budget-gated) |
|
|
315
|
-
| Multi-agent orchestration | Parallel agents (Agent 4) | No | Yes (41 agent types, 8
|
|
315
|
+
| Multi-agent orchestration | Parallel agents (Agent 4) | No | Yes (41 agent types, 8 domains) |
|
|
316
316
|
| Self-testing loop | Yes | No | Yes (RARV cycle) |
|
|
317
317
|
| Code review | No | No | Yes (3-reviewer blind review) |
|
|
318
318
|
| Anti-sycophancy | No | No | Yes (devil's advocate) |
|
|
@@ -122,7 +122,7 @@ BOOTSTRAP -> DISCOVERY -> ARCHITECTURE -> INFRASTRUCTURE
|
|
|
122
122
|
-> DEVELOPMENT -> QA -> DEPLOYMENT -> GROWTH (continuous)
|
|
123
123
|
```
|
|
124
124
|
|
|
125
|
-
**41 Specialized Agent
|
|
125
|
+
**41 Specialized Agent Roles across 8 domains** (prompt-defined specifications the orchestrator adopts per phase; parallel review council and optional worktree streams on Claude Code, sequential on other providers):
|
|
126
126
|
- Engineering (8 types)
|
|
127
127
|
- Operations (8 types)
|
|
128
128
|
- Business (8 types)
|
|
@@ -198,7 +198,7 @@ Each gate: name, status (pass/fail/pending), details expandable
|
|
|
198
198
|
### Agent Activity
|
|
199
199
|
|
|
200
200
|
Real-time grid showing which of the 41 agent types are active:
|
|
201
|
-
- Agent name, type,
|
|
201
|
+
- Agent name, type, domain, model tier (Opus/Sonnet/Haiku)
|
|
202
202
|
- Current task
|
|
203
203
|
- Status (working/idle/completed)
|
|
204
204
|
- Duration
|
package/docs/show-hn-post.md
CHANGED
|
@@ -2,13 +2,13 @@
|
|
|
2
2
|
|
|
3
3
|
## Title
|
|
4
4
|
|
|
5
|
-
Show HN: Loki Mode - PRD in, tested code out (41
|
|
5
|
+
Show HN: Loki Mode - PRD in, tested code out (41 agent roles, 9 quality gates, RARV self-verification)
|
|
6
6
|
|
|
7
7
|
## Body
|
|
8
8
|
|
|
9
9
|
I built Loki Mode because I got tired of the copy-paste loop between AI coding assistants and my terminal. I wanted to hand over a PRD and get back a working, tested codebase -- not perfect, but a solid starting point.
|
|
10
10
|
|
|
11
|
-
**What it does:** You give it a Product Requirements Document. It breaks the work into tasks
|
|
11
|
+
**What it does:** You give it a Product Requirements Document. It breaks the work into tasks across 41 specialized agent roles organized into 8 domains (engineering, operations, business, data, product, growth, review, orchestration) -- prompt-defined specifications the orchestrator adopts per phase, with parallel review (blind council) and optional worktree streams on Claude Code, sequential on other providers -- and runs every iteration through a self-verification loop called RARV: Reason, Act, Reflect, Verify. The idea is that the system catches its own mistakes before you have to.
|
|
12
12
|
|
|
13
13
|
**Quality gates:** 9 automated gates including 3-reviewer blind review (agents review each other's work without seeing prior reviews), anti-sycophancy checks (a devil's advocate pass on unanimous approvals), and mock/mutation detection. These are not foolproof, but they catch a surprising number of issues that single-pass generation misses.
|
|
14
14
|
|
package/loki-ts/dist/loki.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
// @bun
|
|
2
|
-
var f8=Object.defineProperty;var u8=($)=>$;function c8($,Q){this[$]=u8.bind(null,Q)}var g=($,Q)=>{for(var Z in Q)f8($,Z,{get:Q[Z],enumerable:!0,configurable:!0,set:c8.bind(Q,Z)})};var k=($,Q)=>()=>($&&(Q=$($=0)),Q);var X1=import.meta.require;var F$={};g(F$,{lokiDir:()=>P,homeLokiDir:()=>o1,findRepoRootForVersion:()=>d1,REPO_ROOT:()=>f});import{resolve as n,dirname as l1}from"path";import{fileURLToPath as p8}from"url";import{existsSync as L1}from"fs";import{homedir as l8}from"os";function d8(){let $=j$;for(let Q=0;Q<6;Q++){if(L1(n($,"VERSION"))&&L1(n($,"autonomy/run.sh")))return $;let Z=l1($);if(Z===$)break;$=Z}return n(j$,"..","..","..")}function d1($){let Q=$;for(let Z=0;Z<6;Z++){if(L1(n(Q,"VERSION"))&&L1(n(Q,"autonomy/run.sh")))return Q;let z=l1(Q);if(z===Q)break;Q=z}return n($,"..","..","..")}function P(){return process.env.LOKI_DIR??n(process.cwd(),".loki")}function o1(){return n(l8(),".loki")}var j$,f;var y=k(()=>{j$=l1(p8(import.meta.url));f=d8()});import{readFileSync as o8}from"fs";import{resolve as n8,dirname as a8}from"path";import{fileURLToPath as s8}from"url";function k1(){if($1!==null)return $1;let $="7.
|
|
2
|
+
var f8=Object.defineProperty;var u8=($)=>$;function c8($,Q){this[$]=u8.bind(null,Q)}var g=($,Q)=>{for(var Z in Q)f8($,Z,{get:Q[Z],enumerable:!0,configurable:!0,set:c8.bind(Q,Z)})};var k=($,Q)=>()=>($&&(Q=$($=0)),Q);var X1=import.meta.require;var F$={};g(F$,{lokiDir:()=>P,homeLokiDir:()=>o1,findRepoRootForVersion:()=>d1,REPO_ROOT:()=>f});import{resolve as n,dirname as l1}from"path";import{fileURLToPath as p8}from"url";import{existsSync as L1}from"fs";import{homedir as l8}from"os";function d8(){let $=j$;for(let Q=0;Q<6;Q++){if(L1(n($,"VERSION"))&&L1(n($,"autonomy/run.sh")))return $;let Z=l1($);if(Z===$)break;$=Z}return n(j$,"..","..","..")}function d1($){let Q=$;for(let Z=0;Z<6;Z++){if(L1(n(Q,"VERSION"))&&L1(n(Q,"autonomy/run.sh")))return Q;let z=l1(Q);if(z===Q)break;Q=z}return n($,"..","..","..")}function P(){return process.env.LOKI_DIR??n(process.cwd(),".loki")}function o1(){return n(l8(),".loki")}var j$,f;var y=k(()=>{j$=l1(p8(import.meta.url));f=d8()});import{readFileSync as o8}from"fs";import{resolve as n8,dirname as a8}from"path";import{fileURLToPath as s8}from"url";function k1(){if($1!==null)return $1;let $="7.27.0";if(typeof $==="string"&&$.length>0)return $1=$,$1;try{let Q=a8(s8(import.meta.url)),Z=d1(Q);$1=o8(n8(Z,"VERSION"),"utf-8").trim()}catch{$1="unknown"}return $1}var $1=null;var n1=k(()=>{y()});var E$={};g(E$,{runOrThrow:()=>t8,run:()=>j,commandVersion:()=>i8,commandExists:()=>v,ShellError:()=>a1});async function j($,Q={}){let Z=Bun.spawn({cmd:[...$],stdout:"pipe",stderr:"pipe",env:Q.env?{...process.env,...Q.env}:process.env,cwd:Q.cwd}),z,K;if(Q.timeoutMs&&Q.timeoutMs>0)z=setTimeout(()=>{try{Z.kill("SIGTERM")}catch{}K=setTimeout(()=>{try{Z.kill("SIGKILL")}catch{}},2000)},Q.timeoutMs);try{let[H,X,q]=await Promise.all([new Response(Z.stdout).text(),new Response(Z.stderr).text(),Z.exited]);return{stdout:H,stderr:X,exitCode:q}}finally{if(z)clearTimeout(z);if(K)clearTimeout(K)}}async function t8($,Q={}){let Z=await j($,Q);if(Z.exitCode!==0)throw new a1(`command failed (${Z.exitCode}): ${$.join(" ")}`,Z.exitCode,Z.stdout,Z.stderr);return Z}async function v($){let Q=r8($),Z=await j(["sh","-c",`command -v ${Q}`],{timeoutMs:5000});if(Z.exitCode===0)return Z.stdout.trim()||null;return null}function r8($){if(!/^[A-Za-z0-9._/-]+$/.test($))throw Error(`refused to shell-escape suspect token: ${$}`);return $}async function i8($,Q="--version"){if(!await v($))return null;let z=await j([$,Q],{timeoutMs:5000});if(z.exitCode!==0)return null;return((z.stdout||z.stderr).split(/\r?\n/)[0]?.trim()??"")||null}var a1;var d=k(()=>{a1=class a1 extends Error{message;exitCode;stdout;stderr;constructor($,Q,Z,z){super($);this.message=$;this.exitCode=Q;this.stdout=Z;this.stderr=z;this.name="ShellError"}}});function a($){return e8?"":$}var e8,T,N,_,KZ,A,R,h,J;var c=k(()=>{e8=(process.env.NO_COLOR??"").length>0;T=a("\x1B[0;31m"),N=a("\x1B[0;32m"),_=a("\x1B[1;33m"),KZ=a("\x1B[0;34m"),A=a("\x1B[0;36m"),R=a("\x1B[1m"),h=a("\x1B[2m"),J=a("\x1B[0m")});import{existsSync as U7}from"fs";async function Q1(){if(B1!==void 0)return B1;let $="/opt/homebrew/bin/python3.12";if(U7($))return B1=$,$;let Q=await v("python3.12");if(Q)return B1=Q,Q;let Z=await v("python3");return B1=Z,Z}async function Z1($,Q={}){let Z=await Q1();if(!Z)return{stdout:"",stderr:"python3 not found",exitCode:127};return j([Z,"-c",$],Q)}var B1;var H1=k(()=>{d()});var d$={};g(d$,{runStatus:()=>N7});import{existsSync as b,readFileSync as q1,readdirSync as v$,statSync as f$}from"fs";import{resolve as D,basename as P7}from"path";import{homedir as L7}from"os";async function j7(){if(await v("jq"))return!0;return process.stdout.write(`${T}Error: jq is required but not installed.${J}
|
|
3
3
|
`),process.stdout.write(`Install with:
|
|
4
4
|
`),process.stdout.write(` brew install jq (macOS)
|
|
5
5
|
`),process.stdout.write(` apt install jq (Debian/Ubuntu)
|
|
@@ -787,4 +787,4 @@ Set LOKI_LEGACY_BASH=1 to force the bash CLI for every command.
|
|
|
787
787
|
`),2}default:return process.stderr.write(`Unknown command: ${Q}
|
|
788
788
|
`),process.stderr.write(v8),2}}g$();process.on("SIGINT",()=>process.exit(130));process.on("SIGTERM",()=>process.exit(143));var l3=await p3(Bun.argv.slice(2));process.exit(l3);
|
|
789
789
|
|
|
790
|
-
//# debugId=
|
|
790
|
+
//# debugId=07AC5AC0D01821A064756E2164756E21
|
package/mcp/__init__.py
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "loki-mode",
|
|
3
|
-
"version": "7.
|
|
3
|
+
"version": "7.27.0",
|
|
4
4
|
"description": "Loki Mode by Autonomi. Autonomous spec-to-product system: takes a PRD, GitHub issue, OpenAPI/JSON/YAML, or one-line brief to a deployed app via the RARV-C closure loop with 11 quality gates. Provider-agnostic (Claude Code, OpenAI Codex, Cline, Aider).",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"agent",
|
package/providers/codex.sh
CHANGED
|
@@ -32,7 +32,7 @@ PROVIDER_CLI="codex"
|
|
|
32
32
|
# VERIFIED: exec --full-auto confirmed in codex exec --help (v0.98.0)
|
|
33
33
|
# --full-auto: sets --ask-for-approval on-request + --sandbox workspace-write (v0.98.0)
|
|
34
34
|
# Alternative: "exec --dangerously-bypass-approvals-and-sandbox" (legacy, no sandbox)
|
|
35
|
-
PROVIDER_AUTONOMOUS_FLAG="exec --full-auto"
|
|
35
|
+
PROVIDER_AUTONOMOUS_FLAG="exec --full-auto --skip-git-repo-check"
|
|
36
36
|
PROVIDER_PROMPT_FLAG=""
|
|
37
37
|
PROVIDER_PROMPT_POSITIONAL=true
|
|
38
38
|
|
|
@@ -119,7 +119,7 @@ provider_version() {
|
|
|
119
119
|
provider_invoke() {
|
|
120
120
|
local prompt="$1"
|
|
121
121
|
shift
|
|
122
|
-
codex exec --full-auto "$prompt" "$@"
|
|
122
|
+
codex exec --full-auto --skip-git-repo-check "$prompt" "$@"
|
|
123
123
|
}
|
|
124
124
|
|
|
125
125
|
# Model tier to effort level parameter (Codex uses effort, not separate models)
|
|
@@ -210,6 +210,7 @@ provider_invoke_with_tier() {
|
|
|
210
210
|
codex exec \
|
|
211
211
|
--ask-for-approval never \
|
|
212
212
|
--sandbox danger-full-access \
|
|
213
|
+
--skip-git-repo-check \
|
|
213
214
|
"${extra_flags[@]}" \
|
|
214
215
|
"$prompt" "$@"
|
|
215
216
|
}
|
|
@@ -6,11 +6,11 @@ Complete definitions and capabilities for all 41 specialized agent types.
|
|
|
6
6
|
|
|
7
7
|
## Overview
|
|
8
8
|
|
|
9
|
-
Loki Mode has 41
|
|
9
|
+
Loki Mode has 41 specialized agent roles across 8 domains (37 domain agents + 4 orchestration agents) - prompt-defined specifications the orchestrator adopts per phase, not separate processes. Parallelism on Claude Code comes from the blind review council, the adversarial reviewer, and optional git-worktree streams; on other providers everything runs sequentially. The orchestrator activates only the roles needed for your project -- typically 5-10 for simple projects, more for complex ones.
|
|
10
10
|
|
|
11
11
|
---
|
|
12
12
|
|
|
13
|
-
## Engineering
|
|
13
|
+
## Engineering Domain (8 types)
|
|
14
14
|
|
|
15
15
|
| Agent | Capabilities |
|
|
16
16
|
|-------|-------------|
|
|
@@ -25,7 +25,7 @@ Loki Mode has 41 predefined agent types organized into 8 specialized swarms (37
|
|
|
25
25
|
|
|
26
26
|
---
|
|
27
27
|
|
|
28
|
-
## Operations
|
|
28
|
+
## Operations Domain (8 types)
|
|
29
29
|
|
|
30
30
|
| Agent | Capabilities |
|
|
31
31
|
|-------|-------------|
|
|
@@ -40,7 +40,7 @@ Loki Mode has 41 predefined agent types organized into 8 specialized swarms (37
|
|
|
40
40
|
|
|
41
41
|
---
|
|
42
42
|
|
|
43
|
-
## Business
|
|
43
|
+
## Business Domain (8 types)
|
|
44
44
|
|
|
45
45
|
| Agent | Capabilities |
|
|
46
46
|
|-------|-------------|
|
|
@@ -55,7 +55,7 @@ Loki Mode has 41 predefined agent types organized into 8 specialized swarms (37
|
|
|
55
55
|
|
|
56
56
|
---
|
|
57
57
|
|
|
58
|
-
## Data
|
|
58
|
+
## Data Domain (3 types)
|
|
59
59
|
|
|
60
60
|
| Agent | Capabilities |
|
|
61
61
|
|-------|-------------|
|
|
@@ -65,7 +65,7 @@ Loki Mode has 41 predefined agent types organized into 8 specialized swarms (37
|
|
|
65
65
|
|
|
66
66
|
---
|
|
67
67
|
|
|
68
|
-
## Product
|
|
68
|
+
## Product Domain (3 types)
|
|
69
69
|
|
|
70
70
|
| Agent | Capabilities |
|
|
71
71
|
|-------|-------------|
|
|
@@ -75,7 +75,7 @@ Loki Mode has 41 predefined agent types organized into 8 specialized swarms (37
|
|
|
75
75
|
|
|
76
76
|
---
|
|
77
77
|
|
|
78
|
-
## Growth
|
|
78
|
+
## Growth Domain (4 types)
|
|
79
79
|
|
|
80
80
|
| Agent | Capabilities |
|
|
81
81
|
|-------|-------------|
|
|
@@ -86,7 +86,7 @@ Loki Mode has 41 predefined agent types organized into 8 specialized swarms (37
|
|
|
86
86
|
|
|
87
87
|
---
|
|
88
88
|
|
|
89
|
-
## Review
|
|
89
|
+
## Review Domain (3 types)
|
|
90
90
|
|
|
91
91
|
| Agent | Capabilities |
|
|
92
92
|
|-------|-------------|
|
|
@@ -96,7 +96,7 @@ Loki Mode has 41 predefined agent types organized into 8 specialized swarms (37
|
|
|
96
96
|
|
|
97
97
|
---
|
|
98
98
|
|
|
99
|
-
## Orchestration
|
|
99
|
+
## Orchestration Domain (4 types)
|
|
100
100
|
|
|
101
101
|
> **Source:** [Cursor Scaling Learnings](./cursor-learnings.md) - patterns proven at large agent scale
|
|
102
102
|
|
package/references/agents.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Agent Type Definitions
|
|
2
2
|
|
|
3
|
-
Complete specifications for all 41 specialized agent
|
|
3
|
+
Complete specifications for all 41 specialized agent roles in Loki Mode (37 domain agents + 4 orchestration agents). These are prompt-defined role specifications the orchestrator adopts per phase, not separate processes.
|
|
4
4
|
|
|
5
5
|
**Note:** These are agent TYPE definitions, not a fixed count. Loki Mode dynamically spawns agents based on project needs - a simple todo app might use 5-10 agents, while a complex startup spawns more as needed.
|
|
6
6
|
|
|
@@ -45,7 +45,7 @@ Update after every task completion.
|
|
|
45
45
|
|
|
46
46
|
---
|
|
47
47
|
|
|
48
|
-
## Engineering
|
|
48
|
+
## Engineering Domain (8 Agents)
|
|
49
49
|
|
|
50
50
|
### eng-frontend
|
|
51
51
|
**Capabilities:**
|
|
@@ -241,7 +241,7 @@ Update after every task completion.
|
|
|
241
241
|
|
|
242
242
|
---
|
|
243
243
|
|
|
244
|
-
## Operations
|
|
244
|
+
## Operations Domain (8 Agents)
|
|
245
245
|
|
|
246
246
|
### ops-devops
|
|
247
247
|
**Capabilities:**
|
|
@@ -437,7 +437,7 @@ Update after every task completion.
|
|
|
437
437
|
|
|
438
438
|
---
|
|
439
439
|
|
|
440
|
-
## Business
|
|
440
|
+
## Business Domain (8 Agents)
|
|
441
441
|
|
|
442
442
|
### biz-marketing
|
|
443
443
|
**Capabilities:**
|
|
@@ -634,7 +634,7 @@ Update after every task completion.
|
|
|
634
634
|
|
|
635
635
|
---
|
|
636
636
|
|
|
637
|
-
## Data
|
|
637
|
+
## Data Domain (3 Agents)
|
|
638
638
|
|
|
639
639
|
### data-ml
|
|
640
640
|
**Capabilities:**
|
|
@@ -710,7 +710,7 @@ Update after every task completion.
|
|
|
710
710
|
|
|
711
711
|
---
|
|
712
712
|
|
|
713
|
-
## Product
|
|
713
|
+
## Product Domain (3 Agents)
|
|
714
714
|
|
|
715
715
|
### prod-pm
|
|
716
716
|
**Capabilities:**
|
|
@@ -787,7 +787,7 @@ Update after every task completion.
|
|
|
787
787
|
|
|
788
788
|
---
|
|
789
789
|
|
|
790
|
-
## Review
|
|
790
|
+
## Review Domain (3 Agents)
|
|
791
791
|
|
|
792
792
|
### review-code
|
|
793
793
|
**Capabilities:**
|
|
@@ -875,7 +875,7 @@ Update after every task completion.
|
|
|
875
875
|
|
|
876
876
|
---
|
|
877
877
|
|
|
878
|
-
## Growth
|
|
878
|
+
## Growth Domain (4 Agents)
|
|
879
879
|
|
|
880
880
|
### growth-hacker
|
|
881
881
|
**Capabilities:**
|
|
@@ -182,7 +182,7 @@ Dexter shows value of domain specialization. Our 41 agent types follow this patt
|
|
|
182
182
|
- Most haven't scaled across enterprise
|
|
183
183
|
|
|
184
184
|
### Loki Mode Alignment
|
|
185
|
-
- Multi-agent architecture (41 types, 8
|
|
185
|
+
- Multi-agent architecture (41 role types, 8 domains)
|
|
186
186
|
- Plan Agents (orchestrator, planner)
|
|
187
187
|
- Execution Agents (eng-*, ops-*, biz-*)
|
|
188
188
|
- Security controls (LOKI_SANDBOX_MODE, LOKI_BLOCKED_COMMANDS)
|
package/skills/agents.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Agent Dispatch & Structured Prompting
|
|
2
2
|
|
|
3
|
-
> **Full agent type definitions:** See `references/agent-types.md` for complete 41 agent role specifications across 8
|
|
3
|
+
> **Full agent type definitions:** See `references/agent-types.md` for complete 41 agent role specifications across 8 domains (Engineering, Operations, Business, Data, Product, Growth, Review, Orchestration). These are prompt-defined specifications the orchestrator adopts per phase; parallelism on Claude Code comes from the blind review council, the adversarial reviewer, and optional git-worktree streams, sequential on other providers.
|
|
4
4
|
|
|
5
5
|
---
|
|
6
6
|
|
|
@@ -249,8 +249,8 @@ Priority order for context:
|
|
|
249
249
|
|
|
250
250
|
See `references/agent-types.md` for complete specifications. Summary:
|
|
251
251
|
|
|
252
|
-
|
|
|
253
|
-
|
|
252
|
+
| Domain | Agent Types | Count |
|
|
253
|
+
|--------|-------------|-------|
|
|
254
254
|
| Engineering | frontend, backend, database, mobile, api, qa, perf, infra | 8 |
|
|
255
255
|
| Operations | devops, sre, security, monitor, incident, release, cost, compliance | 8 |
|
|
256
256
|
| Business | marketing, sales, finance, legal, support, hr, investor, partnerships | 8 |
|
package/skills/providers.md
CHANGED
|
@@ -6,7 +6,7 @@ Loki Mode supports four AI providers for autonomous execution.
|
|
|
6
6
|
|
|
7
7
|
> **CLI Flags Verified:** The autonomous mode flags have been verified against actual CLI help output:
|
|
8
8
|
> - Claude: `--dangerously-skip-permissions` (verified)
|
|
9
|
-
> - Codex:
|
|
9
|
+
> - Codex: `exec --full-auto --skip-git-repo-check` (the harness invocation; --skip-git-repo-check required on fresh non-git dirs) or `exec --dangerously-bypass-approvals-and-sandbox` (legacy)
|
|
10
10
|
|
|
11
11
|
| Feature | Claude Code | OpenAI Codex | Cline CLI | Aider |
|
|
12
12
|
|---------|-------------|--------------|-----------|-------|
|
|
@@ -56,7 +56,7 @@ Task(model="haiku", ...) # Fast tier (parallelize)
|
|
|
56
56
|
|
|
57
57
|
---
|
|
58
58
|
|
|
59
|
-
## OpenAI Codex CLI (Degraded Mode)
|
|
59
|
+
## OpenAI Codex CLI (Experimental, Degraded Mode)
|
|
60
60
|
|
|
61
61
|
**Best for:** Teams standardized on OpenAI. Accepts feature tradeoffs.
|
|
62
62
|
|
|
@@ -70,7 +70,7 @@ Task(model="haiku", ...) # Fast tier (parallelize)
|
|
|
70
70
|
**Invocation:**
|
|
71
71
|
```bash
|
|
72
72
|
# Recommended (v0.98.0+)
|
|
73
|
-
codex --full-auto "$prompt"
|
|
73
|
+
codex exec --full-auto --skip-git-repo-check "$prompt"
|
|
74
74
|
|
|
75
75
|
# Legacy (still supported)
|
|
76
76
|
codex exec --dangerously-bypass-approvals-and-sandbox "$prompt"
|