loki-mode 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +691 -0
  3. package/SKILL.md +191 -0
  4. package/VERSION +1 -0
  5. package/autonomy/.loki/dashboard/index.html +2634 -0
  6. package/autonomy/CONSTITUTION.md +508 -0
  7. package/autonomy/README.md +201 -0
  8. package/autonomy/config.example.yaml +152 -0
  9. package/autonomy/loki +526 -0
  10. package/autonomy/run.sh +3636 -0
  11. package/bin/loki-mode.js +26 -0
  12. package/bin/postinstall.js +60 -0
  13. package/docs/ACKNOWLEDGEMENTS.md +234 -0
  14. package/docs/COMPARISON.md +325 -0
  15. package/docs/COMPETITIVE-ANALYSIS.md +333 -0
  16. package/docs/INSTALLATION.md +547 -0
  17. package/docs/auto-claude-comparison.md +276 -0
  18. package/docs/cursor-comparison.md +225 -0
  19. package/docs/dashboard-guide.md +355 -0
  20. package/docs/screenshots/README.md +149 -0
  21. package/docs/screenshots/dashboard-agents.png +0 -0
  22. package/docs/screenshots/dashboard-tasks.png +0 -0
  23. package/docs/thick2thin.md +173 -0
  24. package/package.json +48 -0
  25. package/references/advanced-patterns.md +453 -0
  26. package/references/agent-types.md +243 -0
  27. package/references/agents.md +1043 -0
  28. package/references/business-ops.md +550 -0
  29. package/references/competitive-analysis.md +216 -0
  30. package/references/confidence-routing.md +371 -0
  31. package/references/core-workflow.md +275 -0
  32. package/references/cursor-learnings.md +207 -0
  33. package/references/deployment.md +604 -0
  34. package/references/lab-research-patterns.md +534 -0
  35. package/references/mcp-integration.md +186 -0
  36. package/references/memory-system.md +467 -0
  37. package/references/openai-patterns.md +647 -0
  38. package/references/production-patterns.md +568 -0
  39. package/references/prompt-repetition.md +192 -0
  40. package/references/quality-control.md +437 -0
  41. package/references/sdlc-phases.md +410 -0
  42. package/references/task-queue.md +361 -0
  43. package/references/tool-orchestration.md +691 -0
  44. package/skills/00-index.md +120 -0
  45. package/skills/agents.md +249 -0
  46. package/skills/artifacts.md +174 -0
  47. package/skills/github-integration.md +218 -0
  48. package/skills/model-selection.md +125 -0
  49. package/skills/parallel-workflows.md +526 -0
  50. package/skills/patterns-advanced.md +188 -0
  51. package/skills/production.md +292 -0
  52. package/skills/quality-gates.md +180 -0
  53. package/skills/testing.md +149 -0
  54. package/skills/troubleshooting.md +109 -0
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Loki Mode CLI wrapper for npm distribution
4
+ * Delegates to the bash CLI
5
+ */
6
+
7
+ const { spawn } = require('child_process');
8
+ const path = require('path');
9
+
10
+ const lokiScript = path.join(__dirname, '..', 'autonomy', 'loki');
11
+ const args = process.argv.slice(2);
12
+
13
+ const child = spawn(lokiScript, args, {
14
+ stdio: 'inherit',
15
+ shell: true
16
+ });
17
+
18
+ child.on('close', (code) => {
19
+ process.exit(code || 0);
20
+ });
21
+
22
+ child.on('error', (err) => {
23
+ console.error('Error running loki:', err.message);
24
+ console.error('Make sure bash is available on your system');
25
+ process.exit(1);
26
+ });
@@ -0,0 +1,60 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Loki Mode postinstall script
4
+ * Sets up the Claude Code skill symlink
5
+ */
6
+
7
+ const fs = require('fs');
8
+ const path = require('path');
9
+ const os = require('os');
10
+
11
+ const homeDir = os.homedir();
12
+ const skillDir = path.join(homeDir, '.claude', 'skills', 'loki-mode');
13
+ const packageDir = path.join(__dirname, '..');
14
+
15
+ console.log('');
16
+ console.log('Loki Mode v4.1.0 installed!');
17
+ console.log('');
18
+
19
+ // Try to create skill symlink
20
+ try {
21
+ const skillParent = path.dirname(skillDir);
22
+
23
+ if (!fs.existsSync(skillParent)) {
24
+ fs.mkdirSync(skillParent, { recursive: true });
25
+ }
26
+
27
+ // Remove existing symlink/directory
28
+ if (fs.existsSync(skillDir)) {
29
+ const stats = fs.lstatSync(skillDir);
30
+ if (stats.isSymbolicLink()) {
31
+ fs.unlinkSync(skillDir);
32
+ } else {
33
+ console.log(`Existing installation found at ${skillDir}`);
34
+ console.log('Please remove it manually if you want to use this npm installation.');
35
+ console.log('');
36
+ }
37
+ }
38
+
39
+ // Create symlink
40
+ if (!fs.existsSync(skillDir)) {
41
+ fs.symlinkSync(packageDir, skillDir);
42
+ console.log(`Skill installed to: ${skillDir}`);
43
+ }
44
+ } catch (err) {
45
+ console.log(`Could not auto-install skill: ${err.message}`);
46
+ console.log('');
47
+ console.log('Manual installation:');
48
+ console.log(` ln -sf "${packageDir}" "${skillDir}"`);
49
+ }
50
+
51
+ console.log('');
52
+ console.log('Usage:');
53
+ console.log(' loki start [PRD] - Start Loki Mode');
54
+ console.log(' loki status - Check status');
55
+ console.log(' loki --help - Show all commands');
56
+ console.log('');
57
+ console.log('Or in Claude Code:');
58
+ console.log(' claude --dangerously-skip-permissions');
59
+ console.log(' Then say: "Loki Mode"');
60
+ console.log('');
@@ -0,0 +1,234 @@
1
+ # Acknowledgements
2
+
3
+ Loki Mode stands on the shoulders of giants. This project incorporates research, patterns, and insights from the leading AI labs, academic institutions, and practitioners in the field.
4
+
5
+ ---
6
+
7
+ ## Research Labs
8
+
9
+ ### Anthropic
10
+
11
+ Loki Mode is built for Claude and incorporates Anthropic's cutting-edge research on AI safety and agent development.
12
+
13
+ | Paper/Resource | Contribution to Loki Mode |
14
+ |----------------|---------------------------|
15
+ | [Constitutional AI: Harmlessness from AI Feedback](https://www.anthropic.com/research/constitutional-ai-harmlessness-from-ai-feedback) | Self-critique against principles, revision workflow |
16
+ | [Building Effective Agents](https://www.anthropic.com/research/building-effective-agents) | Evaluator-optimizer pattern, parallelization, routing |
17
+ | [Claude Code Best Practices](https://www.anthropic.com/engineering/claude-code-best-practices) | Explore-Plan-Code workflow, context management |
18
+ | [Simple Probes Can Catch Sleeper Agents](https://www.anthropic.com/research/probes-catch-sleeper-agents) | Defection probes, anomaly detection patterns |
19
+ | [Alignment Faking in Large Language Models](https://www.anthropic.com/research/alignment-faking) | Monitoring for strategic compliance |
20
+ | [Visible Extended Thinking](https://www.anthropic.com/research/visible-extended-thinking) | Thinking levels (think, think hard, ultrathink) |
21
+ | [Computer Use Safety](https://www.anthropic.com/news/3-5-models-and-computer-use) | Safe autonomous operation patterns |
22
+ | [Sabotage Evaluations](https://www.anthropic.com/research/sabotage-evaluations-for-frontier-models) | Safety evaluation methodology |
23
+ | [Effective Harnesses for Long-Running Agents](https://www.anthropic.com/engineering/effective-harnesses-for-long-running-agents) | One-feature-at-a-time pattern, Playwright MCP for E2E |
24
+ | [Claude Agent SDK Overview](https://platform.claude.com/docs/en/agent-sdk/overview) | Task tool, subagents, resume parameter, hooks |
25
+
26
+ ### Google DeepMind
27
+
28
+ DeepMind's research on world models, hierarchical reasoning, and scalable oversight informs Loki Mode's architecture.
29
+
30
+ | Paper/Resource | Contribution to Loki Mode |
31
+ |----------------|---------------------------|
32
+ | [SIMA 2: Generalist AI Agent](https://deepmind.google/blog/sima-2-an-agent-that-plays-reasons-and-learns-with-you-in-virtual-3d-worlds/) | Self-improvement loop, reward model training |
33
+ | [Gemini Robotics 1.5](https://deepmind.google/blog/gemini-robotics-15-brings-ai-agents-into-the-physical-world/) | Hierarchical reasoning (planner + executor) |
34
+ | [Dreamer 4: World Model Training](https://danijar.com/project/dreamer4/) | Simulation-first testing, safe exploration |
35
+ | [Genie 3: World Models](https://deepmind.google/blog/genie-3-a-new-frontier-for-world-models/) | World model architecture patterns |
36
+ | [Scalable AI Safety via Doubly-Efficient Debate](https://deepmind.google/research/publications/34920/) | Debate-based verification for critical changes |
37
+ | [Human-AI Complementarity for Amplified Oversight](https://deepmindsafetyresearch.medium.com/human-ai-complementarity-a-goal-for-amplified-oversight-0ad8a44cae0a) | AI-assisted human supervision |
38
+ | [Technical AGI Safety Approach](https://arxiv.org/html/2504.01849v1) | Safety-first agent design |
39
+
40
+ ### OpenAI
41
+
42
+ OpenAI's Agents SDK and deep research patterns provide foundational patterns for agent orchestration.
43
+
44
+ | Paper/Resource | Contribution to Loki Mode |
45
+ |----------------|---------------------------|
46
+ | [Agents SDK Documentation](https://openai.github.io/openai-agents-python/) | Tracing spans, guardrails, tripwires |
47
+ | [A Practical Guide to Building Agents](https://cdn.openai.com/business-guides-and-resources/a-practical-guide-to-building-agents.pdf) | Agent architecture best practices |
48
+ | [Building Agents Track](https://developers.openai.com/tracks/building-agents/) | Development patterns, handoff callbacks |
49
+ | [AGENTS.md Specification](https://agents.md/) | Standardized agent instructions |
50
+ | [Introducing Deep Research](https://openai.com/index/introducing-deep-research/) | Adaptive planning, backtracking |
51
+ | [Deep Research System Card](https://cdn.openai.com/deep-research-system-card.pdf) | Safety considerations for research agents |
52
+ | [Introducing o3 and o4-mini](https://openai.com/index/introducing-o3-and-o4-mini/) | Reasoning model guidance |
53
+ | [Reasoning Best Practices](https://platform.openai.com/docs/guides/reasoning-best-practices) | Extended thinking patterns |
54
+ | [Chain of Thought Monitoring](https://openai.com/index/chain-of-thought-monitoring/) | Reasoning trace monitoring |
55
+ | [Agent Builder Safety](https://platform.openai.com/docs/guides/agent-builder-safety) | Safety patterns for agent builders |
56
+ | [Computer-Using Agent](https://openai.com/index/computer-using-agent/) | Computer use patterns |
57
+ | [Agentic AI Foundation](https://openai.com/index/agentic-ai-foundation/) | Industry standards, interoperability |
58
+
59
+ ### Amazon Web Services (AWS)
60
+
61
+ AWS Bedrock's multi-agent collaboration patterns inform Loki Mode's routing and dispatch strategies.
62
+
63
+ | Paper/Resource | Contribution to Loki Mode |
64
+ |----------------|---------------------------|
65
+ | [Multi-Agent Orchestration Guidance](https://aws.amazon.com/solutions/guidance/multi-agent-orchestration-on-aws/) | Three coordination mechanisms, architectural patterns |
66
+ | [Bedrock Multi-Agent Collaboration](https://docs.aws.amazon.com/bedrock/latest/userguide/agents-multi-agent-collaboration.html) | Supervisor mode, routing mode, 10-agent limit |
67
+ | [Multi-Agent Collaboration Announcement](https://aws.amazon.com/blogs/aws/introducing-multi-agent-collaboration-capability-for-amazon-bedrock/) | Intent classification, selective context sharing |
68
+ | [AgentCore for SRE](https://aws.amazon.com/blogs/machine-learning/build-multi-agent-site-reliability-engineering-assistants-with-amazon-bedrock-agentcore/) | Gateway, Memory, Identity, Observability components |
69
+
70
+ **Key Pattern Adopted:** Routing Mode Optimization - Direct dispatch for simple tasks (lower latency), supervisor orchestration for complex tasks (full coordination).
71
+
72
+ ---
73
+
74
+ ## Academic Research
75
+
76
+ ### Multi-Agent Systems
77
+
78
+ | Paper | Authors/Source | Contribution |
79
+ |-------|----------------|--------------|
80
+ | [Multi-Agent Collaboration Mechanisms Survey](https://arxiv.org/abs/2501.06322) | arXiv 2501.06322 | Collaboration structures, coopetition |
81
+ | [CONSENSAGENT: Anti-Sycophancy Framework](https://aclanthology.org/2025.findings-acl.1141/) | ACL 2025 Findings | Blind review, devil's advocate |
82
+ | [GoalAct: Hierarchical Execution](https://arxiv.org/abs/2504.16563) | arXiv 2504.16563 | Global planning, skill decomposition |
83
+ | [A-Mem: Agentic Memory System](https://arxiv.org/html/2502.12110v11) | arXiv 2502.12110 | Zettelkasten-style memory linking |
84
+ | [Multi-Agent Reflexion (MAR)](https://arxiv.org/html/2512.20845) | arXiv 2512.20845 | Structured debate, persona-based critics |
85
+ | [Iter-VF: Iterative Verification-First](https://arxiv.org/html/2511.21734v1) | arXiv 2511.21734 | Answer-only verification, Markovian retry |
86
+
87
+ ### Evaluation & Safety
88
+
89
+ | Paper | Authors/Source | Contribution |
90
+ |-------|----------------|--------------|
91
+ | [Assessment Framework for Agentic AI](https://arxiv.org/html/2512.12791v1) | arXiv 2512.12791 | Four-pillar evaluation framework |
92
+ | [Measurement Imbalance in Agentic AI](https://arxiv.org/abs/2506.02064) | arXiv 2506.02064 | Multi-dimensional evaluation axes |
93
+ | [Demo-to-Deployment Gap](https://www.marktechpost.com/2025/12/24/) | Stanford/Harvard | Tool reliability vs tool selection |
94
+
95
+ ---
96
+
97
+ ## Industry Resources
98
+
99
+ ### Tools & Frameworks
100
+
101
+ | Resource | Contribution |
102
+ |----------|--------------|
103
+ | [Cursor - Scaling Agents](https://cursor.com/blog/scaling-agents) | Hierarchical planner-worker model, optimistic concurrency, recursive sub-planners, judge agents, scale-tested patterns (1M+ LoC projects) |
104
+ | [NVIDIA ToolOrchestra](https://github.com/NVlabs/ToolOrchestra) | Efficiency metrics, three-reward signal framework, dynamic agent selection |
105
+ | [LerianStudio/ring](https://github.com/LerianStudio/ring) | Subagent-driven-development pattern |
106
+ | [Awesome Agentic Patterns](https://github.com/nibzard/awesome-agentic-patterns) | 105+ production patterns catalog |
107
+
108
+ ### Best Practices Guides
109
+
110
+ | Resource | Contribution |
111
+ |----------|--------------|
112
+ | [Maxim AI: Production Multi-Agent Systems](https://www.getmaxim.ai/articles/best-practices-for-building-production-ready-multi-agent-systems/) | Correlation IDs, failure handling |
113
+ | [UiPath: Agent Builder Best Practices](https://www.uipath.com/blog/ai/agent-builder-best-practices) | Single-responsibility agents |
114
+ | [GitHub: Speed Without Control](https://github.blog/) | Static analysis + AI review, guardrails |
115
+
116
+ ---
117
+
118
+ ## Hacker News Community
119
+
120
+ Battle-tested insights from practitioners deploying agents in production.
121
+
122
+ ### Discussions
123
+
124
+ | Thread | Key Insight |
125
+ |--------|-------------|
126
+ | [What Actually Works in Production for Autonomous Agents](https://news.ycombinator.com/item?id=44623207) | "Zero companies without human in the loop" |
127
+ | [Coding with LLMs in Summer 2025](https://news.ycombinator.com/item?id=44623953) | Context curation beats automatic RAG |
128
+ | [Superpowers: How I'm Using Coding Agents](https://news.ycombinator.com/item?id=45547344) | Sub-agents for context isolation (Simon Willison) |
129
+ | [Claude Code Experience After Two Weeks](https://news.ycombinator.com/item?id=44596472) | Fresh contexts yield better results |
130
+ | [AI Agent Benchmarks Are Broken](https://news.ycombinator.com/item?id=44531697) | LLM-as-judge has shared blind spots |
131
+ | [How to Orchestrate Multi-Agent Workflows](https://news.ycombinator.com/item?id=45955997) | Event-driven, decoupled coordination |
132
+ | [Context Engineering vs Prompt Engineering](https://news.ycombinator.com/item?id=44427757) | Manual context selection principles |
133
+
134
+ ### Show HN Projects
135
+
136
+ | Project | Contribution |
137
+ |---------|--------------|
138
+ | [Self-Evolving Agents Repository](https://news.ycombinator.com/item?id=45099226) | Self-improvement patterns |
139
+ | [Package Manager for Agent Skills](https://news.ycombinator.com/item?id=46422264) | Skills architecture |
140
+ | [Wispbit - AI Code Review Agent](https://news.ycombinator.com/item?id=44722603) | Code review patterns |
141
+ | [Agtrace - Monitoring for AI Coding Agents](https://news.ycombinator.com/item?id=46425670) | Agent monitoring patterns |
142
+
143
+ ---
144
+
145
+ ## Individual Contributors
146
+
147
+ Special thanks to thought leaders whose patterns and insights shaped Loki Mode:
148
+
149
+ | Contributor | Contribution |
150
+ |-------------|--------------|
151
+ | **Boris Cherny** (Creator of Claude Code) | Self-verification loop (2-3x quality improvement), extended thinking mode, "Less prompting, more systems" philosophy |
152
+ | **Ivan Steshov** | Centralized constitution, agent lineage tracking, structured artifacts as contracts |
153
+ | **Addy Osmani** | Git checkpoint system, specification-first approach, visual aids (Mermaid diagrams) |
154
+ | **Simon Willison** | Sub-agents for context isolation, skills system, context curation patterns |
155
+
156
+ ---
157
+
158
+ ## Production Patterns Summary
159
+
160
+ Key patterns incorporated from practitioner experience:
161
+
162
+ | Pattern | Source | Implementation |
163
+ |---------|--------|----------------|
164
+ | Human-in-the-Loop (HITL) | HN Production Discussions | Confidence-based escalation thresholds |
165
+ | Narrow Scope (3-5 steps) | Multiple Practitioners | Task scope constraints |
166
+ | Deterministic Validation | Production Teams | Rule-based outer loops (not LLM-judged) |
167
+ | Context Curation | Simon Willison | Manual selection, focused context |
168
+ | Blind Review + Devil's Advocate | CONSENSAGENT | Anti-sycophancy protocol |
169
+ | Hierarchical Reasoning | DeepMind Gemini | Orchestrator + specialized executors |
170
+ | Constitutional Self-Critique | Anthropic | Principles-based revision |
171
+ | Debate Verification | DeepMind | Critical change verification |
172
+ | One Feature at a Time | Anthropic Harness | Single feature per iteration, full verification |
173
+ | E2E Browser Testing | Anthropic Harness | Playwright MCP for visual verification |
174
+
175
+ ---
176
+
177
+ ## v3.2.0 Additions
178
+
179
+ ### Parallel Workflows
180
+
181
+ | Resource | Contribution |
182
+ |----------|--------------|
183
+ | [Claude Code Git Worktrees](https://code.claude.com/docs/en/common-workflows#run-parallel-claude-code-sessions-with-git-worktrees) | Parallel Claude sessions, worktree isolation pattern |
184
+
185
+ ### Key Patterns Incorporated (v3.2.0)
186
+
187
+ | Pattern | Source | Implementation |
188
+ |---------|--------|----------------|
189
+ | Git Worktree Isolation | Claude Code Docs | `skills/parallel-workflows.md`, `run.sh --parallel` |
190
+ | Parallel Testing Stream | Claude Code Docs | Testing worktree tracks main, continuous validation |
191
+ | Inter-Stream Signals | Custom | `.loki/signals/` for feature/test/docs coordination |
192
+ | Auto-Merge Workflow | Custom | Completed features merge back automatically |
193
+
194
+ ---
195
+
196
+ ## v3.0.0 Additions
197
+
198
+ ### Agent Interoperability
199
+
200
+ | Resource | Contribution |
201
+ |----------|--------------|
202
+ | [Google A2A Protocol](https://developers.googleblog.com/en/a2a-a-new-era-of-agent-interoperability/) | Agent Cards, capability discovery, JSON-RPC 2.0 |
203
+ | [A2A Protocol v0.3](https://cloud.google.com/blog/products/ai-machine-learning/agent2agent-protocol-is-getting-an-upgrade) | gRPC support, security card signing, enterprise features |
204
+ | [A2A Project GitHub](https://github.com/a2aproject/A2A) | Open protocol specification, SDK implementations |
205
+
206
+ ### Agentic Patterns
207
+
208
+ | Resource | Contribution |
209
+ |----------|--------------|
210
+ | [Awesome Agentic Patterns](https://github.com/nibzard/awesome-agentic-patterns) | 105+ production patterns catalog, feedback loops, tool patterns |
211
+ | [Agent Orchestration Critique](https://moridinamael.github.io/agent-orchestration/) | "Ralph Wiggum Mode" - simpler orchestration beats complex frameworks |
212
+
213
+ ### Key Patterns Incorporated
214
+
215
+ | Pattern | Source | Implementation |
216
+ |---------|--------|----------------|
217
+ | Agent Cards | A2A Protocol | `.loki/state/agents/` capability discovery |
218
+ | Structured Handoffs | A2A Protocol | JSON message format for agent-to-agent communication |
219
+ | Sub-Agent Spawning | awesome-agentic-patterns | Task tool with focused prompts |
220
+ | Dual LLM Pattern | awesome-agentic-patterns | Opus for planning, Haiku for execution |
221
+ | CI Feedback Loop | awesome-agentic-patterns | Test results injected into retry prompts |
222
+ | Minimal Orchestration | moridinamael | Simple continuation over complex frameworks |
223
+
224
+ ---
225
+
226
+ ## License
227
+
228
+ This acknowledgements file documents the research and resources that influenced Loki Mode's design. All referenced works retain their original licenses and copyrights.
229
+
230
+ Loki Mode itself is released under the MIT License.
231
+
232
+ ---
233
+
234
+ *Last updated: v4.1.0*
@@ -0,0 +1,325 @@
1
+ # Autonomous Coding Agents Comparison (2025-2026)
2
+
3
+ > Last Updated: January 17, 2026 (v2.36.8)
4
+ >
5
+ > A comprehensive comparison of Loki Mode against major autonomous coding agents and AI IDEs in the market.
6
+ > Deep-dive comparisons validated by Opus feedback loops.
7
+
8
+ ---
9
+
10
+ ## Overview Comparison
11
+
12
+ | Feature | **Loki Mode** | **Zencoder** | **Devin** | **OpenAI Codex** | **Cursor** | **Claude Code** | **Kiro** | **Antigravity** | **Amazon Q** | **OpenCode** |
13
+ |---------|--------------|--------------|-----------|-----------------|------------|-----------------|----------|-----------------|--------------|--------------|
14
+ | **Type** | Skill/Framework | Enterprise Platform | Standalone Agent | Cloud Agent | AI IDE | CLI Agent | AI IDE | AI IDE | Cloud Agent | AI IDE (OSS) |
15
+ | **Autonomy Level** | Full (zero human) | High | Full | High | Medium-High | High | High | High | High | High |
16
+ | **Max Runtime** | Unlimited | Async/Scheduled | Hours | Per-task | Session | Session | Days | Async | Per-task | Session |
17
+ | **Pricing** | Free (OSS) | Enterprise | $20/mo | ChatGPT Plus | $20/mo | API costs | Free preview | Free preview | $19/mo | Free (OSS) |
18
+ | **Open Source** | Yes | No | No | No | No | No | No | No | No | Yes |
19
+ | **GitHub Stars** | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | 70.9k |
20
+
21
+ ---
22
+
23
+ ## Multi-Agent & Orchestration
24
+
25
+ | Feature | **Loki Mode** | **Devin** | **Codex** | **Cursor** | **Kiro** | **Antigravity** | **Amazon Q** | **OpenCode** |
26
+ |---------|--------------|-----------|-----------|------------|----------|-----------------|--------------|--------------|
27
+ | **Multi-Agent** | 37 agents in 7 swarms | Single | Single | Up to 8 parallel | Background | Manager Surface | Multiple types | 4 built-in |
28
+ | **Orchestration** | Full orchestrator | N/A | N/A | Git worktree | Hooks | Manager view | Workflow | Subagents |
29
+ | **Parallel Exec** | 10+ Haiku, 4 impl (worktree) | No | No | 8 max | Yes | Yes | Yes | Yes |
30
+ | **Agent Swarms** | Eng, Ops, Business, Data, Product, Growth, Review | N/A | N/A | N/A | N/A | N/A | 3 types | N/A |
31
+
32
+ ---
33
+
34
+ ## Quality Control & Code Review
35
+
36
+ | Feature | **Loki Mode** | **Devin** | **Codex** | **Cursor** | **Kiro** | **Antigravity** | **Amazon Q** | **OpenCode** |
37
+ |---------|--------------|-----------|-----------|------------|----------|-----------------|--------------|--------------|
38
+ | **Code Review** | 3 blind reviewers + devil's advocate | Basic | Basic | BugBot PR | Property-based | Artifacts | Doc/Review | Basic |
39
+ | **Anti-Sycophancy** | Yes (CONSENSAGENT) | No | No | No | No | No | No | No |
40
+ | **Quality Gates** | 7 gates + PBT | Basic | Sandbox | Tests | Spec validation | Artifact checks | Tests | Permissions |
41
+ | **Constitutional AI** | Yes (principles) | No | Refusal training | No | No | No | No | No |
42
+
43
+ ---
44
+
45
+ ## Spec-Driven Development
46
+
47
+ | Feature | **Loki Mode** | **Devin** | **Codex** | **Cursor** | **Kiro** | **Antigravity** | **Amazon Q** | **OpenCode** |
48
+ |---------|--------------|-----------|-----------|------------|----------|-----------------|--------------|--------------|
49
+ | **Spec-First** | OpenAPI-first | Natural lang | Natural lang | Natural lang | requirements.md, design.md, tasks.md | Natural lang | Natural lang | AGENTS.md |
50
+ | **PRD Support** | Native parsing | Ticket-based | Issue-based | No | Native specs | No | Issue-based | No |
51
+ | **Design Docs** | Auto-generates | No | No | No | Yes (design.md) | Artifacts | Yes | No |
52
+
53
+ ---
54
+
55
+ ## Memory & Context
56
+
57
+ | Feature | **Loki Mode** | **Devin** | **Codex** | **Cursor** | **Kiro** | **Antigravity** | **Amazon Q** | **OpenCode** |
58
+ |---------|--------------|-----------|-----------|------------|----------|-----------------|--------------|--------------|
59
+ | **Memory System** | Episodic + Semantic + Procedural | Session | Task-scoped | Memories (flat) | Steering files | Knowledge base | Customization | Session |
60
+ | **Cross-Session** | Yes (ledgers, handoffs) | Limited | No | Yes | Yes | Yes | Yes | No |
61
+ | **Cross-Project** | Yes (global DB) | No | No | No | No | Yes | Customization | No |
62
+ | **Review Learning** | Yes (anti-patterns) | No | No | No | Yes | No | No | No |
63
+
64
+ ---
65
+
66
+ ## Self-Verification & Testing
67
+
68
+ | Feature | **Loki Mode** | **Devin** | **Codex** | **Cursor** | **Kiro** | **Antigravity** | **Amazon Q** | **OpenCode** |
69
+ |---------|--------------|-----------|-----------|------------|----------|-----------------|--------------|--------------|
70
+ | **Verification Cycle** | RARV (Reason-Act-Reflect-Verify) | Plan-Execute | Plan-Execute | Execute | Spec-Design-Task | Plan-Verify | Execute | Plan-Build |
71
+ | **Property-Based Testing** | Yes (fast-check) | No | No | No | Yes | No | No | No |
72
+ | **Event Hooks** | Yes (file, task, phase) | No | No | No | Yes | No | No | Yes (plugins) |
73
+ | **Debate Verification** | Yes (DeepMind) | No | No | No | No | No | No | No |
74
+ | **Rollback** | Git worktree + stash | No | No | Git | No | Artifacts | No | Git |
75
+
76
+ ---
77
+
78
+ ## Model Selection & Routing
79
+
80
+ | Feature | **Loki Mode** | **Devin** | **Codex** | **Cursor** | **Kiro** | **Antigravity** | **Amazon Q** | **OpenCode** |
81
+ |---------|--------------|-----------|-----------|------------|----------|-----------------|--------------|--------------|
82
+ | **Model Strategy** | Opus=plan, Sonnet=dev, Haiku=ops | GPT-4 | codex-1 | Multi-model | Claude family | Gemini 3 + Claude + GPT | Bedrock | Multi-provider |
83
+ | **Confidence Routing** | 4-tier (auto/direct/supervisor/escalate) | No | No | No | No | No | No | No |
84
+ | **Dynamic Selection** | By complexity | Fixed | Fixed | User choice | User choice | User choice | Auto | User choice |
85
+
86
+ ---
87
+
88
+ ## Code Transformation & Migration
89
+
90
+ | Feature | **Loki Mode** | **Devin** | **Codex** | **Cursor** | **Kiro** | **Antigravity** | **Amazon Q** | **OpenCode** |
91
+ |---------|--------------|-----------|-----------|------------|----------|-----------------|--------------|--------------|
92
+ | **Language Upgrades** | Yes (Java, Python, Node) | No | No | No | No | No | Yes (/transform) | No |
93
+ | **DB Migrations** | Yes (Oracle->PG, MySQL->PG) | No | No | No | No | No | Yes | No |
94
+ | **Framework Modernization** | Yes (Angular->React, .NET) | No | No | No | No | No | Yes | No |
95
+
96
+ ---
97
+
98
+ ## Artifact Generation
99
+
100
+ | Feature | **Loki Mode** | **Devin** | **Codex** | **Cursor** | **Kiro** | **Antigravity** | **Amazon Q** | **OpenCode** |
101
+ |---------|--------------|-----------|-----------|------------|----------|-----------------|--------------|--------------|
102
+ | **Verification Reports** | Yes (on phase complete) | No | No | No | No | Yes | No | No |
103
+ | **Architecture Diagrams** | Yes (mermaid) | No | No | No | Yes | Yes | Yes | No |
104
+ | **Screenshots** | Yes (Playwright) | No | No | No | No | Yes (video) | No | No |
105
+ | **Browser Recording** | No (deterministic tests) | No | No | No | No | Yes | No | No |
106
+
107
+ ---
108
+
109
+ ## Skills & Extensibility
110
+
111
+ | Feature | **Loki Mode** | **Devin** | **Codex** | **Cursor** | **Kiro** | **Antigravity** | **Amazon Q** | **OpenCode** |
112
+ |---------|--------------|-----------|-----------|------------|----------|-----------------|--------------|--------------|
113
+ | **Skills System** | IS a SKILL.md | N/A | $skill-creator, $skill-installer | Rules | SKILL.md compatible | N/A | N/A | SKILL.md compatible |
114
+ | **Plugin System** | Wrapper script | N/A | N/A | Extensions | Hooks | N/A | MCP | JS/TS plugins |
115
+ | **MCP Support** | Playwright MCP | N/A | N/A | Yes | Yes | N/A | Yes | Yes |
116
+
117
+ ---
118
+
119
+ ## Research Foundation
120
+
121
+ | Feature | **Loki Mode** | **Devin** | **Codex** | **Cursor** | **Kiro** | **Antigravity** | **Amazon Q** | **OpenCode** |
122
+ |---------|--------------|-----------|-----------|------------|----------|-----------------|--------------|--------------|
123
+ | **Research Base** | OpenAI SDK, DeepMind, Anthropic, ToolOrchestra, CONSENSAGENT, MAR, GoalAct | Proprietary | RL on coding | Proprietary | AWS | DeepMind | AWS | N/A |
124
+ | **Papers Cited** | 10+ | None public | None public | None public | None public | Gemini papers | None public | None public |
125
+
126
+ ---
127
+
128
+ ## Benchmarks (SWE-bench Verified)
129
+
130
+ | Agent | Score | Notes |
131
+ |-------|-------|-------|
132
+ | **Google Antigravity** | 76.2% | With Gemini 3 Pro |
133
+ | **Claude Code** | ~75%+ | Claude Sonnet 4.5 |
134
+ | **OpenAI Codex** | ~70%+ | GPT-5.2-Codex |
135
+ | **Devin 2.0** | 67% | PR merge rate doubled |
136
+ | **Amazon Q Developer** | 66% | State-of-the-art claim |
137
+ | **Loki Mode** | Inherits Claude | Framework, not model |
138
+
139
+ ---
140
+
141
+ ## Zencoder/Zenflow Comparison (v2.36.7)
142
+
143
+ **Comprehensive analysis of Zencoder.ai enterprise AI coding platform, including Zenflow (autonomous workflows), Zen Agents (specialized agents), and Zentester (QA automation).**
144
+
145
+ ### Feature Comparison
146
+
147
+ | Feature | **Zencoder** | **Loki Mode** | **Assessment** |
148
+ |---------|-------------|---------------|----------------|
149
+ | **Four Pillars** | Structured Workflows, SDD, Multi-Agent Verification, Parallel Execution | SDLC + RARV + 7 Gates + Worktrees | TIE |
150
+ | **Spec-Driven Dev** | Specs as first-class objects | OpenAPI-first | TIE |
151
+ | **Multi-Agent Verification** | Model diversity (Claude vs OpenAI, 54% improvement) | 3 blind reviewers + devil's advocate | Different approach (N/A for Claude Code - only Claude models) |
152
+ | **Quality Gates** | Built-in verification loops | 7 explicit gates + anti-sycophancy | **Loki Mode** |
153
+ | **Memory System** | Not documented | 3-tier episodic/semantic/procedural | **Loki Mode** |
154
+ | **Agent Specialization** | Custom Zen Agents | 37 pre-defined specialized agents | **Loki Mode** |
155
+ | **CI Failure Analysis** | Explicit pattern with auto-fix | DevOps agent only | **ADOPTED from Zencoder** |
156
+ | **Review Comment Resolution** | Auto-apply simple changes | Manual review | **ADOPTED from Zencoder** |
157
+ | **Dependency Management** | Scheduled PRs, one group at a time | Mentioned only | **ADOPTED from Zencoder** |
158
+ | **Multi-Repo Support** | Full cross-repo workflows | Single repo | Zencoder (N/A for Claude Code context) |
159
+ | **IDE Plugins** | VS Code, JetBrains, GitHub App | CLI skill | Zencoder (different use case) |
160
+ | **Repo Grokking** | Proprietary semantic indexing | Claude native exploration | Different approach |
161
+
162
+ ### Patterns ADOPTED from Zencoder
163
+
164
+ | Pattern | Description | Priority |
165
+ |---------|-------------|----------|
166
+ | **CI Failure Analysis** | Classify failures (regression/flakiness/environment/dependency), auto-fix 90% of flaky tests | HIGH |
167
+ | **Review Comment Resolution** | Auto-apply simple review comments (validation, tests, error messages) | HIGH |
168
+ | **Dependency Management** | Weekly scans, one group at a time, security > major > minor > patch | MEDIUM |
169
+
170
+ ### Patterns NOT Adopted
171
+
172
+ | Pattern | Zencoder Feature | Why Not Adopted |
173
+ |---------|-----------------|-----------------|
174
+ | Model Diversity | Claude critiques OpenAI code (54% improvement) | Claude Code only has Claude models available |
175
+ | Multi-Repo Support | Cross-repo change coordination | Claude Code is single-context per session |
176
+ | IDE Plugins | VS Code, JetBrains integrations | Loki Mode is a skill, not a plugin |
177
+ | Repo Grokking | Proprietary semantic indexing | Claude Code has native codebase exploration |
178
+
179
+ ### Where Loki Mode EXCEEDS Zencoder
180
+
181
+ 1. **Quality Control**: 7 explicit gates + blind review + devil's advocate vs built-in loops
182
+ 2. **Memory System**: 3-tier (episodic/semantic/procedural) with cross-project learning
183
+ 3. **Agent Specialization**: 37 pre-defined specialized agents across 7 swarms
184
+ 4. **Anti-Sycophancy**: CONSENSAGENT patterns prevent reviewer groupthink
185
+ 5. **Autonomy Design**: Zero human intervention from PRD to production
186
+ 6. **Research Foundation**: 10+ academic papers integrated vs proprietary
187
+
188
+ ### Where Zencoder EXCEEDS Loki Mode
189
+
190
+ 1. **Multi-Repo**: Cross-repository change coordination (N/A for Claude Code)
191
+ 2. **Model Diversity**: Can use Claude to critique OpenAI-generated code (Claude Code limitation)
192
+ 3. **IDE Integration**: Native plugins for VS Code, JetBrains (Loki Mode is CLI-based)
193
+
194
+ ---
195
+
196
+ ## Deep-Dive Comparison Results
197
+
198
+ ### Patterns Adopted from Each Competitor
199
+
200
+ | Source | Pattern Adopted | Version |
201
+ |--------|----------------|---------|
202
+ | **OpenCode** | Proactive context management (compaction at 90%) | v2.36.2 |
203
+ | **Cursor** | Git worktree isolation for parallel agents | v2.36.3 |
204
+ | **Cursor** | Atomic checkpoint/rollback with git stash | v2.36.3 |
205
+ | **Kiro** | Property-based testing from specs | v2.36.4 |
206
+ | **Kiro** | Event-driven hooks (file, task, phase triggers) | v2.36.4 |
207
+ | **Kiro** | Review-to-memory learning (anti-patterns) | v2.36.4 |
208
+ | **Amazon Q** | Code transformation agent (migrations) | v2.36.5 |
209
+ | **Antigravity** | Artifact generation (reports, diagrams) | v2.36.5 |
210
+
211
+ ### Patterns NOT Adopted (with justification)
212
+
213
+ | Pattern | Source | Why Not Adopted |
214
+ |---------|--------|-----------------|
215
+ | LSP Integration | OpenCode | Violates deterministic validation principle |
216
+ | Plugin/Hook System | OpenCode | Adds complexity for human extensibility |
217
+ | Tool Call Limits (25 ops) | Cursor | Contradicts autonomous operation |
218
+ | BugBot GitHub Comments | Cursor | Pre-commit review is superior |
219
+ | Confidence-based Clarification | Devin | "NEVER ask questions" is core rule |
220
+ | Progressive Skill Disclosure | Codex | Already implicit in references/ structure |
221
+ | Agent Steering Files | Kiro | CLAUDE.md + memory already covers |
222
+ | Manager Surface (interactive) | Antigravity | Requires human control |
223
+ | Video Recording | Antigravity | Requires human review |
224
+
225
+ ---
226
+
227
+ ## Unique Differentiators
228
+
229
+ | Agent | Killer Feature |
230
+ |-------|---------------|
231
+ | **Loki Mode** | Zero-human-intervention full SDLC, 37 agents in 7 swarms, Constitutional AI, anti-sycophancy, cross-project learning, code transformation, property-based testing |
232
+ | **Devin** | Full software engineer persona, Slack integration, 67% PR merge rate |
233
+ | **OpenAI Codex** | Skills marketplace, $skill-creator, GPT-5.2-Codex, secure sandbox |
234
+ | **Cursor** | 8 parallel agents, BugBot, Memories, $10B valuation, Composer model (250 tok/s) |
235
+ | **Kiro** | Spec-driven development (requirements.md/design.md/tasks.md), Property-based testing, Hooks |
236
+ | **Antigravity** | Manager Surface, Artifacts system (video), browser subagents, Gemini 3 (76.2% SWE-bench) |
237
+ | **Amazon Q** | Code transformation (/transform), 66% SWE-bench, deep AWS integration, MCP support |
238
+ | **OpenCode** | 70.9k stars, multi-provider, LSP integration (25+ languages), plugin system |
239
+
240
+ ---
241
+
242
+ ## Summary: Where Loki Mode Excels
243
+
244
+ | Dimension | Loki Mode Advantage |
245
+ |-----------|-------------------|
246
+ | **Autonomy** | Only agent designed for TRUE zero human intervention |
247
+ | **Multi-Agent** | 37 specialized agents in 7 swarms vs 1-8 in competitors |
248
+ | **Quality** | 7 gates + blind review + devil's advocate + property-based testing |
249
+ | **Research** | 10+ academic papers integrated vs proprietary/undisclosed |
250
+ | **Anti-Sycophancy** | Only agent with CONSENSAGENT-based blind review |
251
+ | **Memory** | 3-tier memory (episodic/semantic/procedural) + review learning + cross-project |
252
+ | **Transformation** | Code migration workflows (language, database, framework) |
253
+ | **Cost** | Free (open source) vs $20-500/month |
254
+ | **Customization** | Full source access vs black box |
255
+
256
+ ---
257
+
258
+ ## Where Competitors Excel
259
+
260
+ | Competitor | Advantage Over Loki Mode |
261
+ |------------|-------------------------|
262
+ | **Kiro** | Native spec files (requirements.md, design.md, tasks.md) |
263
+ | **Antigravity** | Browser video recording, Manager Surface for human orchestration |
264
+ | **Cursor** | Polished IDE UX, $10B valuation, massive adoption (500M ARR) |
265
+ | **Devin** | Slack-native workflow, team collaboration |
266
+ | **Codex** | Skills marketplace, GPT-5.2 model capabilities |
267
+ | **Amazon Q** | Deep AWS integration, enterprise support |
268
+ | **OpenCode** | Multi-provider support, LSP integration |
269
+
270
+ ---
271
+
272
+ ## Validation Methodology
273
+
274
+ Each comparison was validated through:
275
+
276
+ 1. **Deep documentation analysis** - Official docs, blogs, changelogs
277
+ 2. **Opus feedback loop** - Critical evaluation by Claude Opus 4.5
278
+ 3. **Pattern extraction** - Identify genuinely beneficial patterns
279
+ 4. **Autonomous fit assessment** - Does it serve zero-human-intervention?
280
+ 5. **Implementation** - Adopt patterns that pass validation
281
+
282
+ ### Validation Questions Asked:
283
+ - Does this pattern require human intervention?
284
+ - Does it improve autonomous quality/verification?
285
+ - Does it align with Constitutional AI principles?
286
+ - Is it simpler than alternatives?
287
+
288
+ ---
289
+
290
+ ## Sources
291
+
292
+ ### Deep-Dive Analysis Sources
293
+ - [OpenCode GitHub](https://github.com/anomalyco/opencode) - 70.9k stars
294
+ - [OpenCode Internals Deep Dive](https://cefboud.com/posts/coding-agents-internals-opencode-deepdive/)
295
+ - [Cursor 2.0 Agent-First Architecture](https://www.digitalapplied.com/blog/cursor-2-0-agent-first-architecture-guide)
296
+ - [Devin 2025 Performance Review](https://cognition.ai/blog/devin-annual-performance-review-2025)
297
+ - [OpenAI Codex Skills](https://developers.openai.com/codex/skills/)
298
+ - [GPT-5.2-Codex System Card](https://openai.com/index/gpt-5-2-codex-system-card/)
299
+ - [Kiro Introducing Blog](https://kiro.dev/blog/introducing-kiro/)
300
+ - [Kiro Autonomous Agent](https://kiro.dev/blog/introducing-kiro-autonomous-agent/)
301
+ - [Google Antigravity Blog](https://developers.googleblog.com/build-with-google-antigravity-our-new-agentic-development-platform/)
302
+ - [Amazon Q Developer Features](https://aws.amazon.com/q/developer/features/)
303
+
304
+ ### Additional Sources
305
+ - [Faros AI - Best AI Coding Agents 2026](https://www.faros.ai/blog/best-ai-coding-agents-2026)
306
+ - [Artificial Analysis - Coding Agents Comparison](https://artificialanalysis.ai/insights/coding-agents-comparison)
307
+ - [Simon Willison on OpenAI Skills](https://simonwillison.net/2025/Dec/12/openai-skills/)
308
+ - [VentureBeat - Google Antigravity](https://venturebeat.com/ai/google-antigravity-introduces-agent-first-architecture-for-asynchronous)
309
+
310
+ ---
311
+
312
+ ## Version History
313
+
314
+ | Version | Date | Comparisons Added |
315
+ |---------|------|-------------------|
316
+ | v2.36.2 | 2026-01-15 | OpenCode |
317
+ | v2.36.3 | 2026-01-15 | Cursor, Devin |
318
+ | v2.36.4 | 2026-01-15 | Codex, Kiro |
319
+ | v2.36.5 | 2026-01-15 | Antigravity, Amazon Q |
320
+ | v2.36.7 | 2026-01-17 | Zencoder/Zenflow |
321
+ | v2.36.8 | 2026-01-17 | Model assignment update (Opus for SDLC phases) |
322
+
323
+ ---
324
+
325
+ **Note:** Features and pricing may change. Always verify with official sources. This comparison focuses on architectural patterns for autonomous operation, not subjective quality assessments.