npm - loki-mode - Versions diffs - 4.2.0 - Mend

loki-mode 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

package/LICENSE +21 -0
package/README.md +691 -0
package/SKILL.md +191 -0
package/VERSION +1 -0
package/autonomy/.loki/dashboard/index.html +2634 -0
package/autonomy/CONSTITUTION.md +508 -0
package/autonomy/README.md +201 -0
package/autonomy/config.example.yaml +152 -0
package/autonomy/loki +526 -0
package/autonomy/run.sh +3636 -0
package/bin/loki-mode.js +26 -0
package/bin/postinstall.js +60 -0
package/docs/ACKNOWLEDGEMENTS.md +234 -0
package/docs/COMPARISON.md +325 -0
package/docs/COMPETITIVE-ANALYSIS.md +333 -0
package/docs/INSTALLATION.md +547 -0
package/docs/auto-claude-comparison.md +276 -0
package/docs/cursor-comparison.md +225 -0
package/docs/dashboard-guide.md +355 -0
package/docs/screenshots/README.md +149 -0
package/docs/screenshots/dashboard-agents.png +0 -0
package/docs/screenshots/dashboard-tasks.png +0 -0
package/docs/thick2thin.md +173 -0
package/package.json +48 -0
package/references/advanced-patterns.md +453 -0
package/references/agent-types.md +243 -0
package/references/agents.md +1043 -0
package/references/business-ops.md +550 -0
package/references/competitive-analysis.md +216 -0
package/references/confidence-routing.md +371 -0
package/references/core-workflow.md +275 -0
package/references/cursor-learnings.md +207 -0
package/references/deployment.md +604 -0
package/references/lab-research-patterns.md +534 -0
package/references/mcp-integration.md +186 -0
package/references/memory-system.md +467 -0
package/references/openai-patterns.md +647 -0
package/references/production-patterns.md +568 -0
package/references/prompt-repetition.md +192 -0
package/references/quality-control.md +437 -0
package/references/sdlc-phases.md +410 -0
package/references/task-queue.md +361 -0
package/references/tool-orchestration.md +691 -0
package/skills/00-index.md +120 -0
package/skills/agents.md +249 -0
package/skills/artifacts.md +174 -0
package/skills/github-integration.md +218 -0
package/skills/model-selection.md +125 -0
package/skills/parallel-workflows.md +526 -0
package/skills/patterns-advanced.md +188 -0
package/skills/production.md +292 -0
package/skills/quality-gates.md +180 -0
package/skills/testing.md +149 -0
package/skills/troubleshooting.md +109 -0

package/bin/loki-mode.js ADDED Viewed

@@ -0,0 +1,26 @@
+#!/usr/bin/env node
+/**
+ * Loki Mode CLI wrapper for npm distribution
+ * Delegates to the bash CLI
+ */
+const { spawn } = require('child_process');
+const path = require('path');
+const lokiScript = path.join(__dirname, '..', 'autonomy', 'loki');
+const args = process.argv.slice(2);
+const child = spawn(lokiScript, args, {
+  stdio: 'inherit',
+  shell: true
+});
+child.on('close', (code) => {
+  process.exit(code || 0);
+});
+child.on('error', (err) => {
+  console.error('Error running loki:', err.message);
+  console.error('Make sure bash is available on your system');
+  process.exit(1);
+});

package/bin/postinstall.js ADDED Viewed

@@ -0,0 +1,60 @@
+#!/usr/bin/env node
+/**
+ * Loki Mode postinstall script
+ * Sets up the Claude Code skill symlink
+ */
+const fs = require('fs');
+const path = require('path');
+const os = require('os');
+const homeDir = os.homedir();
+const skillDir = path.join(homeDir, '.claude', 'skills', 'loki-mode');
+const packageDir = path.join(__dirname, '..');
+console.log('');
+console.log('Loki Mode v4.1.0 installed!');
+console.log('');
+// Try to create skill symlink
+try {
+  const skillParent = path.dirname(skillDir);
+  if (!fs.existsSync(skillParent)) {
+    fs.mkdirSync(skillParent, { recursive: true });
+  }
+  // Remove existing symlink/directory
+  if (fs.existsSync(skillDir)) {
+    const stats = fs.lstatSync(skillDir);
+    if (stats.isSymbolicLink()) {
+      fs.unlinkSync(skillDir);
+    } else {
+      console.log(`Existing installation found at ${skillDir}`);
+      console.log('Please remove it manually if you want to use this npm installation.');
+      console.log('');
+    }
+  }
+  // Create symlink
+  if (!fs.existsSync(skillDir)) {
+    fs.symlinkSync(packageDir, skillDir);
+    console.log(`Skill installed to: ${skillDir}`);
+  }
+} catch (err) {
+  console.log(`Could not auto-install skill: ${err.message}`);
+  console.log('');
+  console.log('Manual installation:');
+  console.log(`  ln -sf "${packageDir}" "${skillDir}"`);
+}
+console.log('');
+console.log('Usage:');
+console.log('  loki start [PRD]    - Start Loki Mode');
+console.log('  loki status         - Check status');
+console.log('  loki --help         - Show all commands');
+console.log('');
+console.log('Or in Claude Code:');
+console.log('  claude --dangerously-skip-permissions');
+console.log('  Then say: "Loki Mode"');
+console.log('');

package/docs/ACKNOWLEDGEMENTS.md ADDED Viewed

@@ -0,0 +1,234 @@
+# Acknowledgements
+Loki Mode stands on the shoulders of giants. This project incorporates research, patterns, and insights from the leading AI labs, academic institutions, and practitioners in the field.
+---
+## Research Labs
+### Anthropic
+Loki Mode is built for Claude and incorporates Anthropic's cutting-edge research on AI safety and agent development.
+| Paper/Resource | Contribution to Loki Mode |
+|----------------|---------------------------|
+| [Constitutional AI: Harmlessness from AI Feedback](https://www.anthropic.com/research/constitutional-ai-harmlessness-from-ai-feedback) | Self-critique against principles, revision workflow |
+| [Building Effective Agents](https://www.anthropic.com/research/building-effective-agents) | Evaluator-optimizer pattern, parallelization, routing |
+| [Claude Code Best Practices](https://www.anthropic.com/engineering/claude-code-best-practices) | Explore-Plan-Code workflow, context management |
+| [Simple Probes Can Catch Sleeper Agents](https://www.anthropic.com/research/probes-catch-sleeper-agents) | Defection probes, anomaly detection patterns |
+| [Alignment Faking in Large Language Models](https://www.anthropic.com/research/alignment-faking) | Monitoring for strategic compliance |
+| [Visible Extended Thinking](https://www.anthropic.com/research/visible-extended-thinking) | Thinking levels (think, think hard, ultrathink) |
+| [Computer Use Safety](https://www.anthropic.com/news/3-5-models-and-computer-use) | Safe autonomous operation patterns |
+| [Sabotage Evaluations](https://www.anthropic.com/research/sabotage-evaluations-for-frontier-models) | Safety evaluation methodology |
+| [Effective Harnesses for Long-Running Agents](https://www.anthropic.com/engineering/effective-harnesses-for-long-running-agents) | One-feature-at-a-time pattern, Playwright MCP for E2E |
+| [Claude Agent SDK Overview](https://platform.claude.com/docs/en/agent-sdk/overview) | Task tool, subagents, resume parameter, hooks |
+### Google DeepMind
+DeepMind's research on world models, hierarchical reasoning, and scalable oversight informs Loki Mode's architecture.
+| Paper/Resource | Contribution to Loki Mode |
+|----------------|---------------------------|
+| [SIMA 2: Generalist AI Agent](https://deepmind.google/blog/sima-2-an-agent-that-plays-reasons-and-learns-with-you-in-virtual-3d-worlds/) | Self-improvement loop, reward model training |
+| [Gemini Robotics 1.5](https://deepmind.google/blog/gemini-robotics-15-brings-ai-agents-into-the-physical-world/) | Hierarchical reasoning (planner + executor) |
+| [Dreamer 4: World Model Training](https://danijar.com/project/dreamer4/) | Simulation-first testing, safe exploration |
+| [Genie 3: World Models](https://deepmind.google/blog/genie-3-a-new-frontier-for-world-models/) | World model architecture patterns |
+| [Scalable AI Safety via Doubly-Efficient Debate](https://deepmind.google/research/publications/34920/) | Debate-based verification for critical changes |
+| [Human-AI Complementarity for Amplified Oversight](https://deepmindsafetyresearch.medium.com/human-ai-complementarity-a-goal-for-amplified-oversight-0ad8a44cae0a) | AI-assisted human supervision |
+| [Technical AGI Safety Approach](https://arxiv.org/html/2504.01849v1) | Safety-first agent design |
+### OpenAI
+OpenAI's Agents SDK and deep research patterns provide foundational patterns for agent orchestration.
+| Paper/Resource | Contribution to Loki Mode |
+|----------------|---------------------------|
+| [Agents SDK Documentation](https://openai.github.io/openai-agents-python/) | Tracing spans, guardrails, tripwires |
+| [A Practical Guide to Building Agents](https://cdn.openai.com/business-guides-and-resources/a-practical-guide-to-building-agents.pdf) | Agent architecture best practices |
+| [Building Agents Track](https://developers.openai.com/tracks/building-agents/) | Development patterns, handoff callbacks |
+| [AGENTS.md Specification](https://agents.md/) | Standardized agent instructions |
+| [Introducing Deep Research](https://openai.com/index/introducing-deep-research/) | Adaptive planning, backtracking |
+| [Deep Research System Card](https://cdn.openai.com/deep-research-system-card.pdf) | Safety considerations for research agents |
+| [Introducing o3 and o4-mini](https://openai.com/index/introducing-o3-and-o4-mini/) | Reasoning model guidance |
+| [Reasoning Best Practices](https://platform.openai.com/docs/guides/reasoning-best-practices) | Extended thinking patterns |
+| [Chain of Thought Monitoring](https://openai.com/index/chain-of-thought-monitoring/) | Reasoning trace monitoring |
+| [Agent Builder Safety](https://platform.openai.com/docs/guides/agent-builder-safety) | Safety patterns for agent builders |
+| [Computer-Using Agent](https://openai.com/index/computer-using-agent/) | Computer use patterns |
+| [Agentic AI Foundation](https://openai.com/index/agentic-ai-foundation/) | Industry standards, interoperability |
+### Amazon Web Services (AWS)
+AWS Bedrock's multi-agent collaboration patterns inform Loki Mode's routing and dispatch strategies.
+| Paper/Resource | Contribution to Loki Mode |
+|----------------|---------------------------|
+| [Multi-Agent Orchestration Guidance](https://aws.amazon.com/solutions/guidance/multi-agent-orchestration-on-aws/) | Three coordination mechanisms, architectural patterns |
+| [Bedrock Multi-Agent Collaboration](https://docs.aws.amazon.com/bedrock/latest/userguide/agents-multi-agent-collaboration.html) | Supervisor mode, routing mode, 10-agent limit |
+| [Multi-Agent Collaboration Announcement](https://aws.amazon.com/blogs/aws/introducing-multi-agent-collaboration-capability-for-amazon-bedrock/) | Intent classification, selective context sharing |
+| [AgentCore for SRE](https://aws.amazon.com/blogs/machine-learning/build-multi-agent-site-reliability-engineering-assistants-with-amazon-bedrock-agentcore/) | Gateway, Memory, Identity, Observability components |
+**Key Pattern Adopted:** Routing Mode Optimization - Direct dispatch for simple tasks (lower latency), supervisor orchestration for complex tasks (full coordination).
+---
+## Academic Research
+### Multi-Agent Systems
+| Paper | Authors/Source | Contribution |
+|-------|----------------|--------------|
+| [Multi-Agent Collaboration Mechanisms Survey](https://arxiv.org/abs/2501.06322) | arXiv 2501.06322 | Collaboration structures, coopetition |
+| [CONSENSAGENT: Anti-Sycophancy Framework](https://aclanthology.org/2025.findings-acl.1141/) | ACL 2025 Findings | Blind review, devil's advocate |
+| [GoalAct: Hierarchical Execution](https://arxiv.org/abs/2504.16563) | arXiv 2504.16563 | Global planning, skill decomposition |
+| [A-Mem: Agentic Memory System](https://arxiv.org/html/2502.12110v11) | arXiv 2502.12110 | Zettelkasten-style memory linking |
+| [Multi-Agent Reflexion (MAR)](https://arxiv.org/html/2512.20845) | arXiv 2512.20845 | Structured debate, persona-based critics |
+| [Iter-VF: Iterative Verification-First](https://arxiv.org/html/2511.21734v1) | arXiv 2511.21734 | Answer-only verification, Markovian retry |
+### Evaluation & Safety
+| Paper | Authors/Source | Contribution |
+|-------|----------------|--------------|
+| [Assessment Framework for Agentic AI](https://arxiv.org/html/2512.12791v1) | arXiv 2512.12791 | Four-pillar evaluation framework |
+| [Measurement Imbalance in Agentic AI](https://arxiv.org/abs/2506.02064) | arXiv 2506.02064 | Multi-dimensional evaluation axes |
+| [Demo-to-Deployment Gap](https://www.marktechpost.com/2025/12/24/) | Stanford/Harvard | Tool reliability vs tool selection |
+---
+## Industry Resources
+### Tools & Frameworks
+| Resource | Contribution |
+|----------|--------------|
+| [Cursor - Scaling Agents](https://cursor.com/blog/scaling-agents) | Hierarchical planner-worker model, optimistic concurrency, recursive sub-planners, judge agents, scale-tested patterns (1M+ LoC projects) |
+| [NVIDIA ToolOrchestra](https://github.com/NVlabs/ToolOrchestra) | Efficiency metrics, three-reward signal framework, dynamic agent selection |
+| [LerianStudio/ring](https://github.com/LerianStudio/ring) | Subagent-driven-development pattern |
+| [Awesome Agentic Patterns](https://github.com/nibzard/awesome-agentic-patterns) | 105+ production patterns catalog |
+### Best Practices Guides
+| Resource | Contribution |
+|----------|--------------|
+| [Maxim AI: Production Multi-Agent Systems](https://www.getmaxim.ai/articles/best-practices-for-building-production-ready-multi-agent-systems/) | Correlation IDs, failure handling |
+| [UiPath: Agent Builder Best Practices](https://www.uipath.com/blog/ai/agent-builder-best-practices) | Single-responsibility agents |
+| [GitHub: Speed Without Control](https://github.blog/) | Static analysis + AI review, guardrails |
+---
+## Hacker News Community
+Battle-tested insights from practitioners deploying agents in production.
+### Discussions
+| Thread | Key Insight |
+|--------|-------------|
+| [What Actually Works in Production for Autonomous Agents](https://news.ycombinator.com/item?id=44623207) | "Zero companies without human in the loop" |
+| [Coding with LLMs in Summer 2025](https://news.ycombinator.com/item?id=44623953) | Context curation beats automatic RAG |
+| [Superpowers: How I'm Using Coding Agents](https://news.ycombinator.com/item?id=45547344) | Sub-agents for context isolation (Simon Willison) |
+| [Claude Code Experience After Two Weeks](https://news.ycombinator.com/item?id=44596472) | Fresh contexts yield better results |
+| [AI Agent Benchmarks Are Broken](https://news.ycombinator.com/item?id=44531697) | LLM-as-judge has shared blind spots |
+| [How to Orchestrate Multi-Agent Workflows](https://news.ycombinator.com/item?id=45955997) | Event-driven, decoupled coordination |
+| [Context Engineering vs Prompt Engineering](https://news.ycombinator.com/item?id=44427757) | Manual context selection principles |
+### Show HN Projects
+| Project | Contribution |
+|---------|--------------|
+| [Self-Evolving Agents Repository](https://news.ycombinator.com/item?id=45099226) | Self-improvement patterns |
+| [Package Manager for Agent Skills](https://news.ycombinator.com/item?id=46422264) | Skills architecture |
+| [Wispbit - AI Code Review Agent](https://news.ycombinator.com/item?id=44722603) | Code review patterns |
+| [Agtrace - Monitoring for AI Coding Agents](https://news.ycombinator.com/item?id=46425670) | Agent monitoring patterns |
+---
+## Individual Contributors
+Special thanks to thought leaders whose patterns and insights shaped Loki Mode:
+| Contributor | Contribution |
+|-------------|--------------|
+| **Boris Cherny** (Creator of Claude Code) | Self-verification loop (2-3x quality improvement), extended thinking mode, "Less prompting, more systems" philosophy |
+| **Ivan Steshov** | Centralized constitution, agent lineage tracking, structured artifacts as contracts |
+| **Addy Osmani** | Git checkpoint system, specification-first approach, visual aids (Mermaid diagrams) |
+| **Simon Willison** | Sub-agents for context isolation, skills system, context curation patterns |
+---
+## Production Patterns Summary
+Key patterns incorporated from practitioner experience:
+| Pattern | Source | Implementation |
+|---------|--------|----------------|
+| Human-in-the-Loop (HITL) | HN Production Discussions | Confidence-based escalation thresholds |
+| Narrow Scope (3-5 steps) | Multiple Practitioners | Task scope constraints |
+| Deterministic Validation | Production Teams | Rule-based outer loops (not LLM-judged) |
+| Context Curation | Simon Willison | Manual selection, focused context |
+| Blind Review + Devil's Advocate | CONSENSAGENT | Anti-sycophancy protocol |
+| Hierarchical Reasoning | DeepMind Gemini | Orchestrator + specialized executors |
+| Constitutional Self-Critique | Anthropic | Principles-based revision |
+| Debate Verification | DeepMind | Critical change verification |
+| One Feature at a Time | Anthropic Harness | Single feature per iteration, full verification |
+| E2E Browser Testing | Anthropic Harness | Playwright MCP for visual verification |
+---
+## v3.2.0 Additions
+### Parallel Workflows
+| Resource | Contribution |
+|----------|--------------|
+| [Claude Code Git Worktrees](https://code.claude.com/docs/en/common-workflows#run-parallel-claude-code-sessions-with-git-worktrees) | Parallel Claude sessions, worktree isolation pattern |
+### Key Patterns Incorporated (v3.2.0)
+| Pattern | Source | Implementation |
+|---------|--------|----------------|
+| Git Worktree Isolation | Claude Code Docs | `skills/parallel-workflows.md`, `run.sh --parallel` |
+| Parallel Testing Stream | Claude Code Docs | Testing worktree tracks main, continuous validation |
+| Inter-Stream Signals | Custom | `.loki/signals/` for feature/test/docs coordination |
+| Auto-Merge Workflow | Custom | Completed features merge back automatically |
+---
+## v3.0.0 Additions
+### Agent Interoperability
+| Resource | Contribution |
+|----------|--------------|
+| [Google A2A Protocol](https://developers.googleblog.com/en/a2a-a-new-era-of-agent-interoperability/) | Agent Cards, capability discovery, JSON-RPC 2.0 |
+| [A2A Protocol v0.3](https://cloud.google.com/blog/products/ai-machine-learning/agent2agent-protocol-is-getting-an-upgrade) | gRPC support, security card signing, enterprise features |
+| [A2A Project GitHub](https://github.com/a2aproject/A2A) | Open protocol specification, SDK implementations |
+### Agentic Patterns
+| Resource | Contribution |
+|----------|--------------|
+| [Awesome Agentic Patterns](https://github.com/nibzard/awesome-agentic-patterns) | 105+ production patterns catalog, feedback loops, tool patterns |
+| [Agent Orchestration Critique](https://moridinamael.github.io/agent-orchestration/) | "Ralph Wiggum Mode" - simpler orchestration beats complex frameworks |
+### Key Patterns Incorporated
+| Pattern | Source | Implementation |
+|---------|--------|----------------|
+| Agent Cards | A2A Protocol | `.loki/state/agents/` capability discovery |
+| Structured Handoffs | A2A Protocol | JSON message format for agent-to-agent communication |
+| Sub-Agent Spawning | awesome-agentic-patterns | Task tool with focused prompts |
+| Dual LLM Pattern | awesome-agentic-patterns | Opus for planning, Haiku for execution |
+| CI Feedback Loop | awesome-agentic-patterns | Test results injected into retry prompts |
+| Minimal Orchestration | moridinamael | Simple continuation over complex frameworks |
+---
+## License
+This acknowledgements file documents the research and resources that influenced Loki Mode's design. All referenced works retain their original licenses and copyrights.
+Loki Mode itself is released under the MIT License.
+---
+*Last updated: v4.1.0*

package/docs/COMPARISON.md ADDED Viewed

@@ -0,0 +1,325 @@
+# Autonomous Coding Agents Comparison (2025-2026)
+> Last Updated: January 17, 2026 (v2.36.8)
+>
+> A comprehensive comparison of Loki Mode against major autonomous coding agents and AI IDEs in the market.
+> Deep-dive comparisons validated by Opus feedback loops.
+---
+## Overview Comparison
+| Feature | **Loki Mode** | **Zencoder** | **Devin** | **OpenAI Codex** | **Cursor** | **Claude Code** | **Kiro** | **Antigravity** | **Amazon Q** | **OpenCode** |
+|---------|--------------|--------------|-----------|-----------------|------------|-----------------|----------|-----------------|--------------|--------------|
+| **Type** | Skill/Framework | Enterprise Platform | Standalone Agent | Cloud Agent | AI IDE | CLI Agent | AI IDE | AI IDE | Cloud Agent | AI IDE (OSS) |
+| **Autonomy Level** | Full (zero human) | High | Full | High | Medium-High | High | High | High | High | High |
+| **Max Runtime** | Unlimited | Async/Scheduled | Hours | Per-task | Session | Session | Days | Async | Per-task | Session |
+| **Pricing** | Free (OSS) | Enterprise | $20/mo | ChatGPT Plus | $20/mo | API costs | Free preview | Free preview | $19/mo | Free (OSS) |
+| **Open Source** | Yes | No | No | No | No | No | No | No | No | Yes |
+| **GitHub Stars** | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | 70.9k |
+---
+## Multi-Agent & Orchestration
+| Feature | **Loki Mode** | **Devin** | **Codex** | **Cursor** | **Kiro** | **Antigravity** | **Amazon Q** | **OpenCode** |
+|---------|--------------|-----------|-----------|------------|----------|-----------------|--------------|--------------|
+| **Multi-Agent** | 37 agents in 7 swarms | Single | Single | Up to 8 parallel | Background | Manager Surface | Multiple types | 4 built-in |
+| **Orchestration** | Full orchestrator | N/A | N/A | Git worktree | Hooks | Manager view | Workflow | Subagents |
+| **Parallel Exec** | 10+ Haiku, 4 impl (worktree) | No | No | 8 max | Yes | Yes | Yes | Yes |
+| **Agent Swarms** | Eng, Ops, Business, Data, Product, Growth, Review | N/A | N/A | N/A | N/A | N/A | 3 types | N/A |
+---
+## Quality Control & Code Review
+| Feature | **Loki Mode** | **Devin** | **Codex** | **Cursor** | **Kiro** | **Antigravity** | **Amazon Q** | **OpenCode** |
+|---------|--------------|-----------|-----------|------------|----------|-----------------|--------------|--------------|
+| **Code Review** | 3 blind reviewers + devil's advocate | Basic | Basic | BugBot PR | Property-based | Artifacts | Doc/Review | Basic |
+| **Anti-Sycophancy** | Yes (CONSENSAGENT) | No | No | No | No | No | No | No |
+| **Quality Gates** | 7 gates + PBT | Basic | Sandbox | Tests | Spec validation | Artifact checks | Tests | Permissions |
+| **Constitutional AI** | Yes (principles) | No | Refusal training | No | No | No | No | No |
+---
+## Spec-Driven Development
+| Feature | **Loki Mode** | **Devin** | **Codex** | **Cursor** | **Kiro** | **Antigravity** | **Amazon Q** | **OpenCode** |
+|---------|--------------|-----------|-----------|------------|----------|-----------------|--------------|--------------|
+| **Spec-First** | OpenAPI-first | Natural lang | Natural lang | Natural lang | requirements.md, design.md, tasks.md | Natural lang | Natural lang | AGENTS.md |
+| **PRD Support** | Native parsing | Ticket-based | Issue-based | No | Native specs | No | Issue-based | No |
+| **Design Docs** | Auto-generates | No | No | No | Yes (design.md) | Artifacts | Yes | No |
+---
+## Memory & Context
+| Feature | **Loki Mode** | **Devin** | **Codex** | **Cursor** | **Kiro** | **Antigravity** | **Amazon Q** | **OpenCode** |
+|---------|--------------|-----------|-----------|------------|----------|-----------------|--------------|--------------|
+| **Memory System** | Episodic + Semantic + Procedural | Session | Task-scoped | Memories (flat) | Steering files | Knowledge base | Customization | Session |
+| **Cross-Session** | Yes (ledgers, handoffs) | Limited | No | Yes | Yes | Yes | Yes | No |
+| **Cross-Project** | Yes (global DB) | No | No | No | No | Yes | Customization | No |
+| **Review Learning** | Yes (anti-patterns) | No | No | No | Yes | No | No | No |
+---
+## Self-Verification & Testing
+| Feature | **Loki Mode** | **Devin** | **Codex** | **Cursor** | **Kiro** | **Antigravity** | **Amazon Q** | **OpenCode** |
+|---------|--------------|-----------|-----------|------------|----------|-----------------|--------------|--------------|
+| **Verification Cycle** | RARV (Reason-Act-Reflect-Verify) | Plan-Execute | Plan-Execute | Execute | Spec-Design-Task | Plan-Verify | Execute | Plan-Build |
+| **Property-Based Testing** | Yes (fast-check) | No | No | No | Yes | No | No | No |
+| **Event Hooks** | Yes (file, task, phase) | No | No | No | Yes | No | No | Yes (plugins) |
+| **Debate Verification** | Yes (DeepMind) | No | No | No | No | No | No | No |
+| **Rollback** | Git worktree + stash | No | No | Git | No | Artifacts | No | Git |
+---
+## Model Selection & Routing
+| Feature | **Loki Mode** | **Devin** | **Codex** | **Cursor** | **Kiro** | **Antigravity** | **Amazon Q** | **OpenCode** |
+|---------|--------------|-----------|-----------|------------|----------|-----------------|--------------|--------------|
+| **Model Strategy** | Opus=plan, Sonnet=dev, Haiku=ops | GPT-4 | codex-1 | Multi-model | Claude family | Gemini 3 + Claude + GPT | Bedrock | Multi-provider |
+| **Confidence Routing** | 4-tier (auto/direct/supervisor/escalate) | No | No | No | No | No | No | No |
+| **Dynamic Selection** | By complexity | Fixed | Fixed | User choice | User choice | User choice | Auto | User choice |
+---
+## Code Transformation & Migration
+| Feature | **Loki Mode** | **Devin** | **Codex** | **Cursor** | **Kiro** | **Antigravity** | **Amazon Q** | **OpenCode** |
+|---------|--------------|-----------|-----------|------------|----------|-----------------|--------------|--------------|
+| **Language Upgrades** | Yes (Java, Python, Node) | No | No | No | No | No | Yes (/transform) | No |
+| **DB Migrations** | Yes (Oracle->PG, MySQL->PG) | No | No | No | No | No | Yes | No |
+| **Framework Modernization** | Yes (Angular->React, .NET) | No | No | No | No | No | Yes | No |
+---
+## Artifact Generation
+| Feature | **Loki Mode** | **Devin** | **Codex** | **Cursor** | **Kiro** | **Antigravity** | **Amazon Q** | **OpenCode** |
+|---------|--------------|-----------|-----------|------------|----------|-----------------|--------------|--------------|
+| **Verification Reports** | Yes (on phase complete) | No | No | No | No | Yes | No | No |
+| **Architecture Diagrams** | Yes (mermaid) | No | No | No | Yes | Yes | Yes | No |
+| **Screenshots** | Yes (Playwright) | No | No | No | No | Yes (video) | No | No |
+| **Browser Recording** | No (deterministic tests) | No | No | No | No | Yes | No | No |
+---
+## Skills & Extensibility
+| Feature | **Loki Mode** | **Devin** | **Codex** | **Cursor** | **Kiro** | **Antigravity** | **Amazon Q** | **OpenCode** |
+|---------|--------------|-----------|-----------|------------|----------|-----------------|--------------|--------------|
+| **Skills System** | IS a SKILL.md | N/A | $skill-creator, $skill-installer | Rules | SKILL.md compatible | N/A | N/A | SKILL.md compatible |
+| **Plugin System** | Wrapper script | N/A | N/A | Extensions | Hooks | N/A | MCP | JS/TS plugins |
+| **MCP Support** | Playwright MCP | N/A | N/A | Yes | Yes | N/A | Yes | Yes |
+---
+## Research Foundation
+| Feature | **Loki Mode** | **Devin** | **Codex** | **Cursor** | **Kiro** | **Antigravity** | **Amazon Q** | **OpenCode** |
+|---------|--------------|-----------|-----------|------------|----------|-----------------|--------------|--------------|
+| **Research Base** | OpenAI SDK, DeepMind, Anthropic, ToolOrchestra, CONSENSAGENT, MAR, GoalAct | Proprietary | RL on coding | Proprietary | AWS | DeepMind | AWS | N/A |
+| **Papers Cited** | 10+ | None public | None public | None public | None public | Gemini papers | None public | None public |
+---
+## Benchmarks (SWE-bench Verified)
+| Agent | Score | Notes |
+|-------|-------|-------|
+| **Google Antigravity** | 76.2% | With Gemini 3 Pro |
+| **Claude Code** | ~75%+ | Claude Sonnet 4.5 |
+| **OpenAI Codex** | ~70%+ | GPT-5.2-Codex |
+| **Devin 2.0** | 67% | PR merge rate doubled |
+| **Amazon Q Developer** | 66% | State-of-the-art claim |
+| **Loki Mode** | Inherits Claude | Framework, not model |
+---
+## Zencoder/Zenflow Comparison (v2.36.7)
+**Comprehensive analysis of Zencoder.ai enterprise AI coding platform, including Zenflow (autonomous workflows), Zen Agents (specialized agents), and Zentester (QA automation).**
+### Feature Comparison
+| Feature | **Zencoder** | **Loki Mode** | **Assessment** |
+|---------|-------------|---------------|----------------|
+| **Four Pillars** | Structured Workflows, SDD, Multi-Agent Verification, Parallel Execution | SDLC + RARV + 7 Gates + Worktrees | TIE |
+| **Spec-Driven Dev** | Specs as first-class objects | OpenAPI-first | TIE |
+| **Multi-Agent Verification** | Model diversity (Claude vs OpenAI, 54% improvement) | 3 blind reviewers + devil's advocate | Different approach (N/A for Claude Code - only Claude models) |
+| **Quality Gates** | Built-in verification loops | 7 explicit gates + anti-sycophancy | **Loki Mode** |
+| **Memory System** | Not documented | 3-tier episodic/semantic/procedural | **Loki Mode** |
+| **Agent Specialization** | Custom Zen Agents | 37 pre-defined specialized agents | **Loki Mode** |
+| **CI Failure Analysis** | Explicit pattern with auto-fix | DevOps agent only | **ADOPTED from Zencoder** |
+| **Review Comment Resolution** | Auto-apply simple changes | Manual review | **ADOPTED from Zencoder** |
+| **Dependency Management** | Scheduled PRs, one group at a time | Mentioned only | **ADOPTED from Zencoder** |
+| **Multi-Repo Support** | Full cross-repo workflows | Single repo | Zencoder (N/A for Claude Code context) |
+| **IDE Plugins** | VS Code, JetBrains, GitHub App | CLI skill | Zencoder (different use case) |
+| **Repo Grokking** | Proprietary semantic indexing | Claude native exploration | Different approach |
+### Patterns ADOPTED from Zencoder
+| Pattern | Description | Priority |
+|---------|-------------|----------|
+| **CI Failure Analysis** | Classify failures (regression/flakiness/environment/dependency), auto-fix 90% of flaky tests | HIGH |
+| **Review Comment Resolution** | Auto-apply simple review comments (validation, tests, error messages) | HIGH |
+| **Dependency Management** | Weekly scans, one group at a time, security > major > minor > patch | MEDIUM |
+### Patterns NOT Adopted
+| Pattern | Zencoder Feature | Why Not Adopted |
+|---------|-----------------|-----------------|
+| Model Diversity | Claude critiques OpenAI code (54% improvement) | Claude Code only has Claude models available |
+| Multi-Repo Support | Cross-repo change coordination | Claude Code is single-context per session |
+| IDE Plugins | VS Code, JetBrains integrations | Loki Mode is a skill, not a plugin |
+| Repo Grokking | Proprietary semantic indexing | Claude Code has native codebase exploration |
+### Where Loki Mode EXCEEDS Zencoder
+1. **Quality Control**: 7 explicit gates + blind review + devil's advocate vs built-in loops
+2. **Memory System**: 3-tier (episodic/semantic/procedural) with cross-project learning
+3. **Agent Specialization**: 37 pre-defined specialized agents across 7 swarms
+4. **Anti-Sycophancy**: CONSENSAGENT patterns prevent reviewer groupthink
+5. **Autonomy Design**: Zero human intervention from PRD to production
+6. **Research Foundation**: 10+ academic papers integrated vs proprietary
+### Where Zencoder EXCEEDS Loki Mode
+1. **Multi-Repo**: Cross-repository change coordination (N/A for Claude Code)
+2. **Model Diversity**: Can use Claude to critique OpenAI-generated code (Claude Code limitation)
+3. **IDE Integration**: Native plugins for VS Code, JetBrains (Loki Mode is CLI-based)
+---
+## Deep-Dive Comparison Results
+### Patterns Adopted from Each Competitor
+| Source | Pattern Adopted | Version |
+|--------|----------------|---------|
+| **OpenCode** | Proactive context management (compaction at 90%) | v2.36.2 |
+| **Cursor** | Git worktree isolation for parallel agents | v2.36.3 |
+| **Cursor** | Atomic checkpoint/rollback with git stash | v2.36.3 |
+| **Kiro** | Property-based testing from specs | v2.36.4 |
+| **Kiro** | Event-driven hooks (file, task, phase triggers) | v2.36.4 |
+| **Kiro** | Review-to-memory learning (anti-patterns) | v2.36.4 |
+| **Amazon Q** | Code transformation agent (migrations) | v2.36.5 |
+| **Antigravity** | Artifact generation (reports, diagrams) | v2.36.5 |
+### Patterns NOT Adopted (with justification)
+| Pattern | Source | Why Not Adopted |
+|---------|--------|-----------------|
+| LSP Integration | OpenCode | Violates deterministic validation principle |
+| Plugin/Hook System | OpenCode | Adds complexity for human extensibility |
+| Tool Call Limits (25 ops) | Cursor | Contradicts autonomous operation |
+| BugBot GitHub Comments | Cursor | Pre-commit review is superior |
+| Confidence-based Clarification | Devin | "NEVER ask questions" is core rule |
+| Progressive Skill Disclosure | Codex | Already implicit in references/ structure |
+| Agent Steering Files | Kiro | CLAUDE.md + memory already covers |
+| Manager Surface (interactive) | Antigravity | Requires human control |
+| Video Recording | Antigravity | Requires human review |
+---
+## Unique Differentiators
+| Agent | Killer Feature |
+|-------|---------------|
+| **Loki Mode** | Zero-human-intervention full SDLC, 37 agents in 7 swarms, Constitutional AI, anti-sycophancy, cross-project learning, code transformation, property-based testing |
+| **Devin** | Full software engineer persona, Slack integration, 67% PR merge rate |
+| **OpenAI Codex** | Skills marketplace, $skill-creator, GPT-5.2-Codex, secure sandbox |
+| **Cursor** | 8 parallel agents, BugBot, Memories, $10B valuation, Composer model (250 tok/s) |
+| **Kiro** | Spec-driven development (requirements.md/design.md/tasks.md), Property-based testing, Hooks |
+| **Antigravity** | Manager Surface, Artifacts system (video), browser subagents, Gemini 3 (76.2% SWE-bench) |
+| **Amazon Q** | Code transformation (/transform), 66% SWE-bench, deep AWS integration, MCP support |
+| **OpenCode** | 70.9k stars, multi-provider, LSP integration (25+ languages), plugin system |
+---
+## Summary: Where Loki Mode Excels
+| Dimension | Loki Mode Advantage |
+|-----------|-------------------|
+| **Autonomy** | Only agent designed for TRUE zero human intervention |
+| **Multi-Agent** | 37 specialized agents in 7 swarms vs 1-8 in competitors |
+| **Quality** | 7 gates + blind review + devil's advocate + property-based testing |
+| **Research** | 10+ academic papers integrated vs proprietary/undisclosed |
+| **Anti-Sycophancy** | Only agent with CONSENSAGENT-based blind review |
+| **Memory** | 3-tier memory (episodic/semantic/procedural) + review learning + cross-project |
+| **Transformation** | Code migration workflows (language, database, framework) |
+| **Cost** | Free (open source) vs $20-500/month |
+| **Customization** | Full source access vs black box |
+---
+## Where Competitors Excel
+| Competitor | Advantage Over Loki Mode |
+|------------|-------------------------|
+| **Kiro** | Native spec files (requirements.md, design.md, tasks.md) |
+| **Antigravity** | Browser video recording, Manager Surface for human orchestration |
+| **Cursor** | Polished IDE UX, $10B valuation, massive adoption (500M ARR) |
+| **Devin** | Slack-native workflow, team collaboration |
+| **Codex** | Skills marketplace, GPT-5.2 model capabilities |
+| **Amazon Q** | Deep AWS integration, enterprise support |
+| **OpenCode** | Multi-provider support, LSP integration |
+---
+## Validation Methodology
+Each comparison was validated through:
+1. **Deep documentation analysis** - Official docs, blogs, changelogs
+2. **Opus feedback loop** - Critical evaluation by Claude Opus 4.5
+3. **Pattern extraction** - Identify genuinely beneficial patterns
+4. **Autonomous fit assessment** - Does it serve zero-human-intervention?
+5. **Implementation** - Adopt patterns that pass validation
+### Validation Questions Asked:
+- Does this pattern require human intervention?
+- Does it improve autonomous quality/verification?
+- Does it align with Constitutional AI principles?
+- Is it simpler than alternatives?
+---
+## Sources
+### Deep-Dive Analysis Sources
+- [OpenCode GitHub](https://github.com/anomalyco/opencode) - 70.9k stars
+- [OpenCode Internals Deep Dive](https://cefboud.com/posts/coding-agents-internals-opencode-deepdive/)
+- [Cursor 2.0 Agent-First Architecture](https://www.digitalapplied.com/blog/cursor-2-0-agent-first-architecture-guide)
+- [Devin 2025 Performance Review](https://cognition.ai/blog/devin-annual-performance-review-2025)
+- [OpenAI Codex Skills](https://developers.openai.com/codex/skills/)
+- [GPT-5.2-Codex System Card](https://openai.com/index/gpt-5-2-codex-system-card/)
+- [Kiro Introducing Blog](https://kiro.dev/blog/introducing-kiro/)
+- [Kiro Autonomous Agent](https://kiro.dev/blog/introducing-kiro-autonomous-agent/)
+- [Google Antigravity Blog](https://developers.googleblog.com/build-with-google-antigravity-our-new-agentic-development-platform/)
+- [Amazon Q Developer Features](https://aws.amazon.com/q/developer/features/)
+### Additional Sources
+- [Faros AI - Best AI Coding Agents 2026](https://www.faros.ai/blog/best-ai-coding-agents-2026)
+- [Artificial Analysis - Coding Agents Comparison](https://artificialanalysis.ai/insights/coding-agents-comparison)
+- [Simon Willison on OpenAI Skills](https://simonwillison.net/2025/Dec/12/openai-skills/)
+- [VentureBeat - Google Antigravity](https://venturebeat.com/ai/google-antigravity-introduces-agent-first-architecture-for-asynchronous)
+---
+## Version History
+| Version | Date | Comparisons Added |
+|---------|------|-------------------|
+| v2.36.2 | 2026-01-15 | OpenCode |
+| v2.36.3 | 2026-01-15 | Cursor, Devin |
+| v2.36.4 | 2026-01-15 | Codex, Kiro |
+| v2.36.5 | 2026-01-15 | Antigravity, Amazon Q |
+| v2.36.7 | 2026-01-17 | Zencoder/Zenflow |
+| v2.36.8 | 2026-01-17 | Model assignment update (Opus for SDLC phases) |
+---
+**Note:** Features and pricing may change. Always verify with official sources. This comparison focuses on architectural patterns for autonomous operation, not subjective quality assessments.