ultimate-pi 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/ck-search/SKILL.md +99 -0
- package/.agents/skills/defuddle/SKILL.md +90 -0
- package/.agents/skills/find-skills/SKILL.md +142 -0
- package/.agents/skills/firecrawl/SKILL.md +150 -0
- package/.agents/skills/firecrawl/rules/install.md +82 -0
- package/.agents/skills/firecrawl/rules/security.md +26 -0
- package/.agents/skills/firecrawl-agent/SKILL.md +57 -0
- package/.agents/skills/firecrawl-build-interact/SKILL.md +67 -0
- package/.agents/skills/firecrawl-build-onboarding/SKILL.md +102 -0
- package/.agents/skills/firecrawl-build-onboarding/references/auth-flow.md +39 -0
- package/.agents/skills/firecrawl-build-onboarding/references/project-setup.md +20 -0
- package/.agents/skills/firecrawl-build-onboarding/references/sdk-installation.md +17 -0
- package/.agents/skills/firecrawl-build-scrape/SKILL.md +68 -0
- package/.agents/skills/firecrawl-build-search/SKILL.md +68 -0
- package/.agents/skills/firecrawl-crawl/SKILL.md +58 -0
- package/.agents/skills/firecrawl-download/SKILL.md +69 -0
- package/.agents/skills/firecrawl-interact/SKILL.md +83 -0
- package/.agents/skills/firecrawl-map/SKILL.md +50 -0
- package/.agents/skills/firecrawl-parse/SKILL.md +61 -0
- package/.agents/skills/firecrawl-scrape/SKILL.md +68 -0
- package/.agents/skills/firecrawl-search/SKILL.md +59 -0
- package/.agents/skills/obsidian-bases/SKILL.md +299 -0
- package/.agents/skills/obsidian-markdown/SKILL.md +237 -0
- package/.agents/skills/posthog-analyst/SKILL.md +306 -0
- package/.agents/skills/posthog-analyst/evals/evals.json +23 -0
- package/.agents/skills/wiki/SKILL.md +215 -0
- package/.agents/skills/wiki/references/css-snippets.md +122 -0
- package/.agents/skills/wiki/references/frontmatter.md +107 -0
- package/.agents/skills/wiki/references/git-setup.md +58 -0
- package/.agents/skills/wiki/references/mcp-setup.md +149 -0
- package/.agents/skills/wiki/references/modes.md +259 -0
- package/.agents/skills/wiki/references/plugins.md +96 -0
- package/.agents/skills/wiki/references/rest-api.md +124 -0
- package/.agents/skills/wiki-autoresearch/SKILL.md +211 -0
- package/.agents/skills/wiki-autoresearch/references/program.md +75 -0
- package/.agents/skills/wiki-fold/SKILL.md +204 -0
- package/.agents/skills/wiki-fold/references/fold-template.md +133 -0
- package/.agents/skills/wiki-ingest/SKILL.md +288 -0
- package/.agents/skills/wiki-lint/SKILL.md +183 -0
- package/.agents/skills/wiki-query/SKILL.md +176 -0
- package/.agents/skills/wiki-save/SKILL.md +128 -0
- package/.ckignore +41 -0
- package/.env.example +9 -0
- package/.github/workflows/lint.yml +33 -0
- package/.github/workflows/publish-github-packages.yml +35 -0
- package/.github/workflows/publish-npm.yml +1 -1
- package/.pi/SYSTEM.md +107 -40
- package/.pi/agents/pi-pi/agent-expert.md +205 -0
- package/.pi/agents/pi-pi/cli-expert.md +47 -0
- package/.pi/agents/pi-pi/config-expert.md +67 -0
- package/.pi/agents/pi-pi/ext-expert.md +53 -0
- package/.pi/agents/pi-pi/keybinding-expert.md +123 -0
- package/.pi/agents/pi-pi/pi-orchestrator.md +103 -0
- package/.pi/agents/pi-pi/prompt-expert.md +83 -0
- package/.pi/agents/pi-pi/skill-expert.md +52 -0
- package/.pi/agents/pi-pi/theme-expert.md +46 -0
- package/.pi/agents/pi-pi/tui-expert.md +100 -0
- package/.pi/agents/rethink.md +140 -0
- package/.pi/agents/wiki-ingest.md +67 -0
- package/.pi/agents/wiki-lint.md +75 -0
- package/.pi/auto-commit.json +20 -0
- package/.pi/extensions/banner.png +0 -0
- package/.pi/extensions/ck-enforce.ts +216 -0
- package/.pi/extensions/custom-footer.ts +308 -0
- package/.pi/extensions/custom-header.ts +116 -0
- package/.pi/extensions/dotenv-loader.ts +170 -0
- package/.pi/internal/cursor-sdk-transcript-parser.ts +59 -0
- package/.pi/model-router.json +95 -0
- package/.pi/npm/.gitignore +2 -0
- package/.pi/prompts/git-sync.md +124 -0
- package/.pi/prompts/harness-setup.md +509 -0
- package/.pi/prompts/save.md +16 -0
- package/.pi/prompts/wiki-autoresearch.md +19 -0
- package/.pi/prompts/wiki.md +23 -0
- package/.pi/providers/cursor-sdk-provider.test.mjs +476 -0
- package/.pi/providers/cursor-sdk-provider.ts +1085 -0
- package/.pi/settings.json +14 -4
- package/.pi/skills/agent-router/SKILL.md +174 -0
- package/.pi/sounds/alert/1-kaching-track.mp3 +0 -0
- package/.pi/sounds/error/1-ksi-wth-track.mp3 +0 -0
- package/.pi/sounds/error/2-smash-track.mp3 +0 -0
- package/.pi/sounds/error/3-buzzer-track.mp3 +0 -0
- package/.pi/sounds/notification/1-soft-notification-track.mp3 +0 -0
- package/.pi/sounds/project-sounds.json +25 -0
- package/.pi/sounds/reminder/1-soft-notification-track.mp3 +0 -0
- package/.pi/sounds/success/1-tada-track.mp3 +0 -0
- package/.pi/sounds/success/2-jobs-done-track.mp3 +0 -0
- package/.pi/sounds/success/3-yay-track.mp3 +0 -0
- package/CONTRIBUTING.md +116 -0
- package/README.md +32 -39
- package/biome.json +34 -0
- package/firecrawl/.env.template +58 -0
- package/firecrawl/README.md +49 -0
- package/firecrawl/docker-compose.yaml +201 -0
- package/firecrawl/searxng/searxng.env +3 -0
- package/firecrawl/searxng/settings.yml +85 -0
- package/lefthook.yml +8 -0
- package/package.json +55 -24
- package/vault/AGENTS.md +37 -0
- package/vault/wiki/_templates/comparison.md +39 -0
- package/vault/wiki/_templates/concept.md +40 -0
- package/vault/wiki/_templates/decision.md +21 -0
- package/vault/wiki/_templates/entity.md +32 -0
- package/vault/wiki/_templates/flow.md +14 -0
- package/vault/wiki/_templates/module.md +18 -0
- package/vault/wiki/_templates/question.md +31 -0
- package/vault/wiki/_templates/source.md +39 -0
- package/vault/wiki/concepts/AST-Aware Code Chunking.md +44 -0
- package/vault/wiki/concepts/Build-Time Prompt Compilation.md +107 -0
- package/vault/wiki/concepts/Context Engine (AI Coding).md +47 -0
- package/vault/wiki/concepts/Context-Aware System Reminders.md +61 -0
- package/vault/wiki/concepts/Contextualized Text Embedding.md +42 -0
- package/vault/wiki/concepts/Contractor vs Employee AI Model.md +55 -0
- package/vault/wiki/concepts/Dual-Model Agent Architecture.md +65 -0
- package/vault/wiki/concepts/Late Chunking vs Early Chunking.md +43 -0
- package/vault/wiki/concepts/Majority Vote Ensembling.md +68 -0
- package/vault/wiki/concepts/Meta-Harness.md +16 -0
- package/vault/wiki/concepts/Multi-Agent AI Coding Architecture.md +75 -0
- package/vault/wiki/concepts/Prompt Enhancement.md +90 -0
- package/vault/wiki/concepts/Prompt Renderer.md +89 -0
- package/vault/wiki/concepts/Semantic Codebase Indexing.md +67 -0
- package/vault/wiki/concepts/additive-config-hierarchy.md +16 -0
- package/vault/wiki/concepts/agent-artifacts-verifiable-deliverables.md +71 -0
- package/vault/wiki/concepts/agent-browser-browser-automation.md +99 -0
- package/vault/wiki/concepts/agent-codebase-interface.md +43 -0
- package/vault/wiki/concepts/agent-harness-architecture.md +67 -0
- package/vault/wiki/concepts/agent-loop-detection-patterns.md +133 -0
- package/vault/wiki/concepts/agent-search-enforcement.md +126 -0
- package/vault/wiki/concepts/agent-skills-ecosystem.md +74 -0
- package/vault/wiki/concepts/agent-skills-pattern.md +68 -0
- package/vault/wiki/concepts/agentic-harness-context-enforcement.md +91 -0
- package/vault/wiki/concepts/agentic-harness.md +34 -0
- package/vault/wiki/concepts/agentic-orchestration-pipeline.md +56 -0
- package/vault/wiki/concepts/agentic-search-no-embeddings.md +18 -0
- package/vault/wiki/concepts/anthropic-context-engineering.md +13 -0
- package/vault/wiki/concepts/antigravity-agent-first-architecture.md +61 -0
- package/vault/wiki/concepts/ast-compression.md +19 -0
- package/vault/wiki/concepts/ast-truncation.md +66 -0
- package/vault/wiki/concepts/barrel-files.md +37 -0
- package/vault/wiki/concepts/browser-harness-agent.md +41 -0
- package/vault/wiki/concepts/browser-subagent-visual-verification.md +82 -0
- package/vault/wiki/concepts/codebase-intelligence-ecosystem-comparison.md +192 -0
- package/vault/wiki/concepts/codebase-intelligence-harness-integration.md +161 -0
- package/vault/wiki/concepts/codebase-to-context-ingestion.md +46 -0
- package/vault/wiki/concepts/codex-harness-innovations.md +147 -0
- package/vault/wiki/concepts/consensus-debate-flow.md +17 -0
- package/vault/wiki/concepts/consensus-debate.md +206 -0
- package/vault/wiki/concepts/content-addressed-spec-identity.md +166 -0
- package/vault/wiki/concepts/context-anxiety.md +57 -0
- package/vault/wiki/concepts/context-compression-techniques.md +19 -0
- package/vault/wiki/concepts/context-continuity.md +22 -0
- package/vault/wiki/concepts/context-drift-in-agents.md +106 -0
- package/vault/wiki/concepts/context-engineering.md +62 -0
- package/vault/wiki/concepts/context-folding.md +67 -0
- package/vault/wiki/concepts/context-mode.md +38 -0
- package/vault/wiki/concepts/cursor-harness-innovations.md +107 -0
- package/vault/wiki/concepts/deterministic-session-compaction.md +79 -0
- package/vault/wiki/concepts/drift-detection-unified.md +296 -0
- package/vault/wiki/concepts/execution-feedback-loop.md +46 -0
- package/vault/wiki/concepts/feedforward-feedback-harness.md +60 -0
- package/vault/wiki/concepts/five-root-cause-metrics-sentrux.md +40 -0
- package/vault/wiki/concepts/fork-safe-spec-storage.md +89 -0
- package/vault/wiki/concepts/fts5-sandbox.md +19 -0
- package/vault/wiki/concepts/fuzzy-edit-matching.md +71 -0
- package/vault/wiki/concepts/gemini-cli-architecture.md +104 -0
- package/vault/wiki/concepts/generator-evaluator-architecture.md +64 -0
- package/vault/wiki/concepts/guardian-agent-pattern.md +67 -0
- package/vault/wiki/concepts/harness-configuration-layers.md +89 -0
- package/vault/wiki/concepts/harness-control-frameworks.md +155 -0
- package/vault/wiki/concepts/harness-engineering-first-principles.md +90 -0
- package/vault/wiki/concepts/harness-h-formalism.md +53 -0
- package/vault/wiki/concepts/hybrid-code-search.md +61 -0
- package/vault/wiki/concepts/inline-post-edit-validation.md +112 -0
- package/vault/wiki/concepts/legendary-engineering-patterns-harness.md +110 -0
- package/vault/wiki/concepts/lifecycle-hooks.md +94 -0
- package/vault/wiki/concepts/mcp-tool-routing.md +102 -0
- package/vault/wiki/concepts/memory-system-of-record-vs-ephemeral-cache.md +47 -0
- package/vault/wiki/concepts/meta-agent-context-pruning.md +151 -0
- package/vault/wiki/concepts/model-adaptive-harness.md +122 -0
- package/vault/wiki/concepts/model-routing-agents.md +101 -0
- package/vault/wiki/concepts/monorepo-architecture.md +45 -0
- package/vault/wiki/concepts/multi-agent-specialization.md +61 -0
- package/vault/wiki/concepts/permission-subsystem.md +16 -0
- package/vault/wiki/concepts/pi-messenger-analysis.md +243 -0
- package/vault/wiki/concepts/pi-vscode-extension-landscape.md +37 -0
- package/vault/wiki/concepts/policy-engine-pattern.md +78 -0
- package/vault/wiki/concepts/progressive-disclosure-agents.md +53 -0
- package/vault/wiki/concepts/progressive-skill-disclosure.md +17 -0
- package/vault/wiki/concepts/provider-native-prompting.md +203 -0
- package/vault/wiki/concepts/quality-signal-sentrux.md +37 -0
- package/vault/wiki/concepts/repo-map-ranking.md +42 -0
- package/vault/wiki/concepts/result-monad-error-handling.md +47 -0
- package/vault/wiki/concepts/safety-defense-in-depth.md +83 -0
- package/vault/wiki/concepts/sandbox-os-enforcement.md +18 -0
- package/vault/wiki/concepts/selective-debate-routing.md +70 -0
- package/vault/wiki/concepts/self-evolving-harness.md +60 -0
- package/vault/wiki/concepts/sentrux-mcp-integration.md +36 -0
- package/vault/wiki/concepts/sentrux-rules-engine.md +49 -0
- package/vault/wiki/concepts/shell-pattern-compression.md +24 -0
- package/vault/wiki/concepts/skill-first-architecture.md +166 -0
- package/vault/wiki/concepts/structured-compaction.md +78 -0
- package/vault/wiki/concepts/subagent-orchestration.md +17 -0
- package/vault/wiki/concepts/subagent-worktree-isolation.md +68 -0
- package/vault/wiki/concepts/superpowers-methodology.md +78 -0
- package/vault/wiki/concepts/think-in-code.md +73 -0
- package/vault/wiki/concepts/ts-execution-layer.md +100 -0
- package/vault/wiki/concepts/typescript-strict-mode.md +37 -0
- package/vault/wiki/concepts/vcc-conversation-compaction-for-pi.md +51 -0
- package/vault/wiki/concepts/verification-drift-detection.md +19 -0
- package/vault/wiki/consensus/consensus-records.md +58 -0
- package/vault/wiki/decisions/2026-04-30-pi-lean-ctx-native.md +122 -0
- package/vault/wiki/decisions/adr-008.md +40 -0
- package/vault/wiki/decisions/adr-009.md +46 -0
- package/vault/wiki/decisions/adr-010.md +55 -0
- package/vault/wiki/decisions/adr-011.md +165 -0
- package/vault/wiki/decisions/adr-012.md +102 -0
- package/vault/wiki/decisions/adr-013.md +59 -0
- package/vault/wiki/decisions/adr-014.md +73 -0
- package/vault/wiki/decisions/adr-015.md +81 -0
- package/vault/wiki/decisions/adr-016.md +91 -0
- package/vault/wiki/decisions/adr-017.md +79 -0
- package/vault/wiki/decisions/adr-018.md +100 -0
- package/vault/wiki/decisions/adr-019.md +75 -0
- package/vault/wiki/decisions/adr-020.md +106 -0
- package/vault/wiki/decisions/adr-021.md +86 -0
- package/vault/wiki/decisions/adr-022.md +113 -0
- package/vault/wiki/decisions/adr-023.md +113 -0
- package/vault/wiki/decisions/adr-024.md +73 -0
- package/vault/wiki/decisions/adr-025.md +130 -0
- package/vault/wiki/decisions/adr-026.md +56 -0
- package/vault/wiki/decisions/colocate-wiki.md +34 -0
- package/vault/wiki/entities/Anders Hejlsberg.md +29 -0
- package/vault/wiki/entities/Anthropic.md +17 -0
- package/vault/wiki/entities/Augment Code.md +49 -0
- package/vault/wiki/entities/Bjarne Stroustrup.md +26 -0
- package/vault/wiki/entities/Bolt.new (StackBlitz).md +39 -0
- package/vault/wiki/entities/Boris Cherny.md +11 -0
- package/vault/wiki/entities/Claude Code.md +19 -0
- package/vault/wiki/entities/Dennis Ritchie.md +26 -0
- package/vault/wiki/entities/Emergent Labs.md +32 -0
- package/vault/wiki/entities/Google Cloud.md +16 -0
- package/vault/wiki/entities/Guido van Rossum.md +28 -0
- package/vault/wiki/entities/Ken Thompson.md +28 -0
- package/vault/wiki/entities/Lee et al.md +16 -0
- package/vault/wiki/entities/Linus Torvalds.md +28 -0
- package/vault/wiki/entities/Lovable (company).md +40 -0
- package/vault/wiki/entities/Martin Fowler.md +16 -0
- package/vault/wiki/entities/Meng et al.md +16 -0
- package/vault/wiki/entities/OpenAI.md +16 -0
- package/vault/wiki/entities/Rocket.new.md +38 -0
- package/vault/wiki/entities/VILA-Lab.md +15 -0
- package/vault/wiki/entities/autodev-codebase.md +18 -0
- package/vault/wiki/entities/ck-tool.md +59 -0
- package/vault/wiki/entities/codesearch.md +18 -0
- package/vault/wiki/entities/disler-indydevdan.md +33 -0
- package/vault/wiki/entities/gsd-get-shit-done.md +56 -0
- package/vault/wiki/entities/javascript-runtimes.md +48 -0
- package/vault/wiki/entities/jesse-vincent.md +38 -0
- package/vault/wiki/entities/lean-ctx.md +32 -0
- package/vault/wiki/entities/opendev.md +41 -0
- package/vault/wiki/entities/ops-codegraph-tool.md +18 -0
- package/vault/wiki/entities/pi-coding-agent.md +53 -0
- package/vault/wiki/entities/sentrux.md +54 -0
- package/vault/wiki/entities/vgrep-tool.md +57 -0
- package/vault/wiki/entities/vitest.md +41 -0
- package/vault/wiki/flows/harness-wiki-pipeline.md +204 -0
- package/vault/wiki/hot.md +932 -0
- package/vault/wiki/index.md +437 -0
- package/vault/wiki/log.md +418 -0
- package/vault/wiki/meta/dashboard.md +30 -0
- package/vault/wiki/meta/lint-report-2026-04-30.md +86 -0
- package/vault/wiki/meta/lint-report-2026-05-02.md +251 -0
- package/vault/wiki/meta/overview.canvas +43 -0
- package/vault/wiki/modules/adversarial-verification.md +57 -0
- package/vault/wiki/modules/automated-observability.md +54 -0
- package/vault/wiki/modules/bench.md +20 -0
- package/vault/wiki/modules/extensions.md +23 -0
- package/vault/wiki/modules/grounding-checkpoints.md +62 -0
- package/vault/wiki/modules/harness-implementation-plan.md +345 -0
- package/vault/wiki/modules/harness-wiki-skill-mapping.md +135 -0
- package/vault/wiki/modules/harness.md +86 -0
- package/vault/wiki/modules/persistent-memory.md +85 -0
- package/vault/wiki/modules/schema-orchestration.md +68 -0
- package/vault/wiki/modules/skills.md +27 -0
- package/vault/wiki/modules/spec-hardening.md +58 -0
- package/vault/wiki/modules/structured-planning.md +53 -0
- package/vault/wiki/modules/think-in-code-enforcement.md +153 -0
- package/vault/wiki/modules/wiki-query-interface.md +64 -0
- package/vault/wiki/overview.md +51 -0
- package/vault/wiki/questions/Research-pi-vs-claude-code-agentic-orchestration-pipeline.md +87 -0
- package/vault/wiki/questions/Research-sentrux-dev.md +123 -0
- package/vault/wiki/questions/Research-superpowers-skill-for-agentic-coding-agents.md +164 -0
- package/vault/wiki/questions/Research: Augment Code Context Engine.md +244 -0
- package/vault/wiki/questions/Research: Automating Software Engineering - Lovable, Bolt, Emergent, Rocket.md +112 -0
- package/vault/wiki/questions/Research: Claude Code State-of-the-Art Harness Improvements.md +209 -0
- package/vault/wiki/questions/Research: Codex State-of-the-Art Harness Improvements.md +99 -0
- package/vault/wiki/questions/Research: Engineering Workflows of Legendary Programmers and AI Harness Mapping.md +107 -0
- package/vault/wiki/questions/Research: Fallow Codebase Intelligence Harness Integration.md +72 -0
- package/vault/wiki/questions/Research: Gemini CLI SOTA Harness Integration.md +166 -0
- package/vault/wiki/questions/Research: GitHub Issues as Harness Spec Storage.md +188 -0
- package/vault/wiki/questions/Research: Google Antigravity Harness Integration.md +120 -0
- package/vault/wiki/questions/Research: Meta-Agent Context Drift Detection.md +236 -0
- package/vault/wiki/questions/Research: Model-Adaptive Agent Harness Design.md +95 -0
- package/vault/wiki/questions/Research: Model-Specific Prompting Guides.md +165 -0
- package/vault/wiki/questions/Research: Prompt Renderer for Multi-Model Agent Harness.md +216 -0
- package/vault/wiki/questions/Research: Skill-First Harness Architecture.md +91 -0
- package/vault/wiki/questions/Research: TypeScript Best Practices and Codebase Structure.md +88 -0
- package/vault/wiki/questions/Research: TypeScript Execution Layer for Agent Tool Calling.md +81 -0
- package/vault/wiki/questions/Research: claude-mem over Obsidian for Harness Layer.md +71 -0
- package/vault/wiki/questions/Research: claude-mem over obsidian wiki as the knowledge base for our agentic harness pipeline. think from first principles. does this replace or complement our current setup? no hard feelings about previous decisions. gimme accurate points.md +80 -0
- package/vault/wiki/questions/Research: context-mode vs lean-ctx.md +72 -0
- package/vault/wiki/questions/Research: cursor.sh Harness Innovations.md +92 -0
- package/vault/wiki/questions/Research: executor.sh Harness Integration.md +170 -0
- package/vault/wiki/questions/Research: how GSD fits into our coding harness setup.md +97 -0
- package/vault/wiki/questions/Research: how claude-mem fits into our workflow. and whether it should replace obsidian in the codebase. no hard feelings about previous actions, rethink from first principles always.md +80 -0
- package/vault/wiki/questions/Research: pi-vcc.md +113 -0
- package/vault/wiki/questions/Research: semantic code search tools.md +69 -0
- package/vault/wiki/questions/Research: vcc extension for pi coding agent.md +73 -0
- package/vault/wiki/questions/how-to-enable-semantic-code-search-now.md +111 -0
- package/vault/wiki/questions/mvp-implementation-blueprint.md +552 -0
- package/vault/wiki/questions/research-agent-first-codebase-exploration.md +199 -0
- package/vault/wiki/questions/research-agentic-coding-harness-latest-papers.md +142 -0
- package/vault/wiki/questions/research-gitingest-gitreverse-integration.md +100 -0
- package/vault/wiki/questions/research-wozcode-token-reduction.md +67 -0
- package/vault/wiki/questions/resolved-context-pruning-inplace-vs-restart.md +95 -0
- package/vault/wiki/questions/resolved-context-window-economics.md +167 -0
- package/vault/wiki/questions/resolved-imad-debate-gating-transfer.md +126 -0
- package/vault/wiki/questions/resolved-mcp-tool-preference.md +112 -0
- package/vault/wiki/questions/resolved-small-model-meta-agents.md +107 -0
- package/vault/wiki/questions/resolved-treesitter-dynamic-languages.md +95 -0
- package/vault/wiki/sources/Auggie Context MCP Server.md +63 -0
- package/vault/wiki/sources/Augment Code Codacy AI Giants.md +61 -0
- package/vault/wiki/sources/Augment Code MCP SiliconAngle.md +49 -0
- package/vault/wiki/sources/Augment Code WorkOS ERC 2025.md +55 -0
- package/vault/wiki/sources/Augment Context Engine Official.md +71 -0
- package/vault/wiki/sources/Augment SWE-bench Agent GitHub.md +74 -0
- package/vault/wiki/sources/Augment SWE-bench Pro Blog.md +58 -0
- package/vault/wiki/sources/Source: AgentBus Jinja2 Prompt Pipelines.md +75 -0
- package/vault/wiki/sources/Source: Arxiv /342/200/224 Don't Break the Cache.md" +85 -0
- package/vault/wiki/sources/Source: Augment - Harness Engineering for AI Coding Agents.md +58 -0
- package/vault/wiki/sources/Source: Blake Crosley Agent Architecture Guide.md +100 -0
- package/vault/wiki/sources/Source: Bolt.new Architecture & Case Study.md +75 -0
- package/vault/wiki/sources/Source: Build-Time Prompt Compilation Architecture.md +107 -0
- package/vault/wiki/sources/Source: Claude API Agent Skills Overview.md +70 -0
- package/vault/wiki/sources/Source: Gemini CLI Changelogs.md +88 -0
- package/vault/wiki/sources/Source: Google Blog - Gemini CLI Announcement.md +57 -0
- package/vault/wiki/sources/Source: Google Gemini CLI Architecture Docs.md +53 -0
- package/vault/wiki/sources/Source: LangChain - Anatomy of Agent Harness.md +65 -0
- package/vault/wiki/sources/Source: Lovable Architecture & Clone Analysis.md +83 -0
- package/vault/wiki/sources/Source: Martin Fowler - Harness Engineering.md +70 -0
- package/vault/wiki/sources/Source: OpenAI Harness Engineering Five Principles.md +58 -0
- package/vault/wiki/sources/Source: OpenAI Harness Engineering /342/200/224 0 Lines of Human Code.md" +101 -0
- package/vault/wiki/sources/Source: OpenDev /342/200/224 Building AI Coding Agents for the Terminal.md" +100 -0
- package/vault/wiki/sources/Source: Render AI Coding Agents Benchmark 2025.md +53 -0
- package/vault/wiki/sources/Source: Rocket.new /342/200/224 Vibe Solutioning Platform.md" +70 -0
- package/vault/wiki/sources/Source: SwirlAI Agent Skills Progressive Disclosure.md +71 -0
- package/vault/wiki/sources/Source: TianPan Prompt Caching Architecture.md +89 -0
- package/vault/wiki/sources/Source: Vercel Labs agent-browser.md +155 -0
- package/vault/wiki/sources/Source: browser-harness CDP Harness.md +126 -0
- package/vault/wiki/sources/agent-drift-academic-paper.md +79 -0
- package/vault/wiki/sources/aider-repomap-tree-sitter.md +42 -0
- package/vault/wiki/sources/anthropic-compaction-api.md +58 -0
- package/vault/wiki/sources/anthropic-effective-harnesses.md +42 -0
- package/vault/wiki/sources/anthropic-prompt-best-practices.md +100 -0
- package/vault/wiki/sources/anthropic2026-harness-design.md +63 -0
- package/vault/wiki/sources/barrel-files-tkdodo.md +38 -0
- package/vault/wiki/sources/birth-of-unix-kernighan-interview.md +57 -0
- package/vault/wiki/sources/bockeler2026-harness-engineering.md +69 -0
- package/vault/wiki/sources/cast-code-chunking-paper.md +50 -0
- package/vault/wiki/sources/ck-semantic-search.md +78 -0
- package/vault/wiki/sources/claude-code-architecture-karaxai-2026.md +71 -0
- package/vault/wiki/sources/claude-code-architecture-qubytes-2026.md +50 -0
- package/vault/wiki/sources/claude-code-architecture-vila-lab-2026.md +64 -0
- package/vault/wiki/sources/claude-code-security-architecture-penligent-2026.md +70 -0
- package/vault/wiki/sources/claude-context-editing-docs.md +13 -0
- package/vault/wiki/sources/cloudflare-codemode.md +63 -0
- package/vault/wiki/sources/code-chunk-library-supermemory.md +63 -0
- package/vault/wiki/sources/codeact-apple-2024.md +62 -0
- package/vault/wiki/sources/codex-dsc-rfc-8573.md +41 -0
- package/vault/wiki/sources/codex-open-source-agent-2026.md +110 -0
- package/vault/wiki/sources/coir-code-retrieval-benchmark.md +51 -0
- package/vault/wiki/sources/colinmcnamara-context-optimization-codemode.md +48 -0
- package/vault/wiki/sources/context-folding-paper.md +61 -0
- package/vault/wiki/sources/context-mode-website.md +63 -0
- package/vault/wiki/sources/cursor-agent-best-practices-2026.md +62 -0
- package/vault/wiki/sources/cursor-fork-29b-2025.md +50 -0
- package/vault/wiki/sources/cursor-harness-april-2026.md +76 -0
- package/vault/wiki/sources/cursor-instant-apply-2024.md +45 -0
- package/vault/wiki/sources/cursor-shadow-workspace-2024.md +52 -0
- package/vault/wiki/sources/cursor-shipped-coding-agent-2026.md +53 -0
- package/vault/wiki/sources/cursor-vs-antigravity-2026.md +51 -0
- package/vault/wiki/sources/disler-pi-vs-claude-code.md +69 -0
- package/vault/wiki/sources/distill-deterministic-context-compression.md +53 -0
- package/vault/wiki/sources/embedding-models-benchmark-supermemory-2025.md +48 -0
- package/vault/wiki/sources/executor-rhyssullivan.md +122 -0
- package/vault/wiki/sources/fallow-rs-codebase-intelligence.md +125 -0
- package/vault/wiki/sources/fan2025-imad.md +60 -0
- package/vault/wiki/sources/forgecode-gpt5-agent-improvements.md +63 -0
- package/vault/wiki/sources/gemini-3-prompting-guide.md +78 -0
- package/vault/wiki/sources/gh-cli-sub-issue-rfc.md +50 -0
- package/vault/wiki/sources/gh-sub-issue-extension.md +72 -0
- package/vault/wiki/sources/github-fork-issues-discussion.md +44 -0
- package/vault/wiki/sources/github-issue-dependencies-docs.md +49 -0
- package/vault/wiki/sources/github-sub-issues-docs.md +51 -0
- package/vault/wiki/sources/gitingest.md +91 -0
- package/vault/wiki/sources/gitreverse.md +63 -0
- package/vault/wiki/sources/google-antigravity-official-blog.md +47 -0
- package/vault/wiki/sources/google-antigravity-wikipedia.md +53 -0
- package/vault/wiki/sources/gsd-codecentric-deep-dive.md +57 -0
- package/vault/wiki/sources/gsd-github-repo.md +51 -0
- package/vault/wiki/sources/gsd-hn-discussion.md +59 -0
- package/vault/wiki/sources/guido-python-design-philosophy.md +56 -0
- package/vault/wiki/sources/hejlsberg-7-learnings.md +48 -0
- package/vault/wiki/sources/ironclaw-drift-monitor.md +80 -0
- package/vault/wiki/sources/langsight-loop-detection.md +80 -0
- package/vault/wiki/sources/leanctx-website.md +69 -0
- package/vault/wiki/sources/lee2026-meta-harness.md +59 -0
- package/vault/wiki/sources/linux-kernel-coding-workflow.md +50 -0
- package/vault/wiki/sources/lou2026-autoharness.md +53 -0
- package/vault/wiki/sources/martin-fowler-harness-engineering.md +73 -0
- package/vault/wiki/sources/mcp-architecture-docs.md +13 -0
- package/vault/wiki/sources/meng2026-agent-harness-survey.md +79 -0
- package/vault/wiki/sources/mindstudio-four-agent-types.md +68 -0
- package/vault/wiki/sources/ms-chat-history-management.md +13 -0
- package/vault/wiki/sources/openai-prompt-guidance.md +104 -0
- package/vault/wiki/sources/openclaw-session-pruning.md +13 -0
- package/vault/wiki/sources/opencode-dcp.md +13 -0
- package/vault/wiki/sources/opendev-arxiv-2603.05344v1.md +79 -0
- package/vault/wiki/sources/openhands-platform.md +39 -0
- package/vault/wiki/sources/oss-guide-codebase-exploration.md +53 -0
- package/vault/wiki/sources/pi-compaction-extensions-ecosystem.md +102 -0
- package/vault/wiki/sources/pi-context-prune-github-repo.md +38 -0
- package/vault/wiki/sources/pi-mono-compaction-docs.md +38 -0
- package/vault/wiki/sources/pi-omni-compact-github-repo.md +50 -0
- package/vault/wiki/sources/pi-rtk-optimizer-github-repo.md +45 -0
- package/vault/wiki/sources/pi-vcc-github-repo.md +69 -0
- package/vault/wiki/sources/pi-vscode-marketplace.md +41 -0
- package/vault/wiki/sources/pi-vscode-model-provider-marketplace.md +39 -0
- package/vault/wiki/sources/py-tree-sitter.md +13 -0
- package/vault/wiki/sources/sentrux-dev-landing.md +40 -0
- package/vault/wiki/sources/sentrux-docs-pro-architecture.md +75 -0
- package/vault/wiki/sources/sentrux-docs-quality-signal.md +46 -0
- package/vault/wiki/sources/sentrux-docs-root-cause-metrics.md +57 -0
- package/vault/wiki/sources/sentrux-docs-rules-engine.md +58 -0
- package/vault/wiki/sources/sentrux-github-repo.md +56 -0
- package/vault/wiki/sources/superpowers-github-repo.md +56 -0
- package/vault/wiki/sources/superpowers-release-blog.md +54 -0
- package/vault/wiki/sources/superpowers-termdock-analysis.md +45 -0
- package/vault/wiki/sources/swe-agent-aci.md +42 -0
- package/vault/wiki/sources/swe-bench.md +45 -0
- package/vault/wiki/sources/swe-pruner-context-pruning.md +13 -0
- package/vault/wiki/sources/think-in-code-blog.md +48 -0
- package/vault/wiki/sources/tree-sitter-docs.md +13 -0
- package/vault/wiki/sources/ts-best-practices-2025-devto.md +42 -0
- package/vault/wiki/sources/ts-folder-structure-mingyang.md +58 -0
- package/vault/wiki/sources/ts-monorepo-koerselman.md +44 -0
- package/vault/wiki/sources/ts-result-error-handling-kkalamarski.md +52 -0
- package/vault/wiki/sources/ts-runtimes-comparison-betterstack.md +42 -0
- package/vault/wiki/sources/ts-strict-mode-rishikc.md +43 -0
- package/vault/wiki/sources/unix-philosophy.md +48 -0
- package/vault/wiki/sources/vectara-chunking-vs-embedding-naacl2025.md +39 -0
- package/vault/wiki/sources/vectara-guardian-agents.md +79 -0
- package/vault/wiki/sources/vgrep-semantic-search.md +76 -0
- package/vault/wiki/sources/vitest-official.md +41 -0
- package/vault/wiki/sources/vscode-pi-community-extension.md +40 -0
- package/vault/wiki/sources/wozcode.md +79 -0
- package/.agents/skills/compress/SKILL.md +0 -111
- package/.agents/skills/compress/scripts/__init__.py +0 -9
- package/.agents/skills/compress/scripts/__main__.py +0 -3
- package/.agents/skills/compress/scripts/benchmark.py +0 -78
- package/.agents/skills/compress/scripts/cli.py +0 -73
- package/.agents/skills/compress/scripts/compress.py +0 -227
- package/.agents/skills/compress/scripts/detect.py +0 -121
- package/.agents/skills/compress/scripts/validate.py +0 -189
- package/.agents/skills/emil-design-eng/SKILL.md +0 -679
- package/.agents/skills/lean-ctx/SKILL.md +0 -149
- package/.agents/skills/lean-ctx/scripts/install.sh +0 -95
- package/.agents/skills/scrapling-official/LICENSE.txt +0 -28
- package/.agents/skills/scrapling-official/SKILL.md +0 -390
- package/.agents/skills/scrapling-official/examples/01_fetcher_session.py +0 -26
- package/.agents/skills/scrapling-official/examples/02_dynamic_session.py +0 -26
- package/.agents/skills/scrapling-official/examples/03_stealthy_session.py +0 -26
- package/.agents/skills/scrapling-official/examples/04_spider.py +0 -58
- package/.agents/skills/scrapling-official/examples/README.md +0 -45
- package/.agents/skills/scrapling-official/references/fetching/choosing.md +0 -78
- package/.agents/skills/scrapling-official/references/fetching/dynamic.md +0 -352
- package/.agents/skills/scrapling-official/references/fetching/static.md +0 -432
- package/.agents/skills/scrapling-official/references/fetching/stealthy.md +0 -255
- package/.agents/skills/scrapling-official/references/mcp-server.md +0 -214
- package/.agents/skills/scrapling-official/references/migrating_from_beautifulsoup.md +0 -86
- package/.agents/skills/scrapling-official/references/parsing/adaptive.md +0 -212
- package/.agents/skills/scrapling-official/references/parsing/main_classes.md +0 -586
- package/.agents/skills/scrapling-official/references/parsing/selection.md +0 -494
- package/.agents/skills/scrapling-official/references/spiders/advanced.md +0 -344
- package/.agents/skills/scrapling-official/references/spiders/architecture.md +0 -94
- package/.agents/skills/scrapling-official/references/spiders/getting-started.md +0 -164
- package/.agents/skills/scrapling-official/references/spiders/proxy-blocking.md +0 -235
- package/.agents/skills/scrapling-official/references/spiders/requests-responses.md +0 -196
- package/.agents/skills/scrapling-official/references/spiders/sessions.md +0 -205
- package/PLAN.md +0 -11
- package/extensions/lean-ctx-enforce.ts +0 -166
- package/skills-lock.json +0 -35
- package/wiki/README.md +0 -19
- package/wiki/decisions/0001-establish-project-wiki-and-decision-record-format.md +0 -25
- package/wiki/decisions/0002-add-project-banner-to-readme.md +0 -26
- package/wiki/decisions/0003-remove-redundant-readme-title-heading.md +0 -26
- package/wiki/decisions/0004-publish-package-to-npm-as-ultimate-pi.md +0 -26
- package/wiki/decisions/0005-automate-npm-publish-with-github-actions.md +0 -27
- package/wiki/decisions/0006-switch-to-npm-trusted-publishing.md +0 -26
- package/wiki/decisions/0007-use-absolute-banner-url-for-npm-readme-rendering.md +0 -26
- package/wiki/decisions/0008-rename-banner-asset-for-cache-busting.md +0 -26
- package/wiki/decisions/0009-force-oidc-path-by-clearing-node-auth-token-in-publish-step.md +0 -25
- package/wiki/decisions/0010-simplify-setup-node-for-npm-trusted-publishing.md +0 -26
- package/wiki/decisions/0011-add-noop-workflow-change-to-force-fresh-publish-run.md +0 -25
- package/wiki/decisions/0012-align-workflow-runtime-with-npm-trusted-publishing-requirements.md +0 -26
- package/wiki/decisions/0013-add-package-repository-url-for-provenance-validation.md +0 -25
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
---
|
|
2
|
+
type: concept
|
|
3
|
+
title: "Prompt Enhancement"
|
|
4
|
+
created: 2026-04-30
|
|
5
|
+
status: developing
|
|
6
|
+
tags:
|
|
7
|
+
- prompt-engineering
|
|
8
|
+
- context
|
|
9
|
+
- retrieval
|
|
10
|
+
aliases:
|
|
11
|
+
- Prompt Enrichment
|
|
12
|
+
- Context Injection
|
|
13
|
+
related:
|
|
14
|
+
- "[[Context Engine (AI Coding)]]"
|
|
15
|
+
- "[[Semantic Codebase Indexing]]"
|
|
16
|
+
sources:
|
|
17
|
+
- "[[Augment Code WorkOS ERC 2025]]"
|
|
18
|
+
- "[[Augment Code Codacy AI Giants]]"
|
|
19
|
+
updated: 2026-05-02
|
|
20
|
+
|
|
21
|
+
---# Prompt Enhancement
|
|
22
|
+
|
|
23
|
+
The process of automatically enriching a user's query with relevant codebase context before it reaches the LLM. The goal is to give the LLM the same understanding a senior engineer would have when approaching a task.
|
|
24
|
+
|
|
25
|
+
## How Augment's Prompt Enhancer Works
|
|
26
|
+
|
|
27
|
+
1. User types a query: "add logging to payment API."
|
|
28
|
+
2. Context Engine semantically searches the codebase for relevant code.
|
|
29
|
+
3. Enhancer constructs an augmented prompt containing:
|
|
30
|
+
- The original query.
|
|
31
|
+
- Relevant source files and their paths.
|
|
32
|
+
- Existing patterns (how logging is done elsewhere).
|
|
33
|
+
- Related utilities and libraries already in the codebase.
|
|
34
|
+
- Team conventions and coding standards.
|
|
35
|
+
4. The augmented prompt is sent to the LLM.
|
|
36
|
+
|
|
37
|
+
## Key Design Principles
|
|
38
|
+
|
|
39
|
+
### Reuse Over Reinvention
|
|
40
|
+
The enhancer actively detects existing utilities and libraries. In Augment's demo, when asked to add Git branch info to a status bar, the enhancer detected an existing internal Git library and guided the agent to use it instead of shelling out to git.
|
|
41
|
+
|
|
42
|
+
### Context Budget Management
|
|
43
|
+
The enhancer must balance context richness with token budget:
|
|
44
|
+
- Retrieve only what's relevant (semantic search).
|
|
45
|
+
- Compress retrieved context (summarize large files).
|
|
46
|
+
- Rank by relevance, not just similarity.
|
|
47
|
+
- Respect the model's context window.
|
|
48
|
+
|
|
49
|
+
### Pattern Recognition
|
|
50
|
+
The enhancer learns from the codebase:
|
|
51
|
+
- Naming conventions.
|
|
52
|
+
- Error handling patterns.
|
|
53
|
+
- Import structure.
|
|
54
|
+
- Testing patterns.
|
|
55
|
+
- Architectural layering.
|
|
56
|
+
|
|
57
|
+
## Implementation for Our Harness
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
def enhance_prompt(query: str, workspace: str) -> str:
|
|
61
|
+
# 1. Semantic search for relevant code
|
|
62
|
+
relevant_files = semantic_search(query, workspace, top_k=10)
|
|
63
|
+
|
|
64
|
+
# 2. Extract patterns from relevant files
|
|
65
|
+
patterns = extract_patterns(relevant_files)
|
|
66
|
+
|
|
67
|
+
# 3. Find existing utilities/libraries
|
|
68
|
+
utilities = find_related_utilities(query, workspace)
|
|
69
|
+
|
|
70
|
+
# 4. Fetch wiki knowledge (our existing knowledge base)
|
|
71
|
+
wiki_context = query_wiki(query)
|
|
72
|
+
|
|
73
|
+
# 5. Build augmented prompt
|
|
74
|
+
return build_prompt(
|
|
75
|
+
query=query,
|
|
76
|
+
relevant_code=relevant_files,
|
|
77
|
+
patterns=patterns,
|
|
78
|
+
utilities=utilities,
|
|
79
|
+
wiki=wiki_context
|
|
80
|
+
)
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## Integration with Existing Harness
|
|
84
|
+
|
|
85
|
+
Our harness already has several context sources:
|
|
86
|
+
- **lean-ctx**: Exact file retrieval (grep, find, read).
|
|
87
|
+
- **wiki**: Architectural knowledge, research, patterns.
|
|
88
|
+
- **ctx_knowledge**: Persistent project conventions and gotchas.
|
|
89
|
+
|
|
90
|
+
Prompt enhancement would unify these into a preprocessing step before the main agent loop.
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
---
|
|
2
|
+
type: concept
|
|
3
|
+
title: "Prompt Renderer"
|
|
4
|
+
created: 2026-05-02
|
|
5
|
+
updated: 2026-05-02
|
|
6
|
+
tags:
|
|
7
|
+
- prompt-renderer
|
|
8
|
+
- multi-model
|
|
9
|
+
- build-time-compilation
|
|
10
|
+
- harness
|
|
11
|
+
status: developing
|
|
12
|
+
related:
|
|
13
|
+
- "[[provider-native-prompting]]"
|
|
14
|
+
- "[[model-adaptive-harness]]"
|
|
15
|
+
- "[[research: Prompt Renderer for Multi-Model Agent Harness]]"
|
|
16
|
+
sources:
|
|
17
|
+
- "[[Source: Build-Time Prompt Compilation Architecture]]"
|
|
18
|
+
- "[[Source: AgentBus Jinja2 Prompt Pipelines]]"
|
|
19
|
+
|
|
20
|
+
---# Prompt Renderer
|
|
21
|
+
|
|
22
|
+
A build-time prompt compilation system that takes a **base prompt spec** (model-agnostic) and renders **per-model optimized prompts** by applying each model's official prompting conventions, substituting variables, and caching compiled output.
|
|
23
|
+
|
|
24
|
+
## Architecture
|
|
25
|
+
|
|
26
|
+
```
|
|
27
|
+
Base Prompt Spec (JSON/YAML)
|
|
28
|
+
↓
|
|
29
|
+
[Compile-time Renderer]
|
|
30
|
+
↓
|
|
31
|
+
┌───────┼───────┬─────────┐
|
|
32
|
+
│ GPT │Claude │Gemini │ ← Per-model compiled prompts
|
|
33
|
+
│.json │.json │.json │
|
|
34
|
+
└───────┴───────┴─────────┘
|
|
35
|
+
↓
|
|
36
|
+
[npm package] ← Shipped in lib
|
|
37
|
+
↓
|
|
38
|
+
[Runtime] → load pre-compiled prompt → substitute runtime vars → send to LLM
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Key Properties
|
|
42
|
+
|
|
43
|
+
- **Build-time, not runtime**: Compiler runs during `npm run build`, output shipped as JSON in npm package
|
|
44
|
+
- **Base spec is model-agnostic**: Single source of truth that describes WHAT the prompt should do, not HOW
|
|
45
|
+
- **Per-model renderers**: Each model gets a plugin that knows its official prompting conventions
|
|
46
|
+
- **Variable system**: Two-phase — compile-time variables (resolved at build) vs runtime variables (resolved at call time)
|
|
47
|
+
- **Caching layer**: Pre-compiled prompts are the cache — no runtime compilation, no warmup needed
|
|
48
|
+
- **Deterministic**: Same spec + same renderer version → identical output (hash-verifiable)
|
|
49
|
+
|
|
50
|
+
## Rendering Pipeline
|
|
51
|
+
|
|
52
|
+
1. **Parse base spec**: Validate structure, required fields, variable declarations
|
|
53
|
+
2. **Select model renderer**: Load per-model plugin (GPT, Claude, Gemini, etc.)
|
|
54
|
+
3. **Apply model conventions**: XML tags for Claude, constraints-first for GPT, constraints-last for Gemini
|
|
55
|
+
4. **Substitute compile-time variables**: Resolve all vars marked `compile: true`
|
|
56
|
+
5. **Validate output**: Check token count, syntax, caching thresholds
|
|
57
|
+
6. **Serialize**: Write compiled prompt to JSON with hash + metadata
|
|
58
|
+
7. **Cache**: Store hash → compiled output for incremental builds
|
|
59
|
+
|
|
60
|
+
## Model-Specific Rendering Rules
|
|
61
|
+
|
|
62
|
+
| Convention | GPT (OpenAI) | Claude (Anthropic) | Gemini (Google) |
|
|
63
|
+
|-----------|-------------|-------------------|-----------------|
|
|
64
|
+
| System prompt | `system` role message | `system` parameter | `systemInstruction` |
|
|
65
|
+
| Structure | Constraints-first, flat | XML tags, nesting OK | Constraints-last, plain text |
|
|
66
|
+
| Instruction style | Outcome-first, shorter | Long-form, detailed | Multimodal-friendly |
|
|
67
|
+
| Cache control | Auto (no code) | `cache_control: {type: "ephemeral"}` | Explicit context cache |
|
|
68
|
+
| Output format | Function calling | Structured output API | Controlled generation |
|
|
69
|
+
| Best practice source | platform.openai.com/docs/guides/prompt-engineering | docs.anthropic.com + interactive tutorial | cloud.google.com/vertex-ai/docs |
|
|
70
|
+
|
|
71
|
+
## Variable Substitution
|
|
72
|
+
|
|
73
|
+
Two-phase variable system:
|
|
74
|
+
|
|
75
|
+
```yaml
|
|
76
|
+
variables:
|
|
77
|
+
model_name: { type: string, compile: true } # Resolved at build
|
|
78
|
+
user_query: { type: string, compile: false } # Resolved at runtime
|
|
79
|
+
max_tokens: { type: number, compile: true, default: 4096 }
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
Compile-time variables produce multiple compiled variants if multiple values are specified (e.g., `model_name: [gpt-5.2, claude-sonnet-4.5]`).
|
|
83
|
+
|
|
84
|
+
## Caching Strategy
|
|
85
|
+
|
|
86
|
+
- **Build cache**: Incremental — only recompile prompts whose spec hash changed
|
|
87
|
+
- **Output cache**: Compiled prompts stored by `{spec_hash}-{model}-{var_hash}.json`
|
|
88
|
+
- **Runtime**: Zero cost — load pre-compiled JSON, substitute runtime vars, send
|
|
89
|
+
- **npm distribution**: Compiled prompts are regular files in the package — no compilation code shipped
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
---
|
|
2
|
+
type: concept
|
|
3
|
+
title: "Semantic Codebase Indexing"
|
|
4
|
+
created: 2026-04-30
|
|
5
|
+
status: developing
|
|
6
|
+
tags:
|
|
7
|
+
- code-indexing
|
|
8
|
+
- embeddings
|
|
9
|
+
- vector-search
|
|
10
|
+
- ast
|
|
11
|
+
aliases:
|
|
12
|
+
- Code Embedding
|
|
13
|
+
related:
|
|
14
|
+
- "[[Context Engine (AI Coding)]]"
|
|
15
|
+
- "[[Prompt Enhancement]]"
|
|
16
|
+
sources:
|
|
17
|
+
- "[[Augment Context Engine Official]]"
|
|
18
|
+
- "[[Augment Code Codacy AI Giants]]"
|
|
19
|
+
updated: 2026-05-02
|
|
20
|
+
|
|
21
|
+
---# Semantic Codebase Indexing
|
|
22
|
+
|
|
23
|
+
The process of converting source code into vector embeddings that capture semantic meaning, enabling similarity search across a codebase without relying on exact keyword matching.
|
|
24
|
+
|
|
25
|
+
## How It Works
|
|
26
|
+
|
|
27
|
+
### 1. Code Chunking
|
|
28
|
+
- Split source files into logical units: functions, classes, methods, modules.
|
|
29
|
+
- Use tree-sitter AST parsing for language-aware chunk boundaries.
|
|
30
|
+
- Typical chunk size: 200-500 tokens for optimal embedding quality.
|
|
31
|
+
|
|
32
|
+
### 2. Embedding Generation
|
|
33
|
+
- Pass each chunk through an embedding model.
|
|
34
|
+
- Options: all-MiniLM-L6-v2 (384-dim, local), CodeBERT, or Voyage AI code embeddings.
|
|
35
|
+
- Augment Code uses custom embedding models trained in pairs for maximum retrieval quality.
|
|
36
|
+
|
|
37
|
+
### 3. Vector Database Storage
|
|
38
|
+
- Store embeddings in LanceDB, ChromaDB, or Qdrant.
|
|
39
|
+
- Index for fast approximate nearest neighbor (ANN) search.
|
|
40
|
+
- Attach metadata: file path, line range, function/class name, dependencies.
|
|
41
|
+
|
|
42
|
+
### 4. Real-time Sync
|
|
43
|
+
- Watch filesystem for changes using watchdog/inotify.
|
|
44
|
+
- Re-embed changed files incrementally.
|
|
45
|
+
- Augment claims "millisecond-level sync."
|
|
46
|
+
|
|
47
|
+
### 5. Hybrid Search
|
|
48
|
+
- Combine vector similarity (semantic) + BM25/ keyword (lexical).
|
|
49
|
+
- Re-rank results by relevance, recency, and relationship proximity.
|
|
50
|
+
|
|
51
|
+
## Why Semantic > Grep
|
|
52
|
+
|
|
53
|
+
| Aspect | Grep/Keyword | Semantic Indexing |
|
|
54
|
+
|--------|-------------|-------------------|
|
|
55
|
+
| Finds related code | Only exact matches | Finds semantically similar code |
|
|
56
|
+
| Understands intent | No | Yes — "payment logging" finds telemetry, billing, audit |
|
|
57
|
+
| Cross-language | No | Partially — embeddings capture patterns |
|
|
58
|
+
| Relationship aware | No | Yes — understands call graphs and imports |
|
|
59
|
+
| Noise filtering | Manual | Automatic relevance ranking |
|
|
60
|
+
|
|
61
|
+
## Implementation Stack (for our harness)
|
|
62
|
+
|
|
63
|
+
- **Parser**: tree-sitter (18 languages via lean-ctx).
|
|
64
|
+
- **Embeddings**: sentence-transformers (all-MiniLM-L6-v2) or voyage-code-2.
|
|
65
|
+
- **Vector DB**: LanceDB (embedded, zero-config) or ChromaDB.
|
|
66
|
+
- **Sync**: watchdog (Python).
|
|
67
|
+
- **Search**: hybrid BM25 + cosine similarity with re-ranking.
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
---
|
|
2
|
+
type: concept
|
|
3
|
+
status: stub
|
|
4
|
+
created: 2026-05-02
|
|
5
|
+
updated: 2026-05-02
|
|
6
|
+
tags: [concept, configuration, claude-code]
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Additive Config Hierarchy
|
|
10
|
+
|
|
11
|
+
Configuration pattern from Claude Code: config layers stack additively (CLAUDE.md → project-level → user-level → system-level) rather than overriding. Each layer adds context rather than replacing previous layers.
|
|
12
|
+
|
|
13
|
+
## References
|
|
14
|
+
|
|
15
|
+
- [[claude-code-architecture-karaxai-2026]]
|
|
16
|
+
- [[harness-configuration-layers]]
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
---
|
|
2
|
+
type: concept
|
|
3
|
+
title: "Agent Artifacts (Trust via Verifiable Deliverables)"
|
|
4
|
+
status: developing
|
|
5
|
+
created: 2026-05-01
|
|
6
|
+
updated: 2026-05-01
|
|
7
|
+
tags:
|
|
8
|
+
- antigravity
|
|
9
|
+
- verification
|
|
10
|
+
- trust
|
|
11
|
+
- harness-design
|
|
12
|
+
aliases: ["Artifact system", "verifiable artifacts"]
|
|
13
|
+
related:
|
|
14
|
+
- "[[adversarial-verification]]"
|
|
15
|
+
- "[[automated-observability]]"
|
|
16
|
+
- "[[harness-implementation-plan]]"
|
|
17
|
+
- "[[antigravity-agent-first-architecture]]"
|
|
18
|
+
sources:
|
|
19
|
+
- "[[google-antigravity-official-blog]]"
|
|
20
|
+
- "[[cursor-vs-antigravity-2026]]"
|
|
21
|
+
|
|
22
|
+
---# Agent Artifacts: Trust via Verifiable Deliverables
|
|
23
|
+
|
|
24
|
+
Google Antigravity's Artifact system replaces raw tool-call logs with human-readable, verifiable deliverables that agents generate as they work.
|
|
25
|
+
|
|
26
|
+
## What Are Artifacts?
|
|
27
|
+
|
|
28
|
+
Structured, verifiable outputs agents produce during execution:
|
|
29
|
+
- Task lists and implementation plans
|
|
30
|
+
- Screenshots and browser recordings
|
|
31
|
+
- Walkthrough documents
|
|
32
|
+
- Test result summaries
|
|
33
|
+
- Architecture diagrams
|
|
34
|
+
|
|
35
|
+
Artifacts represent work at a **task level**, not an API-call level. They are designed to be audited by humans, not parsed by machines.
|
|
36
|
+
|
|
37
|
+
## How Artifacts Build Trust
|
|
38
|
+
|
|
39
|
+
```
|
|
40
|
+
Raw tool logs: "execute_command: npm install" → "exit 0" → "write_file: src/auth.ts" → ...
|
|
41
|
+
Artifact: "Authentication migration plan" → "Screenshot: login page working" → "Test results: 23/23 pass"
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
The second format is reviewable in seconds. The first requires scrolling through hundreds of lines.
|
|
45
|
+
|
|
46
|
+
## Feedback on Artifacts
|
|
47
|
+
|
|
48
|
+
- Developers comment on artifacts (Google Docs-style commenting)
|
|
49
|
+
- Agents incorporate feedback **without stopping execution**
|
|
50
|
+
- Feedback is asynchronous: you comment, the agent picks it up at the next checkpoint
|
|
51
|
+
- No need to restart tasks for mid-course corrections
|
|
52
|
+
|
|
53
|
+
## Comparison with Our Harness
|
|
54
|
+
|
|
55
|
+
| Dimension | Our Harness (L4 + L5) | Antigravity Artifacts |
|
|
56
|
+
|-----------|----------------------|----------------------|
|
|
57
|
+
| Verification type | Adversarial critic agents | Human-reviewable deliverables |
|
|
58
|
+
| Feedback loop | Multi-round debate (selective) | Async comments on artifacts |
|
|
59
|
+
| Trust mechanism | Critic proves work wrong | Agent proves work right |
|
|
60
|
+
| Cost | LLM tokens (critic rounds) | Human attention (review artifacts) |
|
|
61
|
+
|
|
62
|
+
## Gap Analysis
|
|
63
|
+
|
|
64
|
+
Our L4 adversarial verification asks: "Is this correct?" (critic finds flaws).
|
|
65
|
+
Antigravity's Artifacts ask: "Here's proof this is correct" (agent demonstrates success).
|
|
66
|
+
|
|
67
|
+
These are **complementary**. The critic catches what the agent missed. The artifact proves what the agent got right. Both should exist in the harness.
|
|
68
|
+
|
|
69
|
+
## Proposed Integration: Phase P31
|
|
70
|
+
|
|
71
|
+
Add an **Artifact Generation Layer** after L4 verification. Agents generate screenshots, browser recordings, and test result summaries as verifiable proof of work. These artifacts feed into L5 observability and serve as the human-reviewable interface.
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
---
|
|
2
|
+
type: concept
|
|
3
|
+
title: "agent-browser — Rust-Native Browser Automation for AI Agents"
|
|
4
|
+
status: developing
|
|
5
|
+
created: 2026-05-02
|
|
6
|
+
updated: 2026-05-02
|
|
7
|
+
tags:
|
|
8
|
+
- browser-automation
|
|
9
|
+
- ai-agents
|
|
10
|
+
- vercel-labs
|
|
11
|
+
- rust
|
|
12
|
+
- cdp
|
|
13
|
+
- headless-browser
|
|
14
|
+
aliases: ["agent-browser", "Vercel Labs agent-browser"]
|
|
15
|
+
related:
|
|
16
|
+
- "[[browser-subagent-visual-verification]]"
|
|
17
|
+
- "[[harness-implementation-plan]]"
|
|
18
|
+
- "[[Source: Vercel Labs agent-browser]]"
|
|
19
|
+
sources:
|
|
20
|
+
- "[[Source: Vercel Labs agent-browser]]"
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
# agent-browser — Rust-Native Browser Automation for AI Agents
|
|
24
|
+
|
|
25
|
+
Vercel Labs agent-browser (31.4K GitHub stars, Apache 2.0, v0.26.0) is the leading open-source browser automation CLI built specifically for AI agents. Rust-native single binary, 112 contributors, 81 releases, 568 commits.
|
|
26
|
+
|
|
27
|
+
**Supersedes**: [[browser-harness-agent]] (9.4K stars, MIT, Python) — replaced May 2026 for P30. agent-browser has 3.3× more stars, richer AI agent integration, and Rust-native performance.
|
|
28
|
+
|
|
29
|
+
## Core Design
|
|
30
|
+
|
|
31
|
+
Unlike Puppeteer/Playwright (human scripting APIs) and browser-harness (raw CDP with self-healing), agent-browser provides an **agent-native interface**: snapshot-based element refs (`@e1`, `@e2`), JSON output, annotated screenshots, structured diff, and a built-in skills system. The AI agent thinks in terms of refs from snapshots — not CSS selectors, not CDP method calls.
|
|
32
|
+
|
|
33
|
+
## Key Innovations for AI Agents
|
|
34
|
+
|
|
35
|
+
### 1. Snapshot + Refs Workflow
|
|
36
|
+
```
|
|
37
|
+
agent-browser snapshot -i --json
|
|
38
|
+
→ Returns: {"refs": {"e1": {"role":"button","name":"Submit"}, "e2": {"role":"textbox","name":"Email"}}}
|
|
39
|
+
agent-browser click @e1 # deterministic, no DOM re-query
|
|
40
|
+
agent-browser fill @e2 "text" # refs survive page changes until re-snapshot
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
### 2. Annotated Screenshots
|
|
44
|
+
```
|
|
45
|
+
agent-browser screenshot --annotate
|
|
46
|
+
→ Screenshot with numbered labels [1], [2], [3] matching @e1, @e2, @e3 refs
|
|
47
|
+
→ Multimodal models can reason about visual layout + refs simultaneously
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### 3. Structured Diff
|
|
51
|
+
```
|
|
52
|
+
agent-browser diff screenshot --baseline before.png -o diff.png
|
|
53
|
+
agent-browser diff snapshot --baseline before-snapshot.txt
|
|
54
|
+
→ Structural + visual diff for verifying UI changes
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
### 4. React Introspection
|
|
58
|
+
```
|
|
59
|
+
agent-browser open --enable react-devtools <url>
|
|
60
|
+
agent-browser react tree # full component tree
|
|
61
|
+
agent-browser react suspense # suspense boundaries + classifier
|
|
62
|
+
agent-browser vitals # LCP/CLS/TTFB/FCP/INP + React hydration
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
### 5. Batch Mode
|
|
66
|
+
```
|
|
67
|
+
agent-browser batch "open url" "snapshot -i" "click @e1" "screenshot"
|
|
68
|
+
→ Multiple commands in single CLI invocation, reduces process startup overhead
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
### 6. Built-in Skills
|
|
72
|
+
```
|
|
73
|
+
agent-browser skills get core # 420-line usage guide for agents
|
|
74
|
+
npx skills add vercel-labs/agent-browser # install skill stub
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## Architecture
|
|
78
|
+
|
|
79
|
+
- **Rust CLI** + **Rust Daemon**: Single binary. Daemon auto-starts, persists between commands
|
|
80
|
+
- **Client-daemon**: Fast subsequent commands (no browser restart)
|
|
81
|
+
- **Direct CDP**: Like browser-harness — raw DevTools Protocol, no Puppeteer wrappers
|
|
82
|
+
- **Multi-provider**: Local Chrome + 6 cloud providers (Browserless, Browserbase, Browser Use, Kernel, AgentCore, iOS)
|
|
83
|
+
|
|
84
|
+
## Integration with P30
|
|
85
|
+
|
|
86
|
+
P30 Browser Subagent dispatches via P25 router for UI tasks. Harness invokes `agent-browser` CLI as a subprocess (or via batch mode for multi-step workflows). Config at `.pi/harness/browser.json`.
|
|
87
|
+
|
|
88
|
+
**What we use**:
|
|
89
|
+
- Snapshot + refs for element interaction
|
|
90
|
+
- Annotated screenshots for visual verification
|
|
91
|
+
- Diff for before/after comparison
|
|
92
|
+
- Batch mode for multi-step agent workflows
|
|
93
|
+
- `--json` for structured output parsing
|
|
94
|
+
|
|
95
|
+
**What we skip**:
|
|
96
|
+
- Dashboard (CLI harness only)
|
|
97
|
+
- AI Chat (our agent IS the chat)
|
|
98
|
+
- Cloud providers (local Chrome only; opt-in for serverless)
|
|
99
|
+
- iOS Simulator (web-focused; opt-in)
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
---
|
|
2
|
+
type: concept
|
|
3
|
+
title: "Agent-Codebase Interface (ACI)"
|
|
4
|
+
created: 2026-04-30
|
|
5
|
+
updated: 2026-04-30
|
|
6
|
+
tags:
|
|
7
|
+
- agent-architecture
|
|
8
|
+
- codebase-exploration
|
|
9
|
+
- interface-design
|
|
10
|
+
related:
|
|
11
|
+
- "[[swe-agent-aci]]"
|
|
12
|
+
- "[[research-agent-first-codebase-exploration]]"
|
|
13
|
+
status: developing
|
|
14
|
+
|
|
15
|
+
---# Agent-Codebase Interface (ACI)
|
|
16
|
+
|
|
17
|
+
The design of tool interfaces specifically for AI agents — not humans — to interact with codebases. Extends the SWE-agent concept of Agent-Computer Interfaces to codebase exploration specifically.
|
|
18
|
+
|
|
19
|
+
## Core Principle
|
|
20
|
+
|
|
21
|
+
Agents process information differently from humans. They have:
|
|
22
|
+
- **Fixed context windows** (not infinite working memory)
|
|
23
|
+
- **Token-based costs** (every byte of context has a cost)
|
|
24
|
+
- **No visual cortex** (can't "see" code structure, need explicit representations)
|
|
25
|
+
- **No intuition** (can't form mental models from partial exposure)
|
|
26
|
+
- **Perfect recall within context** (but zero recall outside it)
|
|
27
|
+
|
|
28
|
+
Therefore, the interface must:
|
|
29
|
+
1. Maximize information density per token
|
|
30
|
+
2. Present structured, machine-parseable representations
|
|
31
|
+
3. Support progressive disclosure (drill down on demand)
|
|
32
|
+
4. Enable autonomous navigation decisions
|
|
33
|
+
|
|
34
|
+
## Contrast with Human Interfaces
|
|
35
|
+
|
|
36
|
+
| Human Interface | Agent Interface |
|
|
37
|
+
|----------------|-----------------|
|
|
38
|
+
| Syntax highlighting, file trees | AST symbol maps, dependency graphs |
|
|
39
|
+
| Scroll through files | Fetch specific symbol definitions |
|
|
40
|
+
| Visual pattern recognition | Semantic search + structured queries |
|
|
41
|
+
| Gradual immersion ("Paper Cuts") | Bulk ingestion + ranking algorithms |
|
|
42
|
+
| IDE debugging (step-through) | Execution feedback loops (run tests, check output) |
|
|
43
|
+
| "Use the project" to learn | "Map the project" to learn |
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
---
|
|
2
|
+
type: concept
|
|
3
|
+
tags:
|
|
4
|
+
- harness
|
|
5
|
+
- architecture
|
|
6
|
+
- context-engineering
|
|
7
|
+
- safety
|
|
8
|
+
related:
|
|
9
|
+
- "[[Agentic Orchestration Pipeline]]"
|
|
10
|
+
- "[[Context Engineering]]"
|
|
11
|
+
- "[[Safety Defense-in-Depth]]"
|
|
12
|
+
- "[[sources/martin-fowler-harness-engineering]]"
|
|
13
|
+
- "[[sources/opendev-arxiv-2603.05344v1]]"
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
# Agent Harness Architecture
|
|
17
|
+
|
|
18
|
+
The harness is everything in an AI coding agent except the model itself: the runtime orchestration layer that wraps the reasoning loop and coordinates tool dispatch, context management, safety enforcement, and session persistence. Defined as: **Agent = Model + Harness**.
|
|
19
|
+
|
|
20
|
+
## Two-Phase Model
|
|
21
|
+
|
|
22
|
+
### Scaffolding (Pre-Runtime)
|
|
23
|
+
Runs once before the first prompt. Assembles the agent:
|
|
24
|
+
- System prompt compilation (conditional, priority-ordered sections)
|
|
25
|
+
- Tool schema building (from registry, MCP discovery, subagent schemas)
|
|
26
|
+
- Subagent registration and initialization
|
|
27
|
+
|
|
28
|
+
### Harness (Runtime)
|
|
29
|
+
Operates continuously during execution:
|
|
30
|
+
- Tool dispatch with safety gating
|
|
31
|
+
- Context lifecycle management (compaction, reminders, memory)
|
|
32
|
+
- Approval workflows (Manual/Semi-Auto/Auto)
|
|
33
|
+
- Session persistence and undo tracking
|
|
34
|
+
|
|
35
|
+
## Feedforward + Feedback Model
|
|
36
|
+
|
|
37
|
+
| Direction | Type | Examples |
|
|
38
|
+
|-----------|------|----------|
|
|
39
|
+
| **Feedforward (Guides)** | Steer before action | System prompts, AGENTS.md, Skills, coding conventions, architecture docs |
|
|
40
|
+
| **Feedback (Sensors)** | Observe after action | Linters, tests, review agents, type checkers, structural analysis |
|
|
41
|
+
|
|
42
|
+
Two execution modes:
|
|
43
|
+
- **Computational**: Deterministic, fast — tests, linters, type checkers
|
|
44
|
+
- **Inferential**: LLM-based, semantic — AI code reviews, "LLM as judge"
|
|
45
|
+
|
|
46
|
+
## The Steering Loop
|
|
47
|
+
|
|
48
|
+
Human developers iterate on the harness: whenever an issue occurs repeatedly, improve feedforward guides or feedback sensors. Agents can help build harness components (write tests, generate linter rules, create documentation).
|
|
49
|
+
|
|
50
|
+
## Harness Layers (OpenDev Reference)
|
|
51
|
+
|
|
52
|
+
1. **Prompt Composition**: Conditional sections sorted by priority, provider-specific variants, ${VAR} substitution, two-part caching
|
|
53
|
+
2. **Context Engineering**: Staged compaction, event-driven reminders, dual-memory architecture, tool result optimization
|
|
54
|
+
3. **Tool System**: Registry with handler categories, lazy MCP discovery, batch execution, 9-pass fuzzy edit matching
|
|
55
|
+
4. **Safety System**: 5-layer defense-in-depth (prompt → schema → approval → validation → hooks)
|
|
56
|
+
5. **Persistence**: Session storage, operation log/undo, configuration hierarchy, provider cache
|
|
57
|
+
|
|
58
|
+
## Harness Templates
|
|
59
|
+
|
|
60
|
+
For common topologies (CRUD APIs, event processors, dashboards), a harness template bundles guides + sensors as a reusable package. Teams select tech stacks partly based on available harnesses.
|
|
61
|
+
|
|
62
|
+
## Relevance to Our Harness
|
|
63
|
+
|
|
64
|
+
Our current harness architecture:
|
|
65
|
+
- **Scaffolding**: `.pi/skills/` system, agent prompt engineering, wiki as knowledge base
|
|
66
|
+
- **Runtime**: `lean-ctx` for tool routing, `Agent` for subagent spawning, `wiki-autoresearch` for research
|
|
67
|
+
- **Gaps**: No safety defense-in-depth, no staged compaction, no event-driven reminders, no team dispatch, no sequential chaining
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
---
|
|
2
|
+
aliases: ["agent loop patterns", "stuck agent detection", "tool call loops"]
|
|
3
|
+
type: concept
|
|
4
|
+
title: "Agent Loop Detection Patterns"
|
|
5
|
+
created: 2026-04-30
|
|
6
|
+
status: developing
|
|
7
|
+
tags:
|
|
8
|
+
- concept
|
|
9
|
+
- loop-detection
|
|
10
|
+
- agent-reliability
|
|
11
|
+
- production
|
|
12
|
+
related:
|
|
13
|
+
- "[[Research: Meta-Agent Context Drift Detection]]"
|
|
14
|
+
- "[[context-drift-in-agents]]"
|
|
15
|
+
- "[[meta-agent-context-pruning]]"
|
|
16
|
+
- "[[langsight-loop-detection]]"
|
|
17
|
+
- "[[ironclaw-drift-monitor]]"
|
|
18
|
+
updated: 2026-05-02
|
|
19
|
+
|
|
20
|
+
---# Agent Loop Detection Patterns
|
|
21
|
+
|
|
22
|
+
Production-grade detection patterns for identifying when an AI agent is stuck in a non-productive loop. Based on LangSight's production experience and ironclaw's DriftMonitor proposal.
|
|
23
|
+
|
|
24
|
+
## Three Loop Types
|
|
25
|
+
|
|
26
|
+
### 1. Direct Repetition
|
|
27
|
+
|
|
28
|
+
Same tool called with identical arguments multiple times in a row. Most common pattern.
|
|
29
|
+
|
|
30
|
+
**Cause**: Tool returns error or unexpected result. LLM's retry logic doesn't distinguish "transient failure, retry" from "structural failure, give up."
|
|
31
|
+
|
|
32
|
+
**Real-world example**: Support agent called `crm-mcp/lookup_customer` 89 times with identical arguments. CRM returned slightly malformed response. Agent decided it needed more data, called same tool, got same malformed response, repeated. Cost: $214.
|
|
33
|
+
|
|
34
|
+
**Detection**: `SHA256(tool_name + normalized_args)[:16]`. If same hash appears ≥3 times in session window, flag as loop.
|
|
35
|
+
|
|
36
|
+
### 2. Ping-Pong Between Tools
|
|
37
|
+
|
|
38
|
+
Two tools called alternately without state change between calls.
|
|
39
|
+
|
|
40
|
+
**Example**: Agent calls CRM → gets customer → calls Billing → gets invoices → calls CRM again with same args → calls Billing again.
|
|
41
|
+
|
|
42
|
+
**Detection**: Sequence pattern matching on last 6 calls. A-B-A-B-A-B pattern triggers detection.
|
|
43
|
+
|
|
44
|
+
### 3. Retry-Without-Progress
|
|
45
|
+
|
|
46
|
+
Tool call succeeds (no error) but response doesn't satisfy agent's internal goal. Agent keeps calling with minor argument variations.
|
|
47
|
+
|
|
48
|
+
**Detection**: Semantic similarity of consecutive reasoning outputs >0.95 cosine across multiple steps. Computationally expensive.
|
|
49
|
+
|
|
50
|
+
## Detection Approaches
|
|
51
|
+
|
|
52
|
+
### Approach 1: Argument Hash (Recommended)
|
|
53
|
+
|
|
54
|
+
```python
|
|
55
|
+
import hashlib, json
|
|
56
|
+
from collections import Counter
|
|
57
|
+
|
|
58
|
+
def compute_call_hash(tool_name: str, args: dict) -> str:
|
|
59
|
+
payload = f"{tool_name}:{json.dumps(args, sort_keys=True)}"
|
|
60
|
+
return hashlib.sha256(payload.encode()).hexdigest()[:16]
|
|
61
|
+
|
|
62
|
+
class LoopDetector:
|
|
63
|
+
def __init__(self, threshold: int = 3):
|
|
64
|
+
self.threshold = threshold
|
|
65
|
+
self.call_counts = Counter()
|
|
66
|
+
|
|
67
|
+
def record_call(self, tool_name: str, args: dict) -> bool:
|
|
68
|
+
call_hash = compute_call_hash(tool_name, args)
|
|
69
|
+
self.call_counts[call_hash] += 1
|
|
70
|
+
return self.call_counts[call_hash] >= self.threshold
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
Catches >90% of real-world loops with zero false positives at threshold 3.
|
|
74
|
+
|
|
75
|
+
### Approach 2: Sliding Window Rate
|
|
76
|
+
|
|
77
|
+
Count tool calls regardless of argument variation. If tool called >N times in M seconds, flag.
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
from collections import deque
|
|
81
|
+
from datetime import datetime, timedelta
|
|
82
|
+
|
|
83
|
+
class RateLoopDetector:
|
|
84
|
+
def __init__(self, max_calls: int = 10, window_seconds: int = 60):
|
|
85
|
+
self.max_calls = max_calls
|
|
86
|
+
self.window = timedelta(seconds=window_seconds)
|
|
87
|
+
self.call_times: dict[str, deque] = {}
|
|
88
|
+
|
|
89
|
+
def record_call(self, tool_name: str) -> bool:
|
|
90
|
+
now = datetime.utcnow()
|
|
91
|
+
if tool_name not in self.call_times:
|
|
92
|
+
self.call_times[tool_name] = deque()
|
|
93
|
+
times = self.call_times[tool_name]
|
|
94
|
+
while times and now - times[0] > self.window:
|
|
95
|
+
times.popleft()
|
|
96
|
+
times.append(now)
|
|
97
|
+
return len(times) >= self.max_calls
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### Approach 3: LLM Similarity
|
|
101
|
+
|
|
102
|
+
Compare semantic similarity between consecutive reasoning outputs. Most sophisticated but computationally expensive. Usually overkill — Approaches 1+2 catch >90%.
|
|
103
|
+
|
|
104
|
+
## Intervention Strategies
|
|
105
|
+
|
|
106
|
+
| Strategy | When | Risk |
|
|
107
|
+
|----------|------|------|
|
|
108
|
+
| **Warn + continue** | Early monitoring, unsure about thresholds | No false-termination risk, but loops continue |
|
|
109
|
+
| **Terminate session** | Production, confident in thresholds | False termination loses partial work |
|
|
110
|
+
| **Inject recovery** | Want agent to self-correct | Agent may ignore or loop again |
|
|
111
|
+
| **Prune + restart** | Proposed meta-agent pattern | Pruning may remove useful context |
|
|
112
|
+
|
|
113
|
+
## Threshold Tuning
|
|
114
|
+
|
|
115
|
+
- **Default**: 3 identical calls. Works for most agents.
|
|
116
|
+
- **Polling agents**: Use time-based windows (Approach 2), not count-based.
|
|
117
|
+
- **Retry-heavy workflows**: Increase to 5-7.
|
|
118
|
+
- **Sub-agents**: Each sub-agent gets own detector. Parent calling same sub-agent multiple times is not a loop.
|
|
119
|
+
- **Start with warn, switch to terminate**: Monitor for a week, then enforce.
|
|
120
|
+
|
|
121
|
+
## Always Combine With Budget Guardrails
|
|
122
|
+
|
|
123
|
+
Loop detection catches known patterns. Budget guardrails catch unknown patterns:
|
|
124
|
+
- Max cost per session ($1 default)
|
|
125
|
+
- Max steps (25 default)
|
|
126
|
+
- Max wall time (120s default)
|
|
127
|
+
- Soft alert at 80% of budget
|
|
128
|
+
|
|
129
|
+
## See Also
|
|
130
|
+
|
|
131
|
+
- [[meta-agent-context-pruning]] — Extends detection with pruning + restart
|
|
132
|
+
- [[langsight-loop-detection]] — Source: production deployment guide
|
|
133
|
+
- [[ironclaw-drift-monitor]] — Source: 5-rule DriftMonitor proposal
|