ultimate-pi 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (516) hide show
  1. package/.agents/skills/ck-search/SKILL.md +99 -0
  2. package/.agents/skills/defuddle/SKILL.md +90 -0
  3. package/.agents/skills/find-skills/SKILL.md +142 -0
  4. package/.agents/skills/firecrawl/SKILL.md +150 -0
  5. package/.agents/skills/firecrawl/rules/install.md +82 -0
  6. package/.agents/skills/firecrawl/rules/security.md +26 -0
  7. package/.agents/skills/firecrawl-agent/SKILL.md +57 -0
  8. package/.agents/skills/firecrawl-build-interact/SKILL.md +67 -0
  9. package/.agents/skills/firecrawl-build-onboarding/SKILL.md +102 -0
  10. package/.agents/skills/firecrawl-build-onboarding/references/auth-flow.md +39 -0
  11. package/.agents/skills/firecrawl-build-onboarding/references/project-setup.md +20 -0
  12. package/.agents/skills/firecrawl-build-onboarding/references/sdk-installation.md +17 -0
  13. package/.agents/skills/firecrawl-build-scrape/SKILL.md +68 -0
  14. package/.agents/skills/firecrawl-build-search/SKILL.md +68 -0
  15. package/.agents/skills/firecrawl-crawl/SKILL.md +58 -0
  16. package/.agents/skills/firecrawl-download/SKILL.md +69 -0
  17. package/.agents/skills/firecrawl-interact/SKILL.md +83 -0
  18. package/.agents/skills/firecrawl-map/SKILL.md +50 -0
  19. package/.agents/skills/firecrawl-parse/SKILL.md +61 -0
  20. package/.agents/skills/firecrawl-scrape/SKILL.md +68 -0
  21. package/.agents/skills/firecrawl-search/SKILL.md +59 -0
  22. package/.agents/skills/obsidian-bases/SKILL.md +299 -0
  23. package/.agents/skills/obsidian-markdown/SKILL.md +237 -0
  24. package/.agents/skills/posthog-analyst/SKILL.md +306 -0
  25. package/.agents/skills/posthog-analyst/evals/evals.json +23 -0
  26. package/.agents/skills/wiki/SKILL.md +215 -0
  27. package/.agents/skills/wiki/references/css-snippets.md +122 -0
  28. package/.agents/skills/wiki/references/frontmatter.md +107 -0
  29. package/.agents/skills/wiki/references/git-setup.md +58 -0
  30. package/.agents/skills/wiki/references/mcp-setup.md +149 -0
  31. package/.agents/skills/wiki/references/modes.md +259 -0
  32. package/.agents/skills/wiki/references/plugins.md +96 -0
  33. package/.agents/skills/wiki/references/rest-api.md +124 -0
  34. package/.agents/skills/wiki-autoresearch/SKILL.md +211 -0
  35. package/.agents/skills/wiki-autoresearch/references/program.md +75 -0
  36. package/.agents/skills/wiki-fold/SKILL.md +204 -0
  37. package/.agents/skills/wiki-fold/references/fold-template.md +133 -0
  38. package/.agents/skills/wiki-ingest/SKILL.md +288 -0
  39. package/.agents/skills/wiki-lint/SKILL.md +183 -0
  40. package/.agents/skills/wiki-query/SKILL.md +176 -0
  41. package/.agents/skills/wiki-save/SKILL.md +128 -0
  42. package/.ckignore +41 -0
  43. package/.env.example +9 -0
  44. package/.github/workflows/lint.yml +33 -0
  45. package/.github/workflows/publish-github-packages.yml +35 -0
  46. package/.github/workflows/publish-npm.yml +1 -1
  47. package/.pi/SYSTEM.md +107 -40
  48. package/.pi/agents/pi-pi/agent-expert.md +205 -0
  49. package/.pi/agents/pi-pi/cli-expert.md +47 -0
  50. package/.pi/agents/pi-pi/config-expert.md +67 -0
  51. package/.pi/agents/pi-pi/ext-expert.md +53 -0
  52. package/.pi/agents/pi-pi/keybinding-expert.md +123 -0
  53. package/.pi/agents/pi-pi/pi-orchestrator.md +103 -0
  54. package/.pi/agents/pi-pi/prompt-expert.md +83 -0
  55. package/.pi/agents/pi-pi/skill-expert.md +52 -0
  56. package/.pi/agents/pi-pi/theme-expert.md +46 -0
  57. package/.pi/agents/pi-pi/tui-expert.md +100 -0
  58. package/.pi/agents/rethink.md +140 -0
  59. package/.pi/agents/wiki-ingest.md +67 -0
  60. package/.pi/agents/wiki-lint.md +75 -0
  61. package/.pi/auto-commit.json +20 -0
  62. package/.pi/extensions/banner.png +0 -0
  63. package/.pi/extensions/ck-enforce.ts +216 -0
  64. package/.pi/extensions/custom-footer.ts +308 -0
  65. package/.pi/extensions/custom-header.ts +116 -0
  66. package/.pi/extensions/dotenv-loader.ts +170 -0
  67. package/.pi/internal/cursor-sdk-transcript-parser.ts +59 -0
  68. package/.pi/model-router.json +95 -0
  69. package/.pi/npm/.gitignore +2 -0
  70. package/.pi/prompts/git-sync.md +124 -0
  71. package/.pi/prompts/harness-setup.md +509 -0
  72. package/.pi/prompts/save.md +16 -0
  73. package/.pi/prompts/wiki-autoresearch.md +19 -0
  74. package/.pi/prompts/wiki.md +23 -0
  75. package/.pi/providers/cursor-sdk-provider.test.mjs +476 -0
  76. package/.pi/providers/cursor-sdk-provider.ts +1085 -0
  77. package/.pi/settings.json +14 -4
  78. package/.pi/skills/agent-router/SKILL.md +174 -0
  79. package/.pi/sounds/alert/1-kaching-track.mp3 +0 -0
  80. package/.pi/sounds/error/1-ksi-wth-track.mp3 +0 -0
  81. package/.pi/sounds/error/2-smash-track.mp3 +0 -0
  82. package/.pi/sounds/error/3-buzzer-track.mp3 +0 -0
  83. package/.pi/sounds/notification/1-soft-notification-track.mp3 +0 -0
  84. package/.pi/sounds/project-sounds.json +25 -0
  85. package/.pi/sounds/reminder/1-soft-notification-track.mp3 +0 -0
  86. package/.pi/sounds/success/1-tada-track.mp3 +0 -0
  87. package/.pi/sounds/success/2-jobs-done-track.mp3 +0 -0
  88. package/.pi/sounds/success/3-yay-track.mp3 +0 -0
  89. package/CONTRIBUTING.md +116 -0
  90. package/README.md +32 -39
  91. package/biome.json +34 -0
  92. package/firecrawl/.env.template +58 -0
  93. package/firecrawl/README.md +49 -0
  94. package/firecrawl/docker-compose.yaml +201 -0
  95. package/firecrawl/searxng/searxng.env +3 -0
  96. package/firecrawl/searxng/settings.yml +85 -0
  97. package/lefthook.yml +8 -0
  98. package/package.json +55 -24
  99. package/vault/AGENTS.md +37 -0
  100. package/vault/wiki/_templates/comparison.md +39 -0
  101. package/vault/wiki/_templates/concept.md +40 -0
  102. package/vault/wiki/_templates/decision.md +21 -0
  103. package/vault/wiki/_templates/entity.md +32 -0
  104. package/vault/wiki/_templates/flow.md +14 -0
  105. package/vault/wiki/_templates/module.md +18 -0
  106. package/vault/wiki/_templates/question.md +31 -0
  107. package/vault/wiki/_templates/source.md +39 -0
  108. package/vault/wiki/concepts/AST-Aware Code Chunking.md +44 -0
  109. package/vault/wiki/concepts/Build-Time Prompt Compilation.md +107 -0
  110. package/vault/wiki/concepts/Context Engine (AI Coding).md +47 -0
  111. package/vault/wiki/concepts/Context-Aware System Reminders.md +61 -0
  112. package/vault/wiki/concepts/Contextualized Text Embedding.md +42 -0
  113. package/vault/wiki/concepts/Contractor vs Employee AI Model.md +55 -0
  114. package/vault/wiki/concepts/Dual-Model Agent Architecture.md +65 -0
  115. package/vault/wiki/concepts/Late Chunking vs Early Chunking.md +43 -0
  116. package/vault/wiki/concepts/Majority Vote Ensembling.md +68 -0
  117. package/vault/wiki/concepts/Meta-Harness.md +16 -0
  118. package/vault/wiki/concepts/Multi-Agent AI Coding Architecture.md +75 -0
  119. package/vault/wiki/concepts/Prompt Enhancement.md +90 -0
  120. package/vault/wiki/concepts/Prompt Renderer.md +89 -0
  121. package/vault/wiki/concepts/Semantic Codebase Indexing.md +67 -0
  122. package/vault/wiki/concepts/additive-config-hierarchy.md +16 -0
  123. package/vault/wiki/concepts/agent-artifacts-verifiable-deliverables.md +71 -0
  124. package/vault/wiki/concepts/agent-browser-browser-automation.md +99 -0
  125. package/vault/wiki/concepts/agent-codebase-interface.md +43 -0
  126. package/vault/wiki/concepts/agent-harness-architecture.md +67 -0
  127. package/vault/wiki/concepts/agent-loop-detection-patterns.md +133 -0
  128. package/vault/wiki/concepts/agent-search-enforcement.md +126 -0
  129. package/vault/wiki/concepts/agent-skills-ecosystem.md +74 -0
  130. package/vault/wiki/concepts/agent-skills-pattern.md +68 -0
  131. package/vault/wiki/concepts/agentic-harness-context-enforcement.md +91 -0
  132. package/vault/wiki/concepts/agentic-harness.md +34 -0
  133. package/vault/wiki/concepts/agentic-orchestration-pipeline.md +56 -0
  134. package/vault/wiki/concepts/agentic-search-no-embeddings.md +18 -0
  135. package/vault/wiki/concepts/anthropic-context-engineering.md +13 -0
  136. package/vault/wiki/concepts/antigravity-agent-first-architecture.md +61 -0
  137. package/vault/wiki/concepts/ast-compression.md +19 -0
  138. package/vault/wiki/concepts/ast-truncation.md +66 -0
  139. package/vault/wiki/concepts/barrel-files.md +37 -0
  140. package/vault/wiki/concepts/browser-harness-agent.md +41 -0
  141. package/vault/wiki/concepts/browser-subagent-visual-verification.md +82 -0
  142. package/vault/wiki/concepts/codebase-intelligence-ecosystem-comparison.md +192 -0
  143. package/vault/wiki/concepts/codebase-intelligence-harness-integration.md +161 -0
  144. package/vault/wiki/concepts/codebase-to-context-ingestion.md +46 -0
  145. package/vault/wiki/concepts/codex-harness-innovations.md +147 -0
  146. package/vault/wiki/concepts/consensus-debate-flow.md +17 -0
  147. package/vault/wiki/concepts/consensus-debate.md +206 -0
  148. package/vault/wiki/concepts/content-addressed-spec-identity.md +166 -0
  149. package/vault/wiki/concepts/context-anxiety.md +57 -0
  150. package/vault/wiki/concepts/context-compression-techniques.md +19 -0
  151. package/vault/wiki/concepts/context-continuity.md +22 -0
  152. package/vault/wiki/concepts/context-drift-in-agents.md +106 -0
  153. package/vault/wiki/concepts/context-engineering.md +62 -0
  154. package/vault/wiki/concepts/context-folding.md +67 -0
  155. package/vault/wiki/concepts/context-mode.md +38 -0
  156. package/vault/wiki/concepts/cursor-harness-innovations.md +107 -0
  157. package/vault/wiki/concepts/deterministic-session-compaction.md +79 -0
  158. package/vault/wiki/concepts/drift-detection-unified.md +296 -0
  159. package/vault/wiki/concepts/execution-feedback-loop.md +46 -0
  160. package/vault/wiki/concepts/feedforward-feedback-harness.md +60 -0
  161. package/vault/wiki/concepts/five-root-cause-metrics-sentrux.md +40 -0
  162. package/vault/wiki/concepts/fork-safe-spec-storage.md +89 -0
  163. package/vault/wiki/concepts/fts5-sandbox.md +19 -0
  164. package/vault/wiki/concepts/fuzzy-edit-matching.md +71 -0
  165. package/vault/wiki/concepts/gemini-cli-architecture.md +104 -0
  166. package/vault/wiki/concepts/generator-evaluator-architecture.md +64 -0
  167. package/vault/wiki/concepts/guardian-agent-pattern.md +67 -0
  168. package/vault/wiki/concepts/harness-configuration-layers.md +89 -0
  169. package/vault/wiki/concepts/harness-control-frameworks.md +155 -0
  170. package/vault/wiki/concepts/harness-engineering-first-principles.md +90 -0
  171. package/vault/wiki/concepts/harness-h-formalism.md +53 -0
  172. package/vault/wiki/concepts/hybrid-code-search.md +61 -0
  173. package/vault/wiki/concepts/inline-post-edit-validation.md +112 -0
  174. package/vault/wiki/concepts/legendary-engineering-patterns-harness.md +110 -0
  175. package/vault/wiki/concepts/lifecycle-hooks.md +94 -0
  176. package/vault/wiki/concepts/mcp-tool-routing.md +102 -0
  177. package/vault/wiki/concepts/memory-system-of-record-vs-ephemeral-cache.md +47 -0
  178. package/vault/wiki/concepts/meta-agent-context-pruning.md +151 -0
  179. package/vault/wiki/concepts/model-adaptive-harness.md +122 -0
  180. package/vault/wiki/concepts/model-routing-agents.md +101 -0
  181. package/vault/wiki/concepts/monorepo-architecture.md +45 -0
  182. package/vault/wiki/concepts/multi-agent-specialization.md +61 -0
  183. package/vault/wiki/concepts/permission-subsystem.md +16 -0
  184. package/vault/wiki/concepts/pi-messenger-analysis.md +243 -0
  185. package/vault/wiki/concepts/pi-vscode-extension-landscape.md +37 -0
  186. package/vault/wiki/concepts/policy-engine-pattern.md +78 -0
  187. package/vault/wiki/concepts/progressive-disclosure-agents.md +53 -0
  188. package/vault/wiki/concepts/progressive-skill-disclosure.md +17 -0
  189. package/vault/wiki/concepts/provider-native-prompting.md +203 -0
  190. package/vault/wiki/concepts/quality-signal-sentrux.md +37 -0
  191. package/vault/wiki/concepts/repo-map-ranking.md +42 -0
  192. package/vault/wiki/concepts/result-monad-error-handling.md +47 -0
  193. package/vault/wiki/concepts/safety-defense-in-depth.md +83 -0
  194. package/vault/wiki/concepts/sandbox-os-enforcement.md +18 -0
  195. package/vault/wiki/concepts/selective-debate-routing.md +70 -0
  196. package/vault/wiki/concepts/self-evolving-harness.md +60 -0
  197. package/vault/wiki/concepts/sentrux-mcp-integration.md +36 -0
  198. package/vault/wiki/concepts/sentrux-rules-engine.md +49 -0
  199. package/vault/wiki/concepts/shell-pattern-compression.md +24 -0
  200. package/vault/wiki/concepts/skill-first-architecture.md +166 -0
  201. package/vault/wiki/concepts/structured-compaction.md +78 -0
  202. package/vault/wiki/concepts/subagent-orchestration.md +17 -0
  203. package/vault/wiki/concepts/subagent-worktree-isolation.md +68 -0
  204. package/vault/wiki/concepts/superpowers-methodology.md +78 -0
  205. package/vault/wiki/concepts/think-in-code.md +73 -0
  206. package/vault/wiki/concepts/ts-execution-layer.md +100 -0
  207. package/vault/wiki/concepts/typescript-strict-mode.md +37 -0
  208. package/vault/wiki/concepts/vcc-conversation-compaction-for-pi.md +51 -0
  209. package/vault/wiki/concepts/verification-drift-detection.md +19 -0
  210. package/vault/wiki/consensus/consensus-records.md +58 -0
  211. package/vault/wiki/decisions/2026-04-30-pi-lean-ctx-native.md +122 -0
  212. package/vault/wiki/decisions/adr-008.md +40 -0
  213. package/vault/wiki/decisions/adr-009.md +46 -0
  214. package/vault/wiki/decisions/adr-010.md +55 -0
  215. package/vault/wiki/decisions/adr-011.md +165 -0
  216. package/vault/wiki/decisions/adr-012.md +102 -0
  217. package/vault/wiki/decisions/adr-013.md +59 -0
  218. package/vault/wiki/decisions/adr-014.md +73 -0
  219. package/vault/wiki/decisions/adr-015.md +81 -0
  220. package/vault/wiki/decisions/adr-016.md +91 -0
  221. package/vault/wiki/decisions/adr-017.md +79 -0
  222. package/vault/wiki/decisions/adr-018.md +100 -0
  223. package/vault/wiki/decisions/adr-019.md +75 -0
  224. package/vault/wiki/decisions/adr-020.md +106 -0
  225. package/vault/wiki/decisions/adr-021.md +86 -0
  226. package/vault/wiki/decisions/adr-022.md +113 -0
  227. package/vault/wiki/decisions/adr-023.md +113 -0
  228. package/vault/wiki/decisions/adr-024.md +73 -0
  229. package/vault/wiki/decisions/adr-025.md +130 -0
  230. package/vault/wiki/decisions/adr-026.md +56 -0
  231. package/vault/wiki/decisions/colocate-wiki.md +34 -0
  232. package/vault/wiki/entities/Anders Hejlsberg.md +29 -0
  233. package/vault/wiki/entities/Anthropic.md +17 -0
  234. package/vault/wiki/entities/Augment Code.md +49 -0
  235. package/vault/wiki/entities/Bjarne Stroustrup.md +26 -0
  236. package/vault/wiki/entities/Bolt.new (StackBlitz).md +39 -0
  237. package/vault/wiki/entities/Boris Cherny.md +11 -0
  238. package/vault/wiki/entities/Claude Code.md +19 -0
  239. package/vault/wiki/entities/Dennis Ritchie.md +26 -0
  240. package/vault/wiki/entities/Emergent Labs.md +32 -0
  241. package/vault/wiki/entities/Google Cloud.md +16 -0
  242. package/vault/wiki/entities/Guido van Rossum.md +28 -0
  243. package/vault/wiki/entities/Ken Thompson.md +28 -0
  244. package/vault/wiki/entities/Lee et al.md +16 -0
  245. package/vault/wiki/entities/Linus Torvalds.md +28 -0
  246. package/vault/wiki/entities/Lovable (company).md +40 -0
  247. package/vault/wiki/entities/Martin Fowler.md +16 -0
  248. package/vault/wiki/entities/Meng et al.md +16 -0
  249. package/vault/wiki/entities/OpenAI.md +16 -0
  250. package/vault/wiki/entities/Rocket.new.md +38 -0
  251. package/vault/wiki/entities/VILA-Lab.md +15 -0
  252. package/vault/wiki/entities/autodev-codebase.md +18 -0
  253. package/vault/wiki/entities/ck-tool.md +59 -0
  254. package/vault/wiki/entities/codesearch.md +18 -0
  255. package/vault/wiki/entities/disler-indydevdan.md +33 -0
  256. package/vault/wiki/entities/gsd-get-shit-done.md +56 -0
  257. package/vault/wiki/entities/javascript-runtimes.md +48 -0
  258. package/vault/wiki/entities/jesse-vincent.md +38 -0
  259. package/vault/wiki/entities/lean-ctx.md +32 -0
  260. package/vault/wiki/entities/opendev.md +41 -0
  261. package/vault/wiki/entities/ops-codegraph-tool.md +18 -0
  262. package/vault/wiki/entities/pi-coding-agent.md +53 -0
  263. package/vault/wiki/entities/sentrux.md +54 -0
  264. package/vault/wiki/entities/vgrep-tool.md +57 -0
  265. package/vault/wiki/entities/vitest.md +41 -0
  266. package/vault/wiki/flows/harness-wiki-pipeline.md +204 -0
  267. package/vault/wiki/hot.md +932 -0
  268. package/vault/wiki/index.md +437 -0
  269. package/vault/wiki/log.md +418 -0
  270. package/vault/wiki/meta/dashboard.md +30 -0
  271. package/vault/wiki/meta/lint-report-2026-04-30.md +86 -0
  272. package/vault/wiki/meta/lint-report-2026-05-02.md +251 -0
  273. package/vault/wiki/meta/overview.canvas +43 -0
  274. package/vault/wiki/modules/adversarial-verification.md +57 -0
  275. package/vault/wiki/modules/automated-observability.md +54 -0
  276. package/vault/wiki/modules/bench.md +20 -0
  277. package/vault/wiki/modules/extensions.md +23 -0
  278. package/vault/wiki/modules/grounding-checkpoints.md +62 -0
  279. package/vault/wiki/modules/harness-implementation-plan.md +345 -0
  280. package/vault/wiki/modules/harness-wiki-skill-mapping.md +135 -0
  281. package/vault/wiki/modules/harness.md +86 -0
  282. package/vault/wiki/modules/persistent-memory.md +85 -0
  283. package/vault/wiki/modules/schema-orchestration.md +68 -0
  284. package/vault/wiki/modules/skills.md +27 -0
  285. package/vault/wiki/modules/spec-hardening.md +58 -0
  286. package/vault/wiki/modules/structured-planning.md +53 -0
  287. package/vault/wiki/modules/think-in-code-enforcement.md +153 -0
  288. package/vault/wiki/modules/wiki-query-interface.md +64 -0
  289. package/vault/wiki/overview.md +51 -0
  290. package/vault/wiki/questions/Research-pi-vs-claude-code-agentic-orchestration-pipeline.md +87 -0
  291. package/vault/wiki/questions/Research-sentrux-dev.md +123 -0
  292. package/vault/wiki/questions/Research-superpowers-skill-for-agentic-coding-agents.md +164 -0
  293. package/vault/wiki/questions/Research: Augment Code Context Engine.md +244 -0
  294. package/vault/wiki/questions/Research: Automating Software Engineering - Lovable, Bolt, Emergent, Rocket.md +112 -0
  295. package/vault/wiki/questions/Research: Claude Code State-of-the-Art Harness Improvements.md +209 -0
  296. package/vault/wiki/questions/Research: Codex State-of-the-Art Harness Improvements.md +99 -0
  297. package/vault/wiki/questions/Research: Engineering Workflows of Legendary Programmers and AI Harness Mapping.md +107 -0
  298. package/vault/wiki/questions/Research: Fallow Codebase Intelligence Harness Integration.md +72 -0
  299. package/vault/wiki/questions/Research: Gemini CLI SOTA Harness Integration.md +166 -0
  300. package/vault/wiki/questions/Research: GitHub Issues as Harness Spec Storage.md +188 -0
  301. package/vault/wiki/questions/Research: Google Antigravity Harness Integration.md +120 -0
  302. package/vault/wiki/questions/Research: Meta-Agent Context Drift Detection.md +236 -0
  303. package/vault/wiki/questions/Research: Model-Adaptive Agent Harness Design.md +95 -0
  304. package/vault/wiki/questions/Research: Model-Specific Prompting Guides.md +165 -0
  305. package/vault/wiki/questions/Research: Prompt Renderer for Multi-Model Agent Harness.md +216 -0
  306. package/vault/wiki/questions/Research: Skill-First Harness Architecture.md +91 -0
  307. package/vault/wiki/questions/Research: TypeScript Best Practices and Codebase Structure.md +88 -0
  308. package/vault/wiki/questions/Research: TypeScript Execution Layer for Agent Tool Calling.md +81 -0
  309. package/vault/wiki/questions/Research: claude-mem over Obsidian for Harness Layer.md +71 -0
  310. package/vault/wiki/questions/Research: claude-mem over obsidian wiki as the knowledge base for our agentic harness pipeline. think from first principles. does this replace or complement our current setup? no hard feelings about previous decisions. gimme accurate points.md +80 -0
  311. package/vault/wiki/questions/Research: context-mode vs lean-ctx.md +72 -0
  312. package/vault/wiki/questions/Research: cursor.sh Harness Innovations.md +92 -0
  313. package/vault/wiki/questions/Research: executor.sh Harness Integration.md +170 -0
  314. package/vault/wiki/questions/Research: how GSD fits into our coding harness setup.md +97 -0
  315. package/vault/wiki/questions/Research: how claude-mem fits into our workflow. and whether it should replace obsidian in the codebase. no hard feelings about previous actions, rethink from first principles always.md +80 -0
  316. package/vault/wiki/questions/Research: pi-vcc.md +113 -0
  317. package/vault/wiki/questions/Research: semantic code search tools.md +69 -0
  318. package/vault/wiki/questions/Research: vcc extension for pi coding agent.md +73 -0
  319. package/vault/wiki/questions/how-to-enable-semantic-code-search-now.md +111 -0
  320. package/vault/wiki/questions/mvp-implementation-blueprint.md +552 -0
  321. package/vault/wiki/questions/research-agent-first-codebase-exploration.md +199 -0
  322. package/vault/wiki/questions/research-agentic-coding-harness-latest-papers.md +142 -0
  323. package/vault/wiki/questions/research-gitingest-gitreverse-integration.md +100 -0
  324. package/vault/wiki/questions/research-wozcode-token-reduction.md +67 -0
  325. package/vault/wiki/questions/resolved-context-pruning-inplace-vs-restart.md +95 -0
  326. package/vault/wiki/questions/resolved-context-window-economics.md +167 -0
  327. package/vault/wiki/questions/resolved-imad-debate-gating-transfer.md +126 -0
  328. package/vault/wiki/questions/resolved-mcp-tool-preference.md +112 -0
  329. package/vault/wiki/questions/resolved-small-model-meta-agents.md +107 -0
  330. package/vault/wiki/questions/resolved-treesitter-dynamic-languages.md +95 -0
  331. package/vault/wiki/sources/Auggie Context MCP Server.md +63 -0
  332. package/vault/wiki/sources/Augment Code Codacy AI Giants.md +61 -0
  333. package/vault/wiki/sources/Augment Code MCP SiliconAngle.md +49 -0
  334. package/vault/wiki/sources/Augment Code WorkOS ERC 2025.md +55 -0
  335. package/vault/wiki/sources/Augment Context Engine Official.md +71 -0
  336. package/vault/wiki/sources/Augment SWE-bench Agent GitHub.md +74 -0
  337. package/vault/wiki/sources/Augment SWE-bench Pro Blog.md +58 -0
  338. package/vault/wiki/sources/Source: AgentBus Jinja2 Prompt Pipelines.md +75 -0
  339. package/vault/wiki/sources/Source: Arxiv /342/200/224 Don't Break the Cache.md" +85 -0
  340. package/vault/wiki/sources/Source: Augment - Harness Engineering for AI Coding Agents.md +58 -0
  341. package/vault/wiki/sources/Source: Blake Crosley Agent Architecture Guide.md +100 -0
  342. package/vault/wiki/sources/Source: Bolt.new Architecture & Case Study.md +75 -0
  343. package/vault/wiki/sources/Source: Build-Time Prompt Compilation Architecture.md +107 -0
  344. package/vault/wiki/sources/Source: Claude API Agent Skills Overview.md +70 -0
  345. package/vault/wiki/sources/Source: Gemini CLI Changelogs.md +88 -0
  346. package/vault/wiki/sources/Source: Google Blog - Gemini CLI Announcement.md +57 -0
  347. package/vault/wiki/sources/Source: Google Gemini CLI Architecture Docs.md +53 -0
  348. package/vault/wiki/sources/Source: LangChain - Anatomy of Agent Harness.md +65 -0
  349. package/vault/wiki/sources/Source: Lovable Architecture & Clone Analysis.md +83 -0
  350. package/vault/wiki/sources/Source: Martin Fowler - Harness Engineering.md +70 -0
  351. package/vault/wiki/sources/Source: OpenAI Harness Engineering Five Principles.md +58 -0
  352. package/vault/wiki/sources/Source: OpenAI Harness Engineering /342/200/224 0 Lines of Human Code.md" +101 -0
  353. package/vault/wiki/sources/Source: OpenDev /342/200/224 Building AI Coding Agents for the Terminal.md" +100 -0
  354. package/vault/wiki/sources/Source: Render AI Coding Agents Benchmark 2025.md +53 -0
  355. package/vault/wiki/sources/Source: Rocket.new /342/200/224 Vibe Solutioning Platform.md" +70 -0
  356. package/vault/wiki/sources/Source: SwirlAI Agent Skills Progressive Disclosure.md +71 -0
  357. package/vault/wiki/sources/Source: TianPan Prompt Caching Architecture.md +89 -0
  358. package/vault/wiki/sources/Source: Vercel Labs agent-browser.md +155 -0
  359. package/vault/wiki/sources/Source: browser-harness CDP Harness.md +126 -0
  360. package/vault/wiki/sources/agent-drift-academic-paper.md +79 -0
  361. package/vault/wiki/sources/aider-repomap-tree-sitter.md +42 -0
  362. package/vault/wiki/sources/anthropic-compaction-api.md +58 -0
  363. package/vault/wiki/sources/anthropic-effective-harnesses.md +42 -0
  364. package/vault/wiki/sources/anthropic-prompt-best-practices.md +100 -0
  365. package/vault/wiki/sources/anthropic2026-harness-design.md +63 -0
  366. package/vault/wiki/sources/barrel-files-tkdodo.md +38 -0
  367. package/vault/wiki/sources/birth-of-unix-kernighan-interview.md +57 -0
  368. package/vault/wiki/sources/bockeler2026-harness-engineering.md +69 -0
  369. package/vault/wiki/sources/cast-code-chunking-paper.md +50 -0
  370. package/vault/wiki/sources/ck-semantic-search.md +78 -0
  371. package/vault/wiki/sources/claude-code-architecture-karaxai-2026.md +71 -0
  372. package/vault/wiki/sources/claude-code-architecture-qubytes-2026.md +50 -0
  373. package/vault/wiki/sources/claude-code-architecture-vila-lab-2026.md +64 -0
  374. package/vault/wiki/sources/claude-code-security-architecture-penligent-2026.md +70 -0
  375. package/vault/wiki/sources/claude-context-editing-docs.md +13 -0
  376. package/vault/wiki/sources/cloudflare-codemode.md +63 -0
  377. package/vault/wiki/sources/code-chunk-library-supermemory.md +63 -0
  378. package/vault/wiki/sources/codeact-apple-2024.md +62 -0
  379. package/vault/wiki/sources/codex-dsc-rfc-8573.md +41 -0
  380. package/vault/wiki/sources/codex-open-source-agent-2026.md +110 -0
  381. package/vault/wiki/sources/coir-code-retrieval-benchmark.md +51 -0
  382. package/vault/wiki/sources/colinmcnamara-context-optimization-codemode.md +48 -0
  383. package/vault/wiki/sources/context-folding-paper.md +61 -0
  384. package/vault/wiki/sources/context-mode-website.md +63 -0
  385. package/vault/wiki/sources/cursor-agent-best-practices-2026.md +62 -0
  386. package/vault/wiki/sources/cursor-fork-29b-2025.md +50 -0
  387. package/vault/wiki/sources/cursor-harness-april-2026.md +76 -0
  388. package/vault/wiki/sources/cursor-instant-apply-2024.md +45 -0
  389. package/vault/wiki/sources/cursor-shadow-workspace-2024.md +52 -0
  390. package/vault/wiki/sources/cursor-shipped-coding-agent-2026.md +53 -0
  391. package/vault/wiki/sources/cursor-vs-antigravity-2026.md +51 -0
  392. package/vault/wiki/sources/disler-pi-vs-claude-code.md +69 -0
  393. package/vault/wiki/sources/distill-deterministic-context-compression.md +53 -0
  394. package/vault/wiki/sources/embedding-models-benchmark-supermemory-2025.md +48 -0
  395. package/vault/wiki/sources/executor-rhyssullivan.md +122 -0
  396. package/vault/wiki/sources/fallow-rs-codebase-intelligence.md +125 -0
  397. package/vault/wiki/sources/fan2025-imad.md +60 -0
  398. package/vault/wiki/sources/forgecode-gpt5-agent-improvements.md +63 -0
  399. package/vault/wiki/sources/gemini-3-prompting-guide.md +78 -0
  400. package/vault/wiki/sources/gh-cli-sub-issue-rfc.md +50 -0
  401. package/vault/wiki/sources/gh-sub-issue-extension.md +72 -0
  402. package/vault/wiki/sources/github-fork-issues-discussion.md +44 -0
  403. package/vault/wiki/sources/github-issue-dependencies-docs.md +49 -0
  404. package/vault/wiki/sources/github-sub-issues-docs.md +51 -0
  405. package/vault/wiki/sources/gitingest.md +91 -0
  406. package/vault/wiki/sources/gitreverse.md +63 -0
  407. package/vault/wiki/sources/google-antigravity-official-blog.md +47 -0
  408. package/vault/wiki/sources/google-antigravity-wikipedia.md +53 -0
  409. package/vault/wiki/sources/gsd-codecentric-deep-dive.md +57 -0
  410. package/vault/wiki/sources/gsd-github-repo.md +51 -0
  411. package/vault/wiki/sources/gsd-hn-discussion.md +59 -0
  412. package/vault/wiki/sources/guido-python-design-philosophy.md +56 -0
  413. package/vault/wiki/sources/hejlsberg-7-learnings.md +48 -0
  414. package/vault/wiki/sources/ironclaw-drift-monitor.md +80 -0
  415. package/vault/wiki/sources/langsight-loop-detection.md +80 -0
  416. package/vault/wiki/sources/leanctx-website.md +69 -0
  417. package/vault/wiki/sources/lee2026-meta-harness.md +59 -0
  418. package/vault/wiki/sources/linux-kernel-coding-workflow.md +50 -0
  419. package/vault/wiki/sources/lou2026-autoharness.md +53 -0
  420. package/vault/wiki/sources/martin-fowler-harness-engineering.md +73 -0
  421. package/vault/wiki/sources/mcp-architecture-docs.md +13 -0
  422. package/vault/wiki/sources/meng2026-agent-harness-survey.md +79 -0
  423. package/vault/wiki/sources/mindstudio-four-agent-types.md +68 -0
  424. package/vault/wiki/sources/ms-chat-history-management.md +13 -0
  425. package/vault/wiki/sources/openai-prompt-guidance.md +104 -0
  426. package/vault/wiki/sources/openclaw-session-pruning.md +13 -0
  427. package/vault/wiki/sources/opencode-dcp.md +13 -0
  428. package/vault/wiki/sources/opendev-arxiv-2603.05344v1.md +79 -0
  429. package/vault/wiki/sources/openhands-platform.md +39 -0
  430. package/vault/wiki/sources/oss-guide-codebase-exploration.md +53 -0
  431. package/vault/wiki/sources/pi-compaction-extensions-ecosystem.md +102 -0
  432. package/vault/wiki/sources/pi-context-prune-github-repo.md +38 -0
  433. package/vault/wiki/sources/pi-mono-compaction-docs.md +38 -0
  434. package/vault/wiki/sources/pi-omni-compact-github-repo.md +50 -0
  435. package/vault/wiki/sources/pi-rtk-optimizer-github-repo.md +45 -0
  436. package/vault/wiki/sources/pi-vcc-github-repo.md +69 -0
  437. package/vault/wiki/sources/pi-vscode-marketplace.md +41 -0
  438. package/vault/wiki/sources/pi-vscode-model-provider-marketplace.md +39 -0
  439. package/vault/wiki/sources/py-tree-sitter.md +13 -0
  440. package/vault/wiki/sources/sentrux-dev-landing.md +40 -0
  441. package/vault/wiki/sources/sentrux-docs-pro-architecture.md +75 -0
  442. package/vault/wiki/sources/sentrux-docs-quality-signal.md +46 -0
  443. package/vault/wiki/sources/sentrux-docs-root-cause-metrics.md +57 -0
  444. package/vault/wiki/sources/sentrux-docs-rules-engine.md +58 -0
  445. package/vault/wiki/sources/sentrux-github-repo.md +56 -0
  446. package/vault/wiki/sources/superpowers-github-repo.md +56 -0
  447. package/vault/wiki/sources/superpowers-release-blog.md +54 -0
  448. package/vault/wiki/sources/superpowers-termdock-analysis.md +45 -0
  449. package/vault/wiki/sources/swe-agent-aci.md +42 -0
  450. package/vault/wiki/sources/swe-bench.md +45 -0
  451. package/vault/wiki/sources/swe-pruner-context-pruning.md +13 -0
  452. package/vault/wiki/sources/think-in-code-blog.md +48 -0
  453. package/vault/wiki/sources/tree-sitter-docs.md +13 -0
  454. package/vault/wiki/sources/ts-best-practices-2025-devto.md +42 -0
  455. package/vault/wiki/sources/ts-folder-structure-mingyang.md +58 -0
  456. package/vault/wiki/sources/ts-monorepo-koerselman.md +44 -0
  457. package/vault/wiki/sources/ts-result-error-handling-kkalamarski.md +52 -0
  458. package/vault/wiki/sources/ts-runtimes-comparison-betterstack.md +42 -0
  459. package/vault/wiki/sources/ts-strict-mode-rishikc.md +43 -0
  460. package/vault/wiki/sources/unix-philosophy.md +48 -0
  461. package/vault/wiki/sources/vectara-chunking-vs-embedding-naacl2025.md +39 -0
  462. package/vault/wiki/sources/vectara-guardian-agents.md +79 -0
  463. package/vault/wiki/sources/vgrep-semantic-search.md +76 -0
  464. package/vault/wiki/sources/vitest-official.md +41 -0
  465. package/vault/wiki/sources/vscode-pi-community-extension.md +40 -0
  466. package/vault/wiki/sources/wozcode.md +79 -0
  467. package/.agents/skills/compress/SKILL.md +0 -111
  468. package/.agents/skills/compress/scripts/__init__.py +0 -9
  469. package/.agents/skills/compress/scripts/__main__.py +0 -3
  470. package/.agents/skills/compress/scripts/benchmark.py +0 -78
  471. package/.agents/skills/compress/scripts/cli.py +0 -73
  472. package/.agents/skills/compress/scripts/compress.py +0 -227
  473. package/.agents/skills/compress/scripts/detect.py +0 -121
  474. package/.agents/skills/compress/scripts/validate.py +0 -189
  475. package/.agents/skills/emil-design-eng/SKILL.md +0 -679
  476. package/.agents/skills/lean-ctx/SKILL.md +0 -149
  477. package/.agents/skills/lean-ctx/scripts/install.sh +0 -95
  478. package/.agents/skills/scrapling-official/LICENSE.txt +0 -28
  479. package/.agents/skills/scrapling-official/SKILL.md +0 -390
  480. package/.agents/skills/scrapling-official/examples/01_fetcher_session.py +0 -26
  481. package/.agents/skills/scrapling-official/examples/02_dynamic_session.py +0 -26
  482. package/.agents/skills/scrapling-official/examples/03_stealthy_session.py +0 -26
  483. package/.agents/skills/scrapling-official/examples/04_spider.py +0 -58
  484. package/.agents/skills/scrapling-official/examples/README.md +0 -45
  485. package/.agents/skills/scrapling-official/references/fetching/choosing.md +0 -78
  486. package/.agents/skills/scrapling-official/references/fetching/dynamic.md +0 -352
  487. package/.agents/skills/scrapling-official/references/fetching/static.md +0 -432
  488. package/.agents/skills/scrapling-official/references/fetching/stealthy.md +0 -255
  489. package/.agents/skills/scrapling-official/references/mcp-server.md +0 -214
  490. package/.agents/skills/scrapling-official/references/migrating_from_beautifulsoup.md +0 -86
  491. package/.agents/skills/scrapling-official/references/parsing/adaptive.md +0 -212
  492. package/.agents/skills/scrapling-official/references/parsing/main_classes.md +0 -586
  493. package/.agents/skills/scrapling-official/references/parsing/selection.md +0 -494
  494. package/.agents/skills/scrapling-official/references/spiders/advanced.md +0 -344
  495. package/.agents/skills/scrapling-official/references/spiders/architecture.md +0 -94
  496. package/.agents/skills/scrapling-official/references/spiders/getting-started.md +0 -164
  497. package/.agents/skills/scrapling-official/references/spiders/proxy-blocking.md +0 -235
  498. package/.agents/skills/scrapling-official/references/spiders/requests-responses.md +0 -196
  499. package/.agents/skills/scrapling-official/references/spiders/sessions.md +0 -205
  500. package/PLAN.md +0 -11
  501. package/extensions/lean-ctx-enforce.ts +0 -166
  502. package/skills-lock.json +0 -35
  503. package/wiki/README.md +0 -19
  504. package/wiki/decisions/0001-establish-project-wiki-and-decision-record-format.md +0 -25
  505. package/wiki/decisions/0002-add-project-banner-to-readme.md +0 -26
  506. package/wiki/decisions/0003-remove-redundant-readme-title-heading.md +0 -26
  507. package/wiki/decisions/0004-publish-package-to-npm-as-ultimate-pi.md +0 -26
  508. package/wiki/decisions/0005-automate-npm-publish-with-github-actions.md +0 -27
  509. package/wiki/decisions/0006-switch-to-npm-trusted-publishing.md +0 -26
  510. package/wiki/decisions/0007-use-absolute-banner-url-for-npm-readme-rendering.md +0 -26
  511. package/wiki/decisions/0008-rename-banner-asset-for-cache-busting.md +0 -26
  512. package/wiki/decisions/0009-force-oidc-path-by-clearing-node-auth-token-in-publish-step.md +0 -25
  513. package/wiki/decisions/0010-simplify-setup-node-for-npm-trusted-publishing.md +0 -26
  514. package/wiki/decisions/0011-add-noop-workflow-change-to-force-fresh-publish-run.md +0 -25
  515. package/wiki/decisions/0012-align-workflow-runtime-with-npm-trusted-publishing-requirements.md +0 -26
  516. package/wiki/decisions/0013-add-package-repository-url-for-provenance-validation.md +0 -25
@@ -0,0 +1,236 @@
1
+ ---
2
+ type: synthesis
3
+ title: "Research: Meta-Agent Context Drift Detection"
4
+ created: 2026-04-30
5
+ updated: 2026-04-30
6
+ tags:
7
+ - research
8
+ - meta-agent
9
+ - context-drift
10
+ - harness-design
11
+ - agent-reliability
12
+ status: developing
13
+ related:
14
+ - "[[context-drift-in-agents]]"
15
+ - "[[meta-agent-context-pruning]]"
16
+ - "[[agent-loop-detection-patterns]]"
17
+ - "[[guardian-agent-pattern]]"
18
+ - "[[ironclaw-drift-monitor]]"
19
+ - "[[langsight-loop-detection]]"
20
+ - "[[agent-drift-academic-paper]]"
21
+ - "[[vectara-guardian-agents]]"
22
+ - "[[model-adaptive-harness]]"
23
+ - "[[harness-configuration-layers]]"
24
+ - "[[agentic-harness-context-enforcement]]"
25
+ - "[[grounding-checkpoints]]"
26
+ sources:
27
+ - "[[ironclaw-drift-monitor]]"
28
+ - "[[langsight-loop-detection]]"
29
+ - "[[agent-drift-academic-paper]]"
30
+ - "[[vectara-guardian-agents]]"
31
+
32
+ ---# Research: Meta-Agent Context Drift Detection
33
+
34
+ ## Overview
35
+
36
+ A meta-agent that monitors the primary coding agent for context drift — repeated incorrect tool calls, excessive ls/find commands, tool-call loops — and intervenes by pruning irrelevant history from context. This concept exists in fragmented form across industry practice (ironclaw DriftMonitor, LangSight loop detection, Claude Code compaction) and academic research (Agent Stability Index, SWE-Pruner, GUARDIAN), but **no single system combines detection + pruning + context replacement into one pipeline**. The exact composition the user described is a novel synthesis.
37
+
38
+ ## Key Findings
39
+
40
+ - **Exact match exists**: nearai/ironclaw #1634 "DriftMonitor" (March 2026) implements rule-based stuck-pattern detection with system-message injection — but does NOT prune context (Source: [[ironclaw-drift-monitor]])
41
+ - **Loop detection is production-ready**: LangSight detects tool-call repetition via argument hashing, catches 90%+ of real loops with zero false positives at threshold 3 (Source: [[langsight-loop-detection]])
42
+ - **Agent drift is academically quantified**: Agent Drift paper (arxiv 2601.04170) shows 42% task success reduction, 3.2x human intervention increase, and introduces ASI (Agent Stability Index) across 12 dimensions (Source: [[agent-drift-academic-paper]])
43
+ - **Guardian agents are an active industry pattern**: Vectara built a platform-agnostic benchmark (~900 scenarios) validating pre-execution safety layers that check tool selection, arguments, and sequencing before execution. Overall correct rate only 5-59% across platforms (Source: [[vectara-guardian-agents]])
44
+ - **Context pruning exists for code, not conversation**: SWE-Pruner (arxiv 2601.16746) achieves 23-54% token reduction by pruning code context, but operates on source files, not agent conversation history (Source: [[swe-pruner-context-pruning]])
45
+ - **The novel gap**: No existing system does the full loop: detect stuck → identify dead-end context entries → prune them → restart agent with clean context. Each piece exists independently. The composition is new.
46
+
47
+ ## First Principles Analysis
48
+
49
+ ### The Problem
50
+
51
+ Agent starts task → makes wrong tool call → gets error → tries variant → still wrong → tries ls/find/grep repeatedly → context fills with dead ends. Signal-to-noise collapses. Agent gets more lost, not less.
52
+
53
+ This is a **positive feedback loop of context pollution**. Each failed attempt adds noise that makes the next attempt MORE likely to fail. The agent doesn't just fail — it accelerates into failure.
54
+
55
+ ### The Meta-Agent Solution
56
+
57
+ A separate observer (meta-agent) that:
58
+
59
+ 1. **Detects stuck patterns** — rule-based signatures of non-progress: repeated identical tool calls, tool cycling (A-B-A-B), consecutive failures, excessive file searching
60
+ 2. **Identifies dead-end context entries** — which tool calls and responses constitute noise vs. signal
61
+ 3. **Prunes the context** — removes dead-end entries from the conversation history
62
+ 4. **Injects a correction** — "You were stuck on [pattern]. Here's what you know so far. Try a different approach."
63
+ 5. **Restarts the agent** — either by editing in-place (if API supports it) or terminating and resuming with pruned history
64
+
65
+ ### Detection Mechanism
66
+
67
+ **Rule-based (recommended)**: Zero LLM overhead. Pattern-match on tool call sequences:
68
+
69
+ ```
70
+ Pattern | Threshold | Detection
71
+ Repetition | 3+ identical | Hash tool+args, count in sliding window
72
+ Failure spiral | 4+ failures | Consecutive error count
73
+ Tool cycling | A-B-A-B-A-B | Sequence pattern in last 6 calls
74
+ Silence drift | 15+ iters | No text response counter
75
+ Rework churn | 3+ writes | Same file written repeatedly
76
+ Excessive searching | 5+ ls/find | Count search-type tool calls without code edits
77
+ ```
78
+
79
+ **LLM-based (higher cost, higher precision)**: Every N steps, a separate small-model call evaluates trajectory for meaningful progress. Can catch semantic drift that rule-based misses.
80
+
81
+ ### Pruning Heuristic
82
+
83
+ Distinguishing "failed but informative" from "failed and useless":
84
+
85
+ | Keep | Prune |
86
+ |------|-------|
87
+ | Error led to different approach on next attempt | Identical call returned same result |
88
+ | Output contained new information despite failure | Pure noise (navigation bars, boilerplate errors) |
89
+ | User explicitly asked for that action | Agent retried without user direction |
90
+ | Established a constraint used later | Agent forgot about the call entirely |
91
+
92
+ Conservative pruning: when uncertain, keep. The cost of pruning useful context is higher than keeping benign noise.
93
+
94
+ ### Feasibility
95
+
96
+ **High**. Detection is trivial (rule-based, O(1) per call). Pruning requires careful heuristics but the worst case (keep everything) is identical to current behavior. The intervention mechanism (system message injection) is already proven in ironclaw.
97
+
98
+ ### Overhead Analysis
99
+
100
+ | Component | Cost | Notes |
101
+ |-----------|------|-------|
102
+ | Rule-based detection | ~0 tokens | Hash comparison + counters per tool call |
103
+ | LLM-based detection | ~500 tokens per check | If checking every 10 steps, 5 checks in 50-step session = 2,500 tokens |
104
+ | Context pruning | ~0 tokens | Metadata operation, no LLM call |
105
+ | Correction injection | ~150 tokens | System message |
106
+ | Session restart | 1 API call + cache miss | One-time cost if restarting; zero if in-place editing |
107
+ | **Total overhead** | **~2,500-3,000 tokens** | vs. 20,000+ tokens wasted in bloated failed context |
108
+
109
+ **Net savings**: 5-10x token reduction for stuck sessions. The meta-agent pays for itself after 1-2 interventions.
110
+
111
+ ### Edge Cases
112
+
113
+ - **Polling agents**: Legitimate repeated calls to status endpoints. Whitelist polling tools or use time-based windows instead of count-based.
114
+ - **Retry-heavy workflows**: Some tools legitimately fail transiently. Increase threshold to 5-7 for these agents.
115
+ - **Exploratory searching**: Browsing many files is sometimes correct behavior. Distinguish by whether code edits follow the searches.
116
+ - **False positive prune**: Removing useful context is worse than failing to prune. Conservative defaults + escalation levels.
117
+
118
+ ### Escalation Model
119
+
120
+ 1. **Soft nudge** (first detection): System message — "You've called [tool] with same args 3 times. Summarize what you know and try a different approach."
121
+ 2. **Strong nudge** (second detection): System message + context summary — "You're stuck. Here's a clean summary of what you've accomplished. Start fresh from here."
122
+ 3. **Forced restart** (third detection): Terminate session, prune history, restart with clean context and correction message.
123
+
124
+ ## Integration with Existing Harness Pipeline
125
+
126
+ The meta-agent concept maps to the existing harness architecture:
127
+
128
+ ### New Layer: L2.5 — Runtime Drift Monitor
129
+
130
+ Sits between L2 (Structured Planning) and L3 (Grounding Checkpoints). While L3 already has "drift detection" for scope creep against the spec, it does NOT monitor tool-call quality or context pollution.
131
+
132
+ ```
133
+ L2 (Plan) → L2.5 (Drift Monitor) → L3 (Execute + Grounding)
134
+ ↑ ↓
135
+ └── injects corrections ───┘
136
+ ```
137
+
138
+ **Why between L2 and L3**: The plan defines expected tool sequences. The drift monitor compares actual tool calls against the plan AND against stuck-pattern signatures. It catches both "off-plan" drift (scope creep) and "stuck-on-plan" drift (repetitive failures).
139
+
140
+ ### Integration Points
141
+
142
+ | Component | Harness File | Change |
143
+ |-----------|-------------|--------|
144
+ | DriftMonitor struct | `lib/harness-drift-monitor.ts` | **New** — pattern detection, correction injection |
145
+ | DriftMonitor config | `.pi/harness/drift-monitor.json` | **New** — thresholds, escalation levels, whitelists |
146
+ | Extension hook | `extensions/harness-drift-monitor.ts` | **New** — hooks into before_llm_call / after_tool_call |
147
+ | L3 grounding | `lib/harness-executor.ts` | Add drift_monitor field, call check() before each LLM call |
148
+ | Harness plan | `lib/harness-planner.ts` | Layer renumbering (L3→L4, L4→L5, etc. or insert as L2.5) |
149
+ | Implementation plan | [[harness-implementation-plan]] | Add Phase 17: Runtime Drift Monitor |
150
+
151
+ ### Configuration Schema
152
+
153
+ ```typescript
154
+ interface DriftMonitorConfig {
155
+ enabled: boolean; // default: true
156
+ detection: {
157
+ repetition_threshold: number; // default: 3
158
+ failure_spiral_threshold: number; // default: 4
159
+ cycle_window: number; // default: 6
160
+ silence_threshold: number; // default: 15 iterations
161
+ rework_threshold: number; // default: 3
162
+ excessive_search_threshold: number;// default: 5
163
+ };
164
+ intervention: {
165
+ prune_context: boolean; // default: true (prune dead-end entries)
166
+ inject_correction: boolean; // default: true (system message)
167
+ escalation: "soft" | "strong" | "forced_restart";
168
+ max_escalations: number; // default: 3
169
+ };
170
+ whitelist: {
171
+ polling_tools: string[]; // tools allowed repeated calls
172
+ retry_tools: string[]; // tools with legitimate retry patterns
173
+ };
174
+ model_profile: "auto" | "opus" | "gpt" | "gemini" | "strict";
175
+ }
176
+ ```
177
+
178
+ ### Model-Adaptive Behavior
179
+
180
+ Maps to L3 State Channel and L2 Gate Design from the [[harness-configuration-layers|four-layer harness]]:
181
+
182
+ | Model | Detection | Intervention |
183
+ |-------|-----------|-------------|
184
+ | Opus | LLM-based every 15 steps (trusts self-assessment) | Soft nudge → self-corrects reliably |
185
+ | GPT | Rule-based every step (needs frequent checks) | Hard escalation → auto-restart after 3 detections |
186
+ | Gemini | Rule-based every 10 steps (moderate frequency) | Soft nudge → escalate if unresponsive |
187
+ | Strict | Rule-based every step (maximum enforcement) | Hard escalation → auto-restart after 2 detections |
188
+
189
+ ### Token Budget
190
+
191
+ Estimated overhead for a 50-step agent session:
192
+
193
+ | Profile | Checks | Tokens per check | Total overhead |
194
+ |---------|--------|-----------------|----------------|
195
+ | Rule-based (GPT/strict) | 50 | ~0 | 0 |
196
+ | Rule-based (Gemini) | 5 | ~0 | 0 |
197
+ | LLM-based (Opus) | 3 | ~500 | 1,500 |
198
+
199
+ All profiles: correction messages ~150 tokens each, max 3 interventions = 450 tokens. Pruning: zero token cost (metadata operation).
200
+
201
+ ## Key Entities
202
+
203
+ - **nearai/ironclaw**: Open-source agent framework with proposed DriftMonitor (Source: [[ironclaw-drift-monitor]])
204
+ - **LangSight**: Production agent monitoring with loop detection, budget guardrails, circuit breakers (Source: [[langsight-loop-detection]])
205
+ - **Vectara**: Guardian Agents benchmark and pre-execution safety layer (Source: [[vectara-guardian-agents]])
206
+ - **Abhishek Rath**: Author of Agent Drift paper, introduced ASI (Agent Stability Index) (Source: [[agent-drift-academic-paper]])
207
+ - **Anthropic Applied AI team**: Published context engineering framework including compaction, note-taking, sub-agent architectures
208
+
209
+ ## Key Concepts
210
+
211
+ - [[context-drift-in-agents]]: Progressive degradation of agent behavior over extended interactions
212
+ - [[meta-agent-context-pruning]]: The proposed system — detect stuck, prune history, restart
213
+ - [[agent-loop-detection-patterns]]: Three production patterns (direct repetition, ping-pong, retry-without-progress)
214
+ - [[guardian-agent-pattern]]: Pre-execution safety layers that validate agent actions before they execute
215
+
216
+ ## Contradictions
217
+
218
+ - **ironclaw vs. Vectara on intervention timing**: Ironclaw DriftMonitor injects corrections AFTER tool calls (reactive). Vectara Guardian Agents validate BEFORE tool execution (proactive). (Source: [[ironclaw-drift-monitor]] vs [[vectara-guardian-agents]]). The meta-agent concept is reactive (post-hoc pruning), so it aligns with ironclaw's approach. Vectara's proactive approach could complement it as a first line of defense.
219
+ - **LangSight says terminate on loop detection**. Ironclaw says inject correction message. Both are valid for different risk profiles. The proposed escalation model (soft → strong → forced) synthesizes both.
220
+
221
+ ## Open Questions
222
+
223
+ - Can context pruning be done in-place (API-supported message editing) or must it always be a session restart? Most APIs (Anthropic, OpenAI) support message truncation but not selective deletion from middle of history.
224
+ - What is the "minimum viable context" that must survive pruning? The original task, key decisions made, constraints discovered, and the last successful state.
225
+ - Does pruning break the model's chain-of-thought? If the model was mid-reasoning when stuck, restarting with pruned history may lose coherence. Needs testing.
226
+ - How does this interact with prompt caching? Pruning may invalidate cached prefixes, increasing short-term cost.
227
+ - Can a small/cheap model (Haiku, Flash) serve as the meta-agent detector, keeping overhead near zero?
228
+
229
+ ## Sources
230
+
231
+ - [[ironclaw-drift-monitor]]: nearai/ironclaw #1634, March 2026 — Proposed DriftMonitor with 5 rule-based patterns
232
+ - [[langsight-loop-detection]]: LangSight Engineering, March 2026 — Production loop detection with argument hash comparison
233
+ - [[agent-drift-academic-paper]]: Abhishek Rath, January 2026 — Agent Stability Index (ASI) across 12 dimensions
234
+ - [[vectara-guardian-agents]]: Vectara, November 2025 — Platform-agnostic guardian agents benchmark (~900 scenarios)
235
+ - [[swe-pruner-context-pruning]]: Wang et al., January 2026 — Self-adaptive context pruning for coding agents (ACL 2026)
236
+ - [[anthropic-context-engineering]]: Anthropic Applied AI, September 2025 — Context engineering framework
@@ -0,0 +1,95 @@
1
+ ---
2
+ type: synthesis
3
+ title: "Research: Model-Adaptive Agent Harness Design"
4
+ created: 2026-04-30
5
+ updated: 2026-04-30
6
+ tags:
7
+ - research
8
+ - agents
9
+ - harness-design
10
+ - model-awareness
11
+ status: complete
12
+ related:
13
+ - "[[model-adaptive-harness]]"
14
+ - "[[harness-configuration-layers]]"
15
+ - "[[forgecode-gpt5-agent-improvements]]"
16
+ sources:
17
+ - "[[forgecode-gpt5-agent-improvements]]"
18
+
19
+ ---# Research: Model-Adaptive Agent Harness Design
20
+
21
+ ## Overview
22
+
23
+ Forge Code's TermBench 2.0 results reveal that agent harness reliability is not a property of the model — it's a property of how well the harness compensates for each model's specific failure modes. GPT 5.4 and Opus 4.6 reached identical 81.8% scores only after model-specific adaptation. This research documents the design principles for making the harness pipeline model-aware.
24
+
25
+ ## Key Findings
26
+
27
+ - **The harness has four configurable layers** not previously recognized: Signal Design (L1), Gate Design (L2), State Channel (L3), Completion Model (L4). Each has dimensions that vary by model (Source: [[forgecode-gpt5-agent-improvements]])
28
+
29
+ - **GPT and Opus fail differently but reach the same capability ceiling** when the harness compensates. GPT needs flat structure, constraints-first ordering, enforced gates, in-band signals. Opus tolerates nesting, infers from metadata, self-corrects (Source: [[forgecode-gpt5-agent-improvements]])
30
+
31
+ - **Enforced verification is the single biggest improvement.** GPT stops after plausible-but-incomplete solutions. "Please verify" does nothing. A programmatic gate — checklist that must be passed before proceeding — catches gaps (Source: [[forgecode-gpt5-agent-improvements]])
32
+
33
+ - **Schema/instruction shape is a reliability variable, not cosmetic.** GPT anchors on what appears first. Moving constraints before descriptive content reduces malformed behavior. Flat structures (1 nesting level) reduce structural errors. Same semantics, different reliability (Source: [[forgecode-gpt5-agent-improvements]])
34
+
35
+ - **Truncation signaling must be in-band for GPT.** Metadata fields like `total_lines` are invisible to GPT's attention. Body-text warnings are necessary. Opus reads metadata fine (Source: [[forgecode-gpt5-agent-improvements]])
36
+
37
+ ## Key Entities
38
+
39
+ - [[forgecode-gpt5-agent-improvements|ForgeCode]]: Agent coding platform that reached #1 on TermBench 2.0. Published the model-adaptive harness findings
40
+ - Tushar Mathur: Author of the Forge Code blog post and lead on the harness adaptation work
41
+
42
+ ## Key Concepts
43
+
44
+ - [[model-adaptive-harness]]: Harness that varies behavior by model profile, not a one-size-fits-all instruction set
45
+ - [[harness-configuration-layers]]: Four-layer framework (L1 Signal, L2 Gate, L3 Channel, L4 Completion) with configurable dimensions per model
46
+
47
+ ## Design Principles for the Harness Pipeline
48
+
49
+ These findings will be applied to the harness pipeline as it is built out. The key principle: **write once for strict (GPT-safe defaults), relax for forgiving models**. Never write for forgiving and hope strict models cope.
50
+
51
+ ### Four-Layer Model (see [[harness-configuration-layers]] for full specification)
52
+
53
+ 1. **L1 Signal Design** — instruction density, ordering, emphasis, nesting depth, atomicity
54
+ 2. **L2 Gate Design** — enforcement model (hard vs soft), granularity, evidence standard, retry behavior
55
+ 3. **L3 State Channel** — how truncation, progress, and errors are communicated to the model
56
+ 4. **L4 Completion Model** — how "done" is determined and verified
57
+
58
+ ### Model-Specific Differences
59
+
60
+ | Behavior | Opus/Claude | GPT |
61
+ |---|---|---|
62
+ | Structure | Tolerates nesting, natural flow | Needs flat, constraints-first |
63
+ | Truncation | Infers from metadata | Needs body-text warning |
64
+ | Verification | Naturally double-checks | Must be ENFORCED (hard gate) |
65
+ | Completion | Self-aware of gaps | Stops after plausible-but-incomplete |
66
+ | Emphasis | Contextual cues work | Explicit markers (REQUIRED, MANDATORY) |
67
+
68
+ ### What Must Adapt per Model
69
+
70
+ Each pipeline phase that generates instructions for the agent should vary based on the driving model:
71
+ - Instruction formatting (density, ordering, emphasis)
72
+ - Gate enforcement (hard vs soft, checklist vs self-assessment)
73
+ - State signaling (in-band vs metadata, explicit vs implicit progress)
74
+ - Completion criteria (falsifiable checklist vs completion signal)
75
+
76
+ ### What Never Adapts
77
+
78
+ Core invariants across all model profiles:
79
+ - Pipeline steps and phase ordering
80
+ - Quality standards and source attribution requirements
81
+ - Confidence labeling
82
+ - Budget constraints (max rounds, max tokens, max pages)
83
+ - Verification gates (what must be checked, even if how varies by model)
84
+
85
+ ## Open Questions
86
+
87
+ - How to detect model at runtime? System prompt parsing? Tool-call format detection?
88
+ - Should per-step gates be added for GPT profile, or is per-round sufficient?
89
+ - How do these findings apply across all harness phases beyond research?
90
+ - Gemini profile needs validation against actual Gemini agent trajectories
91
+ - Should the harness maintain per-model reliability metrics to track which compensations work?
92
+
93
+ ## Sources
94
+
95
+ - [[forgecode-gpt5-agent-improvements]]: Tushar Mathur, 2026-03-16. Primary source for all four fixes and model behavioral differences
@@ -0,0 +1,165 @@
1
+ ---
2
+ type: synthesis
3
+ title: "Research: Model-Specific Prompting Guides"
4
+ created: 2026-05-01
5
+ updated: 2026-05-01
6
+ tags:
7
+ - research
8
+ - prompting
9
+ - model-specific
10
+ - harness-redesign
11
+ status: developing
12
+ related:
13
+ - "[[model-adaptive-harness]]"
14
+ - "[[harness-configuration-layers]]"
15
+ - "[[harness-implementation-plan]]"
16
+ - "[[forgecode-gpt5-agent-improvements]]"
17
+ sources:
18
+ - "[[openai-prompt-guidance]]"
19
+ - "[[anthropic-prompt-best-practices]]"
20
+ - "[[gemini-3-prompting-guide]]"
21
+
22
+ ---# Research: Model-Specific Prompting Guides
23
+
24
+ ## Overview
25
+
26
+ Every major model provider now publishes official prompting guidance specific to their models. These guides describe HOW to prompt each model for best results — not just what the models fail at. The current harness design derives model profiles from Forge Code's empirical failure-mode observations. This research brings the OFFICIAL provider guidance as the primary source for harness adaptations.
27
+
28
+ ## Key Finding: The Harness Must Be Redesigned
29
+
30
+ The current harness writes "strict mode" (GPT-safe defaults) as canonical and relaxes for forgiving models. This is WRONG according to official guidance. Each provider specifies fundamentally DIFFERENT prompting conventions — not just different strictness levels of the same format.
31
+
32
+ ### What Providers Say vs What Harness Does
33
+
34
+ | Provider | Official Guidance | Current Harness Behavior |
35
+ |----------|------------------|------------------------|
36
+ | **OpenAI** | Outcome-first prompts, shorter, constraints-first ordering, preambles before tools, reasoning effort is primary knob | "Strict mode" — flat structure, constraints-first, enforced hard gates, in-band signals |
37
+ | **Anthropic** | XML tags for structure, long content at top + query at bottom, role setting critical, prefer general instructions over prescriptive steps, effort parameter controls thinking | "Relaxed mode" — hierarchical instructions, soft gates, metadata-based state channels |
38
+ | **Google** | Constraints at END (not beginning), split-step verification, temperature at 1.0, explicit grounding statements, persona definitions critical | No Gemini-specific profile; marked "TBD" |
39
+
40
+ ### Critical Contradictions
41
+
42
+ 1. **Constraint ordering**: OpenAI says constraints-FIRST. Google says constraints-LAST. The harness can't satisfy both with one canonical format.
43
+
44
+ 2. **Prompt density**: OpenAI (GPT-5.5+) says SHORTER prompts, outcome-first. The harness's "strict mode" generates verbose, constraint-heavy prompts — exactly what OpenAI now recommends against.
45
+
46
+ 3. **Structure format**: Anthropic recommends XML tags. OpenAI uses XML-like sections but also markdown. Google uses plain text sections. No single format works across all three.
47
+
48
+ 4. **Temperature**: Google mandates 1.0. OpenAI/Anthropic don't specify. The harness needs model-specific temperature config.
49
+
50
+ 5. **Verification strategy**: Google says split-step (verify first, then generate). Anthropic says self-check at end. OpenAI (GPT-5.4+) says verification loop before finalizing. Different workflows.
51
+
52
+ 6. **Grounding**: Google requires explicit "context is only source of truth" statements. OpenAI uses citation rules. Anthropic uses document quote extraction. Different grounding mechanisms.
53
+
54
+ ## Proposed Redesign: Provider-Native Prompt Generation
55
+
56
+ Instead of "write once, relax for forgiving models," the harness should generate **provider-native prompts** optimized for each model's official conventions.
57
+
58
+ ### Design Principle (NEW)
59
+
60
+ **Generate model-specific prompts from a provider-agnostic semantic specification. Never generate a single canonical prompt and relax it.**
61
+
62
+ The harness's internal representation should be a semantic spec (what must be communicated), not a prompt string. The prompt renderer generates the actual prompt text according to the target model's provider conventions.
63
+
64
+ ### Provider Profiles
65
+
66
+ #### OpenAI GPT-5.x Profile
67
+ ```
68
+ STRUCTURE: XML-like sections (<instruction_spec>)
69
+ ORDERING: Constraints-first, then context, then task
70
+ DENSITY: Concise, outcome-oriented. Describe destination, not journey.
71
+ EMPHASIS: Explicit markers: REQUIRED, MANDATORY for true invariants
72
+ VERIFICATION: Action safety blocks, pre-flight/post-flight
73
+ TOOLS: apply_patch native, shell_command tool, update_plan
74
+ REASONING: Use reasoning_effort parameter, not prompt-level "think step by step"
75
+ TEMPERATURE: Unspecified (default)
76
+ CONTRADICTIONS: Audit prompts for conflicting instructions — harmful to GPT-5+
77
+ ```
78
+
79
+ #### Anthropic Claude 4.x Profile
80
+ ```
81
+ STRUCTURE: XML tags (<instructions>, <context>, <examples>)
82
+ ORDERING: Long content at TOP, query at BOTTOM
83
+ DENSITY: Prefer general instructions over prescriptive steps
84
+ EMPHASIS: Role setting, explain "why" behind instructions
85
+ VERIFICATION: Self-check at end against test criteria
86
+ TOOLS: Explicit tool direction, default_to_action or do_not_act
87
+ THINKING: Adaptive thinking with effort parameter
88
+ TEMPERATURE: Unspecified (removed from API, use effort)
89
+ HALLUCINATION: investigate_before_answering block
90
+ PARALLEL: Maximize parallel tool calls
91
+ ```
92
+
93
+ #### Google Gemini 3 Profile
94
+ ```
95
+ STRUCTURE: Plain text sections
96
+ ORDERING: Context → Task → Constraints AT END
97
+ DENSITY: Concise by default, steer for verbosity explicitly
98
+ EMPHASIS: Persona definitions are binding
99
+ VERIFICATION: Split-step: verify capability → generate answer
100
+ TOOLS: System instructions for steering
101
+ THINKING: thinking level LOW/HIGH
102
+ TEMPERATURE: 1.0 (MANDATORY — never change)
103
+ GROUNDING: Explicit "context is absolute limit of truth" statement
104
+ SYNTHESIS: "Based on the entire document above..." anchor phrase
105
+ ```
106
+
107
+ ## What Changes in the Harness
108
+
109
+ ### L1: Spec Hardening
110
+ - **Before**: Generates spec hardening prompts in "strict mode" with flat structure
111
+ - **After**: Generates provider-native spec prompts using the appropriate format per model
112
+
113
+ ### L2: Structured Planning
114
+ - **Before**: Gate enforcement varies (hard for GPT, soft for Claude)
115
+ - **After**: Gate enforcement follows provider conventions PLUS empirical failure mode data
116
+
117
+ ### L2.5: Drift Monitor
118
+ - **Before**: Detection frequency varies by model
119
+ - **After**: Detection strategy varies by model (split-step for Gemini, self-check for Claude, verification loop for GPT)
120
+
121
+ ### L3: Grounding Checkpoints
122
+ - **Before**: Truncation signaling varies (in-band vs metadata)
123
+ - **After**: Grounding mechanism varies (explicit grounding statement for Gemini, citation rules for GPT, quote extraction for Claude)
124
+
125
+ ### L4: Adversarial Verification
126
+ - **Before**: Completion criteria vary (falsifiable checklist vs completion-signal)
127
+ - **After**: Verification workflow varies (split-step verify-then-generate for Gemini, pre-flight/post-flight for GPT, self-check for Claude)
128
+
129
+ ### New: Prompt Renderer Module
130
+ A new module between the harness's semantic spec and the actual API call. Takes a provider-agnostic task specification and renders it into a provider-native prompt.
131
+
132
+ ```
133
+ Semantic Spec → Prompt Renderer → Provider-Native Prompt → API Call
134
+ ├── openai-renderer
135
+ ├── anthropic-renderer
136
+ └── google-renderer
137
+ ```
138
+
139
+ ## Entities
140
+ - [[OpenAI]]: Publisher of GPT model family and official prompt guidance
141
+ - [[Anthropic]]: Publisher of Claude model family and prompt engineering best practices
142
+ - [[Google Cloud]]: Publisher of Gemini model family and Gemini 3 prompting guide
143
+
144
+ ## Key Concepts
145
+ - [[provider-native-prompting]]: New concept — generate prompts optimized for each provider's conventions
146
+ - [[Prompt Renderer]]: New module — translates semantic specs to provider-native prompts
147
+ - [[model-adaptive-harness]]: Existing concept — needs significant redesign
148
+ - [[harness-configuration-layers]]: Existing concept — dimensions need provider-native mappings
149
+
150
+ ## Contradictions
151
+ - **Constraint ordering**: OpenAI says first, Google says last. Cannot resolve — must generate different prompts per provider.
152
+ - **Prompt density**: OpenAI (5.5+) says shorter, harness says verbose strict mode. OpenAIs own newer guidance contradicts the harness's approach.
153
+ - **Verification workflow**: Three different verification patterns (split-step, self-check, verification loop) — all from official sources, all valid.
154
+
155
+ ## Open Questions
156
+ - How to handle models that don't have official prompting guides (Mistral, DeepSeek, Llama)?
157
+ - Should the harness validate prompts against provider conventions before sending?
158
+ - How does prompt caching interact with provider-native prompt generation?
159
+ - Should the semantic spec be the same across all providers, or should it also vary?
160
+ - What happens when provider guidance changes? Automatic updates?
161
+
162
+ ## Sources
163
+ - [[openai-prompt-guidance]]: OpenAI, 2026 — Comprehensive multi-model guidance
164
+ - [[anthropic-prompt-best-practices]]: Anthropic, 2026 — Claude Opus 4.7 through Haiku 4.5
165
+ - [[gemini-3-prompting-guide]]: Google Cloud, 2026-04-29 — Gemini 3 specific