ultimate-pi 0.1.0 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (509) hide show
  1. package/.agents/skills/ck-search/SKILL.md +99 -0
  2. package/.agents/skills/defuddle/SKILL.md +90 -0
  3. package/.agents/skills/find-skills/SKILL.md +142 -0
  4. package/.agents/skills/firecrawl/SKILL.md +150 -0
  5. package/.agents/skills/firecrawl/rules/install.md +82 -0
  6. package/.agents/skills/firecrawl/rules/security.md +26 -0
  7. package/.agents/skills/firecrawl-agent/SKILL.md +57 -0
  8. package/.agents/skills/firecrawl-build-interact/SKILL.md +67 -0
  9. package/.agents/skills/firecrawl-build-onboarding/SKILL.md +102 -0
  10. package/.agents/skills/firecrawl-build-onboarding/references/auth-flow.md +39 -0
  11. package/.agents/skills/firecrawl-build-onboarding/references/project-setup.md +20 -0
  12. package/.agents/skills/firecrawl-build-onboarding/references/sdk-installation.md +17 -0
  13. package/.agents/skills/firecrawl-build-scrape/SKILL.md +68 -0
  14. package/.agents/skills/firecrawl-build-search/SKILL.md +68 -0
  15. package/.agents/skills/firecrawl-crawl/SKILL.md +58 -0
  16. package/.agents/skills/firecrawl-download/SKILL.md +69 -0
  17. package/.agents/skills/firecrawl-interact/SKILL.md +83 -0
  18. package/.agents/skills/firecrawl-map/SKILL.md +50 -0
  19. package/.agents/skills/firecrawl-parse/SKILL.md +61 -0
  20. package/.agents/skills/firecrawl-scrape/SKILL.md +68 -0
  21. package/.agents/skills/firecrawl-search/SKILL.md +59 -0
  22. package/.agents/skills/obsidian-bases/SKILL.md +299 -0
  23. package/.agents/skills/obsidian-markdown/SKILL.md +237 -0
  24. package/.agents/skills/posthog-analyst/SKILL.md +306 -0
  25. package/.agents/skills/posthog-analyst/evals/evals.json +23 -0
  26. package/.agents/skills/wiki/SKILL.md +215 -0
  27. package/.agents/skills/wiki/references/css-snippets.md +122 -0
  28. package/.agents/skills/wiki/references/frontmatter.md +107 -0
  29. package/.agents/skills/wiki/references/git-setup.md +58 -0
  30. package/.agents/skills/wiki/references/mcp-setup.md +149 -0
  31. package/.agents/skills/wiki/references/modes.md +259 -0
  32. package/.agents/skills/wiki/references/plugins.md +96 -0
  33. package/.agents/skills/wiki/references/rest-api.md +124 -0
  34. package/.agents/skills/wiki-autoresearch/SKILL.md +211 -0
  35. package/.agents/skills/wiki-autoresearch/references/program.md +75 -0
  36. package/.agents/skills/wiki-fold/SKILL.md +204 -0
  37. package/.agents/skills/wiki-fold/references/fold-template.md +133 -0
  38. package/.agents/skills/wiki-ingest/SKILL.md +288 -0
  39. package/.agents/skills/wiki-lint/SKILL.md +183 -0
  40. package/.agents/skills/wiki-query/SKILL.md +176 -0
  41. package/.agents/skills/wiki-save/SKILL.md +128 -0
  42. package/.ckignore +41 -0
  43. package/.env.example +9 -0
  44. package/.github/banner-v2.png +0 -0
  45. package/.github/workflows/lint.yml +33 -0
  46. package/.github/workflows/publish-github-packages.yml +35 -0
  47. package/.github/workflows/publish-npm.yml +32 -0
  48. package/.pi/SYSTEM.md +107 -40
  49. package/.pi/agents/pi-pi/agent-expert.md +205 -0
  50. package/.pi/agents/pi-pi/cli-expert.md +47 -0
  51. package/.pi/agents/pi-pi/config-expert.md +67 -0
  52. package/.pi/agents/pi-pi/ext-expert.md +53 -0
  53. package/.pi/agents/pi-pi/keybinding-expert.md +123 -0
  54. package/.pi/agents/pi-pi/pi-orchestrator.md +103 -0
  55. package/.pi/agents/pi-pi/prompt-expert.md +83 -0
  56. package/.pi/agents/pi-pi/skill-expert.md +52 -0
  57. package/.pi/agents/pi-pi/theme-expert.md +46 -0
  58. package/.pi/agents/pi-pi/tui-expert.md +100 -0
  59. package/.pi/agents/rethink.md +140 -0
  60. package/.pi/agents/wiki-ingest.md +67 -0
  61. package/.pi/agents/wiki-lint.md +75 -0
  62. package/.pi/auto-commit.json +20 -0
  63. package/.pi/extensions/banner.png +0 -0
  64. package/.pi/extensions/ck-enforce.ts +216 -0
  65. package/.pi/extensions/custom-footer.ts +308 -0
  66. package/.pi/extensions/custom-header.ts +116 -0
  67. package/.pi/extensions/dotenv-loader.ts +170 -0
  68. package/.pi/internal/cursor-sdk-transcript-parser.ts +59 -0
  69. package/.pi/model-router.json +95 -0
  70. package/.pi/npm/.gitignore +2 -0
  71. package/.pi/prompts/git-sync.md +124 -0
  72. package/.pi/prompts/harness-setup.md +509 -0
  73. package/.pi/prompts/save.md +16 -0
  74. package/.pi/prompts/wiki-autoresearch.md +19 -0
  75. package/.pi/prompts/wiki.md +23 -0
  76. package/.pi/providers/cursor-sdk-provider.test.mjs +476 -0
  77. package/.pi/providers/cursor-sdk-provider.ts +1085 -0
  78. package/.pi/settings.json +14 -4
  79. package/.pi/skills/agent-router/SKILL.md +174 -0
  80. package/.pi/sounds/alert/1-kaching-track.mp3 +0 -0
  81. package/.pi/sounds/error/1-ksi-wth-track.mp3 +0 -0
  82. package/.pi/sounds/error/2-smash-track.mp3 +0 -0
  83. package/.pi/sounds/error/3-buzzer-track.mp3 +0 -0
  84. package/.pi/sounds/notification/1-soft-notification-track.mp3 +0 -0
  85. package/.pi/sounds/project-sounds.json +25 -0
  86. package/.pi/sounds/reminder/1-soft-notification-track.mp3 +0 -0
  87. package/.pi/sounds/success/1-tada-track.mp3 +0 -0
  88. package/.pi/sounds/success/2-jobs-done-track.mp3 +0 -0
  89. package/.pi/sounds/success/3-yay-track.mp3 +0 -0
  90. package/CONTRIBUTING.md +116 -0
  91. package/README.md +33 -40
  92. package/biome.json +34 -0
  93. package/firecrawl/.env.template +58 -0
  94. package/firecrawl/README.md +49 -0
  95. package/firecrawl/docker-compose.yaml +201 -0
  96. package/firecrawl/searxng/searxng.env +3 -0
  97. package/firecrawl/searxng/settings.yml +85 -0
  98. package/lefthook.yml +8 -0
  99. package/package.json +55 -16
  100. package/vault/AGENTS.md +37 -0
  101. package/vault/wiki/_templates/comparison.md +39 -0
  102. package/vault/wiki/_templates/concept.md +40 -0
  103. package/vault/wiki/_templates/decision.md +21 -0
  104. package/vault/wiki/_templates/entity.md +32 -0
  105. package/vault/wiki/_templates/flow.md +14 -0
  106. package/vault/wiki/_templates/module.md +18 -0
  107. package/vault/wiki/_templates/question.md +31 -0
  108. package/vault/wiki/_templates/source.md +39 -0
  109. package/vault/wiki/concepts/AST-Aware Code Chunking.md +44 -0
  110. package/vault/wiki/concepts/Build-Time Prompt Compilation.md +107 -0
  111. package/vault/wiki/concepts/Context Engine (AI Coding).md +47 -0
  112. package/vault/wiki/concepts/Context-Aware System Reminders.md +61 -0
  113. package/vault/wiki/concepts/Contextualized Text Embedding.md +42 -0
  114. package/vault/wiki/concepts/Contractor vs Employee AI Model.md +55 -0
  115. package/vault/wiki/concepts/Dual-Model Agent Architecture.md +65 -0
  116. package/vault/wiki/concepts/Late Chunking vs Early Chunking.md +43 -0
  117. package/vault/wiki/concepts/Majority Vote Ensembling.md +68 -0
  118. package/vault/wiki/concepts/Meta-Harness.md +16 -0
  119. package/vault/wiki/concepts/Multi-Agent AI Coding Architecture.md +75 -0
  120. package/vault/wiki/concepts/Prompt Enhancement.md +90 -0
  121. package/vault/wiki/concepts/Prompt Renderer.md +89 -0
  122. package/vault/wiki/concepts/Semantic Codebase Indexing.md +67 -0
  123. package/vault/wiki/concepts/additive-config-hierarchy.md +16 -0
  124. package/vault/wiki/concepts/agent-artifacts-verifiable-deliverables.md +71 -0
  125. package/vault/wiki/concepts/agent-browser-browser-automation.md +99 -0
  126. package/vault/wiki/concepts/agent-codebase-interface.md +43 -0
  127. package/vault/wiki/concepts/agent-harness-architecture.md +67 -0
  128. package/vault/wiki/concepts/agent-loop-detection-patterns.md +133 -0
  129. package/vault/wiki/concepts/agent-search-enforcement.md +126 -0
  130. package/vault/wiki/concepts/agent-skills-ecosystem.md +74 -0
  131. package/vault/wiki/concepts/agent-skills-pattern.md +68 -0
  132. package/vault/wiki/concepts/agentic-harness-context-enforcement.md +91 -0
  133. package/vault/wiki/concepts/agentic-harness.md +34 -0
  134. package/vault/wiki/concepts/agentic-orchestration-pipeline.md +56 -0
  135. package/vault/wiki/concepts/agentic-search-no-embeddings.md +18 -0
  136. package/vault/wiki/concepts/anthropic-context-engineering.md +13 -0
  137. package/vault/wiki/concepts/antigravity-agent-first-architecture.md +61 -0
  138. package/vault/wiki/concepts/ast-compression.md +19 -0
  139. package/vault/wiki/concepts/ast-truncation.md +66 -0
  140. package/vault/wiki/concepts/barrel-files.md +37 -0
  141. package/vault/wiki/concepts/browser-harness-agent.md +41 -0
  142. package/vault/wiki/concepts/browser-subagent-visual-verification.md +82 -0
  143. package/vault/wiki/concepts/codebase-intelligence-ecosystem-comparison.md +192 -0
  144. package/vault/wiki/concepts/codebase-intelligence-harness-integration.md +161 -0
  145. package/vault/wiki/concepts/codebase-to-context-ingestion.md +46 -0
  146. package/vault/wiki/concepts/codex-harness-innovations.md +147 -0
  147. package/vault/wiki/concepts/consensus-debate-flow.md +17 -0
  148. package/vault/wiki/concepts/consensus-debate.md +206 -0
  149. package/vault/wiki/concepts/content-addressed-spec-identity.md +166 -0
  150. package/vault/wiki/concepts/context-anxiety.md +57 -0
  151. package/vault/wiki/concepts/context-compression-techniques.md +19 -0
  152. package/vault/wiki/concepts/context-continuity.md +22 -0
  153. package/vault/wiki/concepts/context-drift-in-agents.md +106 -0
  154. package/vault/wiki/concepts/context-engineering.md +62 -0
  155. package/vault/wiki/concepts/context-folding.md +67 -0
  156. package/vault/wiki/concepts/context-mode.md +38 -0
  157. package/vault/wiki/concepts/cursor-harness-innovations.md +107 -0
  158. package/vault/wiki/concepts/deterministic-session-compaction.md +79 -0
  159. package/vault/wiki/concepts/drift-detection-unified.md +296 -0
  160. package/vault/wiki/concepts/execution-feedback-loop.md +46 -0
  161. package/vault/wiki/concepts/feedforward-feedback-harness.md +60 -0
  162. package/vault/wiki/concepts/five-root-cause-metrics-sentrux.md +40 -0
  163. package/vault/wiki/concepts/fork-safe-spec-storage.md +89 -0
  164. package/vault/wiki/concepts/fts5-sandbox.md +19 -0
  165. package/vault/wiki/concepts/fuzzy-edit-matching.md +71 -0
  166. package/vault/wiki/concepts/gemini-cli-architecture.md +104 -0
  167. package/vault/wiki/concepts/generator-evaluator-architecture.md +64 -0
  168. package/vault/wiki/concepts/guardian-agent-pattern.md +67 -0
  169. package/vault/wiki/concepts/harness-configuration-layers.md +89 -0
  170. package/vault/wiki/concepts/harness-control-frameworks.md +155 -0
  171. package/vault/wiki/concepts/harness-engineering-first-principles.md +90 -0
  172. package/vault/wiki/concepts/harness-h-formalism.md +53 -0
  173. package/vault/wiki/concepts/hybrid-code-search.md +61 -0
  174. package/vault/wiki/concepts/inline-post-edit-validation.md +112 -0
  175. package/vault/wiki/concepts/legendary-engineering-patterns-harness.md +110 -0
  176. package/vault/wiki/concepts/lifecycle-hooks.md +94 -0
  177. package/vault/wiki/concepts/mcp-tool-routing.md +102 -0
  178. package/vault/wiki/concepts/memory-system-of-record-vs-ephemeral-cache.md +47 -0
  179. package/vault/wiki/concepts/meta-agent-context-pruning.md +151 -0
  180. package/vault/wiki/concepts/model-adaptive-harness.md +122 -0
  181. package/vault/wiki/concepts/model-routing-agents.md +101 -0
  182. package/vault/wiki/concepts/monorepo-architecture.md +45 -0
  183. package/vault/wiki/concepts/multi-agent-specialization.md +61 -0
  184. package/vault/wiki/concepts/permission-subsystem.md +16 -0
  185. package/vault/wiki/concepts/pi-messenger-analysis.md +243 -0
  186. package/vault/wiki/concepts/pi-vscode-extension-landscape.md +37 -0
  187. package/vault/wiki/concepts/policy-engine-pattern.md +78 -0
  188. package/vault/wiki/concepts/progressive-disclosure-agents.md +53 -0
  189. package/vault/wiki/concepts/progressive-skill-disclosure.md +17 -0
  190. package/vault/wiki/concepts/provider-native-prompting.md +203 -0
  191. package/vault/wiki/concepts/quality-signal-sentrux.md +37 -0
  192. package/vault/wiki/concepts/repo-map-ranking.md +42 -0
  193. package/vault/wiki/concepts/result-monad-error-handling.md +47 -0
  194. package/vault/wiki/concepts/safety-defense-in-depth.md +83 -0
  195. package/vault/wiki/concepts/sandbox-os-enforcement.md +18 -0
  196. package/vault/wiki/concepts/selective-debate-routing.md +70 -0
  197. package/vault/wiki/concepts/self-evolving-harness.md +60 -0
  198. package/vault/wiki/concepts/sentrux-mcp-integration.md +36 -0
  199. package/vault/wiki/concepts/sentrux-rules-engine.md +49 -0
  200. package/vault/wiki/concepts/shell-pattern-compression.md +24 -0
  201. package/vault/wiki/concepts/skill-first-architecture.md +166 -0
  202. package/vault/wiki/concepts/structured-compaction.md +78 -0
  203. package/vault/wiki/concepts/subagent-orchestration.md +17 -0
  204. package/vault/wiki/concepts/subagent-worktree-isolation.md +68 -0
  205. package/vault/wiki/concepts/superpowers-methodology.md +78 -0
  206. package/vault/wiki/concepts/think-in-code.md +73 -0
  207. package/vault/wiki/concepts/ts-execution-layer.md +100 -0
  208. package/vault/wiki/concepts/typescript-strict-mode.md +37 -0
  209. package/vault/wiki/concepts/vcc-conversation-compaction-for-pi.md +51 -0
  210. package/vault/wiki/concepts/verification-drift-detection.md +19 -0
  211. package/vault/wiki/consensus/consensus-records.md +58 -0
  212. package/vault/wiki/decisions/2026-04-30-pi-lean-ctx-native.md +122 -0
  213. package/vault/wiki/decisions/adr-008.md +40 -0
  214. package/vault/wiki/decisions/adr-009.md +46 -0
  215. package/vault/wiki/decisions/adr-010.md +55 -0
  216. package/vault/wiki/decisions/adr-011.md +165 -0
  217. package/vault/wiki/decisions/adr-012.md +102 -0
  218. package/vault/wiki/decisions/adr-013.md +59 -0
  219. package/vault/wiki/decisions/adr-014.md +73 -0
  220. package/vault/wiki/decisions/adr-015.md +81 -0
  221. package/vault/wiki/decisions/adr-016.md +91 -0
  222. package/vault/wiki/decisions/adr-017.md +79 -0
  223. package/vault/wiki/decisions/adr-018.md +100 -0
  224. package/vault/wiki/decisions/adr-019.md +75 -0
  225. package/vault/wiki/decisions/adr-020.md +106 -0
  226. package/vault/wiki/decisions/adr-021.md +86 -0
  227. package/vault/wiki/decisions/adr-022.md +113 -0
  228. package/vault/wiki/decisions/adr-023.md +113 -0
  229. package/vault/wiki/decisions/adr-024.md +73 -0
  230. package/vault/wiki/decisions/adr-025.md +130 -0
  231. package/vault/wiki/decisions/adr-026.md +56 -0
  232. package/vault/wiki/decisions/colocate-wiki.md +34 -0
  233. package/vault/wiki/entities/Anders Hejlsberg.md +29 -0
  234. package/vault/wiki/entities/Anthropic.md +17 -0
  235. package/vault/wiki/entities/Augment Code.md +49 -0
  236. package/vault/wiki/entities/Bjarne Stroustrup.md +26 -0
  237. package/vault/wiki/entities/Bolt.new (StackBlitz).md +39 -0
  238. package/vault/wiki/entities/Boris Cherny.md +11 -0
  239. package/vault/wiki/entities/Claude Code.md +19 -0
  240. package/vault/wiki/entities/Dennis Ritchie.md +26 -0
  241. package/vault/wiki/entities/Emergent Labs.md +32 -0
  242. package/vault/wiki/entities/Google Cloud.md +16 -0
  243. package/vault/wiki/entities/Guido van Rossum.md +28 -0
  244. package/vault/wiki/entities/Ken Thompson.md +28 -0
  245. package/vault/wiki/entities/Lee et al.md +16 -0
  246. package/vault/wiki/entities/Linus Torvalds.md +28 -0
  247. package/vault/wiki/entities/Lovable (company).md +40 -0
  248. package/vault/wiki/entities/Martin Fowler.md +16 -0
  249. package/vault/wiki/entities/Meng et al.md +16 -0
  250. package/vault/wiki/entities/OpenAI.md +16 -0
  251. package/vault/wiki/entities/Rocket.new.md +38 -0
  252. package/vault/wiki/entities/VILA-Lab.md +15 -0
  253. package/vault/wiki/entities/autodev-codebase.md +18 -0
  254. package/vault/wiki/entities/ck-tool.md +59 -0
  255. package/vault/wiki/entities/codesearch.md +18 -0
  256. package/vault/wiki/entities/disler-indydevdan.md +33 -0
  257. package/vault/wiki/entities/gsd-get-shit-done.md +56 -0
  258. package/vault/wiki/entities/javascript-runtimes.md +48 -0
  259. package/vault/wiki/entities/jesse-vincent.md +38 -0
  260. package/vault/wiki/entities/lean-ctx.md +32 -0
  261. package/vault/wiki/entities/opendev.md +41 -0
  262. package/vault/wiki/entities/ops-codegraph-tool.md +18 -0
  263. package/vault/wiki/entities/pi-coding-agent.md +53 -0
  264. package/vault/wiki/entities/sentrux.md +54 -0
  265. package/vault/wiki/entities/vgrep-tool.md +57 -0
  266. package/vault/wiki/entities/vitest.md +41 -0
  267. package/vault/wiki/flows/harness-wiki-pipeline.md +204 -0
  268. package/vault/wiki/hot.md +932 -0
  269. package/vault/wiki/index.md +437 -0
  270. package/vault/wiki/log.md +418 -0
  271. package/vault/wiki/meta/dashboard.md +30 -0
  272. package/vault/wiki/meta/lint-report-2026-04-30.md +86 -0
  273. package/vault/wiki/meta/lint-report-2026-05-02.md +251 -0
  274. package/vault/wiki/meta/overview.canvas +43 -0
  275. package/vault/wiki/modules/adversarial-verification.md +57 -0
  276. package/vault/wiki/modules/automated-observability.md +54 -0
  277. package/vault/wiki/modules/bench.md +20 -0
  278. package/vault/wiki/modules/extensions.md +23 -0
  279. package/vault/wiki/modules/grounding-checkpoints.md +62 -0
  280. package/vault/wiki/modules/harness-implementation-plan.md +345 -0
  281. package/vault/wiki/modules/harness-wiki-skill-mapping.md +135 -0
  282. package/vault/wiki/modules/harness.md +86 -0
  283. package/vault/wiki/modules/persistent-memory.md +85 -0
  284. package/vault/wiki/modules/schema-orchestration.md +68 -0
  285. package/vault/wiki/modules/skills.md +27 -0
  286. package/vault/wiki/modules/spec-hardening.md +58 -0
  287. package/vault/wiki/modules/structured-planning.md +53 -0
  288. package/vault/wiki/modules/think-in-code-enforcement.md +153 -0
  289. package/vault/wiki/modules/wiki-query-interface.md +64 -0
  290. package/vault/wiki/overview.md +51 -0
  291. package/vault/wiki/questions/Research-pi-vs-claude-code-agentic-orchestration-pipeline.md +87 -0
  292. package/vault/wiki/questions/Research-sentrux-dev.md +123 -0
  293. package/vault/wiki/questions/Research-superpowers-skill-for-agentic-coding-agents.md +164 -0
  294. package/vault/wiki/questions/Research: Augment Code Context Engine.md +244 -0
  295. package/vault/wiki/questions/Research: Automating Software Engineering - Lovable, Bolt, Emergent, Rocket.md +112 -0
  296. package/vault/wiki/questions/Research: Claude Code State-of-the-Art Harness Improvements.md +209 -0
  297. package/vault/wiki/questions/Research: Codex State-of-the-Art Harness Improvements.md +99 -0
  298. package/vault/wiki/questions/Research: Engineering Workflows of Legendary Programmers and AI Harness Mapping.md +107 -0
  299. package/vault/wiki/questions/Research: Fallow Codebase Intelligence Harness Integration.md +72 -0
  300. package/vault/wiki/questions/Research: Gemini CLI SOTA Harness Integration.md +166 -0
  301. package/vault/wiki/questions/Research: GitHub Issues as Harness Spec Storage.md +188 -0
  302. package/vault/wiki/questions/Research: Google Antigravity Harness Integration.md +120 -0
  303. package/vault/wiki/questions/Research: Meta-Agent Context Drift Detection.md +236 -0
  304. package/vault/wiki/questions/Research: Model-Adaptive Agent Harness Design.md +95 -0
  305. package/vault/wiki/questions/Research: Model-Specific Prompting Guides.md +165 -0
  306. package/vault/wiki/questions/Research: Prompt Renderer for Multi-Model Agent Harness.md +216 -0
  307. package/vault/wiki/questions/Research: Skill-First Harness Architecture.md +91 -0
  308. package/vault/wiki/questions/Research: TypeScript Best Practices and Codebase Structure.md +88 -0
  309. package/vault/wiki/questions/Research: TypeScript Execution Layer for Agent Tool Calling.md +81 -0
  310. package/vault/wiki/questions/Research: claude-mem over Obsidian for Harness Layer.md +71 -0
  311. package/vault/wiki/questions/Research: claude-mem over obsidian wiki as the knowledge base for our agentic harness pipeline. think from first principles. does this replace or complement our current setup? no hard feelings about previous decisions. gimme accurate points.md +80 -0
  312. package/vault/wiki/questions/Research: context-mode vs lean-ctx.md +72 -0
  313. package/vault/wiki/questions/Research: cursor.sh Harness Innovations.md +92 -0
  314. package/vault/wiki/questions/Research: executor.sh Harness Integration.md +170 -0
  315. package/vault/wiki/questions/Research: how GSD fits into our coding harness setup.md +97 -0
  316. package/vault/wiki/questions/Research: how claude-mem fits into our workflow. and whether it should replace obsidian in the codebase. no hard feelings about previous actions, rethink from first principles always.md +80 -0
  317. package/vault/wiki/questions/Research: pi-vcc.md +113 -0
  318. package/vault/wiki/questions/Research: semantic code search tools.md +69 -0
  319. package/vault/wiki/questions/Research: vcc extension for pi coding agent.md +73 -0
  320. package/vault/wiki/questions/how-to-enable-semantic-code-search-now.md +111 -0
  321. package/vault/wiki/questions/mvp-implementation-blueprint.md +552 -0
  322. package/vault/wiki/questions/research-agent-first-codebase-exploration.md +199 -0
  323. package/vault/wiki/questions/research-agentic-coding-harness-latest-papers.md +142 -0
  324. package/vault/wiki/questions/research-gitingest-gitreverse-integration.md +100 -0
  325. package/vault/wiki/questions/research-wozcode-token-reduction.md +67 -0
  326. package/vault/wiki/questions/resolved-context-pruning-inplace-vs-restart.md +95 -0
  327. package/vault/wiki/questions/resolved-context-window-economics.md +167 -0
  328. package/vault/wiki/questions/resolved-imad-debate-gating-transfer.md +126 -0
  329. package/vault/wiki/questions/resolved-mcp-tool-preference.md +112 -0
  330. package/vault/wiki/questions/resolved-small-model-meta-agents.md +107 -0
  331. package/vault/wiki/questions/resolved-treesitter-dynamic-languages.md +95 -0
  332. package/vault/wiki/sources/Auggie Context MCP Server.md +63 -0
  333. package/vault/wiki/sources/Augment Code Codacy AI Giants.md +61 -0
  334. package/vault/wiki/sources/Augment Code MCP SiliconAngle.md +49 -0
  335. package/vault/wiki/sources/Augment Code WorkOS ERC 2025.md +55 -0
  336. package/vault/wiki/sources/Augment Context Engine Official.md +71 -0
  337. package/vault/wiki/sources/Augment SWE-bench Agent GitHub.md +74 -0
  338. package/vault/wiki/sources/Augment SWE-bench Pro Blog.md +58 -0
  339. package/vault/wiki/sources/Source: AgentBus Jinja2 Prompt Pipelines.md +75 -0
  340. package/vault/wiki/sources/Source: Arxiv /342/200/224 Don't Break the Cache.md" +85 -0
  341. package/vault/wiki/sources/Source: Augment - Harness Engineering for AI Coding Agents.md +58 -0
  342. package/vault/wiki/sources/Source: Blake Crosley Agent Architecture Guide.md +100 -0
  343. package/vault/wiki/sources/Source: Bolt.new Architecture & Case Study.md +75 -0
  344. package/vault/wiki/sources/Source: Build-Time Prompt Compilation Architecture.md +107 -0
  345. package/vault/wiki/sources/Source: Claude API Agent Skills Overview.md +70 -0
  346. package/vault/wiki/sources/Source: Gemini CLI Changelogs.md +88 -0
  347. package/vault/wiki/sources/Source: Google Blog - Gemini CLI Announcement.md +57 -0
  348. package/vault/wiki/sources/Source: Google Gemini CLI Architecture Docs.md +53 -0
  349. package/vault/wiki/sources/Source: LangChain - Anatomy of Agent Harness.md +65 -0
  350. package/vault/wiki/sources/Source: Lovable Architecture & Clone Analysis.md +83 -0
  351. package/vault/wiki/sources/Source: Martin Fowler - Harness Engineering.md +70 -0
  352. package/vault/wiki/sources/Source: OpenAI Harness Engineering Five Principles.md +58 -0
  353. package/vault/wiki/sources/Source: OpenAI Harness Engineering /342/200/224 0 Lines of Human Code.md" +101 -0
  354. package/vault/wiki/sources/Source: OpenDev /342/200/224 Building AI Coding Agents for the Terminal.md" +100 -0
  355. package/vault/wiki/sources/Source: Render AI Coding Agents Benchmark 2025.md +53 -0
  356. package/vault/wiki/sources/Source: Rocket.new /342/200/224 Vibe Solutioning Platform.md" +70 -0
  357. package/vault/wiki/sources/Source: SwirlAI Agent Skills Progressive Disclosure.md +71 -0
  358. package/vault/wiki/sources/Source: TianPan Prompt Caching Architecture.md +89 -0
  359. package/vault/wiki/sources/Source: Vercel Labs agent-browser.md +155 -0
  360. package/vault/wiki/sources/Source: browser-harness CDP Harness.md +126 -0
  361. package/vault/wiki/sources/agent-drift-academic-paper.md +79 -0
  362. package/vault/wiki/sources/aider-repomap-tree-sitter.md +42 -0
  363. package/vault/wiki/sources/anthropic-compaction-api.md +58 -0
  364. package/vault/wiki/sources/anthropic-effective-harnesses.md +42 -0
  365. package/vault/wiki/sources/anthropic-prompt-best-practices.md +100 -0
  366. package/vault/wiki/sources/anthropic2026-harness-design.md +63 -0
  367. package/vault/wiki/sources/barrel-files-tkdodo.md +38 -0
  368. package/vault/wiki/sources/birth-of-unix-kernighan-interview.md +57 -0
  369. package/vault/wiki/sources/bockeler2026-harness-engineering.md +69 -0
  370. package/vault/wiki/sources/cast-code-chunking-paper.md +50 -0
  371. package/vault/wiki/sources/ck-semantic-search.md +78 -0
  372. package/vault/wiki/sources/claude-code-architecture-karaxai-2026.md +71 -0
  373. package/vault/wiki/sources/claude-code-architecture-qubytes-2026.md +50 -0
  374. package/vault/wiki/sources/claude-code-architecture-vila-lab-2026.md +64 -0
  375. package/vault/wiki/sources/claude-code-security-architecture-penligent-2026.md +70 -0
  376. package/vault/wiki/sources/claude-context-editing-docs.md +13 -0
  377. package/vault/wiki/sources/cloudflare-codemode.md +63 -0
  378. package/vault/wiki/sources/code-chunk-library-supermemory.md +63 -0
  379. package/vault/wiki/sources/codeact-apple-2024.md +62 -0
  380. package/vault/wiki/sources/codex-dsc-rfc-8573.md +41 -0
  381. package/vault/wiki/sources/codex-open-source-agent-2026.md +110 -0
  382. package/vault/wiki/sources/coir-code-retrieval-benchmark.md +51 -0
  383. package/vault/wiki/sources/colinmcnamara-context-optimization-codemode.md +48 -0
  384. package/vault/wiki/sources/context-folding-paper.md +61 -0
  385. package/vault/wiki/sources/context-mode-website.md +63 -0
  386. package/vault/wiki/sources/cursor-agent-best-practices-2026.md +62 -0
  387. package/vault/wiki/sources/cursor-fork-29b-2025.md +50 -0
  388. package/vault/wiki/sources/cursor-harness-april-2026.md +76 -0
  389. package/vault/wiki/sources/cursor-instant-apply-2024.md +45 -0
  390. package/vault/wiki/sources/cursor-shadow-workspace-2024.md +52 -0
  391. package/vault/wiki/sources/cursor-shipped-coding-agent-2026.md +53 -0
  392. package/vault/wiki/sources/cursor-vs-antigravity-2026.md +51 -0
  393. package/vault/wiki/sources/disler-pi-vs-claude-code.md +69 -0
  394. package/vault/wiki/sources/distill-deterministic-context-compression.md +53 -0
  395. package/vault/wiki/sources/embedding-models-benchmark-supermemory-2025.md +48 -0
  396. package/vault/wiki/sources/executor-rhyssullivan.md +122 -0
  397. package/vault/wiki/sources/fallow-rs-codebase-intelligence.md +125 -0
  398. package/vault/wiki/sources/fan2025-imad.md +60 -0
  399. package/vault/wiki/sources/forgecode-gpt5-agent-improvements.md +63 -0
  400. package/vault/wiki/sources/gemini-3-prompting-guide.md +78 -0
  401. package/vault/wiki/sources/gh-cli-sub-issue-rfc.md +50 -0
  402. package/vault/wiki/sources/gh-sub-issue-extension.md +72 -0
  403. package/vault/wiki/sources/github-fork-issues-discussion.md +44 -0
  404. package/vault/wiki/sources/github-issue-dependencies-docs.md +49 -0
  405. package/vault/wiki/sources/github-sub-issues-docs.md +51 -0
  406. package/vault/wiki/sources/gitingest.md +91 -0
  407. package/vault/wiki/sources/gitreverse.md +63 -0
  408. package/vault/wiki/sources/google-antigravity-official-blog.md +47 -0
  409. package/vault/wiki/sources/google-antigravity-wikipedia.md +53 -0
  410. package/vault/wiki/sources/gsd-codecentric-deep-dive.md +57 -0
  411. package/vault/wiki/sources/gsd-github-repo.md +51 -0
  412. package/vault/wiki/sources/gsd-hn-discussion.md +59 -0
  413. package/vault/wiki/sources/guido-python-design-philosophy.md +56 -0
  414. package/vault/wiki/sources/hejlsberg-7-learnings.md +48 -0
  415. package/vault/wiki/sources/ironclaw-drift-monitor.md +80 -0
  416. package/vault/wiki/sources/langsight-loop-detection.md +80 -0
  417. package/vault/wiki/sources/leanctx-website.md +69 -0
  418. package/vault/wiki/sources/lee2026-meta-harness.md +59 -0
  419. package/vault/wiki/sources/linux-kernel-coding-workflow.md +50 -0
  420. package/vault/wiki/sources/lou2026-autoharness.md +53 -0
  421. package/vault/wiki/sources/martin-fowler-harness-engineering.md +73 -0
  422. package/vault/wiki/sources/mcp-architecture-docs.md +13 -0
  423. package/vault/wiki/sources/meng2026-agent-harness-survey.md +79 -0
  424. package/vault/wiki/sources/mindstudio-four-agent-types.md +68 -0
  425. package/vault/wiki/sources/ms-chat-history-management.md +13 -0
  426. package/vault/wiki/sources/openai-prompt-guidance.md +104 -0
  427. package/vault/wiki/sources/openclaw-session-pruning.md +13 -0
  428. package/vault/wiki/sources/opencode-dcp.md +13 -0
  429. package/vault/wiki/sources/opendev-arxiv-2603.05344v1.md +79 -0
  430. package/vault/wiki/sources/openhands-platform.md +39 -0
  431. package/vault/wiki/sources/oss-guide-codebase-exploration.md +53 -0
  432. package/vault/wiki/sources/pi-compaction-extensions-ecosystem.md +102 -0
  433. package/vault/wiki/sources/pi-context-prune-github-repo.md +38 -0
  434. package/vault/wiki/sources/pi-mono-compaction-docs.md +38 -0
  435. package/vault/wiki/sources/pi-omni-compact-github-repo.md +50 -0
  436. package/vault/wiki/sources/pi-rtk-optimizer-github-repo.md +45 -0
  437. package/vault/wiki/sources/pi-vcc-github-repo.md +69 -0
  438. package/vault/wiki/sources/pi-vscode-marketplace.md +41 -0
  439. package/vault/wiki/sources/pi-vscode-model-provider-marketplace.md +39 -0
  440. package/vault/wiki/sources/py-tree-sitter.md +13 -0
  441. package/vault/wiki/sources/sentrux-dev-landing.md +40 -0
  442. package/vault/wiki/sources/sentrux-docs-pro-architecture.md +75 -0
  443. package/vault/wiki/sources/sentrux-docs-quality-signal.md +46 -0
  444. package/vault/wiki/sources/sentrux-docs-root-cause-metrics.md +57 -0
  445. package/vault/wiki/sources/sentrux-docs-rules-engine.md +58 -0
  446. package/vault/wiki/sources/sentrux-github-repo.md +56 -0
  447. package/vault/wiki/sources/superpowers-github-repo.md +56 -0
  448. package/vault/wiki/sources/superpowers-release-blog.md +54 -0
  449. package/vault/wiki/sources/superpowers-termdock-analysis.md +45 -0
  450. package/vault/wiki/sources/swe-agent-aci.md +42 -0
  451. package/vault/wiki/sources/swe-bench.md +45 -0
  452. package/vault/wiki/sources/swe-pruner-context-pruning.md +13 -0
  453. package/vault/wiki/sources/think-in-code-blog.md +48 -0
  454. package/vault/wiki/sources/tree-sitter-docs.md +13 -0
  455. package/vault/wiki/sources/ts-best-practices-2025-devto.md +42 -0
  456. package/vault/wiki/sources/ts-folder-structure-mingyang.md +58 -0
  457. package/vault/wiki/sources/ts-monorepo-koerselman.md +44 -0
  458. package/vault/wiki/sources/ts-result-error-handling-kkalamarski.md +52 -0
  459. package/vault/wiki/sources/ts-runtimes-comparison-betterstack.md +42 -0
  460. package/vault/wiki/sources/ts-strict-mode-rishikc.md +43 -0
  461. package/vault/wiki/sources/unix-philosophy.md +48 -0
  462. package/vault/wiki/sources/vectara-chunking-vs-embedding-naacl2025.md +39 -0
  463. package/vault/wiki/sources/vectara-guardian-agents.md +79 -0
  464. package/vault/wiki/sources/vgrep-semantic-search.md +76 -0
  465. package/vault/wiki/sources/vitest-official.md +41 -0
  466. package/vault/wiki/sources/vscode-pi-community-extension.md +40 -0
  467. package/vault/wiki/sources/wozcode.md +79 -0
  468. package/.agents/skills/compress/SKILL.md +0 -111
  469. package/.agents/skills/compress/scripts/__init__.py +0 -9
  470. package/.agents/skills/compress/scripts/__main__.py +0 -3
  471. package/.agents/skills/compress/scripts/benchmark.py +0 -78
  472. package/.agents/skills/compress/scripts/cli.py +0 -73
  473. package/.agents/skills/compress/scripts/compress.py +0 -227
  474. package/.agents/skills/compress/scripts/detect.py +0 -121
  475. package/.agents/skills/compress/scripts/validate.py +0 -189
  476. package/.agents/skills/emil-design-eng/SKILL.md +0 -679
  477. package/.agents/skills/lean-ctx/SKILL.md +0 -149
  478. package/.agents/skills/lean-ctx/scripts/install.sh +0 -95
  479. package/.agents/skills/scrapling-official/LICENSE.txt +0 -28
  480. package/.agents/skills/scrapling-official/SKILL.md +0 -390
  481. package/.agents/skills/scrapling-official/examples/01_fetcher_session.py +0 -26
  482. package/.agents/skills/scrapling-official/examples/02_dynamic_session.py +0 -26
  483. package/.agents/skills/scrapling-official/examples/03_stealthy_session.py +0 -26
  484. package/.agents/skills/scrapling-official/examples/04_spider.py +0 -58
  485. package/.agents/skills/scrapling-official/examples/README.md +0 -45
  486. package/.agents/skills/scrapling-official/references/fetching/choosing.md +0 -78
  487. package/.agents/skills/scrapling-official/references/fetching/dynamic.md +0 -352
  488. package/.agents/skills/scrapling-official/references/fetching/static.md +0 -432
  489. package/.agents/skills/scrapling-official/references/fetching/stealthy.md +0 -255
  490. package/.agents/skills/scrapling-official/references/mcp-server.md +0 -214
  491. package/.agents/skills/scrapling-official/references/migrating_from_beautifulsoup.md +0 -86
  492. package/.agents/skills/scrapling-official/references/parsing/adaptive.md +0 -212
  493. package/.agents/skills/scrapling-official/references/parsing/main_classes.md +0 -586
  494. package/.agents/skills/scrapling-official/references/parsing/selection.md +0 -494
  495. package/.agents/skills/scrapling-official/references/spiders/advanced.md +0 -344
  496. package/.agents/skills/scrapling-official/references/spiders/architecture.md +0 -94
  497. package/.agents/skills/scrapling-official/references/spiders/getting-started.md +0 -164
  498. package/.agents/skills/scrapling-official/references/spiders/proxy-blocking.md +0 -235
  499. package/.agents/skills/scrapling-official/references/spiders/requests-responses.md +0 -196
  500. package/.agents/skills/scrapling-official/references/spiders/sessions.md +0 -205
  501. package/.github/banner.png +0 -0
  502. package/PLAN.md +0 -11
  503. package/extensions/lean-ctx-enforce.ts +0 -166
  504. package/skills-lock.json +0 -35
  505. package/wiki/README.md +0 -10
  506. package/wiki/decisions/0001-establish-project-wiki-and-decision-record-format.md +0 -25
  507. package/wiki/decisions/0002-add-project-banner-to-readme.md +0 -26
  508. package/wiki/decisions/0003-remove-redundant-readme-title-heading.md +0 -26
  509. package/wiki/decisions/0004-publish-package-to-npm-as-ultimate-pi.md +0 -26
@@ -0,0 +1,62 @@
1
+ ---
2
+ type: source
3
+ status: ingested
4
+ source_type: engineering-blog
5
+ title: "Best Practices for Coding with Agents"
6
+ author: "Lee Robinson (Cursor/Anysphere)"
7
+ date_published: 2026-01-09
8
+ url: "https://cursor.com/blog/agent-best-practices"
9
+ confidence: high
10
+ tags: [cursor, agent-best-practices, plan-mode, hooks, skills, context-management]
11
+ key_claims:
12
+ - "Agent harness = Instructions + Tools + Model, tuned per model family"
13
+ - "Plan Mode: research codebase → clarify → plan → approve → build"
14
+ - "Context management: let agent find context dynamically; don't pre-load everything"
15
+ - "Rules (.cursor/rules/): static always-on context. Skills (SKILL.md): dynamic on-demand capabilities"
16
+ - "Long-running agent hooks: stop hooks that re-invoke agent until goal achieved"
17
+ - "Git worktree isolation for parallel agents"
18
+ - "Multi-model parallel execution with judging"
19
+ - "Context anxiety: models start refusing work as context fills up"
20
+ created: 2026-05-02
21
+ updated: 2026-05-02
22
+ ---
23
+ # Best Practices for Coding with Agents
24
+
25
+ Cursor's official guide (Lee Robinson, Jan 2026) covering agent harness design, Plan Mode, context management strategies, Rules/Skills system, long-running agent hooks, parallel agents via git worktrees, and workflow patterns.
26
+
27
+ ## Harness Components
28
+
29
+ 1. **Instructions**: System prompt + rules guiding agent behavior
30
+ 2. **Tools**: File editing, codebase search, terminal execution
31
+ 3. **Model**: The agent model for the task
32
+
33
+ Cursor tunes instructions and tools specifically for every frontier model based on internal evals and external benchmarks.
34
+
35
+ ## Plan Mode
36
+
37
+ `Shift+Tab` toggles Plan Mode. Agent:
38
+ 1. Researches codebase for relevant files
39
+ 2. Asks clarifying questions
40
+ 3. Creates detailed implementation plan with file paths
41
+ 4. Waits for approval before building
42
+
43
+ Plans open as editable Markdown. Save to `.cursor/plans/` for documentation + future agent context.
44
+
45
+ ## Context Management
46
+
47
+ - Let agent find context via grep + semantic search — don't pre-tag every file
48
+ - Start new conversation per task; continue for iterations on same feature
49
+ - `@Past Chats` to reference previous work selectively
50
+ - Long conversations cause context noise → agent loses focus
51
+
52
+ ## Long-Running Agent Hooks
53
+
54
+ Stop hooks in `.cursor/hooks.json` that re-invoke agent via `followup_message` until a DONE condition is met (scratchpad check). Max iteration guard. Pattern: run tests, fix until pass.
55
+
56
+ ## Parallel Agents
57
+
58
+ Git worktrees provide isolated workspaces per agent. Multiple models can run same prompt simultaneously; Cursor judges which solution is best. Apply to merge results back.
59
+
60
+ ## Relevance to Harness
61
+
62
+ Directly validates our: L2 Structured Planning (Plan Mode), SKILL.md system (Skills), context drift concerns (context anxiety), and model-adaptive harness design. Long-running agent hooks are an elegant alternative to our drift monitor's stop-only approach — we need both.
@@ -0,0 +1,50 @@
1
+ ---
2
+ type: source
3
+ status: ingested
4
+ source_type: analysis
5
+ title: "Cursor: How Forking VS Code Built a $29B Company"
6
+ author: "MMNTM Research"
7
+ date_published: 2025-12-15
8
+ url: "https://www.mmntm.net/articles/cursor-deep-dive"
9
+ confidence: medium
10
+ tags: [cursor, vs-code-fork, vertical-agents, architecture, business]
11
+ key_claims:
12
+ - "Forking VS Code = root access to developer workflow. Plugins cannot replicate this"
13
+ - "Extension API constraints: limited UI, process isolation, context blindness"
14
+ - "Shadow Workspace, native diffs, terminal interception, cursor teleportation all require fork"
15
+ - "Model agnosticism as competitive moat vs Copilot's OpenAI lock-in"
16
+ - "Vertical agent thesis: interface and intelligence cannot be decoupled"
17
+ - "The fork tax: constant upstream VS Code merges required"
18
+ created: 2026-05-02
19
+ updated: 2026-05-02
20
+ ---
21
+ # Cursor: How Forking VS Code Built a $29B Company
22
+
23
+ MMNTM Research analysis (Dec 2025) of Cursor's architectural strategy and business model.
24
+
25
+ ## The Extension Trap
26
+
27
+ VS Code Extension API constraints:
28
+ - Limited UI control (no inline diff rendering)
29
+ - Process isolation (Extension Host separate from Renderer/Main)
30
+ - Context blindness (can't cheaply access full editor state)
31
+
32
+ Copilot operates within these constraints. Cursor bypasses them by forking VS Code entirely.
33
+
34
+ ## The Fork = Root Access
35
+
36
+ Forking under MIT license gave Anysphere access to C++ and TypeScript internals. Enables:
37
+ - Shadow Workspace (hidden parallel editor instances)
38
+ - Native diff rendering (inline color-coded overlays)
39
+ - Terminal interception (read output, inject commands)
40
+ - Tab teleportation (predict and animate cursor position)
41
+
42
+ Tax: monthly VS Code upstream merges. Dedicated team for "keeping the lights on."
43
+
44
+ ## Vertical Agent Thesis
45
+
46
+ "The interface and the intelligence cannot be decoupled." Winners aren't building best models — they're building best environments for models. Harvey (legal), Abridge (clinical), Cursor (coding). Pattern repeats across domains.
47
+
48
+ ## Relevance to Harness
49
+
50
+ Meta-lesson: architectural control matters more than model access. Our .pi/ harness architecture is our "fork" — we intercept tool calls, enforce pipeline stages, and control the agent's environment. The question is whether we have enough control points to match what Cursor achieves with editor-level access. We do: tool interception hooks give us equivalent leverage in a CLI/agent context.
@@ -0,0 +1,76 @@
1
+ ---
2
+ type: source
3
+ status: ingested
4
+ source_type: engineering-blog
5
+ title: "Continually Improving Our Agent Harness"
6
+ author: "Stefan Heule & Jediah Katz (Cursor/Anysphere)"
7
+ date_published: 2026-04-30
8
+ url: "https://cursor.com/blog/continually-improving-agent-harness"
9
+ confidence: high
10
+ tags: [cursor, agent-harness, model-adaptive, context-window, error-classification, keep-rate]
11
+ key_claims:
12
+ - "Moved from static guardrails + pre-loaded context to dynamic context discovery"
13
+ - "Keep Rate metric: fraction of agent code still in codebase after time intervals"
14
+ - "LLM-as-judge for user satisfaction from response semantics"
15
+ - "Per-tool per-model error baselines with anomaly detection alerts"
16
+ - "Weekly automated Cloud Agent for bug triage from log analysis"
17
+ - "Model-specific tool provisioning: patch format for OpenAI, string replace for Anthropic"
18
+ - "Mid-chat model switching with conversation summarization"
19
+ - "Context anxiety: one model started refusing work as context window filled"
20
+ - "Subagent pattern: fresh context window per specialized task"
21
+ - "Future: multi-agent orchestration where system dispatches to specialized subagents"
22
+ created: 2026-05-02
23
+ updated: 2026-05-02
24
+ ---
25
+ # Continually Improving Our Agent Harness
26
+
27
+ Cursor's April 30, 2026 engineering blog detailing their harness evolution philosophy, measurement systems, error classification, and model-adaptive customization. Most directly relevant source for our harness plan.
28
+
29
+ ## Dynamic Context Evolution
30
+
31
+ Early Cursor (2024): static context pre-loaded (folder layout, semantic snippets, compressed files) + guardrails (lint surfacing, read rewriting, tool call limits).
32
+
33
+ Current Cursor (2026): guardrails removed as models improved. Dynamic context fetched by agent on demand. More ways for agent to pull context and interact with the world.
34
+
35
+ ## Measurement: Keep Rate + LLM-as-Judge
36
+
37
+ **Keep Rate**: For agent-proposed code changes, track what fraction remains in codebase after fixed time intervals (1hr, 1day, 1week). High keep rate = agent did good work.
38
+
39
+ **LLM-as-Judge**: Language model reads user's responses to agent output to determine satisfaction semantically. Moving to next feature = good. Pasting stack trace = bad.
40
+
41
+ A/B testing harness variants on real usage. One experiment: more expensive model for context summarization made negligible difference.
42
+
43
+ ## Error Classification System
44
+
45
+ Every tool call error classified:
46
+
47
+ | Error Type | Meaning |
48
+ |---|---|
49
+ | `InvalidArguments` | Model mistake in tool call |
50
+ | `UnexpectedEnvironment` | Contradictions in context window |
51
+ | `ProviderError` | Vendor outages |
52
+ | `UserAborted` | User cancelled |
53
+ | `Timeout` | Tool call timed out |
54
+ | Unknown | Always a bug |
55
+
56
+ Alerts fire on unknown error threshold. Anomaly detection for expected errors vs per-tool per-model baseline.
57
+
58
+ Weekly Cloud Agent Automation: searches logs, surfaces new/spiked issues, creates/updates tickets with investigation.
59
+
60
+ ## Model-Adaptive Customization
61
+
62
+ - OpenAI models: patch-based edit format
63
+ - Anthropic models: string replacement format
64
+ - Custom prompting per provider AND per model version
65
+ - Mid-chat model switching: auto-switch harness, summarize conversation, warn about tool set differences
66
+ - Subagent pattern: fresh context window per specialized task (planning, editing, debugging)
67
+
68
+ ## Context Anxiety
69
+
70
+ One model developed "context anxiety": as context window filled, it started refusing work, hedging that tasks seemed too big. Mitigated through prompt adjustments. Independent validation of our P27 Context Anxiety Guard concept.
71
+
72
+ ## Relevance to Harness
73
+
74
+ **Directly validates**: model-adaptive harness design, provider-native prompting, context anxiety guard (P27), L5 observability need, drift monitor need.
75
+
76
+ **New gaps identified**: Keep Rate metric missing from L5, per-tool per-model error classification missing, subagent specialization beyond cost routing missing, autonomous harness self-repair (Cloud Agent for harness bugs) missing.
@@ -0,0 +1,45 @@
1
+ ---
2
+ type: source
3
+ status: ingested
4
+ source_type: engineering-blog
5
+ title: "Editing Files at 1000 Tokens per Second"
6
+ author: "Aman Sanger (Cursor/Anysphere)"
7
+ date_published: 2024-05-14
8
+ url: "https://cursor.com/blog/instant-apply"
9
+ confidence: high
10
+ tags: [cursor, speculative-edits, fast-apply, diff-models, code-editing, latency]
11
+ key_claims:
12
+ - "Fast Apply: custom model trained for full-file rewrites, not diff generation"
13
+ - "Speculative edits: deterministic speculation using existing code as draft tokens. 9-13x speedup"
14
+ - "Diffs fail because: fewer thinking tokens, out-of-distribution, line number hallucination"
15
+ - "Search/replace diff format (Aider-inspired) eliminates line numbers but most models still fail"
16
+ - "Fine-tuned Llama-3-70b + speculative edits outperforms GPT-4o on accuracy and speed"
17
+ - "~1000 tokens/sec on 70B model, deployed with Fireworks AI inference engine"
18
+ created: 2026-05-02
19
+ updated: 2026-05-02
20
+ ---
21
+ # Editing Files at 1000 Tokens per Second
22
+
23
+ Cursor's May 2024 technical post on their Fast Apply model and speculative edits algorithm.
24
+
25
+ ## Why Full-File Rewrites Beat Diffs
26
+
27
+ 1. **Fewer thinking tokens**: Diffs constrain output tokens, giving model fewer forward passes
28
+ 2. **Out of distribution**: Models see more full files than diffs in training
29
+ 3. **Line number hallucination**: Tokenizers treat multi-digit numbers as single tokens; model must commit on first token
30
+
31
+ Cursor tested Aider-inspired search/replace diff format (no line numbers, redundant +/- markers). Only Claude Opus could output accurate diffs. Most models fail badly.
32
+
33
+ ## Speculative Edits Algorithm
34
+
35
+ Unlike standard speculative decoding (draft model proposes, target verifies), Cursor's **speculative edits** uses the *existing code as draft tokens*. Since code edits reuse 80-90% of existing lines, the current file contents serve as high-quality draft predictions. The target model verifies which spans to keep vs replace.
36
+
37
+ This is deterministic speculation — no draft model needed. Deployed with Fireworks AI custom inference engine.
38
+
39
+ ## Training Pipeline
40
+
41
+ Synthetic data from cmd-k prompts → GPT-4 produces chat response → LM "applies" change → mix with real apply data (80/20). Downsampled: small files, repeated filenames, no-op edits. Best model: Llama-3-70b fine-tuned.
42
+
43
+ ## Relevance to Harness
44
+
45
+ Our P10 fuzzy edit matching addresses the same "diff problem" from the tool side. Cursor solves it from the model side (train model to output full rewritten files, not diffs). We should consider: for our edit tool, could we accept full-file rewrites and diff them server-side? This would be more model-friendly.
@@ -0,0 +1,52 @@
1
+ ---
2
+ type: source
3
+ status: ingested
4
+ source_type: engineering-blog
5
+ title: "Iterating with Shadow Workspaces"
6
+ author: "Arvid Lunnemark (Cursor/Anysphere)"
7
+ date_published: 2024-09-01
8
+ url: "https://cursor.com/blog/shadow-workspace"
9
+ confidence: high
10
+ tags: [cursor, shadow-workspace, lsp, pre-verification, agent-harness]
11
+ key_claims:
12
+ - "Shadow workspace = hidden Electron window for AI code iteration with full LSP access"
13
+ - "AI iterates invisibly until lints pass; user only sees valid code"
14
+ - "Implemented as hidden window with gRPC IPC, auto-killed after 15min idle"
15
+ - "Concurrency via interleaving: AIs paused/resumed like CPU processes"
16
+ - "Future: kernel-level folder proxy (FUSE) for runnability + disk isolation"
17
+ - "Rust-analyzer broken because it needs on-disk files; macOS FUSE blocked by Apple walled garden"
18
+ created: 2026-05-02
19
+ updated: 2026-05-02
20
+ ---
21
+ # Iterating with Shadow Workspaces
22
+
23
+ Cursor's engineering blog post describing the **shadow workspace** — a hidden Electron window that lets AI agents iterate on code with full Language Server Protocol (LSP) access, independently of the user's coding experience.
24
+
25
+ ## Design Criteria
26
+
27
+ 1. **LSP-usability**: AIs see lints, go-to-definitions, full LSP interaction
28
+ 2. **Runnability**: AIs run code and see output (future goal)
29
+ 3. **Independence**: User's coding experience unaffected
30
+ 4. **Privacy**: Code stays local
31
+ 5. **Concurrency**: Multiple AIs work concurrently
32
+ 6. **Universality**: Works for all languages and workspace setups
33
+ 7. **Maintainability**: Minimal isolatable code
34
+ 8. **Speed**: No minute-long delays, throughput for hundreds of AI branches
35
+
36
+ ## Current Implementation
37
+
38
+ Hidden Electron window spawned with `show: false`. Edits sent via gRPC IPC between extension hosts. Shadow window runs full VS Code environment with LSP plugins. AI iterates on lints invisibly, then valid code presented to user.
39
+
40
+ Concurrency: interleaves AI edits like CPU processes — AI A runs, pauses, AI B runs, resume A. AIs don't notice time.
41
+
42
+ ## Open Questions
43
+
44
+ 1. Kernel-level folder proxy without kernel extension?
45
+ 2. Windows equivalent of FUSE?
46
+ 3. DriverKit for fake USB proxy folder?
47
+ 4. Network-level isolation for microservice testing?
48
+ 5. Cloud-based remote workspace with auto-inferred Docker?
49
+
50
+ ## Relevance to Harness
51
+
52
+ The shadow workspace is the **pre-verification isolation** pattern. It proves that validating code before the user sees it is the single biggest UX differentiator in agentic coding. Our harness should implement an analogous "pre-commit validation sandbox" between L3 and L4.
@@ -0,0 +1,53 @@
1
+ ---
2
+ type: source
3
+ status: ingested
4
+ source_type: engineering-blog
5
+ title: "How Cursor Shipped its Coding Agent to Production"
6
+ author: "Lee Robinson (Cursor) + ByteByteGo"
7
+ date_published: 2026-01-26
8
+ url: "https://blog.bytebytego.com/p/how-cursor-shipped-its-coding-agent"
9
+ confidence: high
10
+ tags: [cursor, composer, coding-agent, latency, sandboxing, speculative-decoding, context-compaction]
11
+ key_claims:
12
+ - "Coding agent ≠ agentic model. Model is brain, agent is body with tools + loop + context retrieval"
13
+ - "Composer: MoE architecture, 4x faster than similarly intelligent models"
14
+ - "Three latency strategies: MoE (per-call cost), speculative decoding (generation time), context compaction (prompt processing)"
15
+ - "Diff Problem: models struggle with edit tasks. Solved via training on (original, edit_cmd, final) triples"
16
+ - "Search and replace tools are hardest to teach; training data has high volume of these trajectories"
17
+ - "Sandboxing: custom VM scheduler for bursty demand. Sandboxes are core serving infrastructure"
18
+ - "Three production lessons: tool use baked into model, adoption is ultimate metric, speed is product"
19
+ created: 2026-05-02
20
+ updated: 2026-05-02
21
+ ---
22
+ # How Cursor Shipped its Coding Agent to Production
23
+
24
+ ByteByteGo deep dive (Jan 2026) written with Lee Robinson at Cursor. Covers the full architecture of Cursor's coding agent system, Composer model training, and three production challenges.
25
+
26
+ ## System Architecture
27
+
28
+ | Component | Purpose |
29
+ |---|---|
30
+ | **Router** | Auto mode: analyzes request complexity, picks best model |
31
+ | **LLM (agentic model)** | Trained on trajectories (action sequences), not just text |
32
+ | **Tools** | 10+ tools: search, read, write, apply edits, terminal |
33
+ | **Context Retrieval** | Pulls relevant snippets/docs/definitions for current step |
34
+ | **Orchestrator** | ReAct loop: model decides → tool executes → result collected → rebuild context → repeat |
35
+ | **Sandbox** | Isolated execution for builds/tests/linters with strict guardrails |
36
+
37
+ ## Three Production Challenges
38
+
39
+ ### 1. The Diff Problem
40
+ Models trained on text generation struggle with code editing. Solution: train on (original_code, edit_command, final_code) triples. Search+replace tools hardest to teach — require high volume of tool-specific trajectories. Composer trained on tens of thousands of GPUs.
41
+
42
+ ### 2. Latency Compounds
43
+ Three techniques:
44
+ - **MoE Architecture**: Conditional expert routing, fewer active params per token, better quality at similar latency
45
+ - **Speculative Decoding**: Small draft model proposes tokens, large model verifies in parallel. Code structure is predictable (imports, brackets, syntax) → high acceptance rate
46
+ - **Context Compaction**: Summarize working state. Keep failing test names, error types, key stack frames. Drop stale context, deduplicate repeats.
47
+
48
+ ### 3. Sandboxing at Scale
49
+ Custom VM scheduler for bursty demand. Fast provisioning + aggressive recycling. Sandboxes treated as core serving infrastructure, not just containers. During training: hundreds of thousands of concurrent sandboxed environments.
50
+
51
+ ## Relevance to Harness
52
+
53
+ Validates our: inline syntax validation (P11-P12), edit tool fuzziness (P10), Haiku router (P25), sandbox execution. New gaps: context compaction strategy more sophisticated than our drift pruning, speculative editing is a model-level optimization we can't replicate but can learn from conceptually.
@@ -0,0 +1,51 @@
1
+ ---
2
+ type: source
3
+ source_type: secondary
4
+ title: "Cursor vs Antigravity 2026: Which AI Agent Actually Wins?"
5
+ author: "Vishnu (MeshWorld)"
6
+ date_published: 2026-03-18
7
+ url: "https://meshworld.in/blog/ai/comparisons/cursor-vs-antigravity/"
8
+ confidence: medium
9
+ status: ingested
10
+ created: 2026-05-01
11
+ updated: 2026-05-01
12
+ tags:
13
+ - antigravity
14
+ - cursor
15
+ - comparison
16
+ - harness-design
17
+ key_claims:
18
+ - "Antigravity has 1M token context window vs Cursor's RAG-based indexing"
19
+ - "Browser subagent with visual verification is Antigravity's killer feature"
20
+ - "Cursor = Centaur model (you-first). Antigravity = Manager model (agent-first)"
21
+ - "Antigravity Ultra at $249.99/mo criticized for high agentic loop costs"
22
+ - "Cursor v2.6 adds JetBrains support; Antigravity is VS Code only"
23
+ - "Antigravity v1.20.5 powered by Gemini 3.1 Pro"
24
+
25
+ ---# Cursor vs Antigravity 2026
26
+
27
+ Technical comparison published March 18, 2026 by MeshWorld.
28
+
29
+ ## Core Distinction
30
+
31
+ - **Cursor: "Centaur" model** — AI amplifies human typing. You stay in flow.
32
+ - **Antigravity: "Manager" model** — AI does the work. You review artifacts and steer.
33
+
34
+ ## Key Antigravity Features
35
+
36
+ 1. **1M Token Context Window**: Ingests entire repos into active memory. Understands cross-file dependencies natively. No RAG needed.
37
+ 2. **Browser Subagent**: Drives headless Chrome. Takes screenshots, analyzes pixels, verifies UI changes.
38
+ 3. **Nano Banana**: Built-in image generator for UI assets.
39
+
40
+ ## Benchmark Notes
41
+
42
+ - Cursor (Claude Opus 4.6) better at pure logic and bug fixing
43
+ - Antigravity (Gemini 3.1 Pro) undefeated for UI, vision, and multi-step reasoning
44
+
45
+ ## Pricing Gap
46
+
47
+ Antigravity Ultra at $249.99/mo. Token-heavy agentic loops burn through quotas fast. Pro users report multi-day lockouts after intensive sessions.
48
+
49
+ ## Relevance to Harness
50
+
51
+ Validates that different agent architectures excel at different task types. The 1M context window vs RAG debate is central to our context strategy. The browser subagent reveals a gap in our tool registry.
@@ -0,0 +1,69 @@
1
+ ---
2
+ type: source
3
+ source_type: github-repo
4
+ author: disler (IndyDevDan)
5
+ date_published: 2026-02-23
6
+ url: https://github.com/disler/pi-vs-claude-code
7
+ confidence: high
8
+ tags:
9
+ - pi-agent
10
+ - claude-code
11
+ - agentic-coding
12
+ - multi-agent
13
+ - extensions
14
+ key_claims:
15
+ - "Pi Coding Agent is the only real open-source competitor to Claude Code"
16
+ - "Pi's extension system enables UI customization, agent orchestration, safety auditing, and cross-agent integrations"
17
+ - "Extensions compose via multiple -e flags: subagent-widget, agent-team, agent-chain, damage-control, pi-pi"
18
+ - "Pi supports every major AI model provider (OpenAI, Anthropic, Google, OpenRouter)"
19
+ - "Agent teams dispatch work to specialists via teams.yaml; agent chains pipeline steps sequentially via agent-chain.yaml"
20
+ ---
21
+
22
+ # disler/pi-vs-claude-code
23
+
24
+ GitHub repository by IndyDevDan (disler) — 928 stars, 244 forks. A collection of customized Pi Coding Agent instances demonstrating how to hedge against Claude Code in the agentic coding market.
25
+
26
+ ## What It Provides
27
+
28
+ **15+ production extensions** covering the full agent lifecycle:
29
+
30
+ ### Multi-Agent Orchestration (3 extensions)
31
+ - **subagent-widget**: `/sub <task>` spawns background Pi subagents with live-progress widgets
32
+ - **agent-team**: Dispatcher-only orchestrator — primary agent delegates to named specialists via `dispatch_agent` tool, shows grid dashboard
33
+ - **agent-chain**: Sequential pipeline orchestrator — chains agents where output feeds into next step (`$INPUT`, `$ORIGINAL` variables). Example: `plan-build-review` pipeline
34
+
35
+ ### Safety & Control (2 extensions)
36
+ - **damage-control**: Real-time safety auditing — intercepts dangerous bash patterns via regex, enforces path-based access controls from `.pi/damage-control-rules.yaml`. Block levels: Zero Access, Read-Only, No-Delete, Dangerous Commands (some with `ask: true` confirmation)
37
+ - **purpose-gate**: Session intent declaration on startup, blocks prompts until answered
38
+
39
+ ### UI & DX (7 extensions)
40
+ - **pure-focus**: Distraction-free mode (no footer/status)
41
+ - **minimal**: Compact footer with model name + 10-block context meter
42
+ - **tool-counter**: Rich two-line footer (model, context, tokens, cost + cwd/branch, per-tool tally)
43
+ - **tool-counter-widget**: Live-updating above-editor per-tool call counts
44
+ - **session-replay**: Scrollable timeline overlay of session history
45
+ - **theme-cycler**: Keyboard shortcuts to cycle custom themes
46
+ - **system-select**: `/system` command to switch between agent personas from `.pi/agents/`
47
+
48
+ ### Meta & Cross-Agent (2 extensions)
49
+ - **cross-agent**: Scans `.claude/`, `.gemini/`, `.codex/` dirs for commands/skills/agents and registers them in Pi
50
+ - **pi-pi**: Meta-agent that builds Pi agents using parallel research experts (ext-expert, theme-expert, tui-expert)
51
+
52
+ ## Key Architecture Insights
53
+
54
+ **Agent Teams** configured in `.pi/agents/teams.yaml`:
55
+ ```yaml
56
+ frontend: [planner, builder, bowser]
57
+ backend: [architect, implementer, tester]
58
+ ```
59
+ Individual agent personas live as `.md` files in `.pi/agents/`.
60
+
61
+ **Agent Chains** defined in `.pi/agents/agent-chain.yaml` as sequential steps with `$INPUT` injection.
62
+
63
+ **Damage Control Rules** in `.pi/damage-control-rules.yaml` with four path policies (Zero Access, Read-Only, No-Delete, Dangerous Commands).
64
+
65
+ **Stacking**: Extensions compose — `pi -e extensions/minimal.ts -e extensions/cross-agent.ts`.
66
+
67
+ ## Relevance to Our Harness
68
+
69
+ The repo demonstrates that Pi's extension system can implement the full orchestration patterns (subagent delegation, team dispatch, sequential chaining) entirely in user-space TypeScript, without modifying the core agent. This means our harness can adopt these patterns as `.pi/skills/` extensions rather than core code changes.
@@ -0,0 +1,53 @@
1
+ ---
2
+ type: source
3
+ source_type: github-repo
4
+ title: "Siddhant-K-code/distill"
5
+ author: "Siddhant Khare"
6
+ date_published: 2026-02-24
7
+ date_accessed: 2026-05-05
8
+ url: "https://github.com/Siddhant-K-code/distill"
9
+ confidence: medium
10
+ tags:
11
+ - compaction
12
+ - context-engineering
13
+ - deterministic
14
+ - deduplication
15
+ key_claims:
16
+ - "4-layer deterministic context compression: Cluster, Select, Rerank, Compress"
17
+ - "~12ms overhead vs ~500ms for LLM compression"
18
+ - "~$0.0001/call vs $0.01+ for LLM compression"
19
+ - "Semantic deduplication removes 30-40% redundant context from multiple sources"
20
+ - "Session-based context window management with token budgets (v0.4.0)"
21
+ - "Persistent context memory with write-time deduplication and hierarchical decay"
22
+ - "143 GitHub stars, v0.4.0 (Feb 2026)"
23
+ ---
24
+
25
+ # Distill — Deterministic Context Compression for LLM Agents
26
+
27
+ ## Summary
28
+
29
+ Distill is a general-purpose context optimization tool that preprocesses context from multiple sources (RAG, tools, memory, docs) before sending to LLMs. It operates as a reliability layer, not a session compactor — its scope is broader but shallower than pi-vcc.
30
+
31
+ ## Key Details
32
+
33
+ - **Repo**: Siddhant-K-code/distill (143 stars, MIT)
34
+ - **Version**: v0.4.0 (Feb 2026)
35
+ - **Algorithm**: Agglomerative clustering + Maximal Marginal Relevance (MMR) re-ranking
36
+ - **Pipeline**: Over-fetch → Cluster → Select → MMR re-rank → Compress
37
+ - **Scope**: Context preprocessing layer (any LLM workflow), not session-specific compaction
38
+ - **Observability**: Prometheus metrics + OpenTelemetry tracing
39
+ - **Config**: `distill.yaml` file
40
+
41
+ ## How It Differs from pi-vcc
42
+
43
+ | Dimension | Distill | pi-vcc |
44
+ |-----------|---------|--------|
45
+ | Scope | Multi-source context preprocessing | Session conversation compaction |
46
+ | Input | RAG chunks, tool outputs, docs, memory | Pi session transcript |
47
+ | Output | Deduplicated, ranked context | Brief transcript + 5 semantic sections |
48
+ | Recall | No lineage recall | Full JSONL lineage recall |
49
+ | Integration | General LLM middleware | Pi `session_before_compact` hook |
50
+
51
+ ## Why This Matters
52
+
53
+ Distill validates the deterministic-over-LLM pattern but operates at a different layer than pi-vcc. Distill preprocesses what goes INTO the context window. pi-vcc compresses what has ALREADY accumulated in the session. Both are complementary, not competing.
@@ -0,0 +1,48 @@
1
+ ---
2
+ type: source
3
+ status: ingested
4
+ source_type: benchmark-report
5
+ author: Naman Bansal / Supermemory AI
6
+ date_published: 2025-06-27
7
+ url: https://supermemory.ai/blog/best-open-source-embedding-models-benchmarked-and-ranked/
8
+ confidence: high
9
+ key_claims:
10
+ - "MiniLM-L6-v2: 78.1% top-5 retrieval, 14.7ms/1K tokens, 68ms latency, 1.2GB GPU"
11
+ - "E5-Base-v2: 83.5% top-5 retrieval, 20.2ms/1K tokens, 79ms latency, 2.0GB GPU"
12
+ - "BGE-Base-v1.5: 84.7% top-5 retrieval, 22.5ms/1K tokens, 82ms latency, 2.1GB GPU"
13
+ - "Nomic Embed v1: 86.2% top-5 retrieval, 41.9ms/1K tokens, 110ms latency, 4.8GB GPU"
14
+ - "MiniLM-L6-v2 is 5-8% lower accuracy than larger models but 3x faster"
15
+ tags:
16
+ - embedding-models
17
+ - benchmark
18
+ - minilm
19
+ - bge
20
+ - e5
21
+ - nomic
22
+ created: 2026-05-02
23
+ updated: 2026-05-02
24
+
25
+ ---# Best Open-Source Embedding Models Benchmarked and Ranked (2025)
26
+
27
+ ## Summary
28
+
29
+ Comprehensive benchmark of four leading open-source embedding models on BEIR TREC-COVID dataset using FAISS flat L2 index. Provides accuracy, latency, and compute cost trade-offs.
30
+
31
+ ## Benchmark Results
32
+
33
+ | Model | Embed Time (ms/1K tok) | Latency (ms) | Top-5 Accuracy | GPU Memory |
34
+ |-------|----------------------|------------|----------------|------------|
35
+ | MiniLM-L6-v2 | 14.7 | 68 | 78.1% | ~1.2 GB |
36
+ | E5-Base-v2 | 20.2 | 79 | 83.5% | ~2.0 GB |
37
+ | BGE-Base-v1.5 | 22.5 | 82 | 84.7% | ~2.1 GB |
38
+ | Nomic Embed v1 | 41.9 | 110 | 86.2% | ~4.8 GB |
39
+
40
+ ## Trade-off Analysis
41
+
42
+ - **Speed-first**: MiniLM-L6-v2 — best for high-volume, low-latency, edge deployments
43
+ - **Balanced**: E5-Base-v2 or BGE-Base-v1.5 — strong accuracy at reasonable latency
44
+ - **Accuracy-first**: Nomic Embed v1 — best precision but 2x slower, GPU-dependent
45
+
46
+ ## Relevance to Our Implementation
47
+
48
+ MiniLM-L6-v2's 78.1% vs Nomic's 86.2% is an 8.1 percentage point gap on general text retrieval. For code retrieval, the gap is likely wider since MiniLM was trained on general text, not code. However, with AST-aware chunking + contextualized text, the effective gap narrows significantly because the chunking quality improvement (per Vectara NAACL 2025) can outweigh the embedding model choice.