ultimate-pi 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (516) hide show
  1. package/.agents/skills/ck-search/SKILL.md +99 -0
  2. package/.agents/skills/defuddle/SKILL.md +90 -0
  3. package/.agents/skills/find-skills/SKILL.md +142 -0
  4. package/.agents/skills/firecrawl/SKILL.md +150 -0
  5. package/.agents/skills/firecrawl/rules/install.md +82 -0
  6. package/.agents/skills/firecrawl/rules/security.md +26 -0
  7. package/.agents/skills/firecrawl-agent/SKILL.md +57 -0
  8. package/.agents/skills/firecrawl-build-interact/SKILL.md +67 -0
  9. package/.agents/skills/firecrawl-build-onboarding/SKILL.md +102 -0
  10. package/.agents/skills/firecrawl-build-onboarding/references/auth-flow.md +39 -0
  11. package/.agents/skills/firecrawl-build-onboarding/references/project-setup.md +20 -0
  12. package/.agents/skills/firecrawl-build-onboarding/references/sdk-installation.md +17 -0
  13. package/.agents/skills/firecrawl-build-scrape/SKILL.md +68 -0
  14. package/.agents/skills/firecrawl-build-search/SKILL.md +68 -0
  15. package/.agents/skills/firecrawl-crawl/SKILL.md +58 -0
  16. package/.agents/skills/firecrawl-download/SKILL.md +69 -0
  17. package/.agents/skills/firecrawl-interact/SKILL.md +83 -0
  18. package/.agents/skills/firecrawl-map/SKILL.md +50 -0
  19. package/.agents/skills/firecrawl-parse/SKILL.md +61 -0
  20. package/.agents/skills/firecrawl-scrape/SKILL.md +68 -0
  21. package/.agents/skills/firecrawl-search/SKILL.md +59 -0
  22. package/.agents/skills/obsidian-bases/SKILL.md +299 -0
  23. package/.agents/skills/obsidian-markdown/SKILL.md +237 -0
  24. package/.agents/skills/posthog-analyst/SKILL.md +306 -0
  25. package/.agents/skills/posthog-analyst/evals/evals.json +23 -0
  26. package/.agents/skills/wiki/SKILL.md +215 -0
  27. package/.agents/skills/wiki/references/css-snippets.md +122 -0
  28. package/.agents/skills/wiki/references/frontmatter.md +107 -0
  29. package/.agents/skills/wiki/references/git-setup.md +58 -0
  30. package/.agents/skills/wiki/references/mcp-setup.md +149 -0
  31. package/.agents/skills/wiki/references/modes.md +259 -0
  32. package/.agents/skills/wiki/references/plugins.md +96 -0
  33. package/.agents/skills/wiki/references/rest-api.md +124 -0
  34. package/.agents/skills/wiki-autoresearch/SKILL.md +211 -0
  35. package/.agents/skills/wiki-autoresearch/references/program.md +75 -0
  36. package/.agents/skills/wiki-fold/SKILL.md +204 -0
  37. package/.agents/skills/wiki-fold/references/fold-template.md +133 -0
  38. package/.agents/skills/wiki-ingest/SKILL.md +288 -0
  39. package/.agents/skills/wiki-lint/SKILL.md +183 -0
  40. package/.agents/skills/wiki-query/SKILL.md +176 -0
  41. package/.agents/skills/wiki-save/SKILL.md +128 -0
  42. package/.ckignore +41 -0
  43. package/.env.example +9 -0
  44. package/.github/workflows/lint.yml +33 -0
  45. package/.github/workflows/publish-github-packages.yml +35 -0
  46. package/.github/workflows/publish-npm.yml +1 -1
  47. package/.pi/SYSTEM.md +107 -40
  48. package/.pi/agents/pi-pi/agent-expert.md +205 -0
  49. package/.pi/agents/pi-pi/cli-expert.md +47 -0
  50. package/.pi/agents/pi-pi/config-expert.md +67 -0
  51. package/.pi/agents/pi-pi/ext-expert.md +53 -0
  52. package/.pi/agents/pi-pi/keybinding-expert.md +123 -0
  53. package/.pi/agents/pi-pi/pi-orchestrator.md +103 -0
  54. package/.pi/agents/pi-pi/prompt-expert.md +83 -0
  55. package/.pi/agents/pi-pi/skill-expert.md +52 -0
  56. package/.pi/agents/pi-pi/theme-expert.md +46 -0
  57. package/.pi/agents/pi-pi/tui-expert.md +100 -0
  58. package/.pi/agents/rethink.md +140 -0
  59. package/.pi/agents/wiki-ingest.md +67 -0
  60. package/.pi/agents/wiki-lint.md +75 -0
  61. package/.pi/auto-commit.json +20 -0
  62. package/.pi/extensions/banner.png +0 -0
  63. package/.pi/extensions/ck-enforce.ts +216 -0
  64. package/.pi/extensions/custom-footer.ts +308 -0
  65. package/.pi/extensions/custom-header.ts +116 -0
  66. package/.pi/extensions/dotenv-loader.ts +170 -0
  67. package/.pi/internal/cursor-sdk-transcript-parser.ts +59 -0
  68. package/.pi/model-router.json +95 -0
  69. package/.pi/npm/.gitignore +2 -0
  70. package/.pi/prompts/git-sync.md +124 -0
  71. package/.pi/prompts/harness-setup.md +509 -0
  72. package/.pi/prompts/save.md +16 -0
  73. package/.pi/prompts/wiki-autoresearch.md +19 -0
  74. package/.pi/prompts/wiki.md +23 -0
  75. package/.pi/providers/cursor-sdk-provider.test.mjs +476 -0
  76. package/.pi/providers/cursor-sdk-provider.ts +1085 -0
  77. package/.pi/settings.json +14 -4
  78. package/.pi/skills/agent-router/SKILL.md +174 -0
  79. package/.pi/sounds/alert/1-kaching-track.mp3 +0 -0
  80. package/.pi/sounds/error/1-ksi-wth-track.mp3 +0 -0
  81. package/.pi/sounds/error/2-smash-track.mp3 +0 -0
  82. package/.pi/sounds/error/3-buzzer-track.mp3 +0 -0
  83. package/.pi/sounds/notification/1-soft-notification-track.mp3 +0 -0
  84. package/.pi/sounds/project-sounds.json +25 -0
  85. package/.pi/sounds/reminder/1-soft-notification-track.mp3 +0 -0
  86. package/.pi/sounds/success/1-tada-track.mp3 +0 -0
  87. package/.pi/sounds/success/2-jobs-done-track.mp3 +0 -0
  88. package/.pi/sounds/success/3-yay-track.mp3 +0 -0
  89. package/CONTRIBUTING.md +116 -0
  90. package/README.md +32 -39
  91. package/biome.json +34 -0
  92. package/firecrawl/.env.template +58 -0
  93. package/firecrawl/README.md +49 -0
  94. package/firecrawl/docker-compose.yaml +201 -0
  95. package/firecrawl/searxng/searxng.env +3 -0
  96. package/firecrawl/searxng/settings.yml +85 -0
  97. package/lefthook.yml +8 -0
  98. package/package.json +55 -24
  99. package/vault/AGENTS.md +37 -0
  100. package/vault/wiki/_templates/comparison.md +39 -0
  101. package/vault/wiki/_templates/concept.md +40 -0
  102. package/vault/wiki/_templates/decision.md +21 -0
  103. package/vault/wiki/_templates/entity.md +32 -0
  104. package/vault/wiki/_templates/flow.md +14 -0
  105. package/vault/wiki/_templates/module.md +18 -0
  106. package/vault/wiki/_templates/question.md +31 -0
  107. package/vault/wiki/_templates/source.md +39 -0
  108. package/vault/wiki/concepts/AST-Aware Code Chunking.md +44 -0
  109. package/vault/wiki/concepts/Build-Time Prompt Compilation.md +107 -0
  110. package/vault/wiki/concepts/Context Engine (AI Coding).md +47 -0
  111. package/vault/wiki/concepts/Context-Aware System Reminders.md +61 -0
  112. package/vault/wiki/concepts/Contextualized Text Embedding.md +42 -0
  113. package/vault/wiki/concepts/Contractor vs Employee AI Model.md +55 -0
  114. package/vault/wiki/concepts/Dual-Model Agent Architecture.md +65 -0
  115. package/vault/wiki/concepts/Late Chunking vs Early Chunking.md +43 -0
  116. package/vault/wiki/concepts/Majority Vote Ensembling.md +68 -0
  117. package/vault/wiki/concepts/Meta-Harness.md +16 -0
  118. package/vault/wiki/concepts/Multi-Agent AI Coding Architecture.md +75 -0
  119. package/vault/wiki/concepts/Prompt Enhancement.md +90 -0
  120. package/vault/wiki/concepts/Prompt Renderer.md +89 -0
  121. package/vault/wiki/concepts/Semantic Codebase Indexing.md +67 -0
  122. package/vault/wiki/concepts/additive-config-hierarchy.md +16 -0
  123. package/vault/wiki/concepts/agent-artifacts-verifiable-deliverables.md +71 -0
  124. package/vault/wiki/concepts/agent-browser-browser-automation.md +99 -0
  125. package/vault/wiki/concepts/agent-codebase-interface.md +43 -0
  126. package/vault/wiki/concepts/agent-harness-architecture.md +67 -0
  127. package/vault/wiki/concepts/agent-loop-detection-patterns.md +133 -0
  128. package/vault/wiki/concepts/agent-search-enforcement.md +126 -0
  129. package/vault/wiki/concepts/agent-skills-ecosystem.md +74 -0
  130. package/vault/wiki/concepts/agent-skills-pattern.md +68 -0
  131. package/vault/wiki/concepts/agentic-harness-context-enforcement.md +91 -0
  132. package/vault/wiki/concepts/agentic-harness.md +34 -0
  133. package/vault/wiki/concepts/agentic-orchestration-pipeline.md +56 -0
  134. package/vault/wiki/concepts/agentic-search-no-embeddings.md +18 -0
  135. package/vault/wiki/concepts/anthropic-context-engineering.md +13 -0
  136. package/vault/wiki/concepts/antigravity-agent-first-architecture.md +61 -0
  137. package/vault/wiki/concepts/ast-compression.md +19 -0
  138. package/vault/wiki/concepts/ast-truncation.md +66 -0
  139. package/vault/wiki/concepts/barrel-files.md +37 -0
  140. package/vault/wiki/concepts/browser-harness-agent.md +41 -0
  141. package/vault/wiki/concepts/browser-subagent-visual-verification.md +82 -0
  142. package/vault/wiki/concepts/codebase-intelligence-ecosystem-comparison.md +192 -0
  143. package/vault/wiki/concepts/codebase-intelligence-harness-integration.md +161 -0
  144. package/vault/wiki/concepts/codebase-to-context-ingestion.md +46 -0
  145. package/vault/wiki/concepts/codex-harness-innovations.md +147 -0
  146. package/vault/wiki/concepts/consensus-debate-flow.md +17 -0
  147. package/vault/wiki/concepts/consensus-debate.md +206 -0
  148. package/vault/wiki/concepts/content-addressed-spec-identity.md +166 -0
  149. package/vault/wiki/concepts/context-anxiety.md +57 -0
  150. package/vault/wiki/concepts/context-compression-techniques.md +19 -0
  151. package/vault/wiki/concepts/context-continuity.md +22 -0
  152. package/vault/wiki/concepts/context-drift-in-agents.md +106 -0
  153. package/vault/wiki/concepts/context-engineering.md +62 -0
  154. package/vault/wiki/concepts/context-folding.md +67 -0
  155. package/vault/wiki/concepts/context-mode.md +38 -0
  156. package/vault/wiki/concepts/cursor-harness-innovations.md +107 -0
  157. package/vault/wiki/concepts/deterministic-session-compaction.md +79 -0
  158. package/vault/wiki/concepts/drift-detection-unified.md +296 -0
  159. package/vault/wiki/concepts/execution-feedback-loop.md +46 -0
  160. package/vault/wiki/concepts/feedforward-feedback-harness.md +60 -0
  161. package/vault/wiki/concepts/five-root-cause-metrics-sentrux.md +40 -0
  162. package/vault/wiki/concepts/fork-safe-spec-storage.md +89 -0
  163. package/vault/wiki/concepts/fts5-sandbox.md +19 -0
  164. package/vault/wiki/concepts/fuzzy-edit-matching.md +71 -0
  165. package/vault/wiki/concepts/gemini-cli-architecture.md +104 -0
  166. package/vault/wiki/concepts/generator-evaluator-architecture.md +64 -0
  167. package/vault/wiki/concepts/guardian-agent-pattern.md +67 -0
  168. package/vault/wiki/concepts/harness-configuration-layers.md +89 -0
  169. package/vault/wiki/concepts/harness-control-frameworks.md +155 -0
  170. package/vault/wiki/concepts/harness-engineering-first-principles.md +90 -0
  171. package/vault/wiki/concepts/harness-h-formalism.md +53 -0
  172. package/vault/wiki/concepts/hybrid-code-search.md +61 -0
  173. package/vault/wiki/concepts/inline-post-edit-validation.md +112 -0
  174. package/vault/wiki/concepts/legendary-engineering-patterns-harness.md +110 -0
  175. package/vault/wiki/concepts/lifecycle-hooks.md +94 -0
  176. package/vault/wiki/concepts/mcp-tool-routing.md +102 -0
  177. package/vault/wiki/concepts/memory-system-of-record-vs-ephemeral-cache.md +47 -0
  178. package/vault/wiki/concepts/meta-agent-context-pruning.md +151 -0
  179. package/vault/wiki/concepts/model-adaptive-harness.md +122 -0
  180. package/vault/wiki/concepts/model-routing-agents.md +101 -0
  181. package/vault/wiki/concepts/monorepo-architecture.md +45 -0
  182. package/vault/wiki/concepts/multi-agent-specialization.md +61 -0
  183. package/vault/wiki/concepts/permission-subsystem.md +16 -0
  184. package/vault/wiki/concepts/pi-messenger-analysis.md +243 -0
  185. package/vault/wiki/concepts/pi-vscode-extension-landscape.md +37 -0
  186. package/vault/wiki/concepts/policy-engine-pattern.md +78 -0
  187. package/vault/wiki/concepts/progressive-disclosure-agents.md +53 -0
  188. package/vault/wiki/concepts/progressive-skill-disclosure.md +17 -0
  189. package/vault/wiki/concepts/provider-native-prompting.md +203 -0
  190. package/vault/wiki/concepts/quality-signal-sentrux.md +37 -0
  191. package/vault/wiki/concepts/repo-map-ranking.md +42 -0
  192. package/vault/wiki/concepts/result-monad-error-handling.md +47 -0
  193. package/vault/wiki/concepts/safety-defense-in-depth.md +83 -0
  194. package/vault/wiki/concepts/sandbox-os-enforcement.md +18 -0
  195. package/vault/wiki/concepts/selective-debate-routing.md +70 -0
  196. package/vault/wiki/concepts/self-evolving-harness.md +60 -0
  197. package/vault/wiki/concepts/sentrux-mcp-integration.md +36 -0
  198. package/vault/wiki/concepts/sentrux-rules-engine.md +49 -0
  199. package/vault/wiki/concepts/shell-pattern-compression.md +24 -0
  200. package/vault/wiki/concepts/skill-first-architecture.md +166 -0
  201. package/vault/wiki/concepts/structured-compaction.md +78 -0
  202. package/vault/wiki/concepts/subagent-orchestration.md +17 -0
  203. package/vault/wiki/concepts/subagent-worktree-isolation.md +68 -0
  204. package/vault/wiki/concepts/superpowers-methodology.md +78 -0
  205. package/vault/wiki/concepts/think-in-code.md +73 -0
  206. package/vault/wiki/concepts/ts-execution-layer.md +100 -0
  207. package/vault/wiki/concepts/typescript-strict-mode.md +37 -0
  208. package/vault/wiki/concepts/vcc-conversation-compaction-for-pi.md +51 -0
  209. package/vault/wiki/concepts/verification-drift-detection.md +19 -0
  210. package/vault/wiki/consensus/consensus-records.md +58 -0
  211. package/vault/wiki/decisions/2026-04-30-pi-lean-ctx-native.md +122 -0
  212. package/vault/wiki/decisions/adr-008.md +40 -0
  213. package/vault/wiki/decisions/adr-009.md +46 -0
  214. package/vault/wiki/decisions/adr-010.md +55 -0
  215. package/vault/wiki/decisions/adr-011.md +165 -0
  216. package/vault/wiki/decisions/adr-012.md +102 -0
  217. package/vault/wiki/decisions/adr-013.md +59 -0
  218. package/vault/wiki/decisions/adr-014.md +73 -0
  219. package/vault/wiki/decisions/adr-015.md +81 -0
  220. package/vault/wiki/decisions/adr-016.md +91 -0
  221. package/vault/wiki/decisions/adr-017.md +79 -0
  222. package/vault/wiki/decisions/adr-018.md +100 -0
  223. package/vault/wiki/decisions/adr-019.md +75 -0
  224. package/vault/wiki/decisions/adr-020.md +106 -0
  225. package/vault/wiki/decisions/adr-021.md +86 -0
  226. package/vault/wiki/decisions/adr-022.md +113 -0
  227. package/vault/wiki/decisions/adr-023.md +113 -0
  228. package/vault/wiki/decisions/adr-024.md +73 -0
  229. package/vault/wiki/decisions/adr-025.md +130 -0
  230. package/vault/wiki/decisions/adr-026.md +56 -0
  231. package/vault/wiki/decisions/colocate-wiki.md +34 -0
  232. package/vault/wiki/entities/Anders Hejlsberg.md +29 -0
  233. package/vault/wiki/entities/Anthropic.md +17 -0
  234. package/vault/wiki/entities/Augment Code.md +49 -0
  235. package/vault/wiki/entities/Bjarne Stroustrup.md +26 -0
  236. package/vault/wiki/entities/Bolt.new (StackBlitz).md +39 -0
  237. package/vault/wiki/entities/Boris Cherny.md +11 -0
  238. package/vault/wiki/entities/Claude Code.md +19 -0
  239. package/vault/wiki/entities/Dennis Ritchie.md +26 -0
  240. package/vault/wiki/entities/Emergent Labs.md +32 -0
  241. package/vault/wiki/entities/Google Cloud.md +16 -0
  242. package/vault/wiki/entities/Guido van Rossum.md +28 -0
  243. package/vault/wiki/entities/Ken Thompson.md +28 -0
  244. package/vault/wiki/entities/Lee et al.md +16 -0
  245. package/vault/wiki/entities/Linus Torvalds.md +28 -0
  246. package/vault/wiki/entities/Lovable (company).md +40 -0
  247. package/vault/wiki/entities/Martin Fowler.md +16 -0
  248. package/vault/wiki/entities/Meng et al.md +16 -0
  249. package/vault/wiki/entities/OpenAI.md +16 -0
  250. package/vault/wiki/entities/Rocket.new.md +38 -0
  251. package/vault/wiki/entities/VILA-Lab.md +15 -0
  252. package/vault/wiki/entities/autodev-codebase.md +18 -0
  253. package/vault/wiki/entities/ck-tool.md +59 -0
  254. package/vault/wiki/entities/codesearch.md +18 -0
  255. package/vault/wiki/entities/disler-indydevdan.md +33 -0
  256. package/vault/wiki/entities/gsd-get-shit-done.md +56 -0
  257. package/vault/wiki/entities/javascript-runtimes.md +48 -0
  258. package/vault/wiki/entities/jesse-vincent.md +38 -0
  259. package/vault/wiki/entities/lean-ctx.md +32 -0
  260. package/vault/wiki/entities/opendev.md +41 -0
  261. package/vault/wiki/entities/ops-codegraph-tool.md +18 -0
  262. package/vault/wiki/entities/pi-coding-agent.md +53 -0
  263. package/vault/wiki/entities/sentrux.md +54 -0
  264. package/vault/wiki/entities/vgrep-tool.md +57 -0
  265. package/vault/wiki/entities/vitest.md +41 -0
  266. package/vault/wiki/flows/harness-wiki-pipeline.md +204 -0
  267. package/vault/wiki/hot.md +932 -0
  268. package/vault/wiki/index.md +437 -0
  269. package/vault/wiki/log.md +418 -0
  270. package/vault/wiki/meta/dashboard.md +30 -0
  271. package/vault/wiki/meta/lint-report-2026-04-30.md +86 -0
  272. package/vault/wiki/meta/lint-report-2026-05-02.md +251 -0
  273. package/vault/wiki/meta/overview.canvas +43 -0
  274. package/vault/wiki/modules/adversarial-verification.md +57 -0
  275. package/vault/wiki/modules/automated-observability.md +54 -0
  276. package/vault/wiki/modules/bench.md +20 -0
  277. package/vault/wiki/modules/extensions.md +23 -0
  278. package/vault/wiki/modules/grounding-checkpoints.md +62 -0
  279. package/vault/wiki/modules/harness-implementation-plan.md +345 -0
  280. package/vault/wiki/modules/harness-wiki-skill-mapping.md +135 -0
  281. package/vault/wiki/modules/harness.md +86 -0
  282. package/vault/wiki/modules/persistent-memory.md +85 -0
  283. package/vault/wiki/modules/schema-orchestration.md +68 -0
  284. package/vault/wiki/modules/skills.md +27 -0
  285. package/vault/wiki/modules/spec-hardening.md +58 -0
  286. package/vault/wiki/modules/structured-planning.md +53 -0
  287. package/vault/wiki/modules/think-in-code-enforcement.md +153 -0
  288. package/vault/wiki/modules/wiki-query-interface.md +64 -0
  289. package/vault/wiki/overview.md +51 -0
  290. package/vault/wiki/questions/Research-pi-vs-claude-code-agentic-orchestration-pipeline.md +87 -0
  291. package/vault/wiki/questions/Research-sentrux-dev.md +123 -0
  292. package/vault/wiki/questions/Research-superpowers-skill-for-agentic-coding-agents.md +164 -0
  293. package/vault/wiki/questions/Research: Augment Code Context Engine.md +244 -0
  294. package/vault/wiki/questions/Research: Automating Software Engineering - Lovable, Bolt, Emergent, Rocket.md +112 -0
  295. package/vault/wiki/questions/Research: Claude Code State-of-the-Art Harness Improvements.md +209 -0
  296. package/vault/wiki/questions/Research: Codex State-of-the-Art Harness Improvements.md +99 -0
  297. package/vault/wiki/questions/Research: Engineering Workflows of Legendary Programmers and AI Harness Mapping.md +107 -0
  298. package/vault/wiki/questions/Research: Fallow Codebase Intelligence Harness Integration.md +72 -0
  299. package/vault/wiki/questions/Research: Gemini CLI SOTA Harness Integration.md +166 -0
  300. package/vault/wiki/questions/Research: GitHub Issues as Harness Spec Storage.md +188 -0
  301. package/vault/wiki/questions/Research: Google Antigravity Harness Integration.md +120 -0
  302. package/vault/wiki/questions/Research: Meta-Agent Context Drift Detection.md +236 -0
  303. package/vault/wiki/questions/Research: Model-Adaptive Agent Harness Design.md +95 -0
  304. package/vault/wiki/questions/Research: Model-Specific Prompting Guides.md +165 -0
  305. package/vault/wiki/questions/Research: Prompt Renderer for Multi-Model Agent Harness.md +216 -0
  306. package/vault/wiki/questions/Research: Skill-First Harness Architecture.md +91 -0
  307. package/vault/wiki/questions/Research: TypeScript Best Practices and Codebase Structure.md +88 -0
  308. package/vault/wiki/questions/Research: TypeScript Execution Layer for Agent Tool Calling.md +81 -0
  309. package/vault/wiki/questions/Research: claude-mem over Obsidian for Harness Layer.md +71 -0
  310. package/vault/wiki/questions/Research: claude-mem over obsidian wiki as the knowledge base for our agentic harness pipeline. think from first principles. does this replace or complement our current setup? no hard feelings about previous decisions. gimme accurate points.md +80 -0
  311. package/vault/wiki/questions/Research: context-mode vs lean-ctx.md +72 -0
  312. package/vault/wiki/questions/Research: cursor.sh Harness Innovations.md +92 -0
  313. package/vault/wiki/questions/Research: executor.sh Harness Integration.md +170 -0
  314. package/vault/wiki/questions/Research: how GSD fits into our coding harness setup.md +97 -0
  315. package/vault/wiki/questions/Research: how claude-mem fits into our workflow. and whether it should replace obsidian in the codebase. no hard feelings about previous actions, rethink from first principles always.md +80 -0
  316. package/vault/wiki/questions/Research: pi-vcc.md +113 -0
  317. package/vault/wiki/questions/Research: semantic code search tools.md +69 -0
  318. package/vault/wiki/questions/Research: vcc extension for pi coding agent.md +73 -0
  319. package/vault/wiki/questions/how-to-enable-semantic-code-search-now.md +111 -0
  320. package/vault/wiki/questions/mvp-implementation-blueprint.md +552 -0
  321. package/vault/wiki/questions/research-agent-first-codebase-exploration.md +199 -0
  322. package/vault/wiki/questions/research-agentic-coding-harness-latest-papers.md +142 -0
  323. package/vault/wiki/questions/research-gitingest-gitreverse-integration.md +100 -0
  324. package/vault/wiki/questions/research-wozcode-token-reduction.md +67 -0
  325. package/vault/wiki/questions/resolved-context-pruning-inplace-vs-restart.md +95 -0
  326. package/vault/wiki/questions/resolved-context-window-economics.md +167 -0
  327. package/vault/wiki/questions/resolved-imad-debate-gating-transfer.md +126 -0
  328. package/vault/wiki/questions/resolved-mcp-tool-preference.md +112 -0
  329. package/vault/wiki/questions/resolved-small-model-meta-agents.md +107 -0
  330. package/vault/wiki/questions/resolved-treesitter-dynamic-languages.md +95 -0
  331. package/vault/wiki/sources/Auggie Context MCP Server.md +63 -0
  332. package/vault/wiki/sources/Augment Code Codacy AI Giants.md +61 -0
  333. package/vault/wiki/sources/Augment Code MCP SiliconAngle.md +49 -0
  334. package/vault/wiki/sources/Augment Code WorkOS ERC 2025.md +55 -0
  335. package/vault/wiki/sources/Augment Context Engine Official.md +71 -0
  336. package/vault/wiki/sources/Augment SWE-bench Agent GitHub.md +74 -0
  337. package/vault/wiki/sources/Augment SWE-bench Pro Blog.md +58 -0
  338. package/vault/wiki/sources/Source: AgentBus Jinja2 Prompt Pipelines.md +75 -0
  339. package/vault/wiki/sources/Source: Arxiv /342/200/224 Don't Break the Cache.md" +85 -0
  340. package/vault/wiki/sources/Source: Augment - Harness Engineering for AI Coding Agents.md +58 -0
  341. package/vault/wiki/sources/Source: Blake Crosley Agent Architecture Guide.md +100 -0
  342. package/vault/wiki/sources/Source: Bolt.new Architecture & Case Study.md +75 -0
  343. package/vault/wiki/sources/Source: Build-Time Prompt Compilation Architecture.md +107 -0
  344. package/vault/wiki/sources/Source: Claude API Agent Skills Overview.md +70 -0
  345. package/vault/wiki/sources/Source: Gemini CLI Changelogs.md +88 -0
  346. package/vault/wiki/sources/Source: Google Blog - Gemini CLI Announcement.md +57 -0
  347. package/vault/wiki/sources/Source: Google Gemini CLI Architecture Docs.md +53 -0
  348. package/vault/wiki/sources/Source: LangChain - Anatomy of Agent Harness.md +65 -0
  349. package/vault/wiki/sources/Source: Lovable Architecture & Clone Analysis.md +83 -0
  350. package/vault/wiki/sources/Source: Martin Fowler - Harness Engineering.md +70 -0
  351. package/vault/wiki/sources/Source: OpenAI Harness Engineering Five Principles.md +58 -0
  352. package/vault/wiki/sources/Source: OpenAI Harness Engineering /342/200/224 0 Lines of Human Code.md" +101 -0
  353. package/vault/wiki/sources/Source: OpenDev /342/200/224 Building AI Coding Agents for the Terminal.md" +100 -0
  354. package/vault/wiki/sources/Source: Render AI Coding Agents Benchmark 2025.md +53 -0
  355. package/vault/wiki/sources/Source: Rocket.new /342/200/224 Vibe Solutioning Platform.md" +70 -0
  356. package/vault/wiki/sources/Source: SwirlAI Agent Skills Progressive Disclosure.md +71 -0
  357. package/vault/wiki/sources/Source: TianPan Prompt Caching Architecture.md +89 -0
  358. package/vault/wiki/sources/Source: Vercel Labs agent-browser.md +155 -0
  359. package/vault/wiki/sources/Source: browser-harness CDP Harness.md +126 -0
  360. package/vault/wiki/sources/agent-drift-academic-paper.md +79 -0
  361. package/vault/wiki/sources/aider-repomap-tree-sitter.md +42 -0
  362. package/vault/wiki/sources/anthropic-compaction-api.md +58 -0
  363. package/vault/wiki/sources/anthropic-effective-harnesses.md +42 -0
  364. package/vault/wiki/sources/anthropic-prompt-best-practices.md +100 -0
  365. package/vault/wiki/sources/anthropic2026-harness-design.md +63 -0
  366. package/vault/wiki/sources/barrel-files-tkdodo.md +38 -0
  367. package/vault/wiki/sources/birth-of-unix-kernighan-interview.md +57 -0
  368. package/vault/wiki/sources/bockeler2026-harness-engineering.md +69 -0
  369. package/vault/wiki/sources/cast-code-chunking-paper.md +50 -0
  370. package/vault/wiki/sources/ck-semantic-search.md +78 -0
  371. package/vault/wiki/sources/claude-code-architecture-karaxai-2026.md +71 -0
  372. package/vault/wiki/sources/claude-code-architecture-qubytes-2026.md +50 -0
  373. package/vault/wiki/sources/claude-code-architecture-vila-lab-2026.md +64 -0
  374. package/vault/wiki/sources/claude-code-security-architecture-penligent-2026.md +70 -0
  375. package/vault/wiki/sources/claude-context-editing-docs.md +13 -0
  376. package/vault/wiki/sources/cloudflare-codemode.md +63 -0
  377. package/vault/wiki/sources/code-chunk-library-supermemory.md +63 -0
  378. package/vault/wiki/sources/codeact-apple-2024.md +62 -0
  379. package/vault/wiki/sources/codex-dsc-rfc-8573.md +41 -0
  380. package/vault/wiki/sources/codex-open-source-agent-2026.md +110 -0
  381. package/vault/wiki/sources/coir-code-retrieval-benchmark.md +51 -0
  382. package/vault/wiki/sources/colinmcnamara-context-optimization-codemode.md +48 -0
  383. package/vault/wiki/sources/context-folding-paper.md +61 -0
  384. package/vault/wiki/sources/context-mode-website.md +63 -0
  385. package/vault/wiki/sources/cursor-agent-best-practices-2026.md +62 -0
  386. package/vault/wiki/sources/cursor-fork-29b-2025.md +50 -0
  387. package/vault/wiki/sources/cursor-harness-april-2026.md +76 -0
  388. package/vault/wiki/sources/cursor-instant-apply-2024.md +45 -0
  389. package/vault/wiki/sources/cursor-shadow-workspace-2024.md +52 -0
  390. package/vault/wiki/sources/cursor-shipped-coding-agent-2026.md +53 -0
  391. package/vault/wiki/sources/cursor-vs-antigravity-2026.md +51 -0
  392. package/vault/wiki/sources/disler-pi-vs-claude-code.md +69 -0
  393. package/vault/wiki/sources/distill-deterministic-context-compression.md +53 -0
  394. package/vault/wiki/sources/embedding-models-benchmark-supermemory-2025.md +48 -0
  395. package/vault/wiki/sources/executor-rhyssullivan.md +122 -0
  396. package/vault/wiki/sources/fallow-rs-codebase-intelligence.md +125 -0
  397. package/vault/wiki/sources/fan2025-imad.md +60 -0
  398. package/vault/wiki/sources/forgecode-gpt5-agent-improvements.md +63 -0
  399. package/vault/wiki/sources/gemini-3-prompting-guide.md +78 -0
  400. package/vault/wiki/sources/gh-cli-sub-issue-rfc.md +50 -0
  401. package/vault/wiki/sources/gh-sub-issue-extension.md +72 -0
  402. package/vault/wiki/sources/github-fork-issues-discussion.md +44 -0
  403. package/vault/wiki/sources/github-issue-dependencies-docs.md +49 -0
  404. package/vault/wiki/sources/github-sub-issues-docs.md +51 -0
  405. package/vault/wiki/sources/gitingest.md +91 -0
  406. package/vault/wiki/sources/gitreverse.md +63 -0
  407. package/vault/wiki/sources/google-antigravity-official-blog.md +47 -0
  408. package/vault/wiki/sources/google-antigravity-wikipedia.md +53 -0
  409. package/vault/wiki/sources/gsd-codecentric-deep-dive.md +57 -0
  410. package/vault/wiki/sources/gsd-github-repo.md +51 -0
  411. package/vault/wiki/sources/gsd-hn-discussion.md +59 -0
  412. package/vault/wiki/sources/guido-python-design-philosophy.md +56 -0
  413. package/vault/wiki/sources/hejlsberg-7-learnings.md +48 -0
  414. package/vault/wiki/sources/ironclaw-drift-monitor.md +80 -0
  415. package/vault/wiki/sources/langsight-loop-detection.md +80 -0
  416. package/vault/wiki/sources/leanctx-website.md +69 -0
  417. package/vault/wiki/sources/lee2026-meta-harness.md +59 -0
  418. package/vault/wiki/sources/linux-kernel-coding-workflow.md +50 -0
  419. package/vault/wiki/sources/lou2026-autoharness.md +53 -0
  420. package/vault/wiki/sources/martin-fowler-harness-engineering.md +73 -0
  421. package/vault/wiki/sources/mcp-architecture-docs.md +13 -0
  422. package/vault/wiki/sources/meng2026-agent-harness-survey.md +79 -0
  423. package/vault/wiki/sources/mindstudio-four-agent-types.md +68 -0
  424. package/vault/wiki/sources/ms-chat-history-management.md +13 -0
  425. package/vault/wiki/sources/openai-prompt-guidance.md +104 -0
  426. package/vault/wiki/sources/openclaw-session-pruning.md +13 -0
  427. package/vault/wiki/sources/opencode-dcp.md +13 -0
  428. package/vault/wiki/sources/opendev-arxiv-2603.05344v1.md +79 -0
  429. package/vault/wiki/sources/openhands-platform.md +39 -0
  430. package/vault/wiki/sources/oss-guide-codebase-exploration.md +53 -0
  431. package/vault/wiki/sources/pi-compaction-extensions-ecosystem.md +102 -0
  432. package/vault/wiki/sources/pi-context-prune-github-repo.md +38 -0
  433. package/vault/wiki/sources/pi-mono-compaction-docs.md +38 -0
  434. package/vault/wiki/sources/pi-omni-compact-github-repo.md +50 -0
  435. package/vault/wiki/sources/pi-rtk-optimizer-github-repo.md +45 -0
  436. package/vault/wiki/sources/pi-vcc-github-repo.md +69 -0
  437. package/vault/wiki/sources/pi-vscode-marketplace.md +41 -0
  438. package/vault/wiki/sources/pi-vscode-model-provider-marketplace.md +39 -0
  439. package/vault/wiki/sources/py-tree-sitter.md +13 -0
  440. package/vault/wiki/sources/sentrux-dev-landing.md +40 -0
  441. package/vault/wiki/sources/sentrux-docs-pro-architecture.md +75 -0
  442. package/vault/wiki/sources/sentrux-docs-quality-signal.md +46 -0
  443. package/vault/wiki/sources/sentrux-docs-root-cause-metrics.md +57 -0
  444. package/vault/wiki/sources/sentrux-docs-rules-engine.md +58 -0
  445. package/vault/wiki/sources/sentrux-github-repo.md +56 -0
  446. package/vault/wiki/sources/superpowers-github-repo.md +56 -0
  447. package/vault/wiki/sources/superpowers-release-blog.md +54 -0
  448. package/vault/wiki/sources/superpowers-termdock-analysis.md +45 -0
  449. package/vault/wiki/sources/swe-agent-aci.md +42 -0
  450. package/vault/wiki/sources/swe-bench.md +45 -0
  451. package/vault/wiki/sources/swe-pruner-context-pruning.md +13 -0
  452. package/vault/wiki/sources/think-in-code-blog.md +48 -0
  453. package/vault/wiki/sources/tree-sitter-docs.md +13 -0
  454. package/vault/wiki/sources/ts-best-practices-2025-devto.md +42 -0
  455. package/vault/wiki/sources/ts-folder-structure-mingyang.md +58 -0
  456. package/vault/wiki/sources/ts-monorepo-koerselman.md +44 -0
  457. package/vault/wiki/sources/ts-result-error-handling-kkalamarski.md +52 -0
  458. package/vault/wiki/sources/ts-runtimes-comparison-betterstack.md +42 -0
  459. package/vault/wiki/sources/ts-strict-mode-rishikc.md +43 -0
  460. package/vault/wiki/sources/unix-philosophy.md +48 -0
  461. package/vault/wiki/sources/vectara-chunking-vs-embedding-naacl2025.md +39 -0
  462. package/vault/wiki/sources/vectara-guardian-agents.md +79 -0
  463. package/vault/wiki/sources/vgrep-semantic-search.md +76 -0
  464. package/vault/wiki/sources/vitest-official.md +41 -0
  465. package/vault/wiki/sources/vscode-pi-community-extension.md +40 -0
  466. package/vault/wiki/sources/wozcode.md +79 -0
  467. package/.agents/skills/compress/SKILL.md +0 -111
  468. package/.agents/skills/compress/scripts/__init__.py +0 -9
  469. package/.agents/skills/compress/scripts/__main__.py +0 -3
  470. package/.agents/skills/compress/scripts/benchmark.py +0 -78
  471. package/.agents/skills/compress/scripts/cli.py +0 -73
  472. package/.agents/skills/compress/scripts/compress.py +0 -227
  473. package/.agents/skills/compress/scripts/detect.py +0 -121
  474. package/.agents/skills/compress/scripts/validate.py +0 -189
  475. package/.agents/skills/emil-design-eng/SKILL.md +0 -679
  476. package/.agents/skills/lean-ctx/SKILL.md +0 -149
  477. package/.agents/skills/lean-ctx/scripts/install.sh +0 -95
  478. package/.agents/skills/scrapling-official/LICENSE.txt +0 -28
  479. package/.agents/skills/scrapling-official/SKILL.md +0 -390
  480. package/.agents/skills/scrapling-official/examples/01_fetcher_session.py +0 -26
  481. package/.agents/skills/scrapling-official/examples/02_dynamic_session.py +0 -26
  482. package/.agents/skills/scrapling-official/examples/03_stealthy_session.py +0 -26
  483. package/.agents/skills/scrapling-official/examples/04_spider.py +0 -58
  484. package/.agents/skills/scrapling-official/examples/README.md +0 -45
  485. package/.agents/skills/scrapling-official/references/fetching/choosing.md +0 -78
  486. package/.agents/skills/scrapling-official/references/fetching/dynamic.md +0 -352
  487. package/.agents/skills/scrapling-official/references/fetching/static.md +0 -432
  488. package/.agents/skills/scrapling-official/references/fetching/stealthy.md +0 -255
  489. package/.agents/skills/scrapling-official/references/mcp-server.md +0 -214
  490. package/.agents/skills/scrapling-official/references/migrating_from_beautifulsoup.md +0 -86
  491. package/.agents/skills/scrapling-official/references/parsing/adaptive.md +0 -212
  492. package/.agents/skills/scrapling-official/references/parsing/main_classes.md +0 -586
  493. package/.agents/skills/scrapling-official/references/parsing/selection.md +0 -494
  494. package/.agents/skills/scrapling-official/references/spiders/advanced.md +0 -344
  495. package/.agents/skills/scrapling-official/references/spiders/architecture.md +0 -94
  496. package/.agents/skills/scrapling-official/references/spiders/getting-started.md +0 -164
  497. package/.agents/skills/scrapling-official/references/spiders/proxy-blocking.md +0 -235
  498. package/.agents/skills/scrapling-official/references/spiders/requests-responses.md +0 -196
  499. package/.agents/skills/scrapling-official/references/spiders/sessions.md +0 -205
  500. package/PLAN.md +0 -11
  501. package/extensions/lean-ctx-enforce.ts +0 -166
  502. package/skills-lock.json +0 -35
  503. package/wiki/README.md +0 -19
  504. package/wiki/decisions/0001-establish-project-wiki-and-decision-record-format.md +0 -25
  505. package/wiki/decisions/0002-add-project-banner-to-readme.md +0 -26
  506. package/wiki/decisions/0003-remove-redundant-readme-title-heading.md +0 -26
  507. package/wiki/decisions/0004-publish-package-to-npm-as-ultimate-pi.md +0 -26
  508. package/wiki/decisions/0005-automate-npm-publish-with-github-actions.md +0 -27
  509. package/wiki/decisions/0006-switch-to-npm-trusted-publishing.md +0 -26
  510. package/wiki/decisions/0007-use-absolute-banner-url-for-npm-readme-rendering.md +0 -26
  511. package/wiki/decisions/0008-rename-banner-asset-for-cache-busting.md +0 -26
  512. package/wiki/decisions/0009-force-oidc-path-by-clearing-node-auth-token-in-publish-step.md +0 -25
  513. package/wiki/decisions/0010-simplify-setup-node-for-npm-trusted-publishing.md +0 -26
  514. package/wiki/decisions/0011-add-noop-workflow-change-to-force-fresh-publish-run.md +0 -25
  515. package/wiki/decisions/0012-align-workflow-runtime-with-npm-trusted-publishing-requirements.md +0 -26
  516. package/wiki/decisions/0013-add-package-repository-url-for-provenance-validation.md +0 -25
@@ -0,0 +1,70 @@
1
+ ---
2
+ type: source
3
+ status: ingested
4
+ source_type: blog
5
+ title: "Inside Claude Code: The Architecture Behind Tools, Memory, Hooks, and MCP"
6
+ author: "Penligent"
7
+ date_published: 2026-04-02
8
+ url: "https://www.penligent.ai/hackinglabs/inside-claude-code-the-architecture-behind-tools-memory-hooks-and-mcp/"
9
+ confidence: medium
10
+ tags: [claude-code, architecture, security, hooks, permissions, sandboxing, MCP, CVE]
11
+ key_claims:
12
+ - "Claude Code is a governed execution environment with a model in the middle"
13
+ - "The control plane around the model often matters more than the model itself"
14
+ - "Permissions and sandboxing are complementary: permissions control what can be attempted, sandboxing provides OS-level enforcement"
15
+ - "All child processes inherit sandbox boundaries"
16
+ - "Auto mode drops broad allow rules to prevent unsafe policy combinations"
17
+ - "Managed settings: allowManagedHooksOnly, allowManagedMcpServersOnly, allowManagedPermissionRulesOnly"
18
+ - "CVE-2025-68143/144/145: MCP Git server vulnerabilities show thin tool wrappers inherit unsafe command surfaces"
19
+ - "Agent risk is compositional: repo, tool result, config file, MCP server — all can become control inputs"
20
+ created: 2026-05-02
21
+ updated: 2026-05-02
22
+ ---
23
+ # Claude Code Security Architecture (Penligent, 2026)
24
+
25
+ ## Source Summary
26
+
27
+ Security-focused technical analysis by Penligent (AI security platform). Covers the full Claude Code architecture through a security lens. Published April 2, 2026 following Anthropic's source map leak incident. Notable for concrete CVE case studies and the explicit security model analysis.
28
+
29
+ ## Five Operational Layers
30
+
31
+ | Layer | What it controls | Why it matters |
32
+ |---|---|---|
33
+ | Agent loop | Task decomposition, next-step selection | Shifts from text generation to action |
34
+ | Context and memory | What Claude knows now and across sessions | Most "drift" problems are context problems |
35
+ | Execution surface | How work touches code and systems | Determines blast radius and reproducibility |
36
+ | Governance and safety | What Claude is allowed to attempt | The real control plane for production |
37
+ | Extensibility | How new capabilities arrive | Flexibility and supply-chain risk both increase |
38
+
39
+ ## Permission Modes
40
+
41
+ | Mode | Autonomy | Best fit |
42
+ |---|---|---|
43
+ | `default` | Read only | Sensitive work, first use |
44
+ | `acceptEdits` | Read + edit files | Iterating while gating commands |
45
+ | `plan` | Read + plan only | Research before modification |
46
+ | `auto` | ML classifier reviews | Long-running with governance |
47
+ | `bypassPermissions` | All actions, no checks | Isolated containers only |
48
+ | `dontAsk` | Pre-approved tools only | Locked-down environments |
49
+
50
+ ## Sandboxing
51
+
52
+ macOS: Seatbelt. Linux/WSL2: bubblewrap. WSL1 unsupported. Child processes inherit boundaries. `autoAllowBashIfSandboxed` + filesystem/network restrictions. Escape hatch: `dangerouslyDisableSandbox` — still goes through permission flow if enabled.
53
+
54
+ ## Security CVE Case Studies
55
+
56
+ - **CVE-2025-68143**: `mcp-server-git` `git_init` tool accepted arbitrary paths. Fix: remove tool entirely.
57
+ - **CVE-2025-68144**: `git_diff`/`git_checkout` passed user-controlled args directly to Git CLI. Flag-like values interpreted as options. Fix: reject `-` prefix, verify valid Git refs.
58
+ - **CVE-2025-68145**: `--repository` restriction not validated in subsequent tool calls. Fix: path validation with symlink resolution.
59
+ - **CVE-2024-32002**: Malicious Git repo with submodules could write hooks via recursive clone on case-insensitive filesystems. Lesson: repos are execution inputs, not passive context.
60
+ - **CVE-2026-25153**: Backstage TechDocs `mkdocs.yml` hooks execution. Fix: allowlist supported keys, strip hooks.
61
+
62
+ ## Key Quotes
63
+
64
+ > "Claude Code is a governed execution environment with a model in the middle. This is not just 'Claude plus bash.'"
65
+
66
+ > "The dangerous thing about agent systems is often not only the code they generate, but the quiet changes to the agent's own configuration surface."
67
+
68
+ > "Permissions can still be tricked by bad policy or human error. OS-level boundaries change what the subprocess can actually touch."
69
+
70
+ > "Even if prompt injection manipulates Claude's behavior, the sandbox can still prevent critical file modification, unauthorized network egress, and access outside defined boundaries."
@@ -0,0 +1,13 @@
1
+ ---
2
+ type: source
3
+ status: stub
4
+ created: 2026-05-02
5
+ updated: 2026-05-02
6
+ tags: [source, external-doc]
7
+ ---
8
+
9
+ # Claude Context Editing Docs
10
+
11
+ Anthropic documentation on context editing APIs for Claude. Describes how to modify conversation context in-place (vs session restart).
12
+
13
+ Referenced in: [[resolved-context-pruning-inplace-vs-restart]]
@@ -0,0 +1,63 @@
1
+ ---
2
+ type: source
3
+ status: ingested
4
+ source_type: official_documentation
5
+ title: "Cloudflare Code Mode"
6
+ author: "Cloudflare (Kenton Varda, Sunil Pai)"
7
+ date_published: 2025-09-29
8
+ url: "https://developers.cloudflare.com/agents/api-reference/codemode/"
9
+ confidence: high
10
+ tags:
11
+ - agent-tools
12
+ - typescript-execution-layer
13
+ - sandbox
14
+ - mcp
15
+ key_claims:
16
+ - "LLMs are better at writing code to call APIs than at calling them directly through tool functions"
17
+ - "Code Mode converts MCP tools into typed TypeScript APIs, gives LLM a single 'write code' tool, and executes generated code in isolated Worker sandbox"
18
+ - "Inspired by Apple's CodeAct research"
19
+ - "DynamicWorkerExecutor spins up isolated Worker per execution via WorkerLoader"
20
+ - "Network isolation enforced at Workers runtime level (globalOutbound: null)"
21
+ - "Tool calls dispatched via Workers RPC, not network requests"
22
+ - "3-4x context reduction vs traditional tool calling"
23
+ created: 2026-05-02
24
+ updated: 2026-05-02
25
+
26
+ ---# Cloudflare Code Mode
27
+
28
+ Cloudflare's `@cloudflare/codemode` package (beta) implements the **TypeScript execution layer** pattern for AI agents. Instead of exposing dozens of MCP tools as separate function calls in the LLM context, it converts all tools into a typed TypeScript API, gives the LLM a single "write code" tool, and executes the generated JavaScript in a secure, isolated Worker sandbox.
29
+
30
+ ## Architecture
31
+
32
+ ```
33
+ Host Worker ←→ Dynamic Worker (isolated sandbox)
34
+ ToolDispatcher LLM-generated code runs here
35
+ holds tool fns codemode.myTool() → dispatcher.call()
36
+ fetch() blocked by default
37
+ ```
38
+
39
+ 1. `createCodeTool` generates TypeScript type definitions from tools
40
+ 2. LLM writes an async arrow function calling `codemode.toolName(args)`
41
+ 3. Code is normalized via AST parsing (acorn)
42
+ 4. `DynamicWorkerExecutor` spins up isolated Worker via `WorkerLoader`
43
+ 5. Inside sandbox, `Proxy` intercepts `codemode.*` calls → RPC to host
44
+ 6. Console output captured and returned in result
45
+
46
+ ## Key Design Decisions
47
+
48
+ - **TypeScript types as guardrails**: Generated type defs guide LLM to correct implementations
49
+ - **Deterministic execution**: Once code is generated, execution is fully deterministic
50
+ - **Executor interface is minimal** (`execute(code, fns) → ExecuteResult`): pluggable sandbox backends
51
+ - **MCP server wrappers**: `codeMcpServer` and `openApiMcpServer` for wrapping existing servers
52
+ - **Tool name sanitization**: hyphens/dots → underscores for valid JS identifiers
53
+
54
+ ## Limitations
55
+
56
+ - Requires Cloudflare Workers for DynamicWorkerExecutor (custom Executor can use any sandbox)
57
+ - JavaScript execution only
58
+ - Tool approval (`needsApproval`) not yet supported
59
+ - Experimental — may have breaking changes
60
+
61
+ ## Relevance to ultimate-pi
62
+
63
+ Validates the TypeScript execution layer pattern at production scale (Cloudflare Agents SDK). The minimal Executor interface means we can implement our own sandbox backend (Node.js VM, Deno, or bubblewrap) without depending on Cloudflare infrastructure. The 3-4x context reduction directly supports our token budget goals.
@@ -0,0 +1,63 @@
1
+ ---
2
+ type: source
3
+ status: ingested
4
+ source_type: open-source-tool
5
+ author: Shoubhit Dash / Supermemory AI
6
+ date_published: 2025-12-27
7
+ url: https://www.nexxel.dev/blog/code-chunk
8
+ confidence: high
9
+ key_claims:
10
+ - "AST-based code chunking library that implements the cAST paper algorithm in production"
11
+ - "70.1% Recall@5 vs 49.0% (chonkie-code) vs 42.4% (fixed-size baseline)"
12
+ - "Adds contextualized text: file path, scope chain, entity signatures, imports used"
13
+ - "Supports TypeScript, JavaScript, Python, Rust, Go, Java via tree-sitter"
14
+ - "Open source (MIT), npm install code-chunk"
15
+ tags:
16
+ - code-chunking
17
+ - AST
18
+ - tree-sitter
19
+ - embeddings
20
+ - open-source
21
+ created: 2026-05-02
22
+ updated: 2026-05-02
23
+
24
+ ---# code-chunk: AST-Aware Code Chunking Library
25
+
26
+ ## Summary
27
+
28
+ Production-grade open-source library implementing the cAST paper algorithm. Built by Supermemory AI. Uses tree-sitter for parsing, extracts semantic entities with metadata, builds scope trees, and generates contextualized text for embedding.
29
+
30
+ ## Key Features Beyond cAST Paper
31
+
32
+ 1. **Rich context extraction**: Full entity metadata, scope trees, contextualized text formatting
33
+ 2. **Overlap support**: Chunks can include last N lines from previous chunk
34
+ 3. **Streaming**: Process large files without loading everything into memory
35
+ 4. **Batch processing**: Chunk entire codebases with controlled concurrency
36
+ 5. **WASM support**: Works in Cloudflare Workers and edge runtimes
37
+
38
+ ## Contextualized Text Format
39
+
40
+ ```
41
+ # src/services/user.ts
42
+ # Scope: UserService > getUser
43
+ # Defines: async getUser(id: string): Promise<User>
44
+ # Uses: Database
45
+ # After: constructor
46
+
47
+ async getUser(id: string): Promise<User> { ... }
48
+ ```
49
+
50
+ This prepend enriches raw code with semantic context that embedding models (trained on natural language) can leverage.
51
+
52
+ ## Benchmark Results (SWE-bench Lite Eval)
53
+
54
+ | Metric | Without Search | With Semantic Search |
55
+ |--------|---------------|---------------------|
56
+ | Duration | 2.0m | 1.2m |
57
+ | Tokens | 4.3k | 2.4k |
58
+ | Cost | $0.25 | $0.20 |
59
+ | Tool Calls | 19 | 12 |
60
+
61
+ ## Relevance to Our Implementation
62
+
63
+ We should adopt the same approach: tree-sitter AST parsing (already via lean-ctx) → extract entities → scope tree → greedy window assignment → contextualized text prepending → embed with contextualized text.
@@ -0,0 +1,62 @@
1
+ ---
2
+ type: source
3
+ status: ingested
4
+ source_type: academic_paper
5
+ title: "CodeAct: Executable Code Actions Elicit Better LLM Agents"
6
+ author: "Xingyao Wang, Yangyi Chen, Lifan Yuan, Yizhe Zhang, Yunzhu Li, Hao Peng, Heng Ji (Apple / UIUC)"
7
+ date_published: 2024-07
8
+ url: "https://arxiv.org/abs/2402.01030"
9
+ confidence: high
10
+ conference: "ICML 2024"
11
+ tags:
12
+ - agent-tools
13
+ - code-generation
14
+ - tool-calling
15
+ - academic
16
+ key_claims:
17
+ - "Replacing JSON/text tool-calling with executable Python code improves LLM agent success rate by ~20 percentage points on multi-tool tasks"
18
+ - "CodeAct agents require ~30% fewer interaction turns than JSON-based agents"
19
+ - "Python interpreter provides automatic, zero-cost error signals — wrong calculations raise exceptions immediately"
20
+ - "Open-source models benefit more: CodeActAgent (Mistral 7B) at 12.2% vs Lemur-70B at 3.7% on multi-tool benchmark"
21
+ - "CodeActInstruct dataset: 7,139 multi-turn code-based trajectories across 4 domains"
22
+ created: 2026-05-02
23
+ updated: 2026-05-02
24
+
25
+ ---# CodeAct (Apple, ICML 2024)
26
+
27
+ Foundation research paper that established the **code-as-unified-action-space** paradigm. Proposes replacing the JSON and text action formats common in tool-calling agents with executable Python code.
28
+
29
+ ## Core Insight
30
+
31
+ LLMs have seen millions of lines of real-world code during pretraining but only contrived tool-calling examples. Code is a better lingua franca for agent actions because it already encodes control flow, data dependencies, and multi-step composition.
32
+
33
+ ## Key Results
34
+
35
+ | Metric | JSON Actions | CodeAct | Improvement |
36
+ |--------|-------------|---------|-------------|
37
+ | Multi-tool success (GPT-4) | 53.7% | 74.4% | +20.7 pp |
38
+ | Interaction turns | baseline | -30% | fewer round-trips |
39
+ | Open-source (best) | 3.7% (Lemur-70B) | 12.2% (Mistral 7B) | +8.5 pp |
40
+
41
+ ## Mechanism
42
+
43
+ - Unified action space: all agent actions expressed as Python code
44
+ - Python interpreter catches errors automatically — no separate critique step
45
+ - Dynamic revision: agent can emit new actions or revise prior ones based on observations
46
+ - CodeActInstruct: fine-tuning dataset covering information retrieval, package calls, external memory, robot planning
47
+
48
+ ## Limitations
49
+
50
+ - M3ToolEval benchmark has only 82 tasks (small sample, no confidence intervals)
51
+ - Sandbox security is acknowledged but not deeply addressed (one paragraph)
52
+ - 60+ point capability gap between GPT-4 and CodeActAgent remains
53
+
54
+ ## Adoption
55
+
56
+ - Directly inspired Cloudflare Code Mode (TypeScript variant for Workers)
57
+ - Implemented in OpenHands/OpenDevin, LangGraph CodeAct, Manus
58
+ - Foundation for the entire "code execution layer" agent paradigm
59
+
60
+ ## Relevance to ultimate-pi
61
+
62
+ The academic foundation for our P14 (Think-in-Code Enforcement) and the new TypeScript execution layer phase. The 20% improvement on multi-tool tasks validates that code-based tool orchestration is not just a context optimization but a capability improvement. The interpreter-as-error-signal mechanism complements our L4 adversarial verification.
@@ -0,0 +1,41 @@
1
+ ---
2
+ type: source
3
+ source_type: github-issue
4
+ title: "RFC: Deterministic Session Checkpoint v1 (DSC) — Codex"
5
+ author: "Community contributor"
6
+ date_published: 2026-03-20
7
+ date_accessed: 2026-05-05
8
+ url: "https://github.com/openai/codex/issues/8573"
9
+ confidence: medium
10
+ tags:
11
+ - compaction
12
+ - deterministic
13
+ - codex
14
+ - checkpoint
15
+ key_claims:
16
+ - "Proposes replacing Codex's lossy LLM summarization with deterministic host-generated checkpoints"
17
+ - "Checkpoint is derived from session event logs (rollout-*.jsonl) — zero LLM calls"
18
+ - "Data model: Artifact (file URI + hash), FactRecord (VALID/SUSPECT), DecisionRecord"
19
+ - "Stale derived facts auto-marked as SUSPECT after compaction"
20
+ - "RFC closed as not_planned by OpenAI (2026-03-20)"
21
+ ---
22
+
23
+ # Codex DSC RFC 8573 — Deterministic Session Checkpoint
24
+
25
+ ## Summary
26
+
27
+ Community-authored RFC proposing that Codex replace its lossy LLM-based compaction with a deterministic checkpoint derived from session event logs. Closed by OpenAI as "not_planned" but the approach independently validates the pi-vcc pattern.
28
+
29
+ ## Key Details
30
+
31
+ - **Problem**: Codex's auto-compaction causes agents to re-read files, re-derive known facts, and lose task awareness
32
+ - **Proposed solution**: `checkpoint_v1.json` — structured projection of session event log
33
+ - **Data model**: Artifact (file URI + content hash), FactRecord (status: VALID | SUSPECT, evidence refs), DecisionRecord (rationale + dependencies)
34
+ - **Key innovation**: SUSPECT marking — when a file changes after a fact was derived from it, the fact is automatically marked stale
35
+ - **Outcome**: Rejected by OpenAI
36
+
37
+ ## Why This Matters
38
+
39
+ The DSC RFC validates pi-vcc's core thesis: deterministic compaction preserves more useful state than LLM summarization. Codex choosing NOT to implement it does not invalidate the pattern — it may reflect prioritization, not disagreement. The SUSPECT marking concept is novel and absent from pi-vcc.
40
+
41
+ > [!gap] OpenAI's rationale for closing the RFC is not documented publicly. May reflect architectural constraints in Codex's Rust core rather than disagreement with deterministic compaction.
@@ -0,0 +1,110 @@
1
+ ---
2
+ type: source
3
+ status: ingested
4
+ source_type: open-source-repository
5
+ title: "Codex CLI — OpenAI's Open-Source Coding Agent"
6
+ author: "OpenAI"
7
+ date_published: 2026
8
+ url: "https://github.com/openai/codex"
9
+ tags: [codex, openai, agent, harness, rust, open-source]
10
+ confidence: high
11
+ key_claims:
12
+ - "Codex is a lightweight coding agent that runs locally (CLI, IDE, Desktop App, Web)"
13
+ - "Written in Rust (96.3%) — compiled binary, zero-dependency install"
14
+ - "79.2K+ GitHub stars, 11.4K forks, 756 releases, open-source Apache 2.0"
15
+ - "Platform-native sandboxing: Seatbelt (macOS), bubblewrap (Linux), Windows Sandbox"
16
+ - "MCP client AND server — Codex can be a tool for other agents"
17
+ - "Subagent workflows with per-agent model selection (gpt-5.5, gpt-5.4, gpt-5.4-mini, gpt-5.3-codex-spark)"
18
+ - "Memories system with Chronicle screen-context capture for cross-thread recall"
19
+ - "Lifecycle hooks at 6 events (SessionStart, PreToolUse, PermissionRequest, PostToolUse, UserPromptSubmit, Stop)"
20
+ - "Skills system following open agentskills.io standard with progressive disclosure"
21
+ - "Git worktrees for safe parallel branch work"
22
+ - "Automations for scheduled recurring agent tasks"
23
+ created: 2026-05-02
24
+ updated: 2026-05-02
25
+ ---
26
+ # Codex CLI — OpenAI's Open-Source Coding Agent
27
+
28
+ **Source type**: Open-source repository + official documentation. Primary documentation at `developers.openai.com/codex`.
29
+
30
+ ## Repository Facts
31
+
32
+ - **Repo**: `github.com/openai/codex`
33
+ - **Stars**: 79.2K+
34
+ - **Forks**: 11.4K
35
+ - **Language**: Rust (96.3%), Python (2.7%), TypeScript (0.3%)
36
+ - **License**: Apache 2.0
37
+ - **Latest release**: v0.128.0 (Apr 30, 2026)
38
+ - **Total releases**: 756
39
+ - **Build system**: Bazel (monorepo)
40
+ - **Key crates**: `codex-core`, `codex-tui`, `codex-cli`, `codex-exec`, `codex-mcp`, `codex-app-server-protocol`
41
+
42
+ ## Architecture
43
+
44
+ Codex is structured as a Cargo workspace with these key components:
45
+
46
+ - **`codex-core/`**: Business logic library. Largest crate (actively being refactored to reduce bloat).
47
+ - **`codex-tui/`**: Terminal UI built with Ratatui (Rust TUI library).
48
+ - **`codex-cli/`**: CLI multitool exposing subcommands.
49
+ - **`codex-exec/`**: Headless CLI for non-interactive automation (`codex exec`).
50
+ - **`codex-mcp/`**: MCP client and server implementation.
51
+ - **`app-server/`**: Local HTTP/WebSocket server for IDE extension communication.
52
+ - **SDK**: TypeScript/Node.js SDK for programmatic use.
53
+
54
+ ## Multi-Surface Architecture
55
+
56
+ Single agent logic runs across four surfaces:
57
+ 1. **CLI** — Zero-dependency compiled binary
58
+ 2. **IDE Extension** — VS Code, Cursor, Windsurf integration
59
+ 3. **Desktop App** — Full GUI (`codex app`)
60
+ 4. **Web (Cloud)** — `chatgpt.com/codex` for cloud-based agent runs
61
+
62
+ The App Server (local HTTP/WebSocket) is the bridge between agent core and IDE extensions. App-server protocol v2 defines typed RPC methods with camelCase wire format, TypeScript type generation from Rust structs, and explicit cursor pagination.
63
+
64
+ ## Key Innovations
65
+
66
+ ### 1. Rust-Native Implementation
67
+ Zero-dependency compiled binary. No Node.js required (unlike Claude Code). Platform-optimized sandbox integration via native OS APIs.
68
+
69
+ ### 2. Platform-Native Sandboxing
70
+ Three-tier sandbox: `read-only` (inspect only), `workspace-write` (edit within workspace), `danger-full-access` (no restrictions). Uses OS-level enforcement: Seatbelt on macOS, bubblewrap on Linux, Windows Sandbox on Windows. Approval policies: `untrusted`, `on-request`, `never`. Writable roots for multi-directory work. Permission profiles with per-domain and per-unix-socket rules.
71
+
72
+ ### 3. Bidirectional MCP
73
+ Codex functions as MCP client (connects to external MCP servers) AND MCP server (`codex mcp-server` — other agents can use Codex as a tool). This is architecturally unique among production agents.
74
+
75
+ ### 4. Memories + Chronicle
76
+ Opt-in persistent cross-thread memory. Stores under `~/.codex/memories/`. Chronicle captures screen context to bootstrap memories. Background generation (idle-thread-based, rate-limit-aware). Secret redaction. Per-thread controls (`/memories`). Configurable extract/consolidation models.
77
+
78
+ ### 5. Hooks Framework
79
+ JSON-configurable lifecycle hooks at 6 events. Exit-code semantics: 0=continue, 2=block. JSON stdin/stdout contracts. Multiple matching hooks run concurrently. Regex matchers filter by tool name. Managed hooks via `requirements.toml` for enterprise enforcement.
80
+
81
+ ### 6. Subagent Workflows
82
+ Parallel agent dispatch with per-agent model selection. Addresses "context pollution" and "context rot". Subagents return summaries to main thread. Explicit triggering only (no auto-spawn). Model selection: `gpt-5.5` for demanding agents, `gpt-5.4-mini` for fast scans, `gpt-5.3-codex-spark` for near-instant text-only.
83
+
84
+ ### 7. Git Worktrees
85
+ Isolated git worktrees for safe parallel branch work. Enables multiple agents editing different branches without conflicts.
86
+
87
+ ### 8. Skills System
88
+ Follows `agentskills.io` open standard. Progressive disclosure: name + description loaded first, full SKILL.md only when skill is activated. 2% context budget cap. Built-in `$skill-creator` and `$skill-installer`. Scopes: REPO, USER, ADMIN, SYSTEM. Plugins for distribution.
89
+
90
+ ### 9. Automations
91
+ Scheduled recurring agent tasks — CI-like but agent-driven. No equivalent in Claude Code or Cursor.
92
+
93
+ ### 10. Enterprise Governance
94
+ Managed config via `requirements.toml`. Admin hooks. Organization-level policy enforcement. Full enterprise story from day one.
95
+
96
+ ## What This Means for Our Harness
97
+
98
+ Codex independently validates 7 of our planned features and reveals 5 new gaps. See [[codex-harness-innovations]] for the detailed mapping and [[Research: Codex State-of-the-Art Harness Improvements]] for the synthesis.
99
+
100
+ ## Development Conventions (from AGENTS.md)
101
+
102
+ - Crate names prefixed with `codex-`. Collapse if statements, inline format! args, use method references over closures.
103
+ - Avoid bool/Option parameters in public APIs. Prefer enums, named methods, newtypes.
104
+ - Argument comment lint: `/*param_name*/` before opaque literals.
105
+ - Exhaustive match statements. Doc comments on new traits.
106
+ - Prefer RPITIT native async with explicit Send bounds.
107
+ - Object-level deep equality in tests.
108
+ - Modules under 500 LoC. Extract from large modules aggressively.
109
+ - Snapshot tests (insta) for TUI. `pretty_assertions::assert_eq` for tests.
110
+ - Bazel lockfile updates on dependency changes.
@@ -0,0 +1,51 @@
1
+ ---
2
+ type: source
3
+ status: ingested
4
+ source_type: research-paper
5
+ author: Xiangyang Li, Kuicai Dong, Yi Quan Lee, Wei Xia, Yichun Yin, Hao Zhang, Yong Liu, Yasheng Wang, Ruiming Tang
6
+ date_published: 2024-07-03
7
+ url: https://arxiv.org/abs/2407.02883
8
+ confidence: high
9
+ key_claims:
10
+ - "CoIR is the leading benchmark for code information retrieval, accepted at ACL 2025 Main"
11
+ - "10 curated code datasets, 8 retrieval tasks across 7 domains, 2M+ documents"
12
+ - "Trusted by Voyage, Jina, BGE, Salesforce, OpenAI, Google, Qwen, NV-Embed"
13
+ - "Integrated into MTEB leaderboard for cross-benchmark evaluation"
14
+ - "Pip-installable Python framework (coir-eval)"
15
+ tags:
16
+ - benchmark
17
+ - code-retrieval
18
+ - embeddings
19
+ - coir
20
+ - mteb
21
+ created: 2026-05-02
22
+ updated: 2026-05-02
23
+
24
+ ---# CoIR: A Comprehensive Benchmark for Code Information Retrieval Models
25
+
26
+ ## Summary
27
+
28
+ ACL 2025 Main paper introducing CoIR, the standard benchmark for evaluating code embedding/retrieval models. 10 curated datasets, 8 retrieval tasks, 7 domains, 2M+ documents. Integrated into the MTEB leaderboard.
29
+
30
+ ## Top Models on CoIR Leaderboard
31
+
32
+ The CoIR leaderboard is adopted by major embedding providers:
33
+ - **Voyage AI**: voyage-code-3 (top-ranked)
34
+ - **Salesforce**: SFR-Embedding-Code-2B_R
35
+ - **BAAI**: bge-code-v1
36
+ - **Jina AI**: jina-embeddings-v4
37
+ - **Qwen**: Qwen3-Embedding
38
+ - **OpenAI**: text-embedding-3 series
39
+ - **Google**: Gemini embedding models
40
+ - **NVIDIA**: NV-Embed
41
+
42
+ ## Framework
43
+
44
+ - Install: `pip install coir-eval`
45
+ - Compatible with MTEB/BEIR data schema
46
+ - Supports custom models and API-based models
47
+ - 10 tasks: codetrans-dl, stackoverflow-qa, apps, codefeedback-mt, codefeedback-st, codetrans-contest, synthetic-text2sql, cosqa, codesearchnet, codesearchnet-ccr
48
+
49
+ ## Relevance to Our Implementation
50
+
51
+ CoIR is the benchmark we should use to evaluate our embedding pipeline. If we want to validate whether all-MiniLM-L6-v2 with good chunking approaches larger model quality, we run CoIR eval and compare against the leaderboard.
@@ -0,0 +1,48 @@
1
+ ---
2
+ type: source
3
+ status: ingested
4
+ source_type: blog
5
+ title: "Context Optimization in AI Agents: From Sub-Agents to TypeScript Interfaces"
6
+ author: "Colin McNamara"
7
+ date_published: 2025-09-29
8
+ url: "https://colinmcnamara.com/blog/context-optimization-mcp-code-mode"
9
+ confidence: medium
10
+ tags:
11
+ - agent-tools
12
+ - context-optimization
13
+ - typescript-execution-layer
14
+ - mcp
15
+ key_claims:
16
+ - "Traditional tool calling uses ~10,500+ tokens per interaction; Code Mode uses ~3,100 tokens — 3-4x reduction"
17
+ - "Sub-agent pattern is 'non-deterministic to non-deterministic' — LLMs coordinating LLMs"
18
+ - "Code Mode is 'non-deterministic to deterministic' — LLM generates code, runtime executes predictably"
19
+ - "TypeScript has advantages for LLM-generated code: rich training data, type safety as guardrails, deterministic execution"
20
+ - "LLMs have seen millions of lines of TypeScript in training but far fewer synthetic tool-call examples"
21
+ created: 2026-05-02
22
+ updated: 2026-05-02
23
+
24
+ ---# Context Optimization in AI Agents: From Sub-Agents to TypeScript Interfaces
25
+
26
+ Colin McNamara's analysis of Cloudflare's Code Mode, comparing context consumption across three agent tool-calling patterns: traditional tool calling, sub-agent architecture, and the TypeScript execution layer.
27
+
28
+ ## Context Efficiency Comparison
29
+
30
+ | Pattern | Context per Interaction | Mechanism |
31
+ |---------|------------------------|-----------|
32
+ | Traditional tool calling | ~10,500+ tokens | System prompt + tool defs + history + call/response pairs |
33
+ | Sub-agent pattern | ~1,000-1,300 per agent | Supervisor with minimal context, task agents with focused tools |
34
+ | Code Mode (TS execution) | ~3,100 tokens | System + type defs + generated code + results only |
35
+
36
+ ## Key Insight: Deterministic Bridge
37
+
38
+ Traditional sub-agent patterns are "non-deterministic to non-deterministic" — LLMs coordinating LLMs. Code Mode creates a "non-deterministic to deterministic" bridge — an LLM generates code that executes predictably. This has advantages for debugging, reliability, performance, and security.
39
+
40
+ ## TypeScript Advantages
41
+
42
+ - **Rich training data**: Vast quantities of high-quality TypeScript in open-source
43
+ - **Type safety as guardrails**: Type system constrains LLM toward correct implementations
44
+ - **Deterministic execution**: Once code is generated, execution is fully predictable
45
+
46
+ ## Relevance to ultimate-pi
47
+
48
+ The context efficiency comparison directly supports our token budget goals. The "deterministic bridge" concept aligns with our L4 adversarial verification — generated code that's been verified once is reliable, unlike agent intuition which must be re-verified each turn. The sub-agent pattern limitations validate our move toward a TypeScript execution layer (P43).
@@ -0,0 +1,61 @@
1
+ ---
2
+ type: source
3
+ source_type: paper
4
+ title: "Scaling Long-Horizon LLM Agent via Context-Folding"
5
+ author: "Sun et al. (ByteDance Seed, CMU, Stanford)"
6
+ date_published: 2025-10-15
7
+ date_accessed: 2026-05-05
8
+ url: "https://arxiv.org/abs/2510.11967"
9
+ confidence: high
10
+ tags:
11
+ - context-folding
12
+ - compaction
13
+ - reinforcement-learning
14
+ - agent-architecture
15
+ - academic-paper
16
+ key_claims:
17
+ - "200-step agents in 10x less context (32K tokens vs 327K baseline)"
18
+ - "62.0% on BrowseComp-Plus, 58.0% on SWE-Bench Verified with 32K budget"
19
+ - "FoldGRPO: RL framework with token-level process rewards for learned folding"
20
+ - "Branch/return sub-trajectories replace settled segments with summaries"
21
+ - "Outperforms summarization-based context management"
22
+ - "Tool-calling accuracy collapses ~40% past 80K effective-context tokens"
23
+ - "Now available as first-class API primitive in Anthropic's context-management beta"
24
+ ---
25
+
26
+ # Context Folding
27
+
28
+ ## Summary
29
+
30
+ Context Folding (arXiv 2510.11967) is a structured compaction technique from ByteDance Seed, CMU, and Stanford that enables 200+ step agents to maintain only ~32K active tokens — 10x less than naive approaches. Published October 2025.
31
+
32
+ ## Core Mechanism
33
+
34
+ Agents create temporary sub-trajectories for subtasks via a "branch" action. Upon completion, intermediate steps are summarized and "folded" away via a "return" action, leaving only the compressed artifact in active context.
35
+
36
+ **Key distinction**: Folding compresses WITHIN a single run. Memory persists ACROSS runs. Different problems, different solutions.
37
+
38
+ ## FoldGRPO
39
+
40
+ End-to-end reinforcement learning framework that makes folding behavior learnable. Uses token-level process rewards to encourage effective task decomposition and context management. Agents learn WHEN and HOW to branch and fold.
41
+
42
+ ## Results
43
+
44
+ | Benchmark | Folding (32K) | Baseline (327K) |
45
+ |-----------|---------------|-----------------|
46
+ | BrowseComp-Plus | 62.0% | < 62.0% |
47
+ | SWE-Bench Verified | 58.0% | comparable |
48
+
49
+ Significantly outperforms summarization-based context management.
50
+
51
+ ## Critical Finding
52
+
53
+ Past ~80K effective-context tokens, agent tool-calling accuracy collapses by approximately 40%. This is a hard cliff, not a gradual decline. Context windows beyond 80K are misleading for agentic workloads.
54
+
55
+ ## Relevance to pi-vcc
56
+
57
+ Context folding is a fundamentally different approach from pi-vcc:
58
+ - **Folding**: Learned, within-run, branch/return structure, RL-trained
59
+ - **pi-vcc**: Deterministic, at compaction boundaries, extraction-based, no ML
60
+
61
+ They could theoretically combine: pi-vcc for deterministic boundary compaction + context folding for within-run trajectory management.
@@ -0,0 +1,63 @@
1
+ ---
2
+ type: source
3
+ source_type: website
4
+ title: context-mode.com
5
+ author: B. Mert Köseoğlu
6
+ date_published: 2026
7
+ url: https://context-mode.com
8
+ confidence: medium
9
+ key_claims:
10
+ - "Saves 98% of AI coding agent's context window"
11
+ - "66,000+ developers across 14 platforms"
12
+ - "99.5% reduction on Playwright output (56.2KB → 299B)"
13
+ - "30× fewer tokens across full sessions"
14
+ - "Used at Microsoft, Google, Meta, ByteDance, Red Hat, GitHub"
15
+ - "HN #1 with 570+ points"
16
+ created: 2026-04-30
17
+ updated: 2026-04-30
18
+ status: ingested
19
+ tags: [#source/website]
20
+ ---
21
+
22
+ # context-mode.com
23
+
24
+ Landing page for the context-mode MCP plugin. Source for architecture claims, feature descriptions, and benchmark numbers.
25
+
26
+ ## Architecture
27
+
28
+ - **PreToolUse hook**: Routes tool calls. Blocks curl/wget, redirects large output to sandbox.
29
+ - **PostToolUse hook**: Captures events to SessionDB (file ops, git, errors, decisions).
30
+ - **SessionStart**: Restores state from previous session.
31
+ - **PreCompact**: Builds snapshot before context wipe.
32
+ - **UserPromptSubmit**: Captures intent, tracks decisions.
33
+
34
+ ## Think in Code Paradigm
35
+
36
+ Introduced in v1.0.64. Mandatory across all 14 platforms. Rule: when you need to analyze/count/filter/process data, write code that does it. Don't read raw data into context. Uses `ctx_execute()` MCP tool that runs JavaScript in a sandbox (Node.js built-ins only, no npm deps).
37
+
38
+ ## Compression Results (claimed)
39
+
40
+ - Playwright: 56.2 KB → 299 B (99.5%)
41
+ - GitHub Issues: 58.9 KB → 1.1 KB (98%)
42
+ - Access Logs: 45.1 KB → 155 B (99.7%)
43
+ - Full Session: 315 KB → 5.4 KB (98%)
44
+
45
+ ## Platforms
46
+
47
+ Claude Code, Cursor, Codex CLI, VS Code Copilot, JetBrains Copilot, Gemini CLI, Qwen Code, Kiro, OpenCode, KiloCode, Zed, OpenClaw, Pi, Antigravity
48
+
49
+ ## License
50
+
51
+ Elastic License 2.0 (ELv2) — source-available, not OSI-approved open source.
52
+
53
+ ## GitHub
54
+
55
+ - Stars: 11,245 (as of 2026-04-30)
56
+ - Forks: 769
57
+ - Language: TypeScript
58
+ - Created: 2026-02-23
59
+
60
+ ## npm
61
+
62
+ - Package: context-mode
63
+ - Downloads last month: 48,161