@caupulican/pi-adaptative 0.80.97 → 0.80.99
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +53 -0
- package/dist/core/agent-session.d.ts +46 -5
- package/dist/core/agent-session.d.ts.map +1 -1
- package/dist/core/agent-session.js +385 -17
- package/dist/core/agent-session.js.map +1 -1
- package/dist/core/autonomy/envelope-enforcement.d.ts +17 -0
- package/dist/core/autonomy/envelope-enforcement.d.ts.map +1 -0
- package/dist/core/autonomy/envelope-enforcement.js +80 -0
- package/dist/core/autonomy/envelope-enforcement.js.map +1 -0
- package/dist/core/autonomy/foreground-envelope.d.ts +22 -0
- package/dist/core/autonomy/foreground-envelope.d.ts.map +1 -0
- package/dist/core/autonomy/foreground-envelope.js +65 -0
- package/dist/core/autonomy/foreground-envelope.js.map +1 -0
- package/dist/core/autonomy/status.d.ts +11 -0
- package/dist/core/autonomy/status.d.ts.map +1 -1
- package/dist/core/autonomy/status.js.map +1 -1
- package/dist/core/context/brain-curator.d.ts +7 -0
- package/dist/core/context/brain-curator.d.ts.map +1 -1
- package/dist/core/context/brain-curator.js +6 -0
- package/dist/core/context/brain-curator.js.map +1 -1
- package/dist/core/context/context-composition.d.ts.map +1 -1
- package/dist/core/context/context-composition.js +1 -1
- package/dist/core/context/context-composition.js.map +1 -1
- package/dist/core/delegation/session-worker-result.d.ts +8 -2
- package/dist/core/delegation/session-worker-result.d.ts.map +1 -1
- package/dist/core/delegation/session-worker-result.js +18 -1
- package/dist/core/delegation/session-worker-result.js.map +1 -1
- package/dist/core/delegation/worker-actions.d.ts +50 -0
- package/dist/core/delegation/worker-actions.d.ts.map +1 -0
- package/dist/core/delegation/worker-actions.js +70 -0
- package/dist/core/delegation/worker-actions.js.map +1 -0
- package/dist/core/delegation/worker-runner.d.ts +9 -0
- package/dist/core/delegation/worker-runner.d.ts.map +1 -1
- package/dist/core/delegation/worker-runner.js +38 -4
- package/dist/core/delegation/worker-runner.js.map +1 -1
- package/dist/core/learning/observation-store.d.ts +20 -0
- package/dist/core/learning/observation-store.d.ts.map +1 -0
- package/dist/core/learning/observation-store.js +101 -0
- package/dist/core/learning/observation-store.js.map +1 -0
- package/dist/core/model-capability.d.ts +19 -0
- package/dist/core/model-capability.d.ts.map +1 -1
- package/dist/core/model-capability.js +19 -0
- package/dist/core/model-capability.js.map +1 -1
- package/dist/core/model-router/executor-route.d.ts +8 -0
- package/dist/core/model-router/executor-route.d.ts.map +1 -0
- package/dist/core/model-router/executor-route.js +33 -0
- package/dist/core/model-router/executor-route.js.map +1 -0
- package/dist/core/model-router/tool-escalation.d.ts +2 -0
- package/dist/core/model-router/tool-escalation.d.ts.map +1 -1
- package/dist/core/model-router/tool-escalation.js +6 -0
- package/dist/core/model-router/tool-escalation.js.map +1 -1
- package/dist/core/research/research-runner.d.ts +8 -1
- package/dist/core/research/research-runner.d.ts.map +1 -1
- package/dist/core/research/research-runner.js +13 -1
- package/dist/core/research/research-runner.js.map +1 -1
- package/dist/core/research/workspace-collector.d.ts +25 -0
- package/dist/core/research/workspace-collector.d.ts.map +1 -0
- package/dist/core/research/workspace-collector.js +286 -0
- package/dist/core/research/workspace-collector.js.map +1 -0
- package/dist/core/settings-manager.d.ts +5 -0
- package/dist/core/settings-manager.d.ts.map +1 -1
- package/dist/core/settings-manager.js +8 -0
- package/dist/core/settings-manager.js.map +1 -1
- package/dist/modes/interactive/components/fitness-role-selector.d.ts +1 -1
- package/dist/modes/interactive/components/fitness-role-selector.d.ts.map +1 -1
- package/dist/modes/interactive/components/fitness-role-selector.js +5 -0
- package/dist/modes/interactive/components/fitness-role-selector.js.map +1 -1
- package/dist/modes/interactive/components/settings-selector.d.ts.map +1 -1
- package/dist/modes/interactive/components/settings-selector.js +20 -0
- package/dist/modes/interactive/components/settings-selector.js.map +1 -1
- package/dist/modes/interactive/interactive-mode.d.ts.map +1 -1
- package/dist/modes/interactive/interactive-mode.js +9 -0
- package/dist/modes/interactive/interactive-mode.js.map +1 -1
- package/examples/extensions/custom-provider-anthropic/package-lock.json +2 -2
- package/examples/extensions/custom-provider-anthropic/package.json +1 -1
- package/examples/extensions/custom-provider-gitlab-duo/package.json +1 -1
- package/examples/extensions/sandbox/package-lock.json +2 -2
- package/examples/extensions/sandbox/package.json +1 -1
- package/examples/extensions/with-deps/package-lock.json +2 -2
- package/examples/extensions/with-deps/package.json +1 -1
- package/npm-shrinkwrap.json +12 -12
- package/package.json +4 -4
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tool-escalation.js","sourceRoot":"","sources":["../../../src/core/model-router/tool-escalation.ts"],"names":[],"mappings":"AAEA,MAAM,oBAAoB,GAAG,IAAI,GAAG,CAAC;IACpC,MAAM;IACN,MAAM;IACN,MAAM;IACN,IAAI;IACJ,MAAM;IACN,QAAQ;IACR,MAAM;IACN,WAAW;IACX,UAAU;IACV,aAAa;IACb,YAAY;IACZ,kBAAkB;IAClB,mBAAmB;CACnB,CAAC,CAAC;AAEH,MAAM,gBAAgB,GAAG,IAAI,GAAG,CAAC,CAAC,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,KAAK,EAAE,aAAa,EAAE,OAAO,CAAC,CAAC,CAAC;AAE7F,MAAM,kBAAkB,GAAG,IAAI,GAAG,CAAC;IAClC,KAAK;IACL,KAAK;IACL,MAAM;IACN,IAAI;IACJ,IAAI;IACJ,KAAK;IACL,KAAK;IACL,MAAM;IACN,MAAM;IACN,IAAI;IACJ,IAAI;IACJ,MAAM;IACN,KAAK;IACL,MAAM;IACN,KAAK;IACL,IAAI;IACJ,KAAK;IACL,MAAM;IACN,MAAM;IACN,KAAK;IACL,IAAI;IACJ,OAAO;IACP,MAAM;CACN,CAAC,CAAC;AAEH,MAAM,yBAAyB,GAAG,IAAI,GAAG,CAAC,CAAC,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,WAAW,EAAE,MAAM,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC;AAC3G,MAAM,yBAAyB,GAAG,IAAI,GAAG,CAAC,CAAC,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC,CAAC;AAChG,MAAM,uBAAuB,GAC5B,qUAAqU,CAAC;AACvU,MAAM,qBAAqB,GAC1B,+IAA+I,CAAC;AAEjJ,SAAS,eAAe,CAAC,IAAa,EAAsB;IAC3D,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ;QAAE,OAAO,SAAS,CAAC;IACxD,MAAM,MAAM,GAAG,IAA+B,CAAC;IAC/C,MAAM,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,GAAG,IAAI,MAAM,CAAC,YAAY,CAAC;IACpE,OAAO,OAAO,OAAO,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC;AAAA,CAChE;AAED,SAAS,WAAW,CAAC,OAAe,EAAsB;IACzD,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,mBAAmB,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IAC7D,IAAI,CAAC,KAAK;QAAE,OAAO,SAAS,CAAC;IAC7B,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAC/B,OAAO,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE,WAAW,EAAE,CAAC;AAAA,CAC9C;AAED,SAAS,UAAU,CAAC,OAAe,EAAE,KAAa,EAAsB;IACvE,OAAO,OAAO,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,EAAE,WAAW,EAAE,CAAC;AAAA,CACzD;AAED,SAAS,sBAAsB,CAAC,OAAe,EAAW;IACzD,MAAM,IAAI,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC;IAClC,IAAI,CAAC,IAAI,IAAI,CAAC,kBAAkB,CAAC,GAAG,CAAC,IAAI,CAAC;QAAE,OAAO,KAAK,CAAC;IACzD,IAAI,IAAI,KAAK,KAAK,EAAE,CAAC;QACpB,MAAM,UAAU,GAAG,UAAU,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;QAC1C,OAAO,OAAO,CAAC,UAAU,IAAI,yBAAyB,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC;IACzE,CAAC;IACD,IAAI,IAAI,KAAK,KAAK,IAAI,IAAI,KAAK,MAAM,IAAI,IAAI,KAAK,MAAM,EAAE,CAAC;QAC1D,MAAM,UAAU,GAAG,UAAU,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;QAC1C,OAAO,OAAO,CAAC,UAAU,IAAI,yBAAyB,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC;IACzE,CAAC;IACD,OAAO,IAAI,CAAC;AAAA,CACZ;AAED,SAAS,sBAAsB,CAAC,OAAe,EAAW;IACzD,IAAI,CAAC,OAAO,IAAI,uBAAuB,CAAC,IAAI,CAAC,OAAO,CAAC;QAAE,OAAO,KAAK,CAAC;IACpE,MAAM,QAAQ,GAAG,OAAO,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC;IAC5E,OAAO,QAAQ,CAAC,MAAM,GAAG,CAAC,IAAI,QAAQ,CAAC,KAAK,CAAC,sBAAsB,CAAC,CAAC;AAAA,CACrE;AAED,MAAM,UAAU,6BAA6B,CAAC,
|
|
1
|
+
{"version":3,"file":"tool-escalation.js","sourceRoot":"","sources":["../../../src/core/model-router/tool-escalation.ts"],"names":[],"mappings":"AAEA,MAAM,oBAAoB,GAAG,IAAI,GAAG,CAAC;IACpC,MAAM;IACN,MAAM;IACN,MAAM;IACN,IAAI;IACJ,MAAM;IACN,QAAQ;IACR,MAAM;IACN,WAAW;IACX,UAAU;IACV,aAAa;IACb,YAAY;IACZ,kBAAkB;IAClB,mBAAmB;CACnB,CAAC,CAAC;AAEH,MAAM,gBAAgB,GAAG,IAAI,GAAG,CAAC,CAAC,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,KAAK,EAAE,aAAa,EAAE,OAAO,CAAC,CAAC,CAAC;AAE7F,MAAM,kBAAkB,GAAG,IAAI,GAAG,CAAC;IAClC,KAAK;IACL,KAAK;IACL,MAAM;IACN,IAAI;IACJ,IAAI;IACJ,KAAK;IACL,KAAK;IACL,MAAM;IACN,MAAM;IACN,IAAI;IACJ,IAAI;IACJ,MAAM;IACN,KAAK;IACL,MAAM;IACN,KAAK;IACL,IAAI;IACJ,KAAK;IACL,MAAM;IACN,MAAM;IACN,KAAK;IACL,IAAI;IACJ,OAAO;IACP,MAAM;CACN,CAAC,CAAC;AAEH,MAAM,yBAAyB,GAAG,IAAI,GAAG,CAAC,CAAC,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,WAAW,EAAE,MAAM,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC;AAC3G,MAAM,yBAAyB,GAAG,IAAI,GAAG,CAAC,CAAC,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC,CAAC;AAChG,MAAM,uBAAuB,GAC5B,qUAAqU,CAAC;AACvU,MAAM,qBAAqB,GAC1B,+IAA+I,CAAC;AAEjJ,SAAS,eAAe,CAAC,IAAa,EAAsB;IAC3D,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ;QAAE,OAAO,SAAS,CAAC;IACxD,MAAM,MAAM,GAAG,IAA+B,CAAC;IAC/C,MAAM,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,GAAG,IAAI,MAAM,CAAC,YAAY,CAAC;IACpE,OAAO,OAAO,OAAO,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC;AAAA,CAChE;AAED,SAAS,WAAW,CAAC,OAAe,EAAsB;IACzD,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,mBAAmB,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IAC7D,IAAI,CAAC,KAAK;QAAE,OAAO,SAAS,CAAC;IAC7B,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAC/B,OAAO,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE,WAAW,EAAE,CAAC;AAAA,CAC9C;AAED,SAAS,UAAU,CAAC,OAAe,EAAE,KAAa,EAAsB;IACvE,OAAO,OAAO,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,EAAE,WAAW,EAAE,CAAC;AAAA,CACzD;AAED,SAAS,sBAAsB,CAAC,OAAe,EAAW;IACzD,MAAM,IAAI,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC;IAClC,IAAI,CAAC,IAAI,IAAI,CAAC,kBAAkB,CAAC,GAAG,CAAC,IAAI,CAAC;QAAE,OAAO,KAAK,CAAC;IACzD,IAAI,IAAI,KAAK,KAAK,EAAE,CAAC;QACpB,MAAM,UAAU,GAAG,UAAU,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;QAC1C,OAAO,OAAO,CAAC,UAAU,IAAI,yBAAyB,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC;IACzE,CAAC;IACD,IAAI,IAAI,KAAK,KAAK,IAAI,IAAI,KAAK,MAAM,IAAI,IAAI,KAAK,MAAM,EAAE,CAAC;QAC1D,MAAM,UAAU,GAAG,UAAU,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;QAC1C,OAAO,OAAO,CAAC,UAAU,IAAI,yBAAyB,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC;IACzE,CAAC;IACD,OAAO,IAAI,CAAC;AAAA,CACZ;AAED,SAAS,sBAAsB,CAAC,OAAe,EAAW;IACzD,IAAI,CAAC,OAAO,IAAI,uBAAuB,CAAC,IAAI,CAAC,OAAO,CAAC;QAAE,OAAO,KAAK,CAAC;IACpE,MAAM,QAAQ,GAAG,OAAO,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC;IAC5E,OAAO,QAAQ,CAAC,MAAM,GAAG,CAAC,IAAI,QAAQ,CAAC,KAAK,CAAC,sBAAsB,CAAC,CAAC;AAAA,CACrE;AAED,MAAM,UAAU,6BAA6B,CAAC,OAM7C,EAAW;IACX,IAAI,OAAO,CAAC,IAAI,KAAK,OAAO;QAAE,OAAO,KAAK,CAAC;IAC3C,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IACvD,8FAA8F;IAC9F,8FAA8F;IAC9F,yFAAyF;IACzF,6CAA6C;IAC7C,IAAI,OAAO,CAAC,UAAU,KAAK,iBAAiB,IAAI,QAAQ,KAAK,oBAAoB;QAAE,OAAO,KAAK,CAAC;IAChG,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAC;IAC3B,IAAI,oBAAoB,CAAC,GAAG,CAAC,QAAQ,CAAC;QAAE,OAAO,KAAK,CAAC;IACrD,IAAI,gBAAgB,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;QACpC,MAAM,OAAO,GAAG,eAAe,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QAC9C,OAAO,OAAO,CAAC,CAAC,CAAC,CAAC,sBAAsB,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAC1D,CAAC;IACD,OAAO,qBAAqB,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC;AAAA,CAC7E","sourcesContent":["import type { ModelTier } from \"../autonomy/contracts.ts\";\n\nconst READ_ONLY_TOOL_NAMES = new Set([\n\t\"read\",\n\t\"grep\",\n\t\"find\",\n\t\"ls\",\n\t\"list\",\n\t\"search\",\n\t\"glob\",\n\t\"view_file\",\n\t\"list_dir\",\n\t\"grep_search\",\n\t\"search_web\",\n\t\"read_url_content\",\n\t\"read_browser_page\",\n]);\n\nconst SHELL_TOOL_NAMES = new Set([\"bash\", \"exec\", \"execute\", \"run\", \"run_command\", \"shell\"]);\n\nconst READ_ONLY_COMMANDS = new Set([\n\t\"awk\",\n\t\"cat\",\n\t\"date\",\n\t\"df\",\n\t\"du\",\n\t\"env\",\n\t\"git\",\n\t\"grep\",\n\t\"head\",\n\t\"jq\",\n\t\"ls\",\n\t\"node\",\n\t\"npm\",\n\t\"pnpm\",\n\t\"pwd\",\n\t\"rg\",\n\t\"sed\",\n\t\"tail\",\n\t\"test\",\n\t\"tsc\",\n\t\"wc\",\n\t\"which\",\n\t\"yarn\",\n]);\n\nconst READ_ONLY_GIT_SUBCOMMANDS = new Set([\"branch\", \"diff\", \"log\", \"rev-parse\", \"show\", \"status\", \"tag\"]);\nconst READ_ONLY_NPM_SUBCOMMANDS = new Set([\"info\", \"list\", \"ls\", \"outdated\", \"view\", \"whoami\"]);\nconst MUTATING_SHELL_TOKEN_RE =\n\t/(^|\\s)(>|>>|2>|&>|tee\\b|rm\\b|mv\\b|cp\\b|mkdir\\b|touch\\b|chmod\\b|chown\\b|install\\b|commit\\b|push\\b|publish\\b|deploy\\b|apply\\b|add\\b|checkout\\b|switch\\b|reset\\b|clean\\b|stash\\b|merge\\b|rebase\\b|npm\\s+(?:i|install|ci|update|publish|run)\\b|pnpm\\s+(?:i|install|update|publish|run)\\b|yarn\\s+(?:add|install|upgrade|publish|run)\\b)/i;\nconst MUTATING_TOOL_NAME_RE =\n\t/(bash|exec|execute|run|shell|write|edit|patch|replace|delete|remove|move|rename|create|mkdir|touch|install|commit|push|publish|deploy|apply)/i;\n\nfunction getShellCommand(args: unknown): string | undefined {\n\tif (!args || typeof args !== \"object\") return undefined;\n\tconst record = args as Record<string, unknown>;\n\tconst command = record.command ?? record.cmd ?? record.shellCommand;\n\treturn typeof command === \"string\" ? command.trim() : undefined;\n}\n\nfunction commandName(segment: string): string | undefined {\n\tconst first = segment.trim().match(/^[A-Za-z0-9_./-]+/)?.[0];\n\tif (!first) return undefined;\n\tconst parts = first.split(\"/\");\n\treturn parts[parts.length - 1]?.toLowerCase();\n}\n\nfunction commandArg(segment: string, index: number): string | undefined {\n\treturn segment.trim().split(/\\s+/)[index]?.toLowerCase();\n}\n\nfunction isReadOnlyShellSegment(segment: string): boolean {\n\tconst name = commandName(segment);\n\tif (!name || !READ_ONLY_COMMANDS.has(name)) return false;\n\tif (name === \"git\") {\n\t\tconst subcommand = commandArg(segment, 1);\n\t\treturn Boolean(subcommand && READ_ONLY_GIT_SUBCOMMANDS.has(subcommand));\n\t}\n\tif (name === \"npm\" || name === \"pnpm\" || name === \"yarn\") {\n\t\tconst subcommand = commandArg(segment, 1);\n\t\treturn Boolean(subcommand && READ_ONLY_NPM_SUBCOMMANDS.has(subcommand));\n\t}\n\treturn true;\n}\n\nfunction isReadOnlyShellCommand(command: string): boolean {\n\tif (!command || MUTATING_SHELL_TOKEN_RE.test(command)) return false;\n\tconst segments = command.split(/\\s*&&\\s*/).map((segment) => segment.trim());\n\treturn segments.length > 0 && segments.every(isReadOnlyShellSegment);\n}\n\nexport function shouldEscalateModelRouterTool(options: {\n\ttier: ModelTier;\n\ttoolName: string;\n\targs?: unknown;\n\t/** The route's reasonCode; executor-lane turns carry \"executor_direct\". */\n\treasonCode?: string;\n}): boolean {\n\tif (options.tier !== \"cheap\") return false;\n\tconst toolName = options.toolName.trim().toLowerCase();\n\t// Executor-lane turns (G16) exist to run exactly one tool: run_toolkit_script, which enforces\n\t// its own safety (danger confirmation, structural exit-code contract). Escalating on it would\n\t// abort every executor turn at the moment it does its job. Any OTHER mutating tool still\n\t// escalates to the expensive model as usual.\n\tif (options.reasonCode === \"executor_direct\" && toolName === \"run_toolkit_script\") return false;\n\tif (!toolName) return true;\n\tif (READ_ONLY_TOOL_NAMES.has(toolName)) return false;\n\tif (SHELL_TOOL_NAMES.has(toolName)) {\n\t\tconst command = getShellCommand(options.args);\n\t\treturn command ? !isReadOnlyShellCommand(command) : true;\n\t}\n\treturn MUTATING_TOOL_NAME_RE.test(toolName) || !toolName.startsWith(\"read_\");\n}\n"]}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { CapabilityEnvelope, EvidenceBundle, GateOutcome } from "../autonomy/contracts.ts";
|
|
1
|
+
import type { CapabilityEnvelope, EvidenceBundle, EvidenceRef, GateOutcome } from "../autonomy/contracts.ts";
|
|
2
2
|
/**
|
|
3
3
|
* Pure orchestration for one autonomous research pass: gate -> bounded isolated completion ->
|
|
4
4
|
* parse -> evidence bundle. The model executor is injected so this stays provider-free and
|
|
@@ -20,6 +20,12 @@ export interface ResearchRunnerOptions {
|
|
|
20
20
|
context?: string;
|
|
21
21
|
/** Stripped research envelope - never the foreground/architect envelope. */
|
|
22
22
|
envelope: CapabilityEnvelope;
|
|
23
|
+
/**
|
|
24
|
+
* Pointer-first workspace sources (repo-relative path + bounded excerpt, never file bodies) that
|
|
25
|
+
* inform the pass. They are rendered into the user prompt and carried into the evidence bundle;
|
|
26
|
+
* omitted / empty reproduces the pre-collector behavior exactly.
|
|
27
|
+
*/
|
|
28
|
+
sources?: readonly EvidenceRef[];
|
|
23
29
|
/** Budget for this pass; a post-hoc breach marks the run budget_exhausted (spend stays visible). */
|
|
24
30
|
maxUsd: number;
|
|
25
31
|
maxSources: number;
|
|
@@ -46,6 +52,7 @@ export interface ResearchRunResult {
|
|
|
46
52
|
export declare function buildResearchUserPrompt(args: {
|
|
47
53
|
query: string;
|
|
48
54
|
context?: string;
|
|
55
|
+
sources?: readonly EvidenceRef[];
|
|
49
56
|
maxFindings: number;
|
|
50
57
|
}): string;
|
|
51
58
|
export interface ParsedResearchFindings {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"research-runner.d.ts","sourceRoot":"","sources":["../../../src/core/research/research-runner.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,kBAAkB,EAAE,cAAc,EAAwB,WAAW,EAAE,MAAM,0BAA0B,CAAC;AAItH;;;;;;;GAOG;AAEH,2GAA2G;AAC3G,eAAO,MAAM,2BAA2B,QAM5B,CAAC;AAEb,MAAM,WAAW,kBAAkB;IAClC,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,qBAAqB;IACrC,KAAK,EAAE,MAAM,CAAC;IACd,iGAAiG;IACjG,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,4EAA4E;IAC5E,QAAQ,EAAE,kBAAkB,CAAC;IAC7B,oGAAoG;IACpG,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,qDAAqD;IACrD,cAAc,EAAE,MAAM,CAAC;IACvB,wFAAwF;IACxF,QAAQ,EAAE,CAAC,IAAI,EAAE;QAAE,YAAY,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAC;QAAC,MAAM,CAAC,EAAE,WAAW,CAAA;KAAE,KAAK,OAAO,CAAC,kBAAkB,CAAC,CAAC;IACpH,qDAAqD;IACrD,MAAM,CAAC,EAAE,WAAW,CAAC;CACrB;AAED,MAAM,MAAM,iBAAiB,GAAG,WAAW,GAAG,QAAQ,GAAG,UAAU,GAAG,SAAS,GAAG,kBAAkB,CAAC;AAErG,MAAM,WAAW,iBAAiB;IACjC,MAAM,EAAE,iBAAiB,CAAC;IAC1B,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,WAAW,CAAC;IACzB,MAAM,CAAC,EAAE,cAAc,CAAC;IACxB,OAAO,EAAE,MAAM,CAAC;CAChB;AAED,wBAAgB,uBAAuB,CAAC,IAAI,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,CAAC;IAAC,WAAW,EAAE,MAAM,CAAA;CAAE,GAAG,MAAM,CAO9G;AAED,MAAM,WAAW,sBAAsB;IACtC,QAAQ,EAAE,KAAK,CAAC;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,UAAU,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;CAC1D;AAED,wBAAgB,qBAAqB,CAAC,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,GAAG,sBAAsB,GAAG,SAAS,CAwC3G;AA+BD,wBAAsB,WAAW,CAAC,OAAO,EAAE,qBAAqB,GAAG,OAAO,CAAC,iBAAiB,CAAC,CAwD5F","sourcesContent":["import { runBoundedCompletion } from \"../autonomy/bounded-completion.ts\";\nimport type { CapabilityEnvelope, EvidenceBundle, EvidenceRef, Finding, GateOutcome } from \"../autonomy/contracts.ts\";\nimport { createEvidenceBundle } from \"./evidence-bundle.ts\";\nimport { evaluateResearchRequest } from \"./research-gate.ts\";\n\n/**\n * Pure orchestration for one autonomous research pass: gate -> bounded isolated completion ->\n * parse -> evidence bundle. The model executor is injected so this stays provider-free and\n * session-free; production wires `AgentSession.runIsolatedCompletion` in.\n *\n * The lane is read-only by construction: the executor receives text prompts only, and the output\n * is an `EvidenceBundle` whose model-synthesized findings are marked untrusted.\n */\n\n/** Static across calls so callers can use `cacheRetention: \"short\"` and only pay for the variable tail. */\nexport const RESEARCH_LANE_SYSTEM_PROMPT = [\n\t\"You are a read-only research lane for a coding agent.\",\n\t\"You receive a research query plus bounded context and produce findings that help satisfy open goal requirements.\",\n\t\"Respond with STRICT JSON only - no prose, no markdown fences:\",\n\t'{\"findings\":[{\"summary\":\"<one concrete, actionable finding>\",\"confidence\":<0..1>}]}',\n\t\"Base findings only on the provided context. Never invent file paths, APIs, or facts.\",\n].join(\"\\n\");\n\nexport interface ResearchCompletion {\n\ttext: string;\n\tcostUsd: number;\n\tstopReason: string;\n}\n\nexport interface ResearchRunnerOptions {\n\tquery: string;\n\t/** Bounded, pre-redacted context handed to the research model (goal text, open requirements). */\n\tcontext?: string;\n\t/** Stripped research envelope - never the foreground/architect envelope. */\n\tenvelope: CapabilityEnvelope;\n\t/** Budget for this pass; a post-hoc breach marks the run budget_exhausted (spend stays visible). */\n\tmaxUsd: number;\n\tmaxSources: number;\n\tmaxFindings: number;\n\t/** Wall-clock budget in milliseconds; 0 disables. */\n\tmaxWallClockMs: number;\n\t/** Executes one isolated completion. Production: AgentSession.runIsolatedCompletion. */\n\tcomplete: (args: { systemPrompt: string; userPrompt: string; signal?: AbortSignal }) => Promise<ResearchCompletion>;\n\t/** External cancellation (e.g. session disposal). */\n\tsignal?: AbortSignal;\n}\n\nexport type ResearchRunStatus = \"succeeded\" | \"failed\" | \"canceled\" | \"timeout\" | \"budget_exhausted\";\n\nexport interface ResearchRunResult {\n\tstatus: ResearchRunStatus;\n\treasonCode: string;\n\tgateOutcome: GateOutcome;\n\tbundle?: EvidenceBundle;\n\tcostUsd: number;\n}\n\nexport function buildResearchUserPrompt(args: { query: string; context?: string; maxFindings: number }): string {\n\tconst parts = [`Research query: ${args.query}`];\n\tif (args.context && args.context.length > 0) {\n\t\tparts.push(\"\", \"Context:\", args.context);\n\t}\n\tparts.push(\"\", `Return at most ${args.maxFindings} findings.`);\n\treturn parts.join(\"\\n\");\n}\n\nexport interface ParsedResearchFindings {\n\tfindings: Array<{ summary: string; confidence?: number }>;\n}\n\nexport function parseResearchFindings(text: string, maxFindings: number): ParsedResearchFindings | undefined {\n\tconst trimmed = text.trim();\n\tconst candidates: string[] = [trimmed];\n\tconst fenced = /```(?:json)?\\s*([\\s\\S]*?)```/.exec(trimmed);\n\tif (fenced?.[1]) candidates.push(fenced[1].trim());\n\tconst start = trimmed.indexOf(\"{\");\n\tconst end = trimmed.lastIndexOf(\"}\");\n\tif (start >= 0 && end > start) candidates.push(trimmed.slice(start, end + 1));\n\n\tfor (const candidate of candidates) {\n\t\tlet parsed: unknown;\n\t\ttry {\n\t\t\tparsed = JSON.parse(candidate);\n\t\t} catch {\n\t\t\tcontinue;\n\t\t}\n\t\tif (!parsed || typeof parsed !== \"object\" || Array.isArray(parsed)) continue;\n\t\tconst findingsRaw = (parsed as { findings?: unknown }).findings;\n\t\tif (!Array.isArray(findingsRaw)) continue;\n\n\t\tconst findings: Array<{ summary: string; confidence?: number }> = [];\n\t\tfor (const item of findingsRaw) {\n\t\t\tif (!item || typeof item !== \"object\" || Array.isArray(item)) continue;\n\t\t\tconst summary = (item as { summary?: unknown }).summary;\n\t\t\tif (typeof summary !== \"string\" || summary.trim().length === 0) continue;\n\t\t\tconst confidenceRaw = (item as { confidence?: unknown }).confidence;\n\t\t\tconst confidence =\n\t\t\t\ttypeof confidenceRaw === \"number\" && Number.isFinite(confidenceRaw)\n\t\t\t\t\t? Math.min(Math.max(confidenceRaw, 0), 1)\n\t\t\t\t\t: undefined;\n\t\t\tfindings.push({ summary: summary.trim(), confidence });\n\t\t\tif (findings.length >= maxFindings) break;\n\t\t}\n\t\t// A well-formed-but-empty findings array is a valid \"nothing found\"; a findings array whose\n\t\t// every item is malformed is not.\n\t\tif (findings.length > 0 || findingsRaw.length === 0) {\n\t\t\treturn { findings };\n\t\t}\n\t}\n\treturn undefined;\n}\n\nfunction truncateExcerpt(text: string, maxChars: number): string {\n\tif (text.length <= maxChars) return text;\n\treturn `${text.slice(0, Math.max(0, maxChars - 1))}…`;\n}\n\nfunction buildBundle(options: ResearchRunnerOptions, parsed: ParsedResearchFindings): EvidenceBundle {\n\tconst contextRef: EvidenceRef = {\n\t\tid: \"src-context\",\n\t\tkind: \"user\",\n\t\ttitle: \"Goal/context provided to the research lane\",\n\t\ttrusted: true,\n\t\texcerpt: truncateExcerpt(options.context && options.context.length > 0 ? options.context : options.query, 2000),\n\t};\n\tconst synthesisRef: EvidenceRef = {\n\t\tid: \"src-synthesis\",\n\t\tkind: \"tool\",\n\t\ttitle: \"Research-model synthesis\",\n\t\ttrusted: false,\n\t};\n\tconst sources = [contextRef, synthesisRef].slice(0, Math.max(1, options.maxSources));\n\tconst findings: Finding[] = parsed.findings.slice(0, options.maxFindings).map((finding, index) => ({\n\t\tid: `finding-${index + 1}`,\n\t\tsummary: finding.summary,\n\t\tevidenceIds: [synthesisRef.id],\n\t\t...(finding.confidence !== undefined ? { confidence: finding.confidence } : {}),\n\t}));\n\treturn createEvidenceBundle({ query: options.query, sources, findings });\n}\n\nexport async function runResearch(options: ResearchRunnerOptions): Promise<ResearchRunResult> {\n\tconst gateOutcome = evaluateResearchRequest({\n\t\tenvelope: options.envelope,\n\t\tsourceKind: \"tool\",\n\t\testimatedUsd: options.maxUsd,\n\t});\n\tif (gateOutcome.outcome !== \"allow\") {\n\t\t// Skip-and-record, never prompt: gate denials inform diagnostics instead of blocking anything.\n\t\tconst status: ResearchRunStatus = gateOutcome.reasonCode === \"over_budget\" ? \"budget_exhausted\" : \"failed\";\n\t\treturn { status, reasonCode: gateOutcome.reasonCode, gateOutcome, costUsd: 0 };\n\t}\n\n\tconst bounded = await runBoundedCompletion({\n\t\tmaxWallClockMs: options.maxWallClockMs,\n\t\tsignal: options.signal,\n\t\texecute: (signal) =>\n\t\t\toptions.complete({\n\t\t\t\tsystemPrompt: RESEARCH_LANE_SYSTEM_PROMPT,\n\t\t\t\tuserPrompt: buildResearchUserPrompt(options),\n\t\t\t\tsignal,\n\t\t\t}),\n\t});\n\tif (bounded.failure) {\n\t\treturn {\n\t\t\tstatus: bounded.failure.status,\n\t\t\treasonCode: bounded.failure.reasonCode,\n\t\t\tgateOutcome,\n\t\t\tcostUsd: bounded.completion?.costUsd ?? 0,\n\t\t};\n\t}\n\tconst completion = bounded.completion;\n\tif (!completion) {\n\t\treturn { status: \"failed\", reasonCode: \"completion_error\", gateOutcome, costUsd: 0 };\n\t}\n\tif (completion.stopReason === \"error\" || completion.stopReason === \"aborted\") {\n\t\treturn { status: \"failed\", reasonCode: \"model_error\", gateOutcome, costUsd: completion.costUsd };\n\t}\n\n\tconst parsed = parseResearchFindings(completion.text, options.maxFindings);\n\tif (!parsed) {\n\t\treturn { status: \"failed\", reasonCode: \"unparseable_output\", gateOutcome, costUsd: completion.costUsd };\n\t}\n\n\tconst bundle = buildBundle(options, parsed);\n\tconst overBudget = options.maxUsd > 0 && completion.costUsd > options.maxUsd;\n\treturn {\n\t\tstatus: overBudget ? \"budget_exhausted\" : \"succeeded\",\n\t\treasonCode: overBudget\n\t\t\t? \"cost_budget_exceeded\"\n\t\t\t: parsed.findings.length === 0\n\t\t\t\t? \"no_findings\"\n\t\t\t\t: \"research_completed\",\n\t\tgateOutcome,\n\t\tbundle,\n\t\tcostUsd: completion.costUsd,\n\t};\n}\n"]}
|
|
1
|
+
{"version":3,"file":"research-runner.d.ts","sourceRoot":"","sources":["../../../src/core/research/research-runner.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,kBAAkB,EAAE,cAAc,EAAE,WAAW,EAAW,WAAW,EAAE,MAAM,0BAA0B,CAAC;AAItH;;;;;;;GAOG;AAEH,2GAA2G;AAC3G,eAAO,MAAM,2BAA2B,QAM5B,CAAC;AAEb,MAAM,WAAW,kBAAkB;IAClC,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,qBAAqB;IACrC,KAAK,EAAE,MAAM,CAAC;IACd,iGAAiG;IACjG,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,4EAA4E;IAC5E,QAAQ,EAAE,kBAAkB,CAAC;IAC7B;;;;OAIG;IACH,OAAO,CAAC,EAAE,SAAS,WAAW,EAAE,CAAC;IACjC,oGAAoG;IACpG,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,qDAAqD;IACrD,cAAc,EAAE,MAAM,CAAC;IACvB,wFAAwF;IACxF,QAAQ,EAAE,CAAC,IAAI,EAAE;QAAE,YAAY,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAC;QAAC,MAAM,CAAC,EAAE,WAAW,CAAA;KAAE,KAAK,OAAO,CAAC,kBAAkB,CAAC,CAAC;IACpH,qDAAqD;IACrD,MAAM,CAAC,EAAE,WAAW,CAAC;CACrB;AAED,MAAM,MAAM,iBAAiB,GAAG,WAAW,GAAG,QAAQ,GAAG,UAAU,GAAG,SAAS,GAAG,kBAAkB,CAAC;AAErG,MAAM,WAAW,iBAAiB;IACjC,MAAM,EAAE,iBAAiB,CAAC;IAC1B,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,WAAW,CAAC;IACzB,MAAM,CAAC,EAAE,cAAc,CAAC;IACxB,OAAO,EAAE,MAAM,CAAC;CAChB;AAED,wBAAgB,uBAAuB,CAAC,IAAI,EAAE;IAC7C,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,SAAS,WAAW,EAAE,CAAC;IACjC,WAAW,EAAE,MAAM,CAAC;CACpB,GAAG,MAAM,CAcT;AAED,MAAM,WAAW,sBAAsB;IACtC,QAAQ,EAAE,KAAK,CAAC;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,UAAU,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;CAC1D;AAED,wBAAgB,qBAAqB,CAAC,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,GAAG,sBAAsB,GAAG,SAAS,CAwC3G;AAoCD,wBAAsB,WAAW,CAAC,OAAO,EAAE,qBAAqB,GAAG,OAAO,CAAC,iBAAiB,CAAC,CAwD5F","sourcesContent":["import { runBoundedCompletion } from \"../autonomy/bounded-completion.ts\";\nimport type { CapabilityEnvelope, EvidenceBundle, EvidenceRef, Finding, GateOutcome } from \"../autonomy/contracts.ts\";\nimport { createEvidenceBundle } from \"./evidence-bundle.ts\";\nimport { evaluateResearchRequest } from \"./research-gate.ts\";\n\n/**\n * Pure orchestration for one autonomous research pass: gate -> bounded isolated completion ->\n * parse -> evidence bundle. The model executor is injected so this stays provider-free and\n * session-free; production wires `AgentSession.runIsolatedCompletion` in.\n *\n * The lane is read-only by construction: the executor receives text prompts only, and the output\n * is an `EvidenceBundle` whose model-synthesized findings are marked untrusted.\n */\n\n/** Static across calls so callers can use `cacheRetention: \"short\"` and only pay for the variable tail. */\nexport const RESEARCH_LANE_SYSTEM_PROMPT = [\n\t\"You are a read-only research lane for a coding agent.\",\n\t\"You receive a research query plus bounded context and produce findings that help satisfy open goal requirements.\",\n\t\"Respond with STRICT JSON only - no prose, no markdown fences:\",\n\t'{\"findings\":[{\"summary\":\"<one concrete, actionable finding>\",\"confidence\":<0..1>}]}',\n\t\"Base findings only on the provided context. Never invent file paths, APIs, or facts.\",\n].join(\"\\n\");\n\nexport interface ResearchCompletion {\n\ttext: string;\n\tcostUsd: number;\n\tstopReason: string;\n}\n\nexport interface ResearchRunnerOptions {\n\tquery: string;\n\t/** Bounded, pre-redacted context handed to the research model (goal text, open requirements). */\n\tcontext?: string;\n\t/** Stripped research envelope - never the foreground/architect envelope. */\n\tenvelope: CapabilityEnvelope;\n\t/**\n\t * Pointer-first workspace sources (repo-relative path + bounded excerpt, never file bodies) that\n\t * inform the pass. They are rendered into the user prompt and carried into the evidence bundle;\n\t * omitted / empty reproduces the pre-collector behavior exactly.\n\t */\n\tsources?: readonly EvidenceRef[];\n\t/** Budget for this pass; a post-hoc breach marks the run budget_exhausted (spend stays visible). */\n\tmaxUsd: number;\n\tmaxSources: number;\n\tmaxFindings: number;\n\t/** Wall-clock budget in milliseconds; 0 disables. */\n\tmaxWallClockMs: number;\n\t/** Executes one isolated completion. Production: AgentSession.runIsolatedCompletion. */\n\tcomplete: (args: { systemPrompt: string; userPrompt: string; signal?: AbortSignal }) => Promise<ResearchCompletion>;\n\t/** External cancellation (e.g. session disposal). */\n\tsignal?: AbortSignal;\n}\n\nexport type ResearchRunStatus = \"succeeded\" | \"failed\" | \"canceled\" | \"timeout\" | \"budget_exhausted\";\n\nexport interface ResearchRunResult {\n\tstatus: ResearchRunStatus;\n\treasonCode: string;\n\tgateOutcome: GateOutcome;\n\tbundle?: EvidenceBundle;\n\tcostUsd: number;\n}\n\nexport function buildResearchUserPrompt(args: {\n\tquery: string;\n\tcontext?: string;\n\tsources?: readonly EvidenceRef[];\n\tmaxFindings: number;\n}): string {\n\tconst parts = [`Research query: ${args.query}`];\n\tif (args.context && args.context.length > 0) {\n\t\tparts.push(\"\", \"Context:\", args.context);\n\t}\n\tif (args.sources && args.sources.length > 0) {\n\t\tparts.push(\"\", \"Workspace sources (pointer-first; open the file to read full context):\");\n\t\tfor (const source of args.sources) {\n\t\t\tconst pointer = source.title ?? source.uri ?? source.id;\n\t\t\tparts.push(source.excerpt ? `- ${pointer}: ${source.excerpt}` : `- ${pointer}`);\n\t\t}\n\t}\n\tparts.push(\"\", `Return at most ${args.maxFindings} findings.`);\n\treturn parts.join(\"\\n\");\n}\n\nexport interface ParsedResearchFindings {\n\tfindings: Array<{ summary: string; confidence?: number }>;\n}\n\nexport function parseResearchFindings(text: string, maxFindings: number): ParsedResearchFindings | undefined {\n\tconst trimmed = text.trim();\n\tconst candidates: string[] = [trimmed];\n\tconst fenced = /```(?:json)?\\s*([\\s\\S]*?)```/.exec(trimmed);\n\tif (fenced?.[1]) candidates.push(fenced[1].trim());\n\tconst start = trimmed.indexOf(\"{\");\n\tconst end = trimmed.lastIndexOf(\"}\");\n\tif (start >= 0 && end > start) candidates.push(trimmed.slice(start, end + 1));\n\n\tfor (const candidate of candidates) {\n\t\tlet parsed: unknown;\n\t\ttry {\n\t\t\tparsed = JSON.parse(candidate);\n\t\t} catch {\n\t\t\tcontinue;\n\t\t}\n\t\tif (!parsed || typeof parsed !== \"object\" || Array.isArray(parsed)) continue;\n\t\tconst findingsRaw = (parsed as { findings?: unknown }).findings;\n\t\tif (!Array.isArray(findingsRaw)) continue;\n\n\t\tconst findings: Array<{ summary: string; confidence?: number }> = [];\n\t\tfor (const item of findingsRaw) {\n\t\t\tif (!item || typeof item !== \"object\" || Array.isArray(item)) continue;\n\t\t\tconst summary = (item as { summary?: unknown }).summary;\n\t\t\tif (typeof summary !== \"string\" || summary.trim().length === 0) continue;\n\t\t\tconst confidenceRaw = (item as { confidence?: unknown }).confidence;\n\t\t\tconst confidence =\n\t\t\t\ttypeof confidenceRaw === \"number\" && Number.isFinite(confidenceRaw)\n\t\t\t\t\t? Math.min(Math.max(confidenceRaw, 0), 1)\n\t\t\t\t\t: undefined;\n\t\t\tfindings.push({ summary: summary.trim(), confidence });\n\t\t\tif (findings.length >= maxFindings) break;\n\t\t}\n\t\t// A well-formed-but-empty findings array is a valid \"nothing found\"; a findings array whose\n\t\t// every item is malformed is not.\n\t\tif (findings.length > 0 || findingsRaw.length === 0) {\n\t\t\treturn { findings };\n\t\t}\n\t}\n\treturn undefined;\n}\n\nfunction truncateExcerpt(text: string, maxChars: number): string {\n\tif (text.length <= maxChars) return text;\n\treturn `${text.slice(0, Math.max(0, maxChars - 1))}…`;\n}\n\nfunction buildBundle(options: ResearchRunnerOptions, parsed: ParsedResearchFindings): EvidenceBundle {\n\tconst contextRef: EvidenceRef = {\n\t\tid: \"src-context\",\n\t\tkind: \"user\",\n\t\ttitle: \"Goal/context provided to the research lane\",\n\t\ttrusted: true,\n\t\texcerpt: truncateExcerpt(options.context && options.context.length > 0 ? options.context : options.query, 2000),\n\t};\n\tconst synthesisRef: EvidenceRef = {\n\t\tid: \"src-synthesis\",\n\t\tkind: \"tool\",\n\t\ttitle: \"Research-model synthesis\",\n\t\ttrusted: false,\n\t};\n\t// context + synthesis are the fixed provenance anchors (findings cite src-synthesis); workspace\n\t// sources fill whatever budget is left between them, so the anchors are never squeezed out.\n\tconst budget = Math.max(1, options.maxSources);\n\tconst workspaceRoom = Math.max(0, budget - 2);\n\tconst workspaceSources = (options.sources ?? []).slice(0, workspaceRoom);\n\tconst sources = [contextRef, ...workspaceSources, synthesisRef].slice(0, budget);\n\tconst findings: Finding[] = parsed.findings.slice(0, options.maxFindings).map((finding, index) => ({\n\t\tid: `finding-${index + 1}`,\n\t\tsummary: finding.summary,\n\t\tevidenceIds: [synthesisRef.id],\n\t\t...(finding.confidence !== undefined ? { confidence: finding.confidence } : {}),\n\t}));\n\treturn createEvidenceBundle({ query: options.query, sources, findings });\n}\n\nexport async function runResearch(options: ResearchRunnerOptions): Promise<ResearchRunResult> {\n\tconst gateOutcome = evaluateResearchRequest({\n\t\tenvelope: options.envelope,\n\t\tsourceKind: \"tool\",\n\t\testimatedUsd: options.maxUsd,\n\t});\n\tif (gateOutcome.outcome !== \"allow\") {\n\t\t// Skip-and-record, never prompt: gate denials inform diagnostics instead of blocking anything.\n\t\tconst status: ResearchRunStatus = gateOutcome.reasonCode === \"over_budget\" ? \"budget_exhausted\" : \"failed\";\n\t\treturn { status, reasonCode: gateOutcome.reasonCode, gateOutcome, costUsd: 0 };\n\t}\n\n\tconst bounded = await runBoundedCompletion({\n\t\tmaxWallClockMs: options.maxWallClockMs,\n\t\tsignal: options.signal,\n\t\texecute: (signal) =>\n\t\t\toptions.complete({\n\t\t\t\tsystemPrompt: RESEARCH_LANE_SYSTEM_PROMPT,\n\t\t\t\tuserPrompt: buildResearchUserPrompt(options),\n\t\t\t\tsignal,\n\t\t\t}),\n\t});\n\tif (bounded.failure) {\n\t\treturn {\n\t\t\tstatus: bounded.failure.status,\n\t\t\treasonCode: bounded.failure.reasonCode,\n\t\t\tgateOutcome,\n\t\t\tcostUsd: bounded.completion?.costUsd ?? 0,\n\t\t};\n\t}\n\tconst completion = bounded.completion;\n\tif (!completion) {\n\t\treturn { status: \"failed\", reasonCode: \"completion_error\", gateOutcome, costUsd: 0 };\n\t}\n\tif (completion.stopReason === \"error\" || completion.stopReason === \"aborted\") {\n\t\treturn { status: \"failed\", reasonCode: \"model_error\", gateOutcome, costUsd: completion.costUsd };\n\t}\n\n\tconst parsed = parseResearchFindings(completion.text, options.maxFindings);\n\tif (!parsed) {\n\t\treturn { status: \"failed\", reasonCode: \"unparseable_output\", gateOutcome, costUsd: completion.costUsd };\n\t}\n\n\tconst bundle = buildBundle(options, parsed);\n\tconst overBudget = options.maxUsd > 0 && completion.costUsd > options.maxUsd;\n\treturn {\n\t\tstatus: overBudget ? \"budget_exhausted\" : \"succeeded\",\n\t\treasonCode: overBudget\n\t\t\t? \"cost_budget_exceeded\"\n\t\t\t: parsed.findings.length === 0\n\t\t\t\t? \"no_findings\"\n\t\t\t\t: \"research_completed\",\n\t\tgateOutcome,\n\t\tbundle,\n\t\tcostUsd: completion.costUsd,\n\t};\n}\n"]}
|
|
@@ -22,6 +22,13 @@ export function buildResearchUserPrompt(args) {
|
|
|
22
22
|
if (args.context && args.context.length > 0) {
|
|
23
23
|
parts.push("", "Context:", args.context);
|
|
24
24
|
}
|
|
25
|
+
if (args.sources && args.sources.length > 0) {
|
|
26
|
+
parts.push("", "Workspace sources (pointer-first; open the file to read full context):");
|
|
27
|
+
for (const source of args.sources) {
|
|
28
|
+
const pointer = source.title ?? source.uri ?? source.id;
|
|
29
|
+
parts.push(source.excerpt ? `- ${pointer}: ${source.excerpt}` : `- ${pointer}`);
|
|
30
|
+
}
|
|
31
|
+
}
|
|
25
32
|
parts.push("", `Return at most ${args.maxFindings} findings.`);
|
|
26
33
|
return parts.join("\n");
|
|
27
34
|
}
|
|
@@ -90,7 +97,12 @@ function buildBundle(options, parsed) {
|
|
|
90
97
|
title: "Research-model synthesis",
|
|
91
98
|
trusted: false,
|
|
92
99
|
};
|
|
93
|
-
|
|
100
|
+
// context + synthesis are the fixed provenance anchors (findings cite src-synthesis); workspace
|
|
101
|
+
// sources fill whatever budget is left between them, so the anchors are never squeezed out.
|
|
102
|
+
const budget = Math.max(1, options.maxSources);
|
|
103
|
+
const workspaceRoom = Math.max(0, budget - 2);
|
|
104
|
+
const workspaceSources = (options.sources ?? []).slice(0, workspaceRoom);
|
|
105
|
+
const sources = [contextRef, ...workspaceSources, synthesisRef].slice(0, budget);
|
|
94
106
|
const findings = parsed.findings.slice(0, options.maxFindings).map((finding, index) => ({
|
|
95
107
|
id: `finding-${index + 1}`,
|
|
96
108
|
summary: finding.summary,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"research-runner.js","sourceRoot":"","sources":["../../../src/core/research/research-runner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,oBAAoB,EAAE,MAAM,mCAAmC,CAAC;AAEzE,OAAO,EAAE,oBAAoB,EAAE,MAAM,sBAAsB,CAAC;AAC5D,OAAO,EAAE,uBAAuB,EAAE,MAAM,oBAAoB,CAAC;AAE7D;;;;;;;GAOG;AAEH,2GAA2G;AAC3G,MAAM,CAAC,MAAM,2BAA2B,GAAG;IAC1C,uDAAuD;IACvD,kHAAkH;IAClH,+DAA+D;IAC/D,qFAAqF;IACrF,sFAAsF;CACtF,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAoCb,MAAM,UAAU,uBAAuB,CAAC,IAA8D,EAAU;IAC/G,MAAM,KAAK,GAAG,CAAC,mBAAmB,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC;IAChD,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC7C,KAAK,CAAC,IAAI,CAAC,EAAE,EAAE,UAAU,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;IAC1C,CAAC;IACD,KAAK,CAAC,IAAI,CAAC,EAAE,EAAE,kBAAkB,IAAI,CAAC,WAAW,YAAY,CAAC,CAAC;IAC/D,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAAA,CACxB;AAMD,MAAM,UAAU,qBAAqB,CAAC,IAAY,EAAE,WAAmB,EAAsC;IAC5G,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAC5B,MAAM,UAAU,GAAa,CAAC,OAAO,CAAC,CAAC;IACvC,MAAM,MAAM,GAAG,8BAA8B,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAC5D,IAAI,MAAM,EAAE,CAAC,CAAC,CAAC;QAAE,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IACnD,MAAM,KAAK,GAAG,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IACnC,MAAM,GAAG,GAAG,OAAO,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IACrC,IAAI,KAAK,IAAI,CAAC,IAAI,GAAG,GAAG,KAAK;QAAE,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;IAE9E,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACpC,IAAI,MAAe,CAAC;QACpB,IAAI,CAAC;YACJ,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;QAChC,CAAC;QAAC,MAAM,CAAC;YACR,SAAS;QACV,CAAC;QACD,IAAI,CAAC,MAAM,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC;YAAE,SAAS;QAC7E,MAAM,WAAW,GAAI,MAAiC,CAAC,QAAQ,CAAC;QAChE,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,WAAW,CAAC;YAAE,SAAS;QAE1C,MAAM,QAAQ,GAAoD,EAAE,CAAC;QACrE,KAAK,MAAM,IAAI,IAAI,WAAW,EAAE,CAAC;YAChC,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC;gBAAE,SAAS;YACvE,MAAM,OAAO,GAAI,IAA8B,CAAC,OAAO,CAAC;YACxD,IAAI,OAAO,OAAO,KAAK,QAAQ,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;gBAAE,SAAS;YACzE,MAAM,aAAa,GAAI,IAAiC,CAAC,UAAU,CAAC;YACpE,MAAM,UAAU,GACf,OAAO,aAAa,KAAK,QAAQ,IAAI,MAAM,CAAC,QAAQ,CAAC,aAAa,CAAC;gBAClE,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,aAAa,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC;gBACzC,CAAC,CAAC,SAAS,CAAC;YACd,QAAQ,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC,IAAI,EAAE,EAAE,UAAU,EAAE,CAAC,CAAC;YACvD,IAAI,QAAQ,CAAC,MAAM,IAAI,WAAW;gBAAE,MAAM;QAC3C,CAAC;QACD,4FAA4F;QAC5F,kCAAkC;QAClC,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACrD,OAAO,EAAE,QAAQ,EAAE,CAAC;QACrB,CAAC;IACF,CAAC;IACD,OAAO,SAAS,CAAC;AAAA,CACjB;AAED,SAAS,eAAe,CAAC,IAAY,EAAE,QAAgB,EAAU;IAChE,IAAI,IAAI,CAAC,MAAM,IAAI,QAAQ;QAAE,OAAO,IAAI,CAAC;IACzC,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,QAAQ,GAAG,CAAC,CAAC,CAAC,KAAG,CAAC;AAAA,CACtD;AAED,SAAS,WAAW,CAAC,OAA8B,EAAE,MAA8B,EAAkB;IACpG,MAAM,UAAU,GAAgB;QAC/B,EAAE,EAAE,aAAa;QACjB,IAAI,EAAE,MAAM;QACZ,KAAK,EAAE,4CAA4C;QACnD,OAAO,EAAE,IAAI;QACb,OAAO,EAAE,eAAe,CAAC,OAAO,CAAC,OAAO,IAAI,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,IAAI,CAAC;KAC/G,CAAC;IACF,MAAM,YAAY,GAAgB;QACjC,EAAE,EAAE,eAAe;QACnB,IAAI,EAAE,MAAM;QACZ,KAAK,EAAE,0BAA0B;QACjC,OAAO,EAAE,KAAK;KACd,CAAC;IACF,MAAM,OAAO,GAAG,CAAC,UAAU,EAAE,YAAY,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC;IACrF,MAAM,QAAQ,GAAc,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,WAAW,CAAC,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC;QAClG,EAAE,EAAE,WAAW,KAAK,GAAG,CAAC,EAAE;QAC1B,OAAO,EAAE,OAAO,CAAC,OAAO;QACxB,WAAW,EAAE,CAAC,YAAY,CAAC,EAAE,CAAC;QAC9B,GAAG,CAAC,OAAO,CAAC,UAAU,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,UAAU,EAAE,OAAO,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;KAC/E,CAAC,CAAC,CAAC;IACJ,OAAO,oBAAoB,CAAC,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,OAAO,EAAE,QAAQ,EAAE,CAAC,CAAC;AAAA,CACzE;AAED,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,OAA8B,EAA8B;IAC7F,MAAM,WAAW,GAAG,uBAAuB,CAAC;QAC3C,QAAQ,EAAE,OAAO,CAAC,QAAQ;QAC1B,UAAU,EAAE,MAAM;QAClB,YAAY,EAAE,OAAO,CAAC,MAAM;KAC5B,CAAC,CAAC;IACH,IAAI,WAAW,CAAC,OAAO,KAAK,OAAO,EAAE,CAAC;QACrC,+FAA+F;QAC/F,MAAM,MAAM,GAAsB,WAAW,CAAC,UAAU,KAAK,aAAa,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC,QAAQ,CAAC;QAC3G,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,WAAW,CAAC,UAAU,EAAE,WAAW,EAAE,OAAO,EAAE,CAAC,EAAE,CAAC;IAChF,CAAC;IAED,MAAM,OAAO,GAAG,MAAM,oBAAoB,CAAC;QAC1C,cAAc,EAAE,OAAO,CAAC,cAAc;QACtC,MAAM,EAAE,OAAO,CAAC,MAAM;QACtB,OAAO,EAAE,CAAC,MAAM,EAAE,EAAE,CACnB,OAAO,CAAC,QAAQ,CAAC;YAChB,YAAY,EAAE,2BAA2B;YACzC,UAAU,EAAE,uBAAuB,CAAC,OAAO,CAAC;YAC5C,MAAM;SACN,CAAC;KACH,CAAC,CAAC;IACH,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;QACrB,OAAO;YACN,MAAM,EAAE,OAAO,CAAC,OAAO,CAAC,MAAM;YAC9B,UAAU,EAAE,OAAO,CAAC,OAAO,CAAC,UAAU;YACtC,WAAW;YACX,OAAO,EAAE,OAAO,CAAC,UAAU,EAAE,OAAO,IAAI,CAAC;SACzC,CAAC;IACH,CAAC;IACD,MAAM,UAAU,GAAG,OAAO,CAAC,UAAU,CAAC;IACtC,IAAI,CAAC,UAAU,EAAE,CAAC;QACjB,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,UAAU,EAAE,kBAAkB,EAAE,WAAW,EAAE,OAAO,EAAE,CAAC,EAAE,CAAC;IACtF,CAAC;IACD,IAAI,UAAU,CAAC,UAAU,KAAK,OAAO,IAAI,UAAU,CAAC,UAAU,KAAK,SAAS,EAAE,CAAC;QAC9E,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,UAAU,EAAE,aAAa,EAAE,WAAW,EAAE,OAAO,EAAE,UAAU,CAAC,OAAO,EAAE,CAAC;IAClG,CAAC;IAED,MAAM,MAAM,GAAG,qBAAqB,CAAC,UAAU,CAAC,IAAI,EAAE,OAAO,CAAC,WAAW,CAAC,CAAC;IAC3E,IAAI,CAAC,MAAM,EAAE,CAAC;QACb,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,UAAU,EAAE,oBAAoB,EAAE,WAAW,EAAE,OAAO,EAAE,UAAU,CAAC,OAAO,EAAE,CAAC;IACzG,CAAC;IAED,MAAM,MAAM,GAAG,WAAW,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;IAC5C,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,GAAG,CAAC,IAAI,UAAU,CAAC,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC;IAC7E,OAAO;QACN,MAAM,EAAE,UAAU,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC,WAAW;QACrD,UAAU,EAAE,UAAU;YACrB,CAAC,CAAC,sBAAsB;YACxB,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,MAAM,KAAK,CAAC;gBAC7B,CAAC,CAAC,aAAa;gBACf,CAAC,CAAC,oBAAoB;QACxB,WAAW;QACX,MAAM;QACN,OAAO,EAAE,UAAU,CAAC,OAAO;KAC3B,CAAC;AAAA,CACF","sourcesContent":["import { runBoundedCompletion } from \"../autonomy/bounded-completion.ts\";\nimport type { CapabilityEnvelope, EvidenceBundle, EvidenceRef, Finding, GateOutcome } from \"../autonomy/contracts.ts\";\nimport { createEvidenceBundle } from \"./evidence-bundle.ts\";\nimport { evaluateResearchRequest } from \"./research-gate.ts\";\n\n/**\n * Pure orchestration for one autonomous research pass: gate -> bounded isolated completion ->\n * parse -> evidence bundle. The model executor is injected so this stays provider-free and\n * session-free; production wires `AgentSession.runIsolatedCompletion` in.\n *\n * The lane is read-only by construction: the executor receives text prompts only, and the output\n * is an `EvidenceBundle` whose model-synthesized findings are marked untrusted.\n */\n\n/** Static across calls so callers can use `cacheRetention: \"short\"` and only pay for the variable tail. */\nexport const RESEARCH_LANE_SYSTEM_PROMPT = [\n\t\"You are a read-only research lane for a coding agent.\",\n\t\"You receive a research query plus bounded context and produce findings that help satisfy open goal requirements.\",\n\t\"Respond with STRICT JSON only - no prose, no markdown fences:\",\n\t'{\"findings\":[{\"summary\":\"<one concrete, actionable finding>\",\"confidence\":<0..1>}]}',\n\t\"Base findings only on the provided context. Never invent file paths, APIs, or facts.\",\n].join(\"\\n\");\n\nexport interface ResearchCompletion {\n\ttext: string;\n\tcostUsd: number;\n\tstopReason: string;\n}\n\nexport interface ResearchRunnerOptions {\n\tquery: string;\n\t/** Bounded, pre-redacted context handed to the research model (goal text, open requirements). */\n\tcontext?: string;\n\t/** Stripped research envelope - never the foreground/architect envelope. */\n\tenvelope: CapabilityEnvelope;\n\t/** Budget for this pass; a post-hoc breach marks the run budget_exhausted (spend stays visible). */\n\tmaxUsd: number;\n\tmaxSources: number;\n\tmaxFindings: number;\n\t/** Wall-clock budget in milliseconds; 0 disables. */\n\tmaxWallClockMs: number;\n\t/** Executes one isolated completion. Production: AgentSession.runIsolatedCompletion. */\n\tcomplete: (args: { systemPrompt: string; userPrompt: string; signal?: AbortSignal }) => Promise<ResearchCompletion>;\n\t/** External cancellation (e.g. session disposal). */\n\tsignal?: AbortSignal;\n}\n\nexport type ResearchRunStatus = \"succeeded\" | \"failed\" | \"canceled\" | \"timeout\" | \"budget_exhausted\";\n\nexport interface ResearchRunResult {\n\tstatus: ResearchRunStatus;\n\treasonCode: string;\n\tgateOutcome: GateOutcome;\n\tbundle?: EvidenceBundle;\n\tcostUsd: number;\n}\n\nexport function buildResearchUserPrompt(args: { query: string; context?: string; maxFindings: number }): string {\n\tconst parts = [`Research query: ${args.query}`];\n\tif (args.context && args.context.length > 0) {\n\t\tparts.push(\"\", \"Context:\", args.context);\n\t}\n\tparts.push(\"\", `Return at most ${args.maxFindings} findings.`);\n\treturn parts.join(\"\\n\");\n}\n\nexport interface ParsedResearchFindings {\n\tfindings: Array<{ summary: string; confidence?: number }>;\n}\n\nexport function parseResearchFindings(text: string, maxFindings: number): ParsedResearchFindings | undefined {\n\tconst trimmed = text.trim();\n\tconst candidates: string[] = [trimmed];\n\tconst fenced = /```(?:json)?\\s*([\\s\\S]*?)```/.exec(trimmed);\n\tif (fenced?.[1]) candidates.push(fenced[1].trim());\n\tconst start = trimmed.indexOf(\"{\");\n\tconst end = trimmed.lastIndexOf(\"}\");\n\tif (start >= 0 && end > start) candidates.push(trimmed.slice(start, end + 1));\n\n\tfor (const candidate of candidates) {\n\t\tlet parsed: unknown;\n\t\ttry {\n\t\t\tparsed = JSON.parse(candidate);\n\t\t} catch {\n\t\t\tcontinue;\n\t\t}\n\t\tif (!parsed || typeof parsed !== \"object\" || Array.isArray(parsed)) continue;\n\t\tconst findingsRaw = (parsed as { findings?: unknown }).findings;\n\t\tif (!Array.isArray(findingsRaw)) continue;\n\n\t\tconst findings: Array<{ summary: string; confidence?: number }> = [];\n\t\tfor (const item of findingsRaw) {\n\t\t\tif (!item || typeof item !== \"object\" || Array.isArray(item)) continue;\n\t\t\tconst summary = (item as { summary?: unknown }).summary;\n\t\t\tif (typeof summary !== \"string\" || summary.trim().length === 0) continue;\n\t\t\tconst confidenceRaw = (item as { confidence?: unknown }).confidence;\n\t\t\tconst confidence =\n\t\t\t\ttypeof confidenceRaw === \"number\" && Number.isFinite(confidenceRaw)\n\t\t\t\t\t? Math.min(Math.max(confidenceRaw, 0), 1)\n\t\t\t\t\t: undefined;\n\t\t\tfindings.push({ summary: summary.trim(), confidence });\n\t\t\tif (findings.length >= maxFindings) break;\n\t\t}\n\t\t// A well-formed-but-empty findings array is a valid \"nothing found\"; a findings array whose\n\t\t// every item is malformed is not.\n\t\tif (findings.length > 0 || findingsRaw.length === 0) {\n\t\t\treturn { findings };\n\t\t}\n\t}\n\treturn undefined;\n}\n\nfunction truncateExcerpt(text: string, maxChars: number): string {\n\tif (text.length <= maxChars) return text;\n\treturn `${text.slice(0, Math.max(0, maxChars - 1))}…`;\n}\n\nfunction buildBundle(options: ResearchRunnerOptions, parsed: ParsedResearchFindings): EvidenceBundle {\n\tconst contextRef: EvidenceRef = {\n\t\tid: \"src-context\",\n\t\tkind: \"user\",\n\t\ttitle: \"Goal/context provided to the research lane\",\n\t\ttrusted: true,\n\t\texcerpt: truncateExcerpt(options.context && options.context.length > 0 ? options.context : options.query, 2000),\n\t};\n\tconst synthesisRef: EvidenceRef = {\n\t\tid: \"src-synthesis\",\n\t\tkind: \"tool\",\n\t\ttitle: \"Research-model synthesis\",\n\t\ttrusted: false,\n\t};\n\tconst sources = [contextRef, synthesisRef].slice(0, Math.max(1, options.maxSources));\n\tconst findings: Finding[] = parsed.findings.slice(0, options.maxFindings).map((finding, index) => ({\n\t\tid: `finding-${index + 1}`,\n\t\tsummary: finding.summary,\n\t\tevidenceIds: [synthesisRef.id],\n\t\t...(finding.confidence !== undefined ? { confidence: finding.confidence } : {}),\n\t}));\n\treturn createEvidenceBundle({ query: options.query, sources, findings });\n}\n\nexport async function runResearch(options: ResearchRunnerOptions): Promise<ResearchRunResult> {\n\tconst gateOutcome = evaluateResearchRequest({\n\t\tenvelope: options.envelope,\n\t\tsourceKind: \"tool\",\n\t\testimatedUsd: options.maxUsd,\n\t});\n\tif (gateOutcome.outcome !== \"allow\") {\n\t\t// Skip-and-record, never prompt: gate denials inform diagnostics instead of blocking anything.\n\t\tconst status: ResearchRunStatus = gateOutcome.reasonCode === \"over_budget\" ? \"budget_exhausted\" : \"failed\";\n\t\treturn { status, reasonCode: gateOutcome.reasonCode, gateOutcome, costUsd: 0 };\n\t}\n\n\tconst bounded = await runBoundedCompletion({\n\t\tmaxWallClockMs: options.maxWallClockMs,\n\t\tsignal: options.signal,\n\t\texecute: (signal) =>\n\t\t\toptions.complete({\n\t\t\t\tsystemPrompt: RESEARCH_LANE_SYSTEM_PROMPT,\n\t\t\t\tuserPrompt: buildResearchUserPrompt(options),\n\t\t\t\tsignal,\n\t\t\t}),\n\t});\n\tif (bounded.failure) {\n\t\treturn {\n\t\t\tstatus: bounded.failure.status,\n\t\t\treasonCode: bounded.failure.reasonCode,\n\t\t\tgateOutcome,\n\t\t\tcostUsd: bounded.completion?.costUsd ?? 0,\n\t\t};\n\t}\n\tconst completion = bounded.completion;\n\tif (!completion) {\n\t\treturn { status: \"failed\", reasonCode: \"completion_error\", gateOutcome, costUsd: 0 };\n\t}\n\tif (completion.stopReason === \"error\" || completion.stopReason === \"aborted\") {\n\t\treturn { status: \"failed\", reasonCode: \"model_error\", gateOutcome, costUsd: completion.costUsd };\n\t}\n\n\tconst parsed = parseResearchFindings(completion.text, options.maxFindings);\n\tif (!parsed) {\n\t\treturn { status: \"failed\", reasonCode: \"unparseable_output\", gateOutcome, costUsd: completion.costUsd };\n\t}\n\n\tconst bundle = buildBundle(options, parsed);\n\tconst overBudget = options.maxUsd > 0 && completion.costUsd > options.maxUsd;\n\treturn {\n\t\tstatus: overBudget ? \"budget_exhausted\" : \"succeeded\",\n\t\treasonCode: overBudget\n\t\t\t? \"cost_budget_exceeded\"\n\t\t\t: parsed.findings.length === 0\n\t\t\t\t? \"no_findings\"\n\t\t\t\t: \"research_completed\",\n\t\tgateOutcome,\n\t\tbundle,\n\t\tcostUsd: completion.costUsd,\n\t};\n}\n"]}
|
|
1
|
+
{"version":3,"file":"research-runner.js","sourceRoot":"","sources":["../../../src/core/research/research-runner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,oBAAoB,EAAE,MAAM,mCAAmC,CAAC;AAEzE,OAAO,EAAE,oBAAoB,EAAE,MAAM,sBAAsB,CAAC;AAC5D,OAAO,EAAE,uBAAuB,EAAE,MAAM,oBAAoB,CAAC;AAE7D;;;;;;;GAOG;AAEH,2GAA2G;AAC3G,MAAM,CAAC,MAAM,2BAA2B,GAAG;IAC1C,uDAAuD;IACvD,kHAAkH;IAClH,+DAA+D;IAC/D,qFAAqF;IACrF,sFAAsF;CACtF,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AA0Cb,MAAM,UAAU,uBAAuB,CAAC,IAKvC,EAAU;IACV,MAAM,KAAK,GAAG,CAAC,mBAAmB,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC;IAChD,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC7C,KAAK,CAAC,IAAI,CAAC,EAAE,EAAE,UAAU,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;IAC1C,CAAC;IACD,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC7C,KAAK,CAAC,IAAI,CAAC,EAAE,EAAE,wEAAwE,CAAC,CAAC;QACzF,KAAK,MAAM,MAAM,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACnC,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,IAAI,MAAM,CAAC,GAAG,IAAI,MAAM,CAAC,EAAE,CAAC;YACxD,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,OAAO,KAAK,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,KAAK,OAAO,EAAE,CAAC,CAAC;QACjF,CAAC;IACF,CAAC;IACD,KAAK,CAAC,IAAI,CAAC,EAAE,EAAE,kBAAkB,IAAI,CAAC,WAAW,YAAY,CAAC,CAAC;IAC/D,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAAA,CACxB;AAMD,MAAM,UAAU,qBAAqB,CAAC,IAAY,EAAE,WAAmB,EAAsC;IAC5G,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAC5B,MAAM,UAAU,GAAa,CAAC,OAAO,CAAC,CAAC;IACvC,MAAM,MAAM,GAAG,8BAA8B,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAC5D,IAAI,MAAM,EAAE,CAAC,CAAC,CAAC;QAAE,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IACnD,MAAM,KAAK,GAAG,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IACnC,MAAM,GAAG,GAAG,OAAO,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IACrC,IAAI,KAAK,IAAI,CAAC,IAAI,GAAG,GAAG,KAAK;QAAE,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;IAE9E,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACpC,IAAI,MAAe,CAAC;QACpB,IAAI,CAAC;YACJ,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;QAChC,CAAC;QAAC,MAAM,CAAC;YACR,SAAS;QACV,CAAC;QACD,IAAI,CAAC,MAAM,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC;YAAE,SAAS;QAC7E,MAAM,WAAW,GAAI,MAAiC,CAAC,QAAQ,CAAC;QAChE,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,WAAW,CAAC;YAAE,SAAS;QAE1C,MAAM,QAAQ,GAAoD,EAAE,CAAC;QACrE,KAAK,MAAM,IAAI,IAAI,WAAW,EAAE,CAAC;YAChC,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC;gBAAE,SAAS;YACvE,MAAM,OAAO,GAAI,IAA8B,CAAC,OAAO,CAAC;YACxD,IAAI,OAAO,OAAO,KAAK,QAAQ,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;gBAAE,SAAS;YACzE,MAAM,aAAa,GAAI,IAAiC,CAAC,UAAU,CAAC;YACpE,MAAM,UAAU,GACf,OAAO,aAAa,KAAK,QAAQ,IAAI,MAAM,CAAC,QAAQ,CAAC,aAAa,CAAC;gBAClE,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,aAAa,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC;gBACzC,CAAC,CAAC,SAAS,CAAC;YACd,QAAQ,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC,IAAI,EAAE,EAAE,UAAU,EAAE,CAAC,CAAC;YACvD,IAAI,QAAQ,CAAC,MAAM,IAAI,WAAW;gBAAE,MAAM;QAC3C,CAAC;QACD,4FAA4F;QAC5F,kCAAkC;QAClC,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACrD,OAAO,EAAE,QAAQ,EAAE,CAAC;QACrB,CAAC;IACF,CAAC;IACD,OAAO,SAAS,CAAC;AAAA,CACjB;AAED,SAAS,eAAe,CAAC,IAAY,EAAE,QAAgB,EAAU;IAChE,IAAI,IAAI,CAAC,MAAM,IAAI,QAAQ;QAAE,OAAO,IAAI,CAAC;IACzC,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,QAAQ,GAAG,CAAC,CAAC,CAAC,KAAG,CAAC;AAAA,CACtD;AAED,SAAS,WAAW,CAAC,OAA8B,EAAE,MAA8B,EAAkB;IACpG,MAAM,UAAU,GAAgB;QAC/B,EAAE,EAAE,aAAa;QACjB,IAAI,EAAE,MAAM;QACZ,KAAK,EAAE,4CAA4C;QACnD,OAAO,EAAE,IAAI;QACb,OAAO,EAAE,eAAe,CAAC,OAAO,CAAC,OAAO,IAAI,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,IAAI,CAAC;KAC/G,CAAC;IACF,MAAM,YAAY,GAAgB;QACjC,EAAE,EAAE,eAAe;QACnB,IAAI,EAAE,MAAM;QACZ,KAAK,EAAE,0BAA0B;QACjC,OAAO,EAAE,KAAK;KACd,CAAC;IACF,gGAAgG;IAChG,4FAA4F;IAC5F,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,UAAU,CAAC,CAAC;IAC/C,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,GAAG,CAAC,CAAC,CAAC;IAC9C,MAAM,gBAAgB,GAAG,CAAC,OAAO,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,aAAa,CAAC,CAAC;IACzE,MAAM,OAAO,GAAG,CAAC,UAAU,EAAE,GAAG,gBAAgB,EAAE,YAAY,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC;IACjF,MAAM,QAAQ,GAAc,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,WAAW,CAAC,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC;QAClG,EAAE,EAAE,WAAW,KAAK,GAAG,CAAC,EAAE;QAC1B,OAAO,EAAE,OAAO,CAAC,OAAO;QACxB,WAAW,EAAE,CAAC,YAAY,CAAC,EAAE,CAAC;QAC9B,GAAG,CAAC,OAAO,CAAC,UAAU,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,UAAU,EAAE,OAAO,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;KAC/E,CAAC,CAAC,CAAC;IACJ,OAAO,oBAAoB,CAAC,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,OAAO,EAAE,QAAQ,EAAE,CAAC,CAAC;AAAA,CACzE;AAED,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,OAA8B,EAA8B;IAC7F,MAAM,WAAW,GAAG,uBAAuB,CAAC;QAC3C,QAAQ,EAAE,OAAO,CAAC,QAAQ;QAC1B,UAAU,EAAE,MAAM;QAClB,YAAY,EAAE,OAAO,CAAC,MAAM;KAC5B,CAAC,CAAC;IACH,IAAI,WAAW,CAAC,OAAO,KAAK,OAAO,EAAE,CAAC;QACrC,+FAA+F;QAC/F,MAAM,MAAM,GAAsB,WAAW,CAAC,UAAU,KAAK,aAAa,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC,QAAQ,CAAC;QAC3G,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,WAAW,CAAC,UAAU,EAAE,WAAW,EAAE,OAAO,EAAE,CAAC,EAAE,CAAC;IAChF,CAAC;IAED,MAAM,OAAO,GAAG,MAAM,oBAAoB,CAAC;QAC1C,cAAc,EAAE,OAAO,CAAC,cAAc;QACtC,MAAM,EAAE,OAAO,CAAC,MAAM;QACtB,OAAO,EAAE,CAAC,MAAM,EAAE,EAAE,CACnB,OAAO,CAAC,QAAQ,CAAC;YAChB,YAAY,EAAE,2BAA2B;YACzC,UAAU,EAAE,uBAAuB,CAAC,OAAO,CAAC;YAC5C,MAAM;SACN,CAAC;KACH,CAAC,CAAC;IACH,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;QACrB,OAAO;YACN,MAAM,EAAE,OAAO,CAAC,OAAO,CAAC,MAAM;YAC9B,UAAU,EAAE,OAAO,CAAC,OAAO,CAAC,UAAU;YACtC,WAAW;YACX,OAAO,EAAE,OAAO,CAAC,UAAU,EAAE,OAAO,IAAI,CAAC;SACzC,CAAC;IACH,CAAC;IACD,MAAM,UAAU,GAAG,OAAO,CAAC,UAAU,CAAC;IACtC,IAAI,CAAC,UAAU,EAAE,CAAC;QACjB,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,UAAU,EAAE,kBAAkB,EAAE,WAAW,EAAE,OAAO,EAAE,CAAC,EAAE,CAAC;IACtF,CAAC;IACD,IAAI,UAAU,CAAC,UAAU,KAAK,OAAO,IAAI,UAAU,CAAC,UAAU,KAAK,SAAS,EAAE,CAAC;QAC9E,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,UAAU,EAAE,aAAa,EAAE,WAAW,EAAE,OAAO,EAAE,UAAU,CAAC,OAAO,EAAE,CAAC;IAClG,CAAC;IAED,MAAM,MAAM,GAAG,qBAAqB,CAAC,UAAU,CAAC,IAAI,EAAE,OAAO,CAAC,WAAW,CAAC,CAAC;IAC3E,IAAI,CAAC,MAAM,EAAE,CAAC;QACb,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,UAAU,EAAE,oBAAoB,EAAE,WAAW,EAAE,OAAO,EAAE,UAAU,CAAC,OAAO,EAAE,CAAC;IACzG,CAAC;IAED,MAAM,MAAM,GAAG,WAAW,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;IAC5C,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,GAAG,CAAC,IAAI,UAAU,CAAC,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC;IAC7E,OAAO;QACN,MAAM,EAAE,UAAU,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC,WAAW;QACrD,UAAU,EAAE,UAAU;YACrB,CAAC,CAAC,sBAAsB;YACxB,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,MAAM,KAAK,CAAC;gBAC7B,CAAC,CAAC,aAAa;gBACf,CAAC,CAAC,oBAAoB;QACxB,WAAW;QACX,MAAM;QACN,OAAO,EAAE,UAAU,CAAC,OAAO;KAC3B,CAAC;AAAA,CACF","sourcesContent":["import { runBoundedCompletion } from \"../autonomy/bounded-completion.ts\";\nimport type { CapabilityEnvelope, EvidenceBundle, EvidenceRef, Finding, GateOutcome } from \"../autonomy/contracts.ts\";\nimport { createEvidenceBundle } from \"./evidence-bundle.ts\";\nimport { evaluateResearchRequest } from \"./research-gate.ts\";\n\n/**\n * Pure orchestration for one autonomous research pass: gate -> bounded isolated completion ->\n * parse -> evidence bundle. The model executor is injected so this stays provider-free and\n * session-free; production wires `AgentSession.runIsolatedCompletion` in.\n *\n * The lane is read-only by construction: the executor receives text prompts only, and the output\n * is an `EvidenceBundle` whose model-synthesized findings are marked untrusted.\n */\n\n/** Static across calls so callers can use `cacheRetention: \"short\"` and only pay for the variable tail. */\nexport const RESEARCH_LANE_SYSTEM_PROMPT = [\n\t\"You are a read-only research lane for a coding agent.\",\n\t\"You receive a research query plus bounded context and produce findings that help satisfy open goal requirements.\",\n\t\"Respond with STRICT JSON only - no prose, no markdown fences:\",\n\t'{\"findings\":[{\"summary\":\"<one concrete, actionable finding>\",\"confidence\":<0..1>}]}',\n\t\"Base findings only on the provided context. Never invent file paths, APIs, or facts.\",\n].join(\"\\n\");\n\nexport interface ResearchCompletion {\n\ttext: string;\n\tcostUsd: number;\n\tstopReason: string;\n}\n\nexport interface ResearchRunnerOptions {\n\tquery: string;\n\t/** Bounded, pre-redacted context handed to the research model (goal text, open requirements). */\n\tcontext?: string;\n\t/** Stripped research envelope - never the foreground/architect envelope. */\n\tenvelope: CapabilityEnvelope;\n\t/**\n\t * Pointer-first workspace sources (repo-relative path + bounded excerpt, never file bodies) that\n\t * inform the pass. They are rendered into the user prompt and carried into the evidence bundle;\n\t * omitted / empty reproduces the pre-collector behavior exactly.\n\t */\n\tsources?: readonly EvidenceRef[];\n\t/** Budget for this pass; a post-hoc breach marks the run budget_exhausted (spend stays visible). */\n\tmaxUsd: number;\n\tmaxSources: number;\n\tmaxFindings: number;\n\t/** Wall-clock budget in milliseconds; 0 disables. */\n\tmaxWallClockMs: number;\n\t/** Executes one isolated completion. Production: AgentSession.runIsolatedCompletion. */\n\tcomplete: (args: { systemPrompt: string; userPrompt: string; signal?: AbortSignal }) => Promise<ResearchCompletion>;\n\t/** External cancellation (e.g. session disposal). */\n\tsignal?: AbortSignal;\n}\n\nexport type ResearchRunStatus = \"succeeded\" | \"failed\" | \"canceled\" | \"timeout\" | \"budget_exhausted\";\n\nexport interface ResearchRunResult {\n\tstatus: ResearchRunStatus;\n\treasonCode: string;\n\tgateOutcome: GateOutcome;\n\tbundle?: EvidenceBundle;\n\tcostUsd: number;\n}\n\nexport function buildResearchUserPrompt(args: {\n\tquery: string;\n\tcontext?: string;\n\tsources?: readonly EvidenceRef[];\n\tmaxFindings: number;\n}): string {\n\tconst parts = [`Research query: ${args.query}`];\n\tif (args.context && args.context.length > 0) {\n\t\tparts.push(\"\", \"Context:\", args.context);\n\t}\n\tif (args.sources && args.sources.length > 0) {\n\t\tparts.push(\"\", \"Workspace sources (pointer-first; open the file to read full context):\");\n\t\tfor (const source of args.sources) {\n\t\t\tconst pointer = source.title ?? source.uri ?? source.id;\n\t\t\tparts.push(source.excerpt ? `- ${pointer}: ${source.excerpt}` : `- ${pointer}`);\n\t\t}\n\t}\n\tparts.push(\"\", `Return at most ${args.maxFindings} findings.`);\n\treturn parts.join(\"\\n\");\n}\n\nexport interface ParsedResearchFindings {\n\tfindings: Array<{ summary: string; confidence?: number }>;\n}\n\nexport function parseResearchFindings(text: string, maxFindings: number): ParsedResearchFindings | undefined {\n\tconst trimmed = text.trim();\n\tconst candidates: string[] = [trimmed];\n\tconst fenced = /```(?:json)?\\s*([\\s\\S]*?)```/.exec(trimmed);\n\tif (fenced?.[1]) candidates.push(fenced[1].trim());\n\tconst start = trimmed.indexOf(\"{\");\n\tconst end = trimmed.lastIndexOf(\"}\");\n\tif (start >= 0 && end > start) candidates.push(trimmed.slice(start, end + 1));\n\n\tfor (const candidate of candidates) {\n\t\tlet parsed: unknown;\n\t\ttry {\n\t\t\tparsed = JSON.parse(candidate);\n\t\t} catch {\n\t\t\tcontinue;\n\t\t}\n\t\tif (!parsed || typeof parsed !== \"object\" || Array.isArray(parsed)) continue;\n\t\tconst findingsRaw = (parsed as { findings?: unknown }).findings;\n\t\tif (!Array.isArray(findingsRaw)) continue;\n\n\t\tconst findings: Array<{ summary: string; confidence?: number }> = [];\n\t\tfor (const item of findingsRaw) {\n\t\t\tif (!item || typeof item !== \"object\" || Array.isArray(item)) continue;\n\t\t\tconst summary = (item as { summary?: unknown }).summary;\n\t\t\tif (typeof summary !== \"string\" || summary.trim().length === 0) continue;\n\t\t\tconst confidenceRaw = (item as { confidence?: unknown }).confidence;\n\t\t\tconst confidence =\n\t\t\t\ttypeof confidenceRaw === \"number\" && Number.isFinite(confidenceRaw)\n\t\t\t\t\t? Math.min(Math.max(confidenceRaw, 0), 1)\n\t\t\t\t\t: undefined;\n\t\t\tfindings.push({ summary: summary.trim(), confidence });\n\t\t\tif (findings.length >= maxFindings) break;\n\t\t}\n\t\t// A well-formed-but-empty findings array is a valid \"nothing found\"; a findings array whose\n\t\t// every item is malformed is not.\n\t\tif (findings.length > 0 || findingsRaw.length === 0) {\n\t\t\treturn { findings };\n\t\t}\n\t}\n\treturn undefined;\n}\n\nfunction truncateExcerpt(text: string, maxChars: number): string {\n\tif (text.length <= maxChars) return text;\n\treturn `${text.slice(0, Math.max(0, maxChars - 1))}…`;\n}\n\nfunction buildBundle(options: ResearchRunnerOptions, parsed: ParsedResearchFindings): EvidenceBundle {\n\tconst contextRef: EvidenceRef = {\n\t\tid: \"src-context\",\n\t\tkind: \"user\",\n\t\ttitle: \"Goal/context provided to the research lane\",\n\t\ttrusted: true,\n\t\texcerpt: truncateExcerpt(options.context && options.context.length > 0 ? options.context : options.query, 2000),\n\t};\n\tconst synthesisRef: EvidenceRef = {\n\t\tid: \"src-synthesis\",\n\t\tkind: \"tool\",\n\t\ttitle: \"Research-model synthesis\",\n\t\ttrusted: false,\n\t};\n\t// context + synthesis are the fixed provenance anchors (findings cite src-synthesis); workspace\n\t// sources fill whatever budget is left between them, so the anchors are never squeezed out.\n\tconst budget = Math.max(1, options.maxSources);\n\tconst workspaceRoom = Math.max(0, budget - 2);\n\tconst workspaceSources = (options.sources ?? []).slice(0, workspaceRoom);\n\tconst sources = [contextRef, ...workspaceSources, synthesisRef].slice(0, budget);\n\tconst findings: Finding[] = parsed.findings.slice(0, options.maxFindings).map((finding, index) => ({\n\t\tid: `finding-${index + 1}`,\n\t\tsummary: finding.summary,\n\t\tevidenceIds: [synthesisRef.id],\n\t\t...(finding.confidence !== undefined ? { confidence: finding.confidence } : {}),\n\t}));\n\treturn createEvidenceBundle({ query: options.query, sources, findings });\n}\n\nexport async function runResearch(options: ResearchRunnerOptions): Promise<ResearchRunResult> {\n\tconst gateOutcome = evaluateResearchRequest({\n\t\tenvelope: options.envelope,\n\t\tsourceKind: \"tool\",\n\t\testimatedUsd: options.maxUsd,\n\t});\n\tif (gateOutcome.outcome !== \"allow\") {\n\t\t// Skip-and-record, never prompt: gate denials inform diagnostics instead of blocking anything.\n\t\tconst status: ResearchRunStatus = gateOutcome.reasonCode === \"over_budget\" ? \"budget_exhausted\" : \"failed\";\n\t\treturn { status, reasonCode: gateOutcome.reasonCode, gateOutcome, costUsd: 0 };\n\t}\n\n\tconst bounded = await runBoundedCompletion({\n\t\tmaxWallClockMs: options.maxWallClockMs,\n\t\tsignal: options.signal,\n\t\texecute: (signal) =>\n\t\t\toptions.complete({\n\t\t\t\tsystemPrompt: RESEARCH_LANE_SYSTEM_PROMPT,\n\t\t\t\tuserPrompt: buildResearchUserPrompt(options),\n\t\t\t\tsignal,\n\t\t\t}),\n\t});\n\tif (bounded.failure) {\n\t\treturn {\n\t\t\tstatus: bounded.failure.status,\n\t\t\treasonCode: bounded.failure.reasonCode,\n\t\t\tgateOutcome,\n\t\t\tcostUsd: bounded.completion?.costUsd ?? 0,\n\t\t};\n\t}\n\tconst completion = bounded.completion;\n\tif (!completion) {\n\t\treturn { status: \"failed\", reasonCode: \"completion_error\", gateOutcome, costUsd: 0 };\n\t}\n\tif (completion.stopReason === \"error\" || completion.stopReason === \"aborted\") {\n\t\treturn { status: \"failed\", reasonCode: \"model_error\", gateOutcome, costUsd: completion.costUsd };\n\t}\n\n\tconst parsed = parseResearchFindings(completion.text, options.maxFindings);\n\tif (!parsed) {\n\t\treturn { status: \"failed\", reasonCode: \"unparseable_output\", gateOutcome, costUsd: completion.costUsd };\n\t}\n\n\tconst bundle = buildBundle(options, parsed);\n\tconst overBudget = options.maxUsd > 0 && completion.costUsd > options.maxUsd;\n\treturn {\n\t\tstatus: overBudget ? \"budget_exhausted\" : \"succeeded\",\n\t\treasonCode: overBudget\n\t\t\t? \"cost_budget_exceeded\"\n\t\t\t: parsed.findings.length === 0\n\t\t\t\t? \"no_findings\"\n\t\t\t\t: \"research_completed\",\n\t\tgateOutcome,\n\t\tbundle,\n\t\tcostUsd: completion.costUsd,\n\t};\n}\n"]}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { type ExecFileException } from "node:child_process";
|
|
2
|
+
/** Structural DI seam: only the callback overload the collector actually uses — demanding
|
|
3
|
+
* node's full `typeof execFile` (with `__promisify__`) makes plain test mocks unassignable. */
|
|
4
|
+
export type WorkspaceExecFileFn = (command: string, args: readonly string[], options: {
|
|
5
|
+
cwd?: string;
|
|
6
|
+
timeout?: number;
|
|
7
|
+
maxBuffer?: number;
|
|
8
|
+
encoding?: string;
|
|
9
|
+
windowsHide?: boolean;
|
|
10
|
+
}, callback: (error: ExecFileException | null, stdout: string, stderr: string) => void) => unknown;
|
|
11
|
+
import type { EvidenceRef } from "../autonomy/contracts.ts";
|
|
12
|
+
export interface CollectWorkspaceSourcesArgs {
|
|
13
|
+
/** Free text (goal + requirement text) that search terms are derived from. */
|
|
14
|
+
query: string;
|
|
15
|
+
/** Session working directory; ripgrep runs here and paths are reported relative to it. */
|
|
16
|
+
cwd: string;
|
|
17
|
+
/** Hard cap on returned sources; also the lane's source budget. */
|
|
18
|
+
maxSources: number;
|
|
19
|
+
/** Injected for tests; defaults to node's `execFile`. */
|
|
20
|
+
execFileFn?: WorkspaceExecFileFn;
|
|
21
|
+
}
|
|
22
|
+
/** Split on non-word runs, lowercase, drop stopwords/short/dupes, keep source order, cap at MAX_TERMS. */
|
|
23
|
+
export declare function deriveSearchTerms(query: string): string[];
|
|
24
|
+
export declare function collectWorkspaceSources(args: CollectWorkspaceSourcesArgs): Promise<EvidenceRef[]>;
|
|
25
|
+
//# sourceMappingURL=workspace-collector.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"workspace-collector.d.ts","sourceRoot":"","sources":["../../../src/core/research/workspace-collector.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,iBAAiB,EAAY,MAAM,oBAAoB,CAAC;AAEtE;+FAC+F;AAC/F,MAAM,MAAM,mBAAmB,GAAG,CACjC,OAAO,EAAE,MAAM,EACf,IAAI,EAAE,SAAS,MAAM,EAAE,EACvB,OAAO,EAAE;IAAE,GAAG,CAAC,EAAE,MAAM,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,CAAC;IAAC,SAAS,CAAC,EAAE,MAAM,CAAC;IAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAAC,WAAW,CAAC,EAAE,OAAO,CAAA;CAAE,EACzG,QAAQ,EAAE,CAAC,KAAK,EAAE,iBAAiB,GAAG,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,KAAK,IAAI,KAC/E,OAAO,CAAC;AAEb,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,0BAA0B,CAAC;AAuG5D,MAAM,WAAW,2BAA2B;IAC3C,8EAA8E;IAC9E,KAAK,EAAE,MAAM,CAAC;IACd,0FAA0F;IAC1F,GAAG,EAAE,MAAM,CAAC;IACZ,mEAAmE;IACnE,UAAU,EAAE,MAAM,CAAC;IACnB,yDAAyD;IACzD,UAAU,CAAC,EAAE,mBAAmB,CAAC;CACjC;AAED,0GAA0G;AAC1G,wBAAgB,iBAAiB,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,EAAE,CAazD;AA6FD,wBAAsB,uBAAuB,CAAC,IAAI,EAAE,2BAA2B,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC,CAmGvG","sourcesContent":["import { type ExecFileException, execFile } from \"node:child_process\";\n\n/** Structural DI seam: only the callback overload the collector actually uses — demanding\n * node's full `typeof execFile` (with `__promisify__`) makes plain test mocks unassignable. */\nexport type WorkspaceExecFileFn = (\n\tcommand: string,\n\targs: readonly string[],\n\toptions: { cwd?: string; timeout?: number; maxBuffer?: number; encoding?: string; windowsHide?: boolean },\n\tcallback: (error: ExecFileException | null, stdout: string, stderr: string) => void,\n) => unknown;\n\nimport type { EvidenceRef } from \"../autonomy/contracts.ts\";\n\n/**\n * Best-effort workspace research source collector.\n *\n * Feeds the autonomous research lane POINTER-FIRST sources: a repo-relative path, a bounded excerpt,\n * and (when known) a line number — never whole file bodies. It runs `rg` under the session cwd exactly\n * like the grep tool does, so it only surfaces content ripgrep already matched. Collection is bounded\n * (a shared wall-clock deadline, a candidate cap, ripgrep's own binary/oversize skipping) and never\n * throws: if `rg` is missing or errors, it returns `[]`, which is today's \"no collector\" behavior.\n *\n * The returned sources are `EvidenceRef`s (the runner's source type) tagged `kind: \"workspace\"`.\n */\n\n/** Search terms shorter than this are too noisy to be useful discriminators. */\nconst MIN_TERM_LEN = 3;\n/** Cap on derived search terms; keeps the discovery pattern small and the collector cheap. */\nconst MAX_TERMS = 4;\n/** Pointer excerpts are bounded so we never spill a whole line (or a whole file) into the prompt. */\nconst EXCERPT_MAX_CHARS = 200;\n/** Shared wall-clock budget for the whole collection pass (both ripgrep calls together). */\nconst COLLECTION_BUDGET_MS = 5000;\n/** Floor for any single ripgrep call so a nearly-spent budget still gives ripgrep a chance to run. */\nconst MIN_CALL_MS = 500;\n/** ripgrep skips files larger than this; oversized files never contribute a source. */\nconst MAX_FILESIZE = \"1M\";\n/** Upper bound on candidate files carried from discovery into the pointer pass. */\nconst CANDIDATE_CAP = 24;\n/** Generous stdout ceiling; overflow degrades to fewer sources rather than throwing. */\nconst MAX_BUFFER_BYTES = 8 * 1024 * 1024;\n\n/**\n * A small, deliberately conservative English stopword set plus the structural words that show up in\n * goal/requirement text. Anything not here that is >= MIN_TERM_LEN survives as a search term.\n */\nconst STOPWORDS = new Set<string>([\n\t\"the\",\n\t\"and\",\n\t\"for\",\n\t\"with\",\n\t\"that\",\n\t\"this\",\n\t\"from\",\n\t\"into\",\n\t\"are\",\n\t\"was\",\n\t\"were\",\n\t\"but\",\n\t\"not\",\n\t\"you\",\n\t\"your\",\n\t\"our\",\n\t\"all\",\n\t\"any\",\n\t\"can\",\n\t\"has\",\n\t\"have\",\n\t\"had\",\n\t\"will\",\n\t\"would\",\n\t\"should\",\n\t\"could\",\n\t\"its\",\n\t\"his\",\n\t\"her\",\n\t\"their\",\n\t\"them\",\n\t\"they\",\n\t\"she\",\n\t\"him\",\n\t\"who\",\n\t\"what\",\n\t\"when\",\n\t\"where\",\n\t\"which\",\n\t\"how\",\n\t\"why\",\n\t\"use\",\n\t\"using\",\n\t\"used\",\n\t\"via\",\n\t\"per\",\n\t\"out\",\n\t\"off\",\n\t\"over\",\n\t\"under\",\n\t\"then\",\n\t\"than\",\n\t\"add\",\n\t\"adds\",\n\t\"get\",\n\t\"gets\",\n\t\"set\",\n\t\"sets\",\n\t\"new\",\n\t\"old\",\n\t\"one\",\n\t\"two\",\n\t\"let\",\n\t\"run\",\n\t\"runs\",\n]);\n\nexport interface CollectWorkspaceSourcesArgs {\n\t/** Free text (goal + requirement text) that search terms are derived from. */\n\tquery: string;\n\t/** Session working directory; ripgrep runs here and paths are reported relative to it. */\n\tcwd: string;\n\t/** Hard cap on returned sources; also the lane's source budget. */\n\tmaxSources: number;\n\t/** Injected for tests; defaults to node's `execFile`. */\n\texecFileFn?: WorkspaceExecFileFn;\n}\n\n/** Split on non-word runs, lowercase, drop stopwords/short/dupes, keep source order, cap at MAX_TERMS. */\nexport function deriveSearchTerms(query: string): string[] {\n\tconst seen = new Set<string>();\n\tconst terms: string[] = [];\n\tfor (const raw of query.split(/[^\\w]+/)) {\n\t\tconst term = raw.toLowerCase();\n\t\tif (term.length < MIN_TERM_LEN) continue;\n\t\tif (STOPWORDS.has(term)) continue;\n\t\tif (seen.has(term)) continue;\n\t\tseen.add(term);\n\t\tterms.push(term);\n\t\tif (terms.length >= MAX_TERMS) break;\n\t}\n\treturn terms;\n}\n\n/** The most specific term (longest wins; ties keep the earliest) drives the line-level pointer pass. */\nfunction pickBestTerm(terms: readonly string[]): string {\n\treturn terms.reduce((best, term) => (term.length > best.length ? term : best), terms[0]);\n}\n\nfunction truncateExcerpt(text: string): string | undefined {\n\tconst trimmed = text.trim();\n\tif (trimmed.length === 0) return undefined;\n\tif (trimmed.length <= EXCERPT_MAX_CHARS) return trimmed;\n\treturn `${trimmed.slice(0, EXCERPT_MAX_CHARS - 1)}…`;\n}\n\ninterface RgOutcome {\n\t/** True when ripgrep ran to a usable result (matches found, or a clean \"no matches\"). */\n\tok: boolean;\n\tstdout: string;\n\t/** True when the `rg` binary could not be spawned at all — the collector bails entirely. */\n\tmissing: boolean;\n}\n\nfunction runRg(\n\texecFileFn: WorkspaceExecFileFn,\n\targs: readonly string[],\n\tcwd: string,\n\ttimeoutMs: number,\n): Promise<RgOutcome> {\n\treturn new Promise((resolve) => {\n\t\tlet settled = false;\n\t\tconst done = (outcome: RgOutcome): void => {\n\t\t\tif (settled) return;\n\t\t\tsettled = true;\n\t\t\tresolve(outcome);\n\t\t};\n\t\ttry {\n\t\t\texecFileFn(\n\t\t\t\t\"rg\",\n\t\t\t\t[...args],\n\t\t\t\t{ cwd, timeout: timeoutMs, maxBuffer: MAX_BUFFER_BYTES, encoding: \"utf8\", windowsHide: true },\n\t\t\t\t(error: ExecFileException | null, stdout: string) => {\n\t\t\t\t\tconst out = typeof stdout === \"string\" ? stdout : \"\";\n\t\t\t\t\tif (!error) {\n\t\t\t\t\t\tdone({ ok: true, stdout: out, missing: false });\n\t\t\t\t\t\treturn;\n\t\t\t\t\t}\n\t\t\t\t\t// Exit code 1 is ripgrep's \"no matches\" — a clean, usable result, not a failure.\n\t\t\t\t\tif (error.code === 1) {\n\t\t\t\t\t\tdone({ ok: true, stdout: out, missing: false });\n\t\t\t\t\t\treturn;\n\t\t\t\t\t}\n\t\t\t\t\tdone({ ok: false, stdout: \"\", missing: error.code === \"ENOENT\" });\n\t\t\t\t},\n\t\t\t);\n\t\t} catch {\n\t\t\t// A synchronous spawn failure (e.g. rg entirely absent) is treated as \"missing\".\n\t\t\tdone({ ok: false, stdout: \"\", missing: true });\n\t\t}\n\t});\n}\n\n/** rg prints `./foo` when the search root is `.`; keep sources cleanly repo-relative. */\nfunction normalizePath(path: string): string {\n\treturn path.startsWith(\"./\") ? path.slice(2) : path;\n}\n\nfunction parseFileList(stdout: string): string[] {\n\tconst files: string[] = [];\n\tfor (const line of stdout.split(/\\r?\\n/)) {\n\t\tconst path = line.trim();\n\t\tif (path.length > 0) files.push(normalizePath(path));\n\t}\n\treturn files;\n}\n\ninterface ContentMatch {\n\tpath: string;\n\tline: number;\n\ttext: string;\n}\n\nfunction parseContentMatches(stdout: string): ContentMatch[] {\n\tconst matches: ContentMatch[] = [];\n\tfor (const raw of stdout.split(/\\r?\\n/)) {\n\t\tif (raw.length === 0) continue;\n\t\t// `-H -n --no-heading` yields `path:line:text`; text may itself contain colons.\n\t\tconst parsed = /^(.+?):(\\d+):(.*)$/.exec(raw);\n\t\tif (!parsed) continue;\n\t\tmatches.push({ path: normalizePath(parsed[1]), line: Number(parsed[2]), text: parsed[3] });\n\t}\n\treturn matches;\n}\n\nexport async function collectWorkspaceSources(args: CollectWorkspaceSourcesArgs): Promise<EvidenceRef[]> {\n\tconst { query, cwd, maxSources } = args;\n\tconst execFileFn = args.execFileFn ?? (execFile as unknown as WorkspaceExecFileFn);\n\tif (!cwd || maxSources <= 0) return [];\n\n\tconst terms = deriveSearchTerms(query);\n\tif (terms.length === 0) return [];\n\n\tconst deadline = Date.now() + COLLECTION_BUDGET_MS;\n\tconst remainingBudget = (): number => Math.max(MIN_CALL_MS, deadline - Date.now());\n\n\t// Phase 1 (discovery): which files match ANY term. `--max-count 1` stops at the first hit per file;\n\t// ripgrep skips binary and oversized files by default / via --max-filesize.\n\tconst discoveryArgs = [\n\t\t\"--files-with-matches\",\n\t\t\"--max-count\",\n\t\t\"1\",\n\t\t\"--fixed-strings\",\n\t\t\"--smart-case\",\n\t\t\"--no-messages\",\n\t\t\"--max-filesize\",\n\t\tMAX_FILESIZE,\n\t\t\"--color\",\n\t\t\"never\",\n\t\t...terms.flatMap((term) => [\"-e\", term]),\n\t\t// Explicit search root: execFile hands rg a piped stdin, and rg with no path argument would\n\t\t// read (and block on) that pipe instead of scanning the tree. \".\" keeps output repo-relative.\n\t\t\"--\",\n\t\t\".\",\n\t];\n\tconst discovery = await runRg(execFileFn, discoveryArgs, cwd, remainingBudget());\n\tif (discovery.missing || !discovery.ok) return [];\n\n\tconst candidateFiles = parseFileList(discovery.stdout).slice(0, CANDIDATE_CAP);\n\tif (candidateFiles.length === 0) return [];\n\n\t// Phase 2 (pointers): line-level hits for the single best term, scanned only over files discovery\n\t// already matched — so we never read a file ripgrep did not surface.\n\tconst bestTerm = pickBestTerm(terms);\n\tconst contentArgs = [\n\t\t\"-H\",\n\t\t\"-n\",\n\t\t\"--no-heading\",\n\t\t\"-m\",\n\t\t\"2\",\n\t\t\"--fixed-strings\",\n\t\t\"--smart-case\",\n\t\t\"--no-messages\",\n\t\t\"--color\",\n\t\t\"never\",\n\t\t\"-e\",\n\t\tbestTerm,\n\t\t\"--\",\n\t\t...candidateFiles,\n\t];\n\tconst content = await runRg(execFileFn, contentArgs, cwd, remainingBudget());\n\n\tconst sources: EvidenceRef[] = [];\n\tconst seenPaths = new Set<string>();\n\tconst seenLineKeys = new Set<string>();\n\tlet counter = 0;\n\n\tif (content.ok) {\n\t\tfor (const match of parseContentMatches(content.stdout)) {\n\t\t\tif (sources.length >= maxSources) break;\n\t\t\tconst key = `${match.path}:${match.line}`;\n\t\t\tif (seenLineKeys.has(key)) continue;\n\t\t\tseenLineKeys.add(key);\n\t\t\tseenPaths.add(match.path);\n\t\t\tconst excerpt = truncateExcerpt(match.text);\n\t\t\tsources.push({\n\t\t\t\tid: `ws-${++counter}`,\n\t\t\t\tkind: \"workspace\",\n\t\t\t\ttitle: `${match.path}:${match.line}`,\n\t\t\t\turi: match.path,\n\t\t\t\ttrusted: true,\n\t\t\t\t...(excerpt !== undefined ? { excerpt } : {}),\n\t\t\t\tmetadata: { line: match.line, term: bestTerm },\n\t\t\t});\n\t\t}\n\t}\n\n\t// Fill the remaining budget with file-level pointers for candidates that matched a secondary term\n\t// (and so produced no best-term line). Still pointer-first: a path, never a body.\n\tfor (const file of candidateFiles) {\n\t\tif (sources.length >= maxSources) break;\n\t\tif (seenPaths.has(file)) continue;\n\t\tseenPaths.add(file);\n\t\tsources.push({\n\t\t\tid: `ws-${++counter}`,\n\t\t\tkind: \"workspace\",\n\t\t\ttitle: file,\n\t\t\turi: file,\n\t\t\ttrusted: true,\n\t\t\tmetadata: { matchedBy: \"discovery\" },\n\t\t});\n\t}\n\n\treturn sources.slice(0, maxSources);\n}\n"]}
|
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
import { execFile } from "node:child_process";
|
|
2
|
+
/**
|
|
3
|
+
* Best-effort workspace research source collector.
|
|
4
|
+
*
|
|
5
|
+
* Feeds the autonomous research lane POINTER-FIRST sources: a repo-relative path, a bounded excerpt,
|
|
6
|
+
* and (when known) a line number — never whole file bodies. It runs `rg` under the session cwd exactly
|
|
7
|
+
* like the grep tool does, so it only surfaces content ripgrep already matched. Collection is bounded
|
|
8
|
+
* (a shared wall-clock deadline, a candidate cap, ripgrep's own binary/oversize skipping) and never
|
|
9
|
+
* throws: if `rg` is missing or errors, it returns `[]`, which is today's "no collector" behavior.
|
|
10
|
+
*
|
|
11
|
+
* The returned sources are `EvidenceRef`s (the runner's source type) tagged `kind: "workspace"`.
|
|
12
|
+
*/
|
|
13
|
+
/** Search terms shorter than this are too noisy to be useful discriminators. */
|
|
14
|
+
const MIN_TERM_LEN = 3;
|
|
15
|
+
/** Cap on derived search terms; keeps the discovery pattern small and the collector cheap. */
|
|
16
|
+
const MAX_TERMS = 4;
|
|
17
|
+
/** Pointer excerpts are bounded so we never spill a whole line (or a whole file) into the prompt. */
|
|
18
|
+
const EXCERPT_MAX_CHARS = 200;
|
|
19
|
+
/** Shared wall-clock budget for the whole collection pass (both ripgrep calls together). */
|
|
20
|
+
const COLLECTION_BUDGET_MS = 5000;
|
|
21
|
+
/** Floor for any single ripgrep call so a nearly-spent budget still gives ripgrep a chance to run. */
|
|
22
|
+
const MIN_CALL_MS = 500;
|
|
23
|
+
/** ripgrep skips files larger than this; oversized files never contribute a source. */
|
|
24
|
+
const MAX_FILESIZE = "1M";
|
|
25
|
+
/** Upper bound on candidate files carried from discovery into the pointer pass. */
|
|
26
|
+
const CANDIDATE_CAP = 24;
|
|
27
|
+
/** Generous stdout ceiling; overflow degrades to fewer sources rather than throwing. */
|
|
28
|
+
const MAX_BUFFER_BYTES = 8 * 1024 * 1024;
|
|
29
|
+
/**
|
|
30
|
+
* A small, deliberately conservative English stopword set plus the structural words that show up in
|
|
31
|
+
* goal/requirement text. Anything not here that is >= MIN_TERM_LEN survives as a search term.
|
|
32
|
+
*/
|
|
33
|
+
const STOPWORDS = new Set([
|
|
34
|
+
"the",
|
|
35
|
+
"and",
|
|
36
|
+
"for",
|
|
37
|
+
"with",
|
|
38
|
+
"that",
|
|
39
|
+
"this",
|
|
40
|
+
"from",
|
|
41
|
+
"into",
|
|
42
|
+
"are",
|
|
43
|
+
"was",
|
|
44
|
+
"were",
|
|
45
|
+
"but",
|
|
46
|
+
"not",
|
|
47
|
+
"you",
|
|
48
|
+
"your",
|
|
49
|
+
"our",
|
|
50
|
+
"all",
|
|
51
|
+
"any",
|
|
52
|
+
"can",
|
|
53
|
+
"has",
|
|
54
|
+
"have",
|
|
55
|
+
"had",
|
|
56
|
+
"will",
|
|
57
|
+
"would",
|
|
58
|
+
"should",
|
|
59
|
+
"could",
|
|
60
|
+
"its",
|
|
61
|
+
"his",
|
|
62
|
+
"her",
|
|
63
|
+
"their",
|
|
64
|
+
"them",
|
|
65
|
+
"they",
|
|
66
|
+
"she",
|
|
67
|
+
"him",
|
|
68
|
+
"who",
|
|
69
|
+
"what",
|
|
70
|
+
"when",
|
|
71
|
+
"where",
|
|
72
|
+
"which",
|
|
73
|
+
"how",
|
|
74
|
+
"why",
|
|
75
|
+
"use",
|
|
76
|
+
"using",
|
|
77
|
+
"used",
|
|
78
|
+
"via",
|
|
79
|
+
"per",
|
|
80
|
+
"out",
|
|
81
|
+
"off",
|
|
82
|
+
"over",
|
|
83
|
+
"under",
|
|
84
|
+
"then",
|
|
85
|
+
"than",
|
|
86
|
+
"add",
|
|
87
|
+
"adds",
|
|
88
|
+
"get",
|
|
89
|
+
"gets",
|
|
90
|
+
"set",
|
|
91
|
+
"sets",
|
|
92
|
+
"new",
|
|
93
|
+
"old",
|
|
94
|
+
"one",
|
|
95
|
+
"two",
|
|
96
|
+
"let",
|
|
97
|
+
"run",
|
|
98
|
+
"runs",
|
|
99
|
+
]);
|
|
100
|
+
/** Split on non-word runs, lowercase, drop stopwords/short/dupes, keep source order, cap at MAX_TERMS. */
|
|
101
|
+
export function deriveSearchTerms(query) {
|
|
102
|
+
const seen = new Set();
|
|
103
|
+
const terms = [];
|
|
104
|
+
for (const raw of query.split(/[^\w]+/)) {
|
|
105
|
+
const term = raw.toLowerCase();
|
|
106
|
+
if (term.length < MIN_TERM_LEN)
|
|
107
|
+
continue;
|
|
108
|
+
if (STOPWORDS.has(term))
|
|
109
|
+
continue;
|
|
110
|
+
if (seen.has(term))
|
|
111
|
+
continue;
|
|
112
|
+
seen.add(term);
|
|
113
|
+
terms.push(term);
|
|
114
|
+
if (terms.length >= MAX_TERMS)
|
|
115
|
+
break;
|
|
116
|
+
}
|
|
117
|
+
return terms;
|
|
118
|
+
}
|
|
119
|
+
/** The most specific term (longest wins; ties keep the earliest) drives the line-level pointer pass. */
|
|
120
|
+
function pickBestTerm(terms) {
|
|
121
|
+
return terms.reduce((best, term) => (term.length > best.length ? term : best), terms[0]);
|
|
122
|
+
}
|
|
123
|
+
function truncateExcerpt(text) {
|
|
124
|
+
const trimmed = text.trim();
|
|
125
|
+
if (trimmed.length === 0)
|
|
126
|
+
return undefined;
|
|
127
|
+
if (trimmed.length <= EXCERPT_MAX_CHARS)
|
|
128
|
+
return trimmed;
|
|
129
|
+
return `${trimmed.slice(0, EXCERPT_MAX_CHARS - 1)}…`;
|
|
130
|
+
}
|
|
131
|
+
function runRg(execFileFn, args, cwd, timeoutMs) {
|
|
132
|
+
return new Promise((resolve) => {
|
|
133
|
+
let settled = false;
|
|
134
|
+
const done = (outcome) => {
|
|
135
|
+
if (settled)
|
|
136
|
+
return;
|
|
137
|
+
settled = true;
|
|
138
|
+
resolve(outcome);
|
|
139
|
+
};
|
|
140
|
+
try {
|
|
141
|
+
execFileFn("rg", [...args], { cwd, timeout: timeoutMs, maxBuffer: MAX_BUFFER_BYTES, encoding: "utf8", windowsHide: true }, (error, stdout) => {
|
|
142
|
+
const out = typeof stdout === "string" ? stdout : "";
|
|
143
|
+
if (!error) {
|
|
144
|
+
done({ ok: true, stdout: out, missing: false });
|
|
145
|
+
return;
|
|
146
|
+
}
|
|
147
|
+
// Exit code 1 is ripgrep's "no matches" — a clean, usable result, not a failure.
|
|
148
|
+
if (error.code === 1) {
|
|
149
|
+
done({ ok: true, stdout: out, missing: false });
|
|
150
|
+
return;
|
|
151
|
+
}
|
|
152
|
+
done({ ok: false, stdout: "", missing: error.code === "ENOENT" });
|
|
153
|
+
});
|
|
154
|
+
}
|
|
155
|
+
catch {
|
|
156
|
+
// A synchronous spawn failure (e.g. rg entirely absent) is treated as "missing".
|
|
157
|
+
done({ ok: false, stdout: "", missing: true });
|
|
158
|
+
}
|
|
159
|
+
});
|
|
160
|
+
}
|
|
161
|
+
/** rg prints `./foo` when the search root is `.`; keep sources cleanly repo-relative. */
|
|
162
|
+
function normalizePath(path) {
|
|
163
|
+
return path.startsWith("./") ? path.slice(2) : path;
|
|
164
|
+
}
|
|
165
|
+
function parseFileList(stdout) {
|
|
166
|
+
const files = [];
|
|
167
|
+
for (const line of stdout.split(/\r?\n/)) {
|
|
168
|
+
const path = line.trim();
|
|
169
|
+
if (path.length > 0)
|
|
170
|
+
files.push(normalizePath(path));
|
|
171
|
+
}
|
|
172
|
+
return files;
|
|
173
|
+
}
|
|
174
|
+
function parseContentMatches(stdout) {
|
|
175
|
+
const matches = [];
|
|
176
|
+
for (const raw of stdout.split(/\r?\n/)) {
|
|
177
|
+
if (raw.length === 0)
|
|
178
|
+
continue;
|
|
179
|
+
// `-H -n --no-heading` yields `path:line:text`; text may itself contain colons.
|
|
180
|
+
const parsed = /^(.+?):(\d+):(.*)$/.exec(raw);
|
|
181
|
+
if (!parsed)
|
|
182
|
+
continue;
|
|
183
|
+
matches.push({ path: normalizePath(parsed[1]), line: Number(parsed[2]), text: parsed[3] });
|
|
184
|
+
}
|
|
185
|
+
return matches;
|
|
186
|
+
}
|
|
187
|
+
export async function collectWorkspaceSources(args) {
|
|
188
|
+
const { query, cwd, maxSources } = args;
|
|
189
|
+
const execFileFn = args.execFileFn ?? execFile;
|
|
190
|
+
if (!cwd || maxSources <= 0)
|
|
191
|
+
return [];
|
|
192
|
+
const terms = deriveSearchTerms(query);
|
|
193
|
+
if (terms.length === 0)
|
|
194
|
+
return [];
|
|
195
|
+
const deadline = Date.now() + COLLECTION_BUDGET_MS;
|
|
196
|
+
const remainingBudget = () => Math.max(MIN_CALL_MS, deadline - Date.now());
|
|
197
|
+
// Phase 1 (discovery): which files match ANY term. `--max-count 1` stops at the first hit per file;
|
|
198
|
+
// ripgrep skips binary and oversized files by default / via --max-filesize.
|
|
199
|
+
const discoveryArgs = [
|
|
200
|
+
"--files-with-matches",
|
|
201
|
+
"--max-count",
|
|
202
|
+
"1",
|
|
203
|
+
"--fixed-strings",
|
|
204
|
+
"--smart-case",
|
|
205
|
+
"--no-messages",
|
|
206
|
+
"--max-filesize",
|
|
207
|
+
MAX_FILESIZE,
|
|
208
|
+
"--color",
|
|
209
|
+
"never",
|
|
210
|
+
...terms.flatMap((term) => ["-e", term]),
|
|
211
|
+
// Explicit search root: execFile hands rg a piped stdin, and rg with no path argument would
|
|
212
|
+
// read (and block on) that pipe instead of scanning the tree. "." keeps output repo-relative.
|
|
213
|
+
"--",
|
|
214
|
+
".",
|
|
215
|
+
];
|
|
216
|
+
const discovery = await runRg(execFileFn, discoveryArgs, cwd, remainingBudget());
|
|
217
|
+
if (discovery.missing || !discovery.ok)
|
|
218
|
+
return [];
|
|
219
|
+
const candidateFiles = parseFileList(discovery.stdout).slice(0, CANDIDATE_CAP);
|
|
220
|
+
if (candidateFiles.length === 0)
|
|
221
|
+
return [];
|
|
222
|
+
// Phase 2 (pointers): line-level hits for the single best term, scanned only over files discovery
|
|
223
|
+
// already matched — so we never read a file ripgrep did not surface.
|
|
224
|
+
const bestTerm = pickBestTerm(terms);
|
|
225
|
+
const contentArgs = [
|
|
226
|
+
"-H",
|
|
227
|
+
"-n",
|
|
228
|
+
"--no-heading",
|
|
229
|
+
"-m",
|
|
230
|
+
"2",
|
|
231
|
+
"--fixed-strings",
|
|
232
|
+
"--smart-case",
|
|
233
|
+
"--no-messages",
|
|
234
|
+
"--color",
|
|
235
|
+
"never",
|
|
236
|
+
"-e",
|
|
237
|
+
bestTerm,
|
|
238
|
+
"--",
|
|
239
|
+
...candidateFiles,
|
|
240
|
+
];
|
|
241
|
+
const content = await runRg(execFileFn, contentArgs, cwd, remainingBudget());
|
|
242
|
+
const sources = [];
|
|
243
|
+
const seenPaths = new Set();
|
|
244
|
+
const seenLineKeys = new Set();
|
|
245
|
+
let counter = 0;
|
|
246
|
+
if (content.ok) {
|
|
247
|
+
for (const match of parseContentMatches(content.stdout)) {
|
|
248
|
+
if (sources.length >= maxSources)
|
|
249
|
+
break;
|
|
250
|
+
const key = `${match.path}:${match.line}`;
|
|
251
|
+
if (seenLineKeys.has(key))
|
|
252
|
+
continue;
|
|
253
|
+
seenLineKeys.add(key);
|
|
254
|
+
seenPaths.add(match.path);
|
|
255
|
+
const excerpt = truncateExcerpt(match.text);
|
|
256
|
+
sources.push({
|
|
257
|
+
id: `ws-${++counter}`,
|
|
258
|
+
kind: "workspace",
|
|
259
|
+
title: `${match.path}:${match.line}`,
|
|
260
|
+
uri: match.path,
|
|
261
|
+
trusted: true,
|
|
262
|
+
...(excerpt !== undefined ? { excerpt } : {}),
|
|
263
|
+
metadata: { line: match.line, term: bestTerm },
|
|
264
|
+
});
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
// Fill the remaining budget with file-level pointers for candidates that matched a secondary term
|
|
268
|
+
// (and so produced no best-term line). Still pointer-first: a path, never a body.
|
|
269
|
+
for (const file of candidateFiles) {
|
|
270
|
+
if (sources.length >= maxSources)
|
|
271
|
+
break;
|
|
272
|
+
if (seenPaths.has(file))
|
|
273
|
+
continue;
|
|
274
|
+
seenPaths.add(file);
|
|
275
|
+
sources.push({
|
|
276
|
+
id: `ws-${++counter}`,
|
|
277
|
+
kind: "workspace",
|
|
278
|
+
title: file,
|
|
279
|
+
uri: file,
|
|
280
|
+
trusted: true,
|
|
281
|
+
metadata: { matchedBy: "discovery" },
|
|
282
|
+
});
|
|
283
|
+
}
|
|
284
|
+
return sources.slice(0, maxSources);
|
|
285
|
+
}
|
|
286
|
+
//# sourceMappingURL=workspace-collector.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"workspace-collector.js","sourceRoot":"","sources":["../../../src/core/research/workspace-collector.ts"],"names":[],"mappings":"AAAA,OAAO,EAA0B,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAatE;;;;;;;;;;GAUG;AAEH,gFAAgF;AAChF,MAAM,YAAY,GAAG,CAAC,CAAC;AACvB,8FAA8F;AAC9F,MAAM,SAAS,GAAG,CAAC,CAAC;AACpB,qGAAqG;AACrG,MAAM,iBAAiB,GAAG,GAAG,CAAC;AAC9B,4FAA4F;AAC5F,MAAM,oBAAoB,GAAG,IAAI,CAAC;AAClC,sGAAsG;AACtG,MAAM,WAAW,GAAG,GAAG,CAAC;AACxB,uFAAuF;AACvF,MAAM,YAAY,GAAG,IAAI,CAAC;AAC1B,mFAAmF;AACnF,MAAM,aAAa,GAAG,EAAE,CAAC;AACzB,wFAAwF;AACxF,MAAM,gBAAgB,GAAG,CAAC,GAAG,IAAI,GAAG,IAAI,CAAC;AAEzC;;;GAGG;AACH,MAAM,SAAS,GAAG,IAAI,GAAG,CAAS;IACjC,KAAK;IACL,KAAK;IACL,KAAK;IACL,MAAM;IACN,MAAM;IACN,MAAM;IACN,MAAM;IACN,MAAM;IACN,KAAK;IACL,KAAK;IACL,MAAM;IACN,KAAK;IACL,KAAK;IACL,KAAK;IACL,MAAM;IACN,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,MAAM;IACN,KAAK;IACL,MAAM;IACN,OAAO;IACP,QAAQ;IACR,OAAO;IACP,KAAK;IACL,KAAK;IACL,KAAK;IACL,OAAO;IACP,MAAM;IACN,MAAM;IACN,KAAK;IACL,KAAK;IACL,KAAK;IACL,MAAM;IACN,MAAM;IACN,OAAO;IACP,OAAO;IACP,KAAK;IACL,KAAK;IACL,KAAK;IACL,OAAO;IACP,MAAM;IACN,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,MAAM;IACN,OAAO;IACP,MAAM;IACN,MAAM;IACN,KAAK;IACL,MAAM;IACN,KAAK;IACL,MAAM;IACN,KAAK;IACL,MAAM;IACN,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,MAAM;CACN,CAAC,CAAC;AAaH,0GAA0G;AAC1G,MAAM,UAAU,iBAAiB,CAAC,KAAa,EAAY;IAC1D,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,KAAK,MAAM,GAAG,IAAI,KAAK,CAAC,KAAK,CAAC,QAAQ,CAAC,EAAE,CAAC;QACzC,MAAM,IAAI,GAAG,GAAG,CAAC,WAAW,EAAE,CAAC;QAC/B,IAAI,IAAI,CAAC,MAAM,GAAG,YAAY;YAAE,SAAS;QACzC,IAAI,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC;YAAE,SAAS;QAClC,IAAI,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC;YAAE,SAAS;QAC7B,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACf,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACjB,IAAI,KAAK,CAAC,MAAM,IAAI,SAAS;YAAE,MAAM;IACtC,CAAC;IACD,OAAO,KAAK,CAAC;AAAA,CACb;AAED,wGAAwG;AACxG,SAAS,YAAY,CAAC,KAAwB,EAAU;IACvD,OAAO,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;AAAA,CACzF;AAED,SAAS,eAAe,CAAC,IAAY,EAAsB;IAC1D,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAC5B,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,SAAS,CAAC;IAC3C,IAAI,OAAO,CAAC,MAAM,IAAI,iBAAiB;QAAE,OAAO,OAAO,CAAC;IACxD,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,iBAAiB,GAAG,CAAC,CAAC,KAAG,CAAC;AAAA,CACrD;AAUD,SAAS,KAAK,CACb,UAA+B,EAC/B,IAAuB,EACvB,GAAW,EACX,SAAiB,EACI;IACrB,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC;QAC/B,IAAI,OAAO,GAAG,KAAK,CAAC;QACpB,MAAM,IAAI,GAAG,CAAC,OAAkB,EAAQ,EAAE,CAAC;YAC1C,IAAI,OAAO;gBAAE,OAAO;YACpB,OAAO,GAAG,IAAI,CAAC;YACf,OAAO,CAAC,OAAO,CAAC,CAAC;QAAA,CACjB,CAAC;QACF,IAAI,CAAC;YACJ,UAAU,CACT,IAAI,EACJ,CAAC,GAAG,IAAI,CAAC,EACT,EAAE,GAAG,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS,EAAE,gBAAgB,EAAE,QAAQ,EAAE,MAAM,EAAE,WAAW,EAAE,IAAI,EAAE,EAC7F,CAAC,KAA+B,EAAE,MAAc,EAAE,EAAE,CAAC;gBACpD,MAAM,GAAG,GAAG,OAAO,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC;gBACrD,IAAI,CAAC,KAAK,EAAE,CAAC;oBACZ,IAAI,CAAC,EAAE,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;oBAChD,OAAO;gBACR,CAAC;gBACD,mFAAiF;gBACjF,IAAI,KAAK,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;oBACtB,IAAI,CAAC,EAAE,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;oBAChD,OAAO;gBACR,CAAC;gBACD,IAAI,CAAC,EAAE,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,EAAE,EAAE,OAAO,EAAE,KAAK,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC,CAAC;YAAA,CAClE,CACD,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACR,iFAAiF;YACjF,IAAI,CAAC,EAAE,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;QAChD,CAAC;IAAA,CACD,CAAC,CAAC;AAAA,CACH;AAED,yFAAyF;AACzF,SAAS,aAAa,CAAC,IAAY,EAAU;IAC5C,OAAO,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;AAAA,CACpD;AAED,SAAS,aAAa,CAAC,MAAc,EAAY;IAChD,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,EAAE,CAAC;QAC1C,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;QACzB,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC;YAAE,KAAK,CAAC,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC;IACtD,CAAC;IACD,OAAO,KAAK,CAAC;AAAA,CACb;AAQD,SAAS,mBAAmB,CAAC,MAAc,EAAkB;IAC5D,MAAM,OAAO,GAAmB,EAAE,CAAC;IACnC,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,EAAE,CAAC;QACzC,IAAI,GAAG,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAC/B,gFAAgF;QAChF,MAAM,MAAM,GAAG,oBAAoB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC9C,IAAI,CAAC,MAAM;YAAE,SAAS;QACtB,OAAO,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IAC5F,CAAC;IACD,OAAO,OAAO,CAAC;AAAA,CACf;AAED,MAAM,CAAC,KAAK,UAAU,uBAAuB,CAAC,IAAiC,EAA0B;IACxG,MAAM,EAAE,KAAK,EAAE,GAAG,EAAE,UAAU,EAAE,GAAG,IAAI,CAAC;IACxC,MAAM,UAAU,GAAG,IAAI,CAAC,UAAU,IAAK,QAA2C,CAAC;IACnF,IAAI,CAAC,GAAG,IAAI,UAAU,IAAI,CAAC;QAAE,OAAO,EAAE,CAAC;IAEvC,MAAM,KAAK,GAAG,iBAAiB,CAAC,KAAK,CAAC,CAAC;IACvC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAElC,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,oBAAoB,CAAC;IACnD,MAAM,eAAe,GAAG,GAAW,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC,CAAC;IAEnF,oGAAoG;IACpG,4EAA4E;IAC5E,MAAM,aAAa,GAAG;QACrB,sBAAsB;QACtB,aAAa;QACb,GAAG;QACH,iBAAiB;QACjB,cAAc;QACd,eAAe;QACf,gBAAgB;QAChB,YAAY;QACZ,SAAS;QACT,OAAO;QACP,GAAG,KAAK,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;QACxC,4FAA4F;QAC5F,8FAA8F;QAC9F,IAAI;QACJ,GAAG;KACH,CAAC;IACF,MAAM,SAAS,GAAG,MAAM,KAAK,CAAC,UAAU,EAAE,aAAa,EAAE,GAAG,EAAE,eAAe,EAAE,CAAC,CAAC;IACjF,IAAI,SAAS,CAAC,OAAO,IAAI,CAAC,SAAS,CAAC,EAAE;QAAE,OAAO,EAAE,CAAC;IAElD,MAAM,cAAc,GAAG,aAAa,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,aAAa,CAAC,CAAC;IAC/E,IAAI,cAAc,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAE3C,kGAAkG;IAClG,uEAAqE;IACrE,MAAM,QAAQ,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC;IACrC,MAAM,WAAW,GAAG;QACnB,IAAI;QACJ,IAAI;QACJ,cAAc;QACd,IAAI;QACJ,GAAG;QACH,iBAAiB;QACjB,cAAc;QACd,eAAe;QACf,SAAS;QACT,OAAO;QACP,IAAI;QACJ,QAAQ;QACR,IAAI;QACJ,GAAG,cAAc;KACjB,CAAC;IACF,MAAM,OAAO,GAAG,MAAM,KAAK,CAAC,UAAU,EAAE,WAAW,EAAE,GAAG,EAAE,eAAe,EAAE,CAAC,CAAC;IAE7E,MAAM,OAAO,GAAkB,EAAE,CAAC;IAClC,MAAM,SAAS,GAAG,IAAI,GAAG,EAAU,CAAC;IACpC,MAAM,YAAY,GAAG,IAAI,GAAG,EAAU,CAAC;IACvC,IAAI,OAAO,GAAG,CAAC,CAAC;IAEhB,IAAI,OAAO,CAAC,EAAE,EAAE,CAAC;QAChB,KAAK,MAAM,KAAK,IAAI,mBAAmB,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;YACzD,IAAI,OAAO,CAAC,MAAM,IAAI,UAAU;gBAAE,MAAM;YACxC,MAAM,GAAG,GAAG,GAAG,KAAK,CAAC,IAAI,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC;YAC1C,IAAI,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC;gBAAE,SAAS;YACpC,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACtB,SAAS,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YAC1B,MAAM,OAAO,GAAG,eAAe,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YAC5C,OAAO,CAAC,IAAI,CAAC;gBACZ,EAAE,EAAE,MAAM,EAAE,OAAO,EAAE;gBACrB,IAAI,EAAE,WAAW;gBACjB,KAAK,EAAE,GAAG,KAAK,CAAC,IAAI,IAAI,KAAK,CAAC,IAAI,EAAE;gBACpC,GAAG,EAAE,KAAK,CAAC,IAAI;gBACf,OAAO,EAAE,IAAI;gBACb,GAAG,CAAC,OAAO,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;gBAC7C,QAAQ,EAAE,EAAE,IAAI,EAAE,KAAK,CAAC,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE;aAC9C,CAAC,CAAC;QACJ,CAAC;IACF,CAAC;IAED,kGAAkG;IAClG,kFAAkF;IAClF,KAAK,MAAM,IAAI,IAAI,cAAc,EAAE,CAAC;QACnC,IAAI,OAAO,CAAC,MAAM,IAAI,UAAU;YAAE,MAAM;QACxC,IAAI,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC;YAAE,SAAS;QAClC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACpB,OAAO,CAAC,IAAI,CAAC;YACZ,EAAE,EAAE,MAAM,EAAE,OAAO,EAAE;YACrB,IAAI,EAAE,WAAW;YACjB,KAAK,EAAE,IAAI;YACX,GAAG,EAAE,IAAI;YACT,OAAO,EAAE,IAAI;YACb,QAAQ,EAAE,EAAE,SAAS,EAAE,WAAW,EAAE;SACpC,CAAC,CAAC;IACJ,CAAC;IAED,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;AAAA,CACpC","sourcesContent":["import { type ExecFileException, execFile } from \"node:child_process\";\n\n/** Structural DI seam: only the callback overload the collector actually uses — demanding\n * node's full `typeof execFile` (with `__promisify__`) makes plain test mocks unassignable. */\nexport type WorkspaceExecFileFn = (\n\tcommand: string,\n\targs: readonly string[],\n\toptions: { cwd?: string; timeout?: number; maxBuffer?: number; encoding?: string; windowsHide?: boolean },\n\tcallback: (error: ExecFileException | null, stdout: string, stderr: string) => void,\n) => unknown;\n\nimport type { EvidenceRef } from \"../autonomy/contracts.ts\";\n\n/**\n * Best-effort workspace research source collector.\n *\n * Feeds the autonomous research lane POINTER-FIRST sources: a repo-relative path, a bounded excerpt,\n * and (when known) a line number — never whole file bodies. It runs `rg` under the session cwd exactly\n * like the grep tool does, so it only surfaces content ripgrep already matched. Collection is bounded\n * (a shared wall-clock deadline, a candidate cap, ripgrep's own binary/oversize skipping) and never\n * throws: if `rg` is missing or errors, it returns `[]`, which is today's \"no collector\" behavior.\n *\n * The returned sources are `EvidenceRef`s (the runner's source type) tagged `kind: \"workspace\"`.\n */\n\n/** Search terms shorter than this are too noisy to be useful discriminators. */\nconst MIN_TERM_LEN = 3;\n/** Cap on derived search terms; keeps the discovery pattern small and the collector cheap. */\nconst MAX_TERMS = 4;\n/** Pointer excerpts are bounded so we never spill a whole line (or a whole file) into the prompt. */\nconst EXCERPT_MAX_CHARS = 200;\n/** Shared wall-clock budget for the whole collection pass (both ripgrep calls together). */\nconst COLLECTION_BUDGET_MS = 5000;\n/** Floor for any single ripgrep call so a nearly-spent budget still gives ripgrep a chance to run. */\nconst MIN_CALL_MS = 500;\n/** ripgrep skips files larger than this; oversized files never contribute a source. */\nconst MAX_FILESIZE = \"1M\";\n/** Upper bound on candidate files carried from discovery into the pointer pass. */\nconst CANDIDATE_CAP = 24;\n/** Generous stdout ceiling; overflow degrades to fewer sources rather than throwing. */\nconst MAX_BUFFER_BYTES = 8 * 1024 * 1024;\n\n/**\n * A small, deliberately conservative English stopword set plus the structural words that show up in\n * goal/requirement text. Anything not here that is >= MIN_TERM_LEN survives as a search term.\n */\nconst STOPWORDS = new Set<string>([\n\t\"the\",\n\t\"and\",\n\t\"for\",\n\t\"with\",\n\t\"that\",\n\t\"this\",\n\t\"from\",\n\t\"into\",\n\t\"are\",\n\t\"was\",\n\t\"were\",\n\t\"but\",\n\t\"not\",\n\t\"you\",\n\t\"your\",\n\t\"our\",\n\t\"all\",\n\t\"any\",\n\t\"can\",\n\t\"has\",\n\t\"have\",\n\t\"had\",\n\t\"will\",\n\t\"would\",\n\t\"should\",\n\t\"could\",\n\t\"its\",\n\t\"his\",\n\t\"her\",\n\t\"their\",\n\t\"them\",\n\t\"they\",\n\t\"she\",\n\t\"him\",\n\t\"who\",\n\t\"what\",\n\t\"when\",\n\t\"where\",\n\t\"which\",\n\t\"how\",\n\t\"why\",\n\t\"use\",\n\t\"using\",\n\t\"used\",\n\t\"via\",\n\t\"per\",\n\t\"out\",\n\t\"off\",\n\t\"over\",\n\t\"under\",\n\t\"then\",\n\t\"than\",\n\t\"add\",\n\t\"adds\",\n\t\"get\",\n\t\"gets\",\n\t\"set\",\n\t\"sets\",\n\t\"new\",\n\t\"old\",\n\t\"one\",\n\t\"two\",\n\t\"let\",\n\t\"run\",\n\t\"runs\",\n]);\n\nexport interface CollectWorkspaceSourcesArgs {\n\t/** Free text (goal + requirement text) that search terms are derived from. */\n\tquery: string;\n\t/** Session working directory; ripgrep runs here and paths are reported relative to it. */\n\tcwd: string;\n\t/** Hard cap on returned sources; also the lane's source budget. */\n\tmaxSources: number;\n\t/** Injected for tests; defaults to node's `execFile`. */\n\texecFileFn?: WorkspaceExecFileFn;\n}\n\n/** Split on non-word runs, lowercase, drop stopwords/short/dupes, keep source order, cap at MAX_TERMS. */\nexport function deriveSearchTerms(query: string): string[] {\n\tconst seen = new Set<string>();\n\tconst terms: string[] = [];\n\tfor (const raw of query.split(/[^\\w]+/)) {\n\t\tconst term = raw.toLowerCase();\n\t\tif (term.length < MIN_TERM_LEN) continue;\n\t\tif (STOPWORDS.has(term)) continue;\n\t\tif (seen.has(term)) continue;\n\t\tseen.add(term);\n\t\tterms.push(term);\n\t\tif (terms.length >= MAX_TERMS) break;\n\t}\n\treturn terms;\n}\n\n/** The most specific term (longest wins; ties keep the earliest) drives the line-level pointer pass. */\nfunction pickBestTerm(terms: readonly string[]): string {\n\treturn terms.reduce((best, term) => (term.length > best.length ? term : best), terms[0]);\n}\n\nfunction truncateExcerpt(text: string): string | undefined {\n\tconst trimmed = text.trim();\n\tif (trimmed.length === 0) return undefined;\n\tif (trimmed.length <= EXCERPT_MAX_CHARS) return trimmed;\n\treturn `${trimmed.slice(0, EXCERPT_MAX_CHARS - 1)}…`;\n}\n\ninterface RgOutcome {\n\t/** True when ripgrep ran to a usable result (matches found, or a clean \"no matches\"). */\n\tok: boolean;\n\tstdout: string;\n\t/** True when the `rg` binary could not be spawned at all — the collector bails entirely. */\n\tmissing: boolean;\n}\n\nfunction runRg(\n\texecFileFn: WorkspaceExecFileFn,\n\targs: readonly string[],\n\tcwd: string,\n\ttimeoutMs: number,\n): Promise<RgOutcome> {\n\treturn new Promise((resolve) => {\n\t\tlet settled = false;\n\t\tconst done = (outcome: RgOutcome): void => {\n\t\t\tif (settled) return;\n\t\t\tsettled = true;\n\t\t\tresolve(outcome);\n\t\t};\n\t\ttry {\n\t\t\texecFileFn(\n\t\t\t\t\"rg\",\n\t\t\t\t[...args],\n\t\t\t\t{ cwd, timeout: timeoutMs, maxBuffer: MAX_BUFFER_BYTES, encoding: \"utf8\", windowsHide: true },\n\t\t\t\t(error: ExecFileException | null, stdout: string) => {\n\t\t\t\t\tconst out = typeof stdout === \"string\" ? stdout : \"\";\n\t\t\t\t\tif (!error) {\n\t\t\t\t\t\tdone({ ok: true, stdout: out, missing: false });\n\t\t\t\t\t\treturn;\n\t\t\t\t\t}\n\t\t\t\t\t// Exit code 1 is ripgrep's \"no matches\" — a clean, usable result, not a failure.\n\t\t\t\t\tif (error.code === 1) {\n\t\t\t\t\t\tdone({ ok: true, stdout: out, missing: false });\n\t\t\t\t\t\treturn;\n\t\t\t\t\t}\n\t\t\t\t\tdone({ ok: false, stdout: \"\", missing: error.code === \"ENOENT\" });\n\t\t\t\t},\n\t\t\t);\n\t\t} catch {\n\t\t\t// A synchronous spawn failure (e.g. rg entirely absent) is treated as \"missing\".\n\t\t\tdone({ ok: false, stdout: \"\", missing: true });\n\t\t}\n\t});\n}\n\n/** rg prints `./foo` when the search root is `.`; keep sources cleanly repo-relative. */\nfunction normalizePath(path: string): string {\n\treturn path.startsWith(\"./\") ? path.slice(2) : path;\n}\n\nfunction parseFileList(stdout: string): string[] {\n\tconst files: string[] = [];\n\tfor (const line of stdout.split(/\\r?\\n/)) {\n\t\tconst path = line.trim();\n\t\tif (path.length > 0) files.push(normalizePath(path));\n\t}\n\treturn files;\n}\n\ninterface ContentMatch {\n\tpath: string;\n\tline: number;\n\ttext: string;\n}\n\nfunction parseContentMatches(stdout: string): ContentMatch[] {\n\tconst matches: ContentMatch[] = [];\n\tfor (const raw of stdout.split(/\\r?\\n/)) {\n\t\tif (raw.length === 0) continue;\n\t\t// `-H -n --no-heading` yields `path:line:text`; text may itself contain colons.\n\t\tconst parsed = /^(.+?):(\\d+):(.*)$/.exec(raw);\n\t\tif (!parsed) continue;\n\t\tmatches.push({ path: normalizePath(parsed[1]), line: Number(parsed[2]), text: parsed[3] });\n\t}\n\treturn matches;\n}\n\nexport async function collectWorkspaceSources(args: CollectWorkspaceSourcesArgs): Promise<EvidenceRef[]> {\n\tconst { query, cwd, maxSources } = args;\n\tconst execFileFn = args.execFileFn ?? (execFile as unknown as WorkspaceExecFileFn);\n\tif (!cwd || maxSources <= 0) return [];\n\n\tconst terms = deriveSearchTerms(query);\n\tif (terms.length === 0) return [];\n\n\tconst deadline = Date.now() + COLLECTION_BUDGET_MS;\n\tconst remainingBudget = (): number => Math.max(MIN_CALL_MS, deadline - Date.now());\n\n\t// Phase 1 (discovery): which files match ANY term. `--max-count 1` stops at the first hit per file;\n\t// ripgrep skips binary and oversized files by default / via --max-filesize.\n\tconst discoveryArgs = [\n\t\t\"--files-with-matches\",\n\t\t\"--max-count\",\n\t\t\"1\",\n\t\t\"--fixed-strings\",\n\t\t\"--smart-case\",\n\t\t\"--no-messages\",\n\t\t\"--max-filesize\",\n\t\tMAX_FILESIZE,\n\t\t\"--color\",\n\t\t\"never\",\n\t\t...terms.flatMap((term) => [\"-e\", term]),\n\t\t// Explicit search root: execFile hands rg a piped stdin, and rg with no path argument would\n\t\t// read (and block on) that pipe instead of scanning the tree. \".\" keeps output repo-relative.\n\t\t\"--\",\n\t\t\".\",\n\t];\n\tconst discovery = await runRg(execFileFn, discoveryArgs, cwd, remainingBudget());\n\tif (discovery.missing || !discovery.ok) return [];\n\n\tconst candidateFiles = parseFileList(discovery.stdout).slice(0, CANDIDATE_CAP);\n\tif (candidateFiles.length === 0) return [];\n\n\t// Phase 2 (pointers): line-level hits for the single best term, scanned only over files discovery\n\t// already matched — so we never read a file ripgrep did not surface.\n\tconst bestTerm = pickBestTerm(terms);\n\tconst contentArgs = [\n\t\t\"-H\",\n\t\t\"-n\",\n\t\t\"--no-heading\",\n\t\t\"-m\",\n\t\t\"2\",\n\t\t\"--fixed-strings\",\n\t\t\"--smart-case\",\n\t\t\"--no-messages\",\n\t\t\"--color\",\n\t\t\"never\",\n\t\t\"-e\",\n\t\tbestTerm,\n\t\t\"--\",\n\t\t...candidateFiles,\n\t];\n\tconst content = await runRg(execFileFn, contentArgs, cwd, remainingBudget());\n\n\tconst sources: EvidenceRef[] = [];\n\tconst seenPaths = new Set<string>();\n\tconst seenLineKeys = new Set<string>();\n\tlet counter = 0;\n\n\tif (content.ok) {\n\t\tfor (const match of parseContentMatches(content.stdout)) {\n\t\t\tif (sources.length >= maxSources) break;\n\t\t\tconst key = `${match.path}:${match.line}`;\n\t\t\tif (seenLineKeys.has(key)) continue;\n\t\t\tseenLineKeys.add(key);\n\t\t\tseenPaths.add(match.path);\n\t\t\tconst excerpt = truncateExcerpt(match.text);\n\t\t\tsources.push({\n\t\t\t\tid: `ws-${++counter}`,\n\t\t\t\tkind: \"workspace\",\n\t\t\t\ttitle: `${match.path}:${match.line}`,\n\t\t\t\turi: match.path,\n\t\t\t\ttrusted: true,\n\t\t\t\t...(excerpt !== undefined ? { excerpt } : {}),\n\t\t\t\tmetadata: { line: match.line, term: bestTerm },\n\t\t\t});\n\t\t}\n\t}\n\n\t// Fill the remaining budget with file-level pointers for candidates that matched a secondary term\n\t// (and so produced no best-term line). Still pointer-first: a path, never a body.\n\tfor (const file of candidateFiles) {\n\t\tif (sources.length >= maxSources) break;\n\t\tif (seenPaths.has(file)) continue;\n\t\tseenPaths.add(file);\n\t\tsources.push({\n\t\t\tid: `ws-${++counter}`,\n\t\t\tkind: \"workspace\",\n\t\t\ttitle: file,\n\t\t\turi: file,\n\t\t\ttrusted: true,\n\t\t\tmetadata: { matchedBy: \"discovery\" },\n\t\t});\n\t}\n\n\treturn sources.slice(0, maxSources);\n}\n"]}
|