npm - @caupulican/pi-adaptative - Versions diffs - 0.80.96 → 0.80.98 - Mend

@caupulican/pi-adaptative 0.80.96 → 0.80.98

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

package/dist/core/research/workspace-collector.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"workspace-collector.d.ts","sourceRoot":"","sources":["../../../src/core/research/workspace-collector.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,iBAAiB,EAAY,MAAM,oBAAoB,CAAC;AAEtE;+FAC+F;AAC/F,MAAM,MAAM,mBAAmB,GAAG,CACjC,OAAO,EAAE,MAAM,EACf,IAAI,EAAE,SAAS,MAAM,EAAE,EACvB,OAAO,EAAE;IAAE,GAAG,CAAC,EAAE,MAAM,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,CAAC;IAAC,SAAS,CAAC,EAAE,MAAM,CAAC;IAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAAC,WAAW,CAAC,EAAE,OAAO,CAAA;CAAE,EACzG,QAAQ,EAAE,CAAC,KAAK,EAAE,iBAAiB,GAAG,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,KAAK,IAAI,KAC/E,OAAO,CAAC;AAEb,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,0BAA0B,CAAC;AAuG5D,MAAM,WAAW,2BAA2B;IAC3C,8EAA8E;IAC9E,KAAK,EAAE,MAAM,CAAC;IACd,0FAA0F;IAC1F,GAAG,EAAE,MAAM,CAAC;IACZ,mEAAmE;IACnE,UAAU,EAAE,MAAM,CAAC;IACnB,yDAAyD;IACzD,UAAU,CAAC,EAAE,mBAAmB,CAAC;CACjC;AAED,0GAA0G;AAC1G,wBAAgB,iBAAiB,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,EAAE,CAazD;AA6FD,wBAAsB,uBAAuB,CAAC,IAAI,EAAE,2BAA2B,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC,CAmGvG","sourcesContent":["import { type ExecFileException, execFile } from \"node:child_process\";\n\n/** Structural DI seam: only the callback overload the collector actually uses — demanding\n * node's full `typeof execFile` (with `__promisify__`) makes plain test mocks unassignable. */\nexport type WorkspaceExecFileFn = (\n\tcommand: string,\n\targs: readonly string[],\n\toptions: { cwd?: string; timeout?: number; maxBuffer?: number; encoding?: string; windowsHide?: boolean },\n\tcallback: (error: ExecFileException | null, stdout: string, stderr: string) => void,\n) => unknown;\n\nimport type { EvidenceRef } from \"../autonomy/contracts.ts\";\n\n/**\n * Best-effort workspace research source collector.\n *\n * Feeds the autonomous research lane POINTER-FIRST sources: a repo-relative path, a bounded excerpt,\n * and (when known) a line number — never whole file bodies. It runs `rg` under the session cwd exactly\n * like the grep tool does, so it only surfaces content ripgrep already matched. Collection is bounded\n * (a shared wall-clock deadline, a candidate cap, ripgrep's own binary/oversize skipping) and never\n * throws: if `rg` is missing or errors, it returns `[]`, which is today's \"no collector\" behavior.\n *\n * The returned sources are `EvidenceRef`s (the runner's source type) tagged `kind: \"workspace\"`.\n */\n\n/** Search terms shorter than this are too noisy to be useful discriminators. */\nconst MIN_TERM_LEN = 3;\n/** Cap on derived search terms; keeps the discovery pattern small and the collector cheap. */\nconst MAX_TERMS = 4;\n/** Pointer excerpts are bounded so we never spill a whole line (or a whole file) into the prompt. */\nconst EXCERPT_MAX_CHARS = 200;\n/** Shared wall-clock budget for the whole collection pass (both ripgrep calls together). */\nconst COLLECTION_BUDGET_MS = 5000;\n/** Floor for any single ripgrep call so a nearly-spent budget still gives ripgrep a chance to run. */\nconst MIN_CALL_MS = 500;\n/** ripgrep skips files larger than this; oversized files never contribute a source. */\nconst MAX_FILESIZE = \"1M\";\n/** Upper bound on candidate files carried from discovery into the pointer pass. */\nconst CANDIDATE_CAP = 24;\n/** Generous stdout ceiling; overflow degrades to fewer sources rather than throwing. */\nconst MAX_BUFFER_BYTES = 8 * 1024 * 1024;\n\n/**\n * A small, deliberately conservative English stopword set plus the structural words that show up in\n * goal/requirement text. Anything not here that is >= MIN_TERM_LEN survives as a search term.\n */\nconst STOPWORDS = new Set<string>([\n\t\"the\",\n\t\"and\",\n\t\"for\",\n\t\"with\",\n\t\"that\",\n\t\"this\",\n\t\"from\",\n\t\"into\",\n\t\"are\",\n\t\"was\",\n\t\"were\",\n\t\"but\",\n\t\"not\",\n\t\"you\",\n\t\"your\",\n\t\"our\",\n\t\"all\",\n\t\"any\",\n\t\"can\",\n\t\"has\",\n\t\"have\",\n\t\"had\",\n\t\"will\",\n\t\"would\",\n\t\"should\",\n\t\"could\",\n\t\"its\",\n\t\"his\",\n\t\"her\",\n\t\"their\",\n\t\"them\",\n\t\"they\",\n\t\"she\",\n\t\"him\",\n\t\"who\",\n\t\"what\",\n\t\"when\",\n\t\"where\",\n\t\"which\",\n\t\"how\",\n\t\"why\",\n\t\"use\",\n\t\"using\",\n\t\"used\",\n\t\"via\",\n\t\"per\",\n\t\"out\",\n\t\"off\",\n\t\"over\",\n\t\"under\",\n\t\"then\",\n\t\"than\",\n\t\"add\",\n\t\"adds\",\n\t\"get\",\n\t\"gets\",\n\t\"set\",\n\t\"sets\",\n\t\"new\",\n\t\"old\",\n\t\"one\",\n\t\"two\",\n\t\"let\",\n\t\"run\",\n\t\"runs\",\n]);\n\nexport interface CollectWorkspaceSourcesArgs {\n\t/** Free text (goal + requirement text) that search terms are derived from. */\n\tquery: string;\n\t/** Session working directory; ripgrep runs here and paths are reported relative to it. */\n\tcwd: string;\n\t/** Hard cap on returned sources; also the lane's source budget. */\n\tmaxSources: number;\n\t/** Injected for tests; defaults to node's `execFile`. */\n\texecFileFn?: WorkspaceExecFileFn;\n}\n\n/** Split on non-word runs, lowercase, drop stopwords/short/dupes, keep source order, cap at MAX_TERMS. */\nexport function deriveSearchTerms(query: string): string[] {\n\tconst seen = new Set<string>();\n\tconst terms: string[] = [];\n\tfor (const raw of query.split(/[^\\w]+/)) {\n\t\tconst term = raw.toLowerCase();\n\t\tif (term.length < MIN_TERM_LEN) continue;\n\t\tif (STOPWORDS.has(term)) continue;\n\t\tif (seen.has(term)) continue;\n\t\tseen.add(term);\n\t\tterms.push(term);\n\t\tif (terms.length >= MAX_TERMS) break;\n\t}\n\treturn terms;\n}\n\n/** The most specific term (longest wins; ties keep the earliest) drives the line-level pointer pass. */\nfunction pickBestTerm(terms: readonly string[]): string {\n\treturn terms.reduce((best, term) => (term.length > best.length ? term : best), terms[0]);\n}\n\nfunction truncateExcerpt(text: string): string | undefined {\n\tconst trimmed = text.trim();\n\tif (trimmed.length === 0) return undefined;\n\tif (trimmed.length <= EXCERPT_MAX_CHARS) return trimmed;\n\treturn `${trimmed.slice(0, EXCERPT_MAX_CHARS - 1)}…`;\n}\n\ninterface RgOutcome {\n\t/** True when ripgrep ran to a usable result (matches found, or a clean \"no matches\"). */\n\tok: boolean;\n\tstdout: string;\n\t/** True when the `rg` binary could not be spawned at all — the collector bails entirely. */\n\tmissing: boolean;\n}\n\nfunction runRg(\n\texecFileFn: WorkspaceExecFileFn,\n\targs: readonly string[],\n\tcwd: string,\n\ttimeoutMs: number,\n): Promise<RgOutcome> {\n\treturn new Promise((resolve) => {\n\t\tlet settled = false;\n\t\tconst done = (outcome: RgOutcome): void => {\n\t\t\tif (settled) return;\n\t\t\tsettled = true;\n\t\t\tresolve(outcome);\n\t\t};\n\t\ttry {\n\t\t\texecFileFn(\n\t\t\t\t\"rg\",\n\t\t\t\t[...args],\n\t\t\t\t{ cwd, timeout: timeoutMs, maxBuffer: MAX_BUFFER_BYTES, encoding: \"utf8\", windowsHide: true },\n\t\t\t\t(error: ExecFileException | null, stdout: string) => {\n\t\t\t\t\tconst out = typeof stdout === \"string\" ? stdout : \"\";\n\t\t\t\t\tif (!error) {\n\t\t\t\t\t\tdone({ ok: true, stdout: out, missing: false });\n\t\t\t\t\t\treturn;\n\t\t\t\t\t}\n\t\t\t\t\t// Exit code 1 is ripgrep's \"no matches\" — a clean, usable result, not a failure.\n\t\t\t\t\tif (error.code === 1) {\n\t\t\t\t\t\tdone({ ok: true, stdout: out, missing: false });\n\t\t\t\t\t\treturn;\n\t\t\t\t\t}\n\t\t\t\t\tdone({ ok: false, stdout: \"\", missing: error.code === \"ENOENT\" });\n\t\t\t\t},\n\t\t\t);\n\t\t} catch {\n\t\t\t// A synchronous spawn failure (e.g. rg entirely absent) is treated as \"missing\".\n\t\t\tdone({ ok: false, stdout: \"\", missing: true });\n\t\t}\n\t});\n}\n\n/** rg prints `./foo` when the search root is `.`; keep sources cleanly repo-relative. */\nfunction normalizePath(path: string): string {\n\treturn path.startsWith(\"./\") ? path.slice(2) : path;\n}\n\nfunction parseFileList(stdout: string): string[] {\n\tconst files: string[] = [];\n\tfor (const line of stdout.split(/\\r?\\n/)) {\n\t\tconst path = line.trim();\n\t\tif (path.length > 0) files.push(normalizePath(path));\n\t}\n\treturn files;\n}\n\ninterface ContentMatch {\n\tpath: string;\n\tline: number;\n\ttext: string;\n}\n\nfunction parseContentMatches(stdout: string): ContentMatch[] {\n\tconst matches: ContentMatch[] = [];\n\tfor (const raw of stdout.split(/\\r?\\n/)) {\n\t\tif (raw.length === 0) continue;\n\t\t// `-H -n --no-heading` yields `path:line:text`; text may itself contain colons.\n\t\tconst parsed = /^(.+?):(\\d+):(.*)$/.exec(raw);\n\t\tif (!parsed) continue;\n\t\tmatches.push({ path: normalizePath(parsed[1]), line: Number(parsed[2]), text: parsed[3] });\n\t}\n\treturn matches;\n}\n\nexport async function collectWorkspaceSources(args: CollectWorkspaceSourcesArgs): Promise<EvidenceRef[]> {\n\tconst { query, cwd, maxSources } = args;\n\tconst execFileFn = args.execFileFn ?? (execFile as unknown as WorkspaceExecFileFn);\n\tif (!cwd || maxSources <= 0) return [];\n\n\tconst terms = deriveSearchTerms(query);\n\tif (terms.length === 0) return [];\n\n\tconst deadline = Date.now() + COLLECTION_BUDGET_MS;\n\tconst remainingBudget = (): number => Math.max(MIN_CALL_MS, deadline - Date.now());\n\n\t// Phase 1 (discovery): which files match ANY term. `--max-count 1` stops at the first hit per file;\n\t// ripgrep skips binary and oversized files by default / via --max-filesize.\n\tconst discoveryArgs = [\n\t\t\"--files-with-matches\",\n\t\t\"--max-count\",\n\t\t\"1\",\n\t\t\"--fixed-strings\",\n\t\t\"--smart-case\",\n\t\t\"--no-messages\",\n\t\t\"--max-filesize\",\n\t\tMAX_FILESIZE,\n\t\t\"--color\",\n\t\t\"never\",\n\t\t...terms.flatMap((term) => [\"-e\", term]),\n\t\t// Explicit search root: execFile hands rg a piped stdin, and rg with no path argument would\n\t\t// read (and block on) that pipe instead of scanning the tree. \".\" keeps output repo-relative.\n\t\t\"--\",\n\t\t\".\",\n\t];\n\tconst discovery = await runRg(execFileFn, discoveryArgs, cwd, remainingBudget());\n\tif (discovery.missing || !discovery.ok) return [];\n\n\tconst candidateFiles = parseFileList(discovery.stdout).slice(0, CANDIDATE_CAP);\n\tif (candidateFiles.length === 0) return [];\n\n\t// Phase 2 (pointers): line-level hits for the single best term, scanned only over files discovery\n\t// already matched — so we never read a file ripgrep did not surface.\n\tconst bestTerm = pickBestTerm(terms);\n\tconst contentArgs = [\n\t\t\"-H\",\n\t\t\"-n\",\n\t\t\"--no-heading\",\n\t\t\"-m\",\n\t\t\"2\",\n\t\t\"--fixed-strings\",\n\t\t\"--smart-case\",\n\t\t\"--no-messages\",\n\t\t\"--color\",\n\t\t\"never\",\n\t\t\"-e\",\n\t\tbestTerm,\n\t\t\"--\",\n\t\t...candidateFiles,\n\t];\n\tconst content = await runRg(execFileFn, contentArgs, cwd, remainingBudget());\n\n\tconst sources: EvidenceRef[] = [];\n\tconst seenPaths = new Set<string>();\n\tconst seenLineKeys = new Set<string>();\n\tlet counter = 0;\n\n\tif (content.ok) {\n\t\tfor (const match of parseContentMatches(content.stdout)) {\n\t\t\tif (sources.length >= maxSources) break;\n\t\t\tconst key = `${match.path}:${match.line}`;\n\t\t\tif (seenLineKeys.has(key)) continue;\n\t\t\tseenLineKeys.add(key);\n\t\t\tseenPaths.add(match.path);\n\t\t\tconst excerpt = truncateExcerpt(match.text);\n\t\t\tsources.push({\n\t\t\t\tid: `ws-${++counter}`,\n\t\t\t\tkind: \"workspace\",\n\t\t\t\ttitle: `${match.path}:${match.line}`,\n\t\t\t\turi: match.path,\n\t\t\t\ttrusted: true,\n\t\t\t\t...(excerpt !== undefined ? { excerpt } : {}),\n\t\t\t\tmetadata: { line: match.line, term: bestTerm },\n\t\t\t});\n\t\t}\n\t}\n\n\t// Fill the remaining budget with file-level pointers for candidates that matched a secondary term\n\t// (and so produced no best-term line). Still pointer-first: a path, never a body.\n\tfor (const file of candidateFiles) {\n\t\tif (sources.length >= maxSources) break;\n\t\tif (seenPaths.has(file)) continue;\n\t\tseenPaths.add(file);\n\t\tsources.push({\n\t\t\tid: `ws-${++counter}`,\n\t\t\tkind: \"workspace\",\n\t\t\ttitle: file,\n\t\t\turi: file,\n\t\t\ttrusted: true,\n\t\t\tmetadata: { matchedBy: \"discovery\" },\n\t\t});\n\t}\n\n\treturn sources.slice(0, maxSources);\n}\n"]}

package/dist/core/research/workspace-collector.js ADDED Viewed

@@ -0,0 +1,286 @@
+import { execFile } from "node:child_process";
+/**
+ * Best-effort workspace research source collector.
+ *
+ * Feeds the autonomous research lane POINTER-FIRST sources: a repo-relative path, a bounded excerpt,
+ * and (when known) a line number — never whole file bodies. It runs `rg` under the session cwd exactly
+ * like the grep tool does, so it only surfaces content ripgrep already matched. Collection is bounded
+ * (a shared wall-clock deadline, a candidate cap, ripgrep's own binary/oversize skipping) and never
+ * throws: if `rg` is missing or errors, it returns `[]`, which is today's "no collector" behavior.
+ *
+ * The returned sources are `EvidenceRef`s (the runner's source type) tagged `kind: "workspace"`.
+ */
+/** Search terms shorter than this are too noisy to be useful discriminators. */
+const MIN_TERM_LEN = 3;
+/** Cap on derived search terms; keeps the discovery pattern small and the collector cheap. */
+const MAX_TERMS = 4;
+/** Pointer excerpts are bounded so we never spill a whole line (or a whole file) into the prompt. */
+const EXCERPT_MAX_CHARS = 200;
+/** Shared wall-clock budget for the whole collection pass (both ripgrep calls together). */
+const COLLECTION_BUDGET_MS = 5000;
+/** Floor for any single ripgrep call so a nearly-spent budget still gives ripgrep a chance to run. */
+const MIN_CALL_MS = 500;
+/** ripgrep skips files larger than this; oversized files never contribute a source. */
+const MAX_FILESIZE = "1M";
+/** Upper bound on candidate files carried from discovery into the pointer pass. */
+const CANDIDATE_CAP = 24;
+/** Generous stdout ceiling; overflow degrades to fewer sources rather than throwing. */
+const MAX_BUFFER_BYTES = 8 * 1024 * 1024;
+/**
+ * A small, deliberately conservative English stopword set plus the structural words that show up in
+ * goal/requirement text. Anything not here that is >= MIN_TERM_LEN survives as a search term.
+ */
+const STOPWORDS = new Set([
+    "the",
+    "and",
+    "for",
+    "with",
+    "that",
+    "this",
+    "from",
+    "into",
+    "are",
+    "was",
+    "were",
+    "but",
+    "not",
+    "you",
+    "your",
+    "our",
+    "all",
+    "any",
+    "can",
+    "has",
+    "have",
+    "had",
+    "will",
+    "would",
+    "should",
+    "could",
+    "its",
+    "his",
+    "her",
+    "their",
+    "them",
+    "they",
+    "she",
+    "him",
+    "who",
+    "what",
+    "when",
+    "where",
+    "which",
+    "how",
+    "why",
+    "use",
+    "using",
+    "used",
+    "via",
+    "per",
+    "out",
+    "off",
+    "over",
+    "under",
+    "then",
+    "than",
+    "add",
+    "adds",
+    "get",
+    "gets",
+    "set",
+    "sets",
+    "new",
+    "old",
+    "one",
+    "two",
+    "let",
+    "run",
+    "runs",
+]);
+/** Split on non-word runs, lowercase, drop stopwords/short/dupes, keep source order, cap at MAX_TERMS. */
+export function deriveSearchTerms(query) {
+    const seen = new Set();
+    const terms = [];
+    for (const raw of query.split(/[^\w]+/)) {
+        const term = raw.toLowerCase();
+        if (term.length < MIN_TERM_LEN)
+            continue;
+        if (STOPWORDS.has(term))
+            continue;
+        if (seen.has(term))
+            continue;
+        seen.add(term);
+        terms.push(term);
+        if (terms.length >= MAX_TERMS)
+            break;
+    }
+    return terms;
+}
+/** The most specific term (longest wins; ties keep the earliest) drives the line-level pointer pass. */
+function pickBestTerm(terms) {
+    return terms.reduce((best, term) => (term.length > best.length ? term : best), terms[0]);
+}
+function truncateExcerpt(text) {
+    const trimmed = text.trim();
+    if (trimmed.length === 0)
+        return undefined;
+    if (trimmed.length <= EXCERPT_MAX_CHARS)
+        return trimmed;
+    return `${trimmed.slice(0, EXCERPT_MAX_CHARS - 1)}…`;
+}
+function runRg(execFileFn, args, cwd, timeoutMs) {
+    return new Promise((resolve) => {
+        let settled = false;
+        const done = (outcome) => {
+            if (settled)
+                return;
+            settled = true;
+            resolve(outcome);
+        };
+        try {
+            execFileFn("rg", [...args], { cwd, timeout: timeoutMs, maxBuffer: MAX_BUFFER_BYTES, encoding: "utf8", windowsHide: true }, (error, stdout) => {
+                const out = typeof stdout === "string" ? stdout : "";
+                if (!error) {
+                    done({ ok: true, stdout: out, missing: false });
+                    return;
+                }
+                // Exit code 1 is ripgrep's "no matches" — a clean, usable result, not a failure.
+                if (error.code === 1) {
+                    done({ ok: true, stdout: out, missing: false });
+                    return;
+                }
+                done({ ok: false, stdout: "", missing: error.code === "ENOENT" });
+            });
+        }
+        catch {
+            // A synchronous spawn failure (e.g. rg entirely absent) is treated as "missing".
+            done({ ok: false, stdout: "", missing: true });
+        }
+    });
+}
+/** rg prints `./foo` when the search root is `.`; keep sources cleanly repo-relative. */
+function normalizePath(path) {
+    return path.startsWith("./") ? path.slice(2) : path;
+}
+function parseFileList(stdout) {
+    const files = [];
+    for (const line of stdout.split(/\r?\n/)) {
+        const path = line.trim();
+        if (path.length > 0)
+            files.push(normalizePath(path));
+    }
+    return files;
+}
+function parseContentMatches(stdout) {
+    const matches = [];
+    for (const raw of stdout.split(/\r?\n/)) {
+        if (raw.length === 0)
+            continue;
+        // `-H -n --no-heading` yields `path:line:text`; text may itself contain colons.
+        const parsed = /^(.+?):(\d+):(.*)$/.exec(raw);
+        if (!parsed)
+            continue;
+        matches.push({ path: normalizePath(parsed[1]), line: Number(parsed[2]), text: parsed[3] });
+    }
+    return matches;
+}
+export async function collectWorkspaceSources(args) {
+    const { query, cwd, maxSources } = args;
+    const execFileFn = args.execFileFn ?? execFile;
+    if (!cwd || maxSources <= 0)
+        return [];
+    const terms = deriveSearchTerms(query);
+    if (terms.length === 0)
+        return [];
+    const deadline = Date.now() + COLLECTION_BUDGET_MS;
+    const remainingBudget = () => Math.max(MIN_CALL_MS, deadline - Date.now());
+    // Phase 1 (discovery): which files match ANY term. `--max-count 1` stops at the first hit per file;
+    // ripgrep skips binary and oversized files by default / via --max-filesize.
+    const discoveryArgs = [
+        "--files-with-matches",
+        "--max-count",
+        "1",
+        "--fixed-strings",
+        "--smart-case",
+        "--no-messages",
+        "--max-filesize",
+        MAX_FILESIZE,
+        "--color",
+        "never",
+        ...terms.flatMap((term) => ["-e", term]),
+        // Explicit search root: execFile hands rg a piped stdin, and rg with no path argument would
+        // read (and block on) that pipe instead of scanning the tree. "." keeps output repo-relative.
+        "--",
+        ".",
+    ];
+    const discovery = await runRg(execFileFn, discoveryArgs, cwd, remainingBudget());
+    if (discovery.missing || !discovery.ok)
+        return [];
+    const candidateFiles = parseFileList(discovery.stdout).slice(0, CANDIDATE_CAP);
+    if (candidateFiles.length === 0)
+        return [];
+    // Phase 2 (pointers): line-level hits for the single best term, scanned only over files discovery
+    // already matched — so we never read a file ripgrep did not surface.
+    const bestTerm = pickBestTerm(terms);
+    const contentArgs = [
+        "-H",
+        "-n",
+        "--no-heading",
+        "-m",
+        "2",
+        "--fixed-strings",
+        "--smart-case",
+        "--no-messages",
+        "--color",
+        "never",
+        "-e",
+        bestTerm,
+        "--",
+        ...candidateFiles,
+    ];
+    const content = await runRg(execFileFn, contentArgs, cwd, remainingBudget());
+    const sources = [];
+    const seenPaths = new Set();
+    const seenLineKeys = new Set();
+    let counter = 0;
+    if (content.ok) {
+        for (const match of parseContentMatches(content.stdout)) {
+            if (sources.length >= maxSources)
+                break;
+            const key = `${match.path}:${match.line}`;
+            if (seenLineKeys.has(key))
+                continue;
+            seenLineKeys.add(key);
+            seenPaths.add(match.path);
+            const excerpt = truncateExcerpt(match.text);
+            sources.push({
+                id: `ws-${++counter}`,
+                kind: "workspace",
+                title: `${match.path}:${match.line}`,
+                uri: match.path,
+                trusted: true,
+                ...(excerpt !== undefined ? { excerpt } : {}),
+                metadata: { line: match.line, term: bestTerm },
+            });
+        }
+    }
+    // Fill the remaining budget with file-level pointers for candidates that matched a secondary term
+    // (and so produced no best-term line). Still pointer-first: a path, never a body.
+    for (const file of candidateFiles) {
+        if (sources.length >= maxSources)
+            break;
+        if (seenPaths.has(file))
+            continue;
+        seenPaths.add(file);
+        sources.push({
+            id: `ws-${++counter}`,
+            kind: "workspace",
+            title: file,
+            uri: file,
+            trusted: true,
+            metadata: { matchedBy: "discovery" },
+        });
+    }
+    return sources.slice(0, maxSources);
+}
+//# sourceMappingURL=workspace-collector.js.map

package/dist/core/research/workspace-collector.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"workspace-collector.js","sourceRoot":"","sources":["../../../src/core/research/workspace-collector.ts"],"names":[],"mappings":"AAAA,OAAO,EAA0B,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAatE;;;;;;;;;;GAUG;AAEH,gFAAgF;AAChF,MAAM,YAAY,GAAG,CAAC,CAAC;AACvB,8FAA8F;AAC9F,MAAM,SAAS,GAAG,CAAC,CAAC;AACpB,qGAAqG;AACrG,MAAM,iBAAiB,GAAG,GAAG,CAAC;AAC9B,4FAA4F;AAC5F,MAAM,oBAAoB,GAAG,IAAI,CAAC;AAClC,sGAAsG;AACtG,MAAM,WAAW,GAAG,GAAG,CAAC;AACxB,uFAAuF;AACvF,MAAM,YAAY,GAAG,IAAI,CAAC;AAC1B,mFAAmF;AACnF,MAAM,aAAa,GAAG,EAAE,CAAC;AACzB,wFAAwF;AACxF,MAAM,gBAAgB,GAAG,CAAC,GAAG,IAAI,GAAG,IAAI,CAAC;AAEzC;;;GAGG;AACH,MAAM,SAAS,GAAG,IAAI,GAAG,CAAS;IACjC,KAAK;IACL,KAAK;IACL,KAAK;IACL,MAAM;IACN,MAAM;IACN,MAAM;IACN,MAAM;IACN,MAAM;IACN,KAAK;IACL,KAAK;IACL,MAAM;IACN,KAAK;IACL,KAAK;IACL,KAAK;IACL,MAAM;IACN,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,MAAM;IACN,KAAK;IACL,MAAM;IACN,OAAO;IACP,QAAQ;IACR,OAAO;IACP,KAAK;IACL,KAAK;IACL,KAAK;IACL,OAAO;IACP,MAAM;IACN,MAAM;IACN,KAAK;IACL,KAAK;IACL,KAAK;IACL,MAAM;IACN,MAAM;IACN,OAAO;IACP,OAAO;IACP,KAAK;IACL,KAAK;IACL,KAAK;IACL,OAAO;IACP,MAAM;IACN,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,MAAM;IACN,OAAO;IACP,MAAM;IACN,MAAM;IACN,KAAK;IACL,MAAM;IACN,KAAK;IACL,MAAM;IACN,KAAK;IACL,MAAM;IACN,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,MAAM;CACN,CAAC,CAAC;AAaH,0GAA0G;AAC1G,MAAM,UAAU,iBAAiB,CAAC,KAAa,EAAY;IAC1D,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,KAAK,MAAM,GAAG,IAAI,KAAK,CAAC,KAAK,CAAC,QAAQ,CAAC,EAAE,CAAC;QACzC,MAAM,IAAI,GAAG,GAAG,CAAC,WAAW,EAAE,CAAC;QAC/B,IAAI,IAAI,CAAC,MAAM,GAAG,YAAY;YAAE,SAAS;QACzC,IAAI,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC;YAAE,SAAS;QAClC,IAAI,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC;YAAE,SAAS;QAC7B,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACf,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACjB,IAAI,KAAK,CAAC,MAAM,IAAI,SAAS;YAAE,MAAM;IACtC,CAAC;IACD,OAAO,KAAK,CAAC;AAAA,CACb;AAED,wGAAwG;AACxG,SAAS,YAAY,CAAC,KAAwB,EAAU;IACvD,OAAO,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;AAAA,CACzF;AAED,SAAS,eAAe,CAAC,IAAY,EAAsB;IAC1D,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAC5B,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,SAAS,CAAC;IAC3C,IAAI,OAAO,CAAC,MAAM,IAAI,iBAAiB;QAAE,OAAO,OAAO,CAAC;IACxD,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,iBAAiB,GAAG,CAAC,CAAC,KAAG,CAAC;AAAA,CACrD;AAUD,SAAS,KAAK,CACb,UAA+B,EAC/B,IAAuB,EACvB,GAAW,EACX,SAAiB,EACI;IACrB,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC;QAC/B,IAAI,OAAO,GAAG,KAAK,CAAC;QACpB,MAAM,IAAI,GAAG,CAAC,OAAkB,EAAQ,EAAE,CAAC;YAC1C,IAAI,OAAO;gBAAE,OAAO;YACpB,OAAO,GAAG,IAAI,CAAC;YACf,OAAO,CAAC,OAAO,CAAC,CAAC;QAAA,CACjB,CAAC;QACF,IAAI,CAAC;YACJ,UAAU,CACT,IAAI,EACJ,CAAC,GAAG,IAAI,CAAC,EACT,EAAE,GAAG,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS,EAAE,gBAAgB,EAAE,QAAQ,EAAE,MAAM,EAAE,WAAW,EAAE,IAAI,EAAE,EAC7F,CAAC,KAA+B,EAAE,MAAc,EAAE,EAAE,CAAC;gBACpD,MAAM,GAAG,GAAG,OAAO,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC;gBACrD,IAAI,CAAC,KAAK,EAAE,CAAC;oBACZ,IAAI,CAAC,EAAE,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;oBAChD,OAAO;gBACR,CAAC;gBACD,mFAAiF;gBACjF,IAAI,KAAK,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;oBACtB,IAAI,CAAC,EAAE,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;oBAChD,OAAO;gBACR,CAAC;gBACD,IAAI,CAAC,EAAE,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,EAAE,EAAE,OAAO,EAAE,KAAK,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC,CAAC;YAAA,CAClE,CACD,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACR,iFAAiF;YACjF,IAAI,CAAC,EAAE,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;QAChD,CAAC;IAAA,CACD,CAAC,CAAC;AAAA,CACH;AAED,yFAAyF;AACzF,SAAS,aAAa,CAAC,IAAY,EAAU;IAC5C,OAAO,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;AAAA,CACpD;AAED,SAAS,aAAa,CAAC,MAAc,EAAY;IAChD,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,EAAE,CAAC;QAC1C,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;QACzB,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC;YAAE,KAAK,CAAC,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC;IACtD,CAAC;IACD,OAAO,KAAK,CAAC;AAAA,CACb;AAQD,SAAS,mBAAmB,CAAC,MAAc,EAAkB;IAC5D,MAAM,OAAO,GAAmB,EAAE,CAAC;IACnC,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,EAAE,CAAC;QACzC,IAAI,GAAG,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAC/B,gFAAgF;QAChF,MAAM,MAAM,GAAG,oBAAoB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC9C,IAAI,CAAC,MAAM;YAAE,SAAS;QACtB,OAAO,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IAC5F,CAAC;IACD,OAAO,OAAO,CAAC;AAAA,CACf;AAED,MAAM,CAAC,KAAK,UAAU,uBAAuB,CAAC,IAAiC,EAA0B;IACxG,MAAM,EAAE,KAAK,EAAE,GAAG,EAAE,UAAU,EAAE,GAAG,IAAI,CAAC;IACxC,MAAM,UAAU,GAAG,IAAI,CAAC,UAAU,IAAK,QAA2C,CAAC;IACnF,IAAI,CAAC,GAAG,IAAI,UAAU,IAAI,CAAC;QAAE,OAAO,EAAE,CAAC;IAEvC,MAAM,KAAK,GAAG,iBAAiB,CAAC,KAAK,CAAC,CAAC;IACvC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAElC,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,oBAAoB,CAAC;IACnD,MAAM,eAAe,GAAG,GAAW,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC,CAAC;IAEnF,oGAAoG;IACpG,4EAA4E;IAC5E,MAAM,aAAa,GAAG;QACrB,sBAAsB;QACtB,aAAa;QACb,GAAG;QACH,iBAAiB;QACjB,cAAc;QACd,eAAe;QACf,gBAAgB;QAChB,YAAY;QACZ,SAAS;QACT,OAAO;QACP,GAAG,KAAK,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;QACxC,4FAA4F;QAC5F,8FAA8F;QAC9F,IAAI;QACJ,GAAG;KACH,CAAC;IACF,MAAM,SAAS,GAAG,MAAM,KAAK,CAAC,UAAU,EAAE,aAAa,EAAE,GAAG,EAAE,eAAe,EAAE,CAAC,CAAC;IACjF,IAAI,SAAS,CAAC,OAAO,IAAI,CAAC,SAAS,CAAC,EAAE;QAAE,OAAO,EAAE,CAAC;IAElD,MAAM,cAAc,GAAG,aAAa,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,aAAa,CAAC,CAAC;IAC/E,IAAI,cAAc,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAE3C,kGAAkG;IAClG,uEAAqE;IACrE,MAAM,QAAQ,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC;IACrC,MAAM,WAAW,GAAG;QACnB,IAAI;QACJ,IAAI;QACJ,cAAc;QACd,IAAI;QACJ,GAAG;QACH,iBAAiB;QACjB,cAAc;QACd,eAAe;QACf,SAAS;QACT,OAAO;QACP,IAAI;QACJ,QAAQ;QACR,IAAI;QACJ,GAAG,cAAc;KACjB,CAAC;IACF,MAAM,OAAO,GAAG,MAAM,KAAK,CAAC,UAAU,EAAE,WAAW,EAAE,GAAG,EAAE,eAAe,EAAE,CAAC,CAAC;IAE7E,MAAM,OAAO,GAAkB,EAAE,CAAC;IAClC,MAAM,SAAS,GAAG,IAAI,GAAG,EAAU,CAAC;IACpC,MAAM,YAAY,GAAG,IAAI,GAAG,EAAU,CAAC;IACvC,IAAI,OAAO,GAAG,CAAC,CAAC;IAEhB,IAAI,OAAO,CAAC,EAAE,EAAE,CAAC;QAChB,KAAK,MAAM,KAAK,IAAI,mBAAmB,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;YACzD,IAAI,OAAO,CAAC,MAAM,IAAI,UAAU;gBAAE,MAAM;YACxC,MAAM,GAAG,GAAG,GAAG,KAAK,CAAC,IAAI,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC;YAC1C,IAAI,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC;gBAAE,SAAS;YACpC,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACtB,SAAS,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YAC1B,MAAM,OAAO,GAAG,eAAe,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YAC5C,OAAO,CAAC,IAAI,CAAC;gBACZ,EAAE,EAAE,MAAM,EAAE,OAAO,EAAE;gBACrB,IAAI,EAAE,WAAW;gBACjB,KAAK,EAAE,GAAG,KAAK,CAAC,IAAI,IAAI,KAAK,CAAC,IAAI,EAAE;gBACpC,GAAG,EAAE,KAAK,CAAC,IAAI;gBACf,OAAO,EAAE,IAAI;gBACb,GAAG,CAAC,OAAO,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;gBAC7C,QAAQ,EAAE,EAAE,IAAI,EAAE,KAAK,CAAC,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE;aAC9C,CAAC,CAAC;QACJ,CAAC;IACF,CAAC;IAED,kGAAkG;IAClG,kFAAkF;IAClF,KAAK,MAAM,IAAI,IAAI,cAAc,EAAE,CAAC;QACnC,IAAI,OAAO,CAAC,MAAM,IAAI,UAAU;YAAE,MAAM;QACxC,IAAI,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC;YAAE,SAAS;QAClC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACpB,OAAO,CAAC,IAAI,CAAC;YACZ,EAAE,EAAE,MAAM,EAAE,OAAO,EAAE;YACrB,IAAI,EAAE,WAAW;YACjB,KAAK,EAAE,IAAI;YACX,GAAG,EAAE,IAAI;YACT,OAAO,EAAE,IAAI;YACb,QAAQ,EAAE,EAAE,SAAS,EAAE,WAAW,EAAE;SACpC,CAAC,CAAC;IACJ,CAAC;IAED,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;AAAA,CACpC","sourcesContent":["import { type ExecFileException, execFile } from \"node:child_process\";\n\n/** Structural DI seam: only the callback overload the collector actually uses — demanding\n * node's full `typeof execFile` (with `__promisify__`) makes plain test mocks unassignable. */\nexport type WorkspaceExecFileFn = (\n\tcommand: string,\n\targs: readonly string[],\n\toptions: { cwd?: string; timeout?: number; maxBuffer?: number; encoding?: string; windowsHide?: boolean },\n\tcallback: (error: ExecFileException | null, stdout: string, stderr: string) => void,\n) => unknown;\n\nimport type { EvidenceRef } from \"../autonomy/contracts.ts\";\n\n/**\n * Best-effort workspace research source collector.\n *\n * Feeds the autonomous research lane POINTER-FIRST sources: a repo-relative path, a bounded excerpt,\n * and (when known) a line number — never whole file bodies. It runs `rg` under the session cwd exactly\n * like the grep tool does, so it only surfaces content ripgrep already matched. Collection is bounded\n * (a shared wall-clock deadline, a candidate cap, ripgrep's own binary/oversize skipping) and never\n * throws: if `rg` is missing or errors, it returns `[]`, which is today's \"no collector\" behavior.\n *\n * The returned sources are `EvidenceRef`s (the runner's source type) tagged `kind: \"workspace\"`.\n */\n\n/** Search terms shorter than this are too noisy to be useful discriminators. */\nconst MIN_TERM_LEN = 3;\n/** Cap on derived search terms; keeps the discovery pattern small and the collector cheap. */\nconst MAX_TERMS = 4;\n/** Pointer excerpts are bounded so we never spill a whole line (or a whole file) into the prompt. */\nconst EXCERPT_MAX_CHARS = 200;\n/** Shared wall-clock budget for the whole collection pass (both ripgrep calls together). */\nconst COLLECTION_BUDGET_MS = 5000;\n/** Floor for any single ripgrep call so a nearly-spent budget still gives ripgrep a chance to run. */\nconst MIN_CALL_MS = 500;\n/** ripgrep skips files larger than this; oversized files never contribute a source. */\nconst MAX_FILESIZE = \"1M\";\n/** Upper bound on candidate files carried from discovery into the pointer pass. */\nconst CANDIDATE_CAP = 24;\n/** Generous stdout ceiling; overflow degrades to fewer sources rather than throwing. */\nconst MAX_BUFFER_BYTES = 8 * 1024 * 1024;\n\n/**\n * A small, deliberately conservative English stopword set plus the structural words that show up in\n * goal/requirement text. Anything not here that is >= MIN_TERM_LEN survives as a search term.\n */\nconst STOPWORDS = new Set<string>([\n\t\"the\",\n\t\"and\",\n\t\"for\",\n\t\"with\",\n\t\"that\",\n\t\"this\",\n\t\"from\",\n\t\"into\",\n\t\"are\",\n\t\"was\",\n\t\"were\",\n\t\"but\",\n\t\"not\",\n\t\"you\",\n\t\"your\",\n\t\"our\",\n\t\"all\",\n\t\"any\",\n\t\"can\",\n\t\"has\",\n\t\"have\",\n\t\"had\",\n\t\"will\",\n\t\"would\",\n\t\"should\",\n\t\"could\",\n\t\"its\",\n\t\"his\",\n\t\"her\",\n\t\"their\",\n\t\"them\",\n\t\"they\",\n\t\"she\",\n\t\"him\",\n\t\"who\",\n\t\"what\",\n\t\"when\",\n\t\"where\",\n\t\"which\",\n\t\"how\",\n\t\"why\",\n\t\"use\",\n\t\"using\",\n\t\"used\",\n\t\"via\",\n\t\"per\",\n\t\"out\",\n\t\"off\",\n\t\"over\",\n\t\"under\",\n\t\"then\",\n\t\"than\",\n\t\"add\",\n\t\"adds\",\n\t\"get\",\n\t\"gets\",\n\t\"set\",\n\t\"sets\",\n\t\"new\",\n\t\"old\",\n\t\"one\",\n\t\"two\",\n\t\"let\",\n\t\"run\",\n\t\"runs\",\n]);\n\nexport interface CollectWorkspaceSourcesArgs {\n\t/** Free text (goal + requirement text) that search terms are derived from. */\n\tquery: string;\n\t/** Session working directory; ripgrep runs here and paths are reported relative to it. */\n\tcwd: string;\n\t/** Hard cap on returned sources; also the lane's source budget. */\n\tmaxSources: number;\n\t/** Injected for tests; defaults to node's `execFile`. */\n\texecFileFn?: WorkspaceExecFileFn;\n}\n\n/** Split on non-word runs, lowercase, drop stopwords/short/dupes, keep source order, cap at MAX_TERMS. */\nexport function deriveSearchTerms(query: string): string[] {\n\tconst seen = new Set<string>();\n\tconst terms: string[] = [];\n\tfor (const raw of query.split(/[^\\w]+/)) {\n\t\tconst term = raw.toLowerCase();\n\t\tif (term.length < MIN_TERM_LEN) continue;\n\t\tif (STOPWORDS.has(term)) continue;\n\t\tif (seen.has(term)) continue;\n\t\tseen.add(term);\n\t\tterms.push(term);\n\t\tif (terms.length >= MAX_TERMS) break;\n\t}\n\treturn terms;\n}\n\n/** The most specific term (longest wins; ties keep the earliest) drives the line-level pointer pass. */\nfunction pickBestTerm(terms: readonly string[]): string {\n\treturn terms.reduce((best, term) => (term.length > best.length ? term : best), terms[0]);\n}\n\nfunction truncateExcerpt(text: string): string | undefined {\n\tconst trimmed = text.trim();\n\tif (trimmed.length === 0) return undefined;\n\tif (trimmed.length <= EXCERPT_MAX_CHARS) return trimmed;\n\treturn `${trimmed.slice(0, EXCERPT_MAX_CHARS - 1)}…`;\n}\n\ninterface RgOutcome {\n\t/** True when ripgrep ran to a usable result (matches found, or a clean \"no matches\"). */\n\tok: boolean;\n\tstdout: string;\n\t/** True when the `rg` binary could not be spawned at all — the collector bails entirely. */\n\tmissing: boolean;\n}\n\nfunction runRg(\n\texecFileFn: WorkspaceExecFileFn,\n\targs: readonly string[],\n\tcwd: string,\n\ttimeoutMs: number,\n): Promise<RgOutcome> {\n\treturn new Promise((resolve) => {\n\t\tlet settled = false;\n\t\tconst done = (outcome: RgOutcome): void => {\n\t\t\tif (settled) return;\n\t\t\tsettled = true;\n\t\t\tresolve(outcome);\n\t\t};\n\t\ttry {\n\t\t\texecFileFn(\n\t\t\t\t\"rg\",\n\t\t\t\t[...args],\n\t\t\t\t{ cwd, timeout: timeoutMs, maxBuffer: MAX_BUFFER_BYTES, encoding: \"utf8\", windowsHide: true },\n\t\t\t\t(error: ExecFileException | null, stdout: string) => {\n\t\t\t\t\tconst out = typeof stdout === \"string\" ? stdout : \"\";\n\t\t\t\t\tif (!error) {\n\t\t\t\t\t\tdone({ ok: true, stdout: out, missing: false });\n\t\t\t\t\t\treturn;\n\t\t\t\t\t}\n\t\t\t\t\t// Exit code 1 is ripgrep's \"no matches\" — a clean, usable result, not a failure.\n\t\t\t\t\tif (error.code === 1) {\n\t\t\t\t\t\tdone({ ok: true, stdout: out, missing: false });\n\t\t\t\t\t\treturn;\n\t\t\t\t\t}\n\t\t\t\t\tdone({ ok: false, stdout: \"\", missing: error.code === \"ENOENT\" });\n\t\t\t\t},\n\t\t\t);\n\t\t} catch {\n\t\t\t// A synchronous spawn failure (e.g. rg entirely absent) is treated as \"missing\".\n\t\t\tdone({ ok: false, stdout: \"\", missing: true });\n\t\t}\n\t});\n}\n\n/** rg prints `./foo` when the search root is `.`; keep sources cleanly repo-relative. */\nfunction normalizePath(path: string): string {\n\treturn path.startsWith(\"./\") ? path.slice(2) : path;\n}\n\nfunction parseFileList(stdout: string): string[] {\n\tconst files: string[] = [];\n\tfor (const line of stdout.split(/\\r?\\n/)) {\n\t\tconst path = line.trim();\n\t\tif (path.length > 0) files.push(normalizePath(path));\n\t}\n\treturn files;\n}\n\ninterface ContentMatch {\n\tpath: string;\n\tline: number;\n\ttext: string;\n}\n\nfunction parseContentMatches(stdout: string): ContentMatch[] {\n\tconst matches: ContentMatch[] = [];\n\tfor (const raw of stdout.split(/\\r?\\n/)) {\n\t\tif (raw.length === 0) continue;\n\t\t// `-H -n --no-heading` yields `path:line:text`; text may itself contain colons.\n\t\tconst parsed = /^(.+?):(\\d+):(.*)$/.exec(raw);\n\t\tif (!parsed) continue;\n\t\tmatches.push({ path: normalizePath(parsed[1]), line: Number(parsed[2]), text: parsed[3] });\n\t}\n\treturn matches;\n}\n\nexport async function collectWorkspaceSources(args: CollectWorkspaceSourcesArgs): Promise<EvidenceRef[]> {\n\tconst { query, cwd, maxSources } = args;\n\tconst execFileFn = args.execFileFn ?? (execFile as unknown as WorkspaceExecFileFn);\n\tif (!cwd || maxSources <= 0) return [];\n\n\tconst terms = deriveSearchTerms(query);\n\tif (terms.length === 0) return [];\n\n\tconst deadline = Date.now() + COLLECTION_BUDGET_MS;\n\tconst remainingBudget = (): number => Math.max(MIN_CALL_MS, deadline - Date.now());\n\n\t// Phase 1 (discovery): which files match ANY term. `--max-count 1` stops at the first hit per file;\n\t// ripgrep skips binary and oversized files by default / via --max-filesize.\n\tconst discoveryArgs = [\n\t\t\"--files-with-matches\",\n\t\t\"--max-count\",\n\t\t\"1\",\n\t\t\"--fixed-strings\",\n\t\t\"--smart-case\",\n\t\t\"--no-messages\",\n\t\t\"--max-filesize\",\n\t\tMAX_FILESIZE,\n\t\t\"--color\",\n\t\t\"never\",\n\t\t...terms.flatMap((term) => [\"-e\", term]),\n\t\t// Explicit search root: execFile hands rg a piped stdin, and rg with no path argument would\n\t\t// read (and block on) that pipe instead of scanning the tree. \".\" keeps output repo-relative.\n\t\t\"--\",\n\t\t\".\",\n\t];\n\tconst discovery = await runRg(execFileFn, discoveryArgs, cwd, remainingBudget());\n\tif (discovery.missing || !discovery.ok) return [];\n\n\tconst candidateFiles = parseFileList(discovery.stdout).slice(0, CANDIDATE_CAP);\n\tif (candidateFiles.length === 0) return [];\n\n\t// Phase 2 (pointers): line-level hits for the single best term, scanned only over files discovery\n\t// already matched — so we never read a file ripgrep did not surface.\n\tconst bestTerm = pickBestTerm(terms);\n\tconst contentArgs = [\n\t\t\"-H\",\n\t\t\"-n\",\n\t\t\"--no-heading\",\n\t\t\"-m\",\n\t\t\"2\",\n\t\t\"--fixed-strings\",\n\t\t\"--smart-case\",\n\t\t\"--no-messages\",\n\t\t\"--color\",\n\t\t\"never\",\n\t\t\"-e\",\n\t\tbestTerm,\n\t\t\"--\",\n\t\t...candidateFiles,\n\t];\n\tconst content = await runRg(execFileFn, contentArgs, cwd, remainingBudget());\n\n\tconst sources: EvidenceRef[] = [];\n\tconst seenPaths = new Set<string>();\n\tconst seenLineKeys = new Set<string>();\n\tlet counter = 0;\n\n\tif (content.ok) {\n\t\tfor (const match of parseContentMatches(content.stdout)) {\n\t\t\tif (sources.length >= maxSources) break;\n\t\t\tconst key = `${match.path}:${match.line}`;\n\t\t\tif (seenLineKeys.has(key)) continue;\n\t\t\tseenLineKeys.add(key);\n\t\t\tseenPaths.add(match.path);\n\t\t\tconst excerpt = truncateExcerpt(match.text);\n\t\t\tsources.push({\n\t\t\t\tid: `ws-${++counter}`,\n\t\t\t\tkind: \"workspace\",\n\t\t\t\ttitle: `${match.path}:${match.line}`,\n\t\t\t\turi: match.path,\n\t\t\t\ttrusted: true,\n\t\t\t\t...(excerpt !== undefined ? { excerpt } : {}),\n\t\t\t\tmetadata: { line: match.line, term: bestTerm },\n\t\t\t});\n\t\t}\n\t}\n\n\t// Fill the remaining budget with file-level pointers for candidates that matched a secondary term\n\t// (and so produced no best-term line). Still pointer-first: a path, never a body.\n\tfor (const file of candidateFiles) {\n\t\tif (sources.length >= maxSources) break;\n\t\tif (seenPaths.has(file)) continue;\n\t\tseenPaths.add(file);\n\t\tsources.push({\n\t\t\tid: `ws-${++counter}`,\n\t\t\tkind: \"workspace\",\n\t\t\ttitle: file,\n\t\t\turi: file,\n\t\t\ttrusted: true,\n\t\t\tmetadata: { matchedBy: \"discovery\" },\n\t\t});\n\t}\n\n\treturn sources.slice(0, maxSources);\n}\n"]}

package/dist/core/settings-manager.d.ts CHANGED Viewed

@@ -141,6 +141,7 @@ export interface ModelRouterSettings {
     learningModel?: string;
     judgeEnabled?: boolean;
     judgeModel?: string;
+    executorModel?: string;
 }
 export declare const DEFAULT_RESEARCH_LANE_ENABLED = false;
 export declare const DEFAULT_RESEARCH_LANE_MAX_USD = 0.25;
@@ -524,6 +525,7 @@ export declare class SettingsManager {
         learningModel?: string;
         judgeEnabled: boolean;
         judgeModel?: string;
+        executorModel?: string;
     };
     setModelRouterSettings(settings: ModelRouterSettings, scope?: SettingsScope): void;
     /** Configured auxiliary summarizer model id, or "auto" (default) to pick the cheapest authed model. */