@x-code-cli/core 0.1.6 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/file-ingest.d.ts +62 -0
- package/dist/agent/file-ingest.d.ts.map +1 -0
- package/dist/agent/file-ingest.js +390 -0
- package/dist/agent/file-ingest.js.map +1 -0
- package/dist/agent/light-compact.d.ts +13 -0
- package/dist/agent/light-compact.d.ts.map +1 -0
- package/dist/agent/light-compact.js +106 -0
- package/dist/agent/light-compact.js.map +1 -0
- package/dist/agent/loop-guard.d.ts +50 -0
- package/dist/agent/loop-guard.d.ts.map +1 -0
- package/dist/agent/loop-guard.js +107 -0
- package/dist/agent/loop-guard.js.map +1 -0
- package/dist/agent/loop-state.d.ts +11 -0
- package/dist/agent/loop-state.d.ts.map +1 -1
- package/dist/agent/loop-state.js +2 -0
- package/dist/agent/loop-state.js.map +1 -1
- package/dist/agent/loop.d.ts +2 -2
- package/dist/agent/loop.d.ts.map +1 -1
- package/dist/agent/loop.js +65 -8
- package/dist/agent/loop.js.map +1 -1
- package/dist/agent/messages.d.ts +5 -2
- package/dist/agent/messages.d.ts.map +1 -1
- package/dist/agent/messages.js.map +1 -1
- package/dist/agent/provider-compat.d.ts +7 -0
- package/dist/agent/provider-compat.d.ts.map +1 -1
- package/dist/agent/provider-compat.js +122 -0
- package/dist/agent/provider-compat.js.map +1 -1
- package/dist/agent/system-prompt.js +3 -3
- package/dist/agent/system-prompt.js.map +1 -1
- package/dist/agent/tool-execution.d.ts.map +1 -1
- package/dist/agent/tool-execution.js +68 -26
- package/dist/agent/tool-execution.js.map +1 -1
- package/dist/agent/tool-result-sanitize.d.ts +8 -0
- package/dist/agent/tool-result-sanitize.d.ts.map +1 -0
- package/dist/agent/tool-result-sanitize.js +77 -0
- package/dist/agent/tool-result-sanitize.js.map +1 -0
- package/dist/agent/vision-fallback.d.ts +22 -0
- package/dist/agent/vision-fallback.d.ts.map +1 -0
- package/dist/agent/vision-fallback.js +127 -0
- package/dist/agent/vision-fallback.js.map +1 -0
- package/dist/index.d.ts +8 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +5 -1
- package/dist/index.js.map +1 -1
- package/dist/providers/cache-control.d.ts +29 -0
- package/dist/providers/cache-control.d.ts.map +1 -0
- package/dist/providers/cache-control.js +93 -0
- package/dist/providers/cache-control.js.map +1 -0
- package/dist/providers/capabilities.d.ts +15 -0
- package/dist/providers/capabilities.d.ts.map +1 -0
- package/dist/providers/capabilities.js +38 -0
- package/dist/providers/capabilities.js.map +1 -0
- package/dist/tools/index.d.ts +31 -5
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +1 -10
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/read-file.d.ts +29 -1
- package/dist/tools/read-file.d.ts.map +1 -1
- package/dist/tools/read-file.js +103 -10
- package/dist/tools/read-file.js.map +1 -1
- package/dist/tools/shell-provider.d.ts +13 -0
- package/dist/tools/shell-provider.d.ts.map +1 -0
- package/dist/tools/shell-provider.js +74 -0
- package/dist/tools/shell-provider.js.map +1 -0
- package/dist/tools/shell-utils.d.ts +1 -7
- package/dist/tools/shell-utils.d.ts.map +1 -1
- package/dist/tools/shell-utils.js +0 -17
- package/dist/tools/shell-utils.js.map +1 -1
- package/dist/tools/truncate.d.ts +36 -0
- package/dist/tools/truncate.d.ts.map +1 -0
- package/dist/tools/truncate.js +118 -0
- package/dist/tools/truncate.js.map +1 -0
- package/dist/tools/web-search.js +2 -2
- package/dist/tools/web-search.js.map +1 -1
- package/dist/utils/shell-error.d.ts +12 -0
- package/dist/utils/shell-error.d.ts.map +1 -0
- package/dist/utils/shell-error.js +73 -0
- package/dist/utils/shell-error.js.map +1 -0
- package/package.json +21 -12
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import type { FilePart, ImagePart, TextPart } from 'ai';
|
|
2
|
+
import type { ProviderCapabilities } from '../providers/capabilities.js';
|
|
3
|
+
/** A content part resolved from a file reference. Same types the AI SDK
|
|
4
|
+
* accepts in user message `content` arrays, so callers can splice these
|
|
5
|
+
* directly into a UserModelMessage. */
|
|
6
|
+
export type IngestedPart = TextPart | ImagePart | FilePart;
|
|
7
|
+
export type FileKind = 'text' | 'image' | 'pdf' | 'office' | 'unknown';
|
|
8
|
+
/** Paths the user pointed at, either via `@file` or a bare absolute path. */
|
|
9
|
+
export interface FileReference {
|
|
10
|
+
/** Original token from the user's input (for echoing/UI). */
|
|
11
|
+
raw: string;
|
|
12
|
+
/** Resolved absolute path. */
|
|
13
|
+
absolutePath: string;
|
|
14
|
+
}
|
|
15
|
+
/** Classify a file by extension first, falling back to magic-byte detection
|
|
16
|
+
* when the extension is missing or unrecognized. */
|
|
17
|
+
export declare function classifyFile(filePath: string): Promise<FileKind>;
|
|
18
|
+
/**
|
|
19
|
+
* Extract plain-text references from a user prompt. Two syntaxes are
|
|
20
|
+
* recognized:
|
|
21
|
+
*
|
|
22
|
+
* 1. `@path` — the `@` prefix marks an explicit attachment. Stops at
|
|
23
|
+
* whitespace. Honors Windows (`D:\foo\bar`) and POSIX (`/etc/foo`)
|
|
24
|
+
* absolute paths.
|
|
25
|
+
*
|
|
26
|
+
* 2. Bare absolute paths — any token that looks like `C:\…`, `D:\…`, or
|
|
27
|
+
* starts with `/` and contains at least one path separator, with an
|
|
28
|
+
* extension. Less aggressive than @-mention: only fires on tokens that
|
|
29
|
+
* clearly look like paths, to avoid hijacking regex/SQL/etc.
|
|
30
|
+
*
|
|
31
|
+
* Duplicates are de-duplicated by absolute path so a file referenced twice
|
|
32
|
+
* only gets ingested once.
|
|
33
|
+
*/
|
|
34
|
+
export declare function extractFileReferences(input: string): FileReference[];
|
|
35
|
+
/** OCR an image via tesseract.js. Loads Chinese + English language packs on
|
|
36
|
+
* first call (cached in-memory afterwards). Accuracy is limited, especially
|
|
37
|
+
* for handwriting or stylized text — intended as a text-extraction fallback
|
|
38
|
+
* for providers that can't natively see images. */
|
|
39
|
+
export declare function ocrImage(filePath: string): Promise<string>;
|
|
40
|
+
/**
|
|
41
|
+
* Resolve a single file reference into one or more content parts, taking
|
|
42
|
+
* the active provider's multi-modal capabilities into account.
|
|
43
|
+
*
|
|
44
|
+
* Contract:
|
|
45
|
+
* - Text, Office, and text-bearing PDFs always collapse to a single
|
|
46
|
+
* TextPart — cheapest path, works for every provider.
|
|
47
|
+
* - Images: ImagePart if the provider can see images; otherwise OCR'd
|
|
48
|
+
* TextPart annotated as a fallback.
|
|
49
|
+
* - Scanned PDFs (pdf-parse yields near-empty text): FilePart for providers
|
|
50
|
+
* with PDF support; OCR'd TextPart otherwise.
|
|
51
|
+
* - Missing/unreadable files return a TextPart carrying the error so the
|
|
52
|
+
* model can acknowledge the failure rather than silently ignore it.
|
|
53
|
+
*/
|
|
54
|
+
export declare function ingestFile(ref: FileReference, caps: ProviderCapabilities, onNotice?: (msg: string) => void): Promise<IngestedPart[]>;
|
|
55
|
+
/**
|
|
56
|
+
* Compose the content parts for a user message: original text first, then
|
|
57
|
+
* one or more parts per ingested file. Returns a plain string when no
|
|
58
|
+
* files were referenced, so simple prompts stay on the string fast path
|
|
59
|
+
* (keeps existing provider behavior / caching semantics unchanged).
|
|
60
|
+
*/
|
|
61
|
+
export declare function buildUserContent(text: string, caps: ProviderCapabilities, onNotice?: (msg: string) => void): Promise<string | Array<TextPart | ImagePart | FilePart>>;
|
|
62
|
+
//# sourceMappingURL=file-ingest.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"file-ingest.d.ts","sourceRoot":"","sources":["../../src/agent/file-ingest.ts"],"names":[],"mappings":"AAgBA,OAAO,KAAK,EAAE,QAAQ,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,IAAI,CAAA;AAEvD,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,8BAA8B,CAAA;AAgBxE;;wCAEwC;AACxC,MAAM,MAAM,YAAY,GAAG,QAAQ,GAAG,SAAS,GAAG,QAAQ,CAAA;AAE1D,MAAM,MAAM,QAAQ,GAAG,MAAM,GAAG,OAAO,GAAG,KAAK,GAAG,QAAQ,GAAG,SAAS,CAAA;AAEtE,6EAA6E;AAC7E,MAAM,WAAW,aAAa;IAC5B,6DAA6D;IAC7D,GAAG,EAAE,MAAM,CAAA;IACX,8BAA8B;IAC9B,YAAY,EAAE,MAAM,CAAA;CACrB;AAkBD;qDACqD;AACrD,wBAAsB,YAAY,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,QAAQ,CAAC,CAuBtE;AAED;;;;;;;;;;;;;;;GAeG;AACH,wBAAgB,qBAAqB,CAAC,KAAK,EAAE,MAAM,GAAG,aAAa,EAAE,CAwBpE;AA8DD;;;oDAGoD;AACpD,wBAAsB,QAAQ,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAgBhE;AAoDD;;;;;;;;;;;;;GAaG;AACH,wBAAsB,UAAU,CAC9B,GAAG,EAAE,aAAa,EAClB,IAAI,EAAE,oBAAoB,EAC1B,QAAQ,CAAC,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,IAAI,GAC/B,OAAO,CAAC,YAAY,EAAE,CAAC,CAsGzB;AAED;;;;;GAKG;AACH,wBAAsB,gBAAgB,CACpC,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,oBAAoB,EAC1B,QAAQ,CAAC,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,IAAI,GAC/B,OAAO,CAAC,MAAM,GAAG,KAAK,CAAC,QAAQ,GAAG,SAAS,GAAG,QAAQ,CAAC,CAAC,CAU1D"}
|
|
@@ -0,0 +1,390 @@
|
|
|
1
|
+
// @x-code-cli/core — Attach-file-to-message pipeline
|
|
2
|
+
//
|
|
3
|
+
// Given a raw user prompt that references files (via `@path` or bare
|
|
4
|
+
// absolute paths), resolve each reference into an AI-SDK content part:
|
|
5
|
+
//
|
|
6
|
+
// text / code → TextPart with file body
|
|
7
|
+
// PDF → TextPart with extracted text (local, no tokens wasted on binary)
|
|
8
|
+
// docx/xlsx/pptx → TextPart via officeparser/mammoth/xlsx
|
|
9
|
+
// image → ImagePart for multimodal providers; OCR'd TextPart for DeepSeek
|
|
10
|
+
//
|
|
11
|
+
// PDF is deliberately NOT sent as a FilePart even to multimodal providers
|
|
12
|
+
// when we can extract text locally — a 100-page text PDF becomes a few KB
|
|
13
|
+
// of prompt instead of tens of thousands of tokens of rendered pages.
|
|
14
|
+
import fs from 'node:fs/promises';
|
|
15
|
+
import path from 'node:path';
|
|
16
|
+
import { GLOBAL_XCODE_DIR } from '../utils.js';
|
|
17
|
+
import { captionImage, pickVisionProvider } from './vision-fallback.js';
|
|
18
|
+
/** Where tesseract.js caches its language model weights (`eng.traineddata`,
|
|
19
|
+
* `chi_sim.traineddata`, ~7.6 MB total). Without this the worker writes
|
|
20
|
+
* them into process.cwd() — which means each project the user runs `xc` in
|
|
21
|
+
* re-downloads the same files, and untracked binaries leak into git status.
|
|
22
|
+
* Centralizing under `~/.x-code/tessdata/` makes the download a one-time
|
|
23
|
+
* cost shared across every project on the machine. */
|
|
24
|
+
async function tesseractCacheDir() {
|
|
25
|
+
const dir = path.join(GLOBAL_XCODE_DIR, 'tessdata');
|
|
26
|
+
await fs.mkdir(dir, { recursive: true });
|
|
27
|
+
return dir;
|
|
28
|
+
}
|
|
29
|
+
/** Extensions we treat as inline text without inspection. Order doesn't
|
|
30
|
+
* matter; this is just a membership check. */
|
|
31
|
+
const TEXT_EXTENSIONS = new Set([
|
|
32
|
+
'.txt', '.md', '.mdx', '.rst', '.log', '.csv', '.tsv', '.json', '.jsonc',
|
|
33
|
+
'.yaml', '.yml', '.toml', '.ini', '.env', '.cfg', '.conf',
|
|
34
|
+
'.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs',
|
|
35
|
+
'.py', '.rb', '.go', '.rs', '.java', '.kt', '.swift', '.c', '.h',
|
|
36
|
+
'.cpp', '.cc', '.hpp', '.cs', '.php', '.pl', '.lua', '.sh', '.bash',
|
|
37
|
+
'.zsh', '.fish', '.ps1', '.sql', '.graphql', '.gql', '.proto',
|
|
38
|
+
'.html', '.htm', '.css', '.scss', '.sass', '.less', '.vue', '.svelte',
|
|
39
|
+
'.xml', '.svg', '.dockerfile', '.makefile', '.gitignore', '.editorconfig',
|
|
40
|
+
]);
|
|
41
|
+
const IMAGE_EXTENSIONS = new Set(['.png', '.jpg', '.jpeg', '.webp', '.gif', '.bmp']);
|
|
42
|
+
const OFFICE_EXTENSIONS = new Set(['.docx', '.xlsx', '.pptx', '.odt', '.ods', '.odp']);
|
|
43
|
+
/** Classify a file by extension first, falling back to magic-byte detection
|
|
44
|
+
* when the extension is missing or unrecognized. */
|
|
45
|
+
export async function classifyFile(filePath) {
|
|
46
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
47
|
+
if (TEXT_EXTENSIONS.has(ext))
|
|
48
|
+
return 'text';
|
|
49
|
+
if (IMAGE_EXTENSIONS.has(ext))
|
|
50
|
+
return 'image';
|
|
51
|
+
if (OFFICE_EXTENSIONS.has(ext))
|
|
52
|
+
return 'office';
|
|
53
|
+
if (ext === '.pdf')
|
|
54
|
+
return 'pdf';
|
|
55
|
+
// Unknown extension — peek magic bytes.
|
|
56
|
+
try {
|
|
57
|
+
const { fileTypeFromFile } = await import('file-type');
|
|
58
|
+
const detected = await fileTypeFromFile(filePath);
|
|
59
|
+
if (!detected)
|
|
60
|
+
return 'text'; // Empty signature → assume plain text.
|
|
61
|
+
if (detected.mime.startsWith('image/'))
|
|
62
|
+
return 'image';
|
|
63
|
+
if (detected.mime === 'application/pdf')
|
|
64
|
+
return 'pdf';
|
|
65
|
+
if (detected.mime.includes('officedocument') ||
|
|
66
|
+
detected.mime.includes('opendocument'))
|
|
67
|
+
return 'office';
|
|
68
|
+
if (detected.mime.startsWith('text/'))
|
|
69
|
+
return 'text';
|
|
70
|
+
return 'unknown';
|
|
71
|
+
}
|
|
72
|
+
catch {
|
|
73
|
+
return 'unknown';
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Extract plain-text references from a user prompt. Two syntaxes are
|
|
78
|
+
* recognized:
|
|
79
|
+
*
|
|
80
|
+
* 1. `@path` — the `@` prefix marks an explicit attachment. Stops at
|
|
81
|
+
* whitespace. Honors Windows (`D:\foo\bar`) and POSIX (`/etc/foo`)
|
|
82
|
+
* absolute paths.
|
|
83
|
+
*
|
|
84
|
+
* 2. Bare absolute paths — any token that looks like `C:\…`, `D:\…`, or
|
|
85
|
+
* starts with `/` and contains at least one path separator, with an
|
|
86
|
+
* extension. Less aggressive than @-mention: only fires on tokens that
|
|
87
|
+
* clearly look like paths, to avoid hijacking regex/SQL/etc.
|
|
88
|
+
*
|
|
89
|
+
* Duplicates are de-duplicated by absolute path so a file referenced twice
|
|
90
|
+
* only gets ingested once.
|
|
91
|
+
*/
|
|
92
|
+
export function extractFileReferences(input) {
|
|
93
|
+
const refs = new Map();
|
|
94
|
+
// @path — one token, stops at whitespace. `@` must be at line start or
|
|
95
|
+
// preceded by whitespace so we don't eat `@user@host` email-ish tokens.
|
|
96
|
+
const atRegex = /(?:^|\s)@((?:[A-Za-z]:[\\/]|[\\/])[^\s]+|[^\s@][^\s]*)/g;
|
|
97
|
+
for (const m of input.matchAll(atRegex)) {
|
|
98
|
+
const raw = m[1] ?? '';
|
|
99
|
+
if (!raw)
|
|
100
|
+
continue;
|
|
101
|
+
const abs = path.isAbsolute(raw) ? path.normalize(raw) : path.resolve(raw);
|
|
102
|
+
refs.set(abs, { raw: `@${raw}`, absolutePath: abs });
|
|
103
|
+
}
|
|
104
|
+
// Bare absolute paths. Require a separator + extension so code snippets
|
|
105
|
+
// like `fs.readFile` don't match. Windows drive letters + POSIX roots only.
|
|
106
|
+
const bareRegex = /(?:^|\s)((?:[A-Za-z]:[\\/]|\/)[^\s]*\.[A-Za-z0-9]{1,8})/g;
|
|
107
|
+
for (const m of input.matchAll(bareRegex)) {
|
|
108
|
+
const raw = m[1] ?? '';
|
|
109
|
+
if (!raw)
|
|
110
|
+
continue;
|
|
111
|
+
const abs = path.normalize(raw);
|
|
112
|
+
if (!refs.has(abs))
|
|
113
|
+
refs.set(abs, { raw, absolutePath: abs });
|
|
114
|
+
}
|
|
115
|
+
return [...refs.values()];
|
|
116
|
+
}
|
|
117
|
+
/** Read a file as a numbered text block — the same format the read-file
|
|
118
|
+
* tool produces, so the model sees a consistent representation whether
|
|
119
|
+
* the file was inlined up-front or fetched on demand. */
|
|
120
|
+
async function readTextFile(filePath) {
|
|
121
|
+
const content = await fs.readFile(filePath, 'utf-8');
|
|
122
|
+
const lines = content.split('\n');
|
|
123
|
+
return lines.map((line, i) => `${i + 1}\t${line}`).join('\n');
|
|
124
|
+
}
|
|
125
|
+
/** Extract plain text from a PDF. Uses pdf-parse's class-based v2 API
|
|
126
|
+
* (PDFParse.getText). Returns an empty string on failure; the caller
|
|
127
|
+
* decides whether to fall back to OCR. */
|
|
128
|
+
async function extractPdfText(filePath) {
|
|
129
|
+
try {
|
|
130
|
+
const { PDFParse } = await import('pdf-parse');
|
|
131
|
+
const buffer = await fs.readFile(filePath);
|
|
132
|
+
const parser = new PDFParse({ data: new Uint8Array(buffer) });
|
|
133
|
+
try {
|
|
134
|
+
const result = await parser.getText();
|
|
135
|
+
return result.text ?? '';
|
|
136
|
+
}
|
|
137
|
+
finally {
|
|
138
|
+
await parser.destroy().catch(() => { });
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
catch {
|
|
142
|
+
return '';
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
/** Extract text from an Office document. Routes .docx through mammoth
|
|
146
|
+
* (best-in-class semantic extraction), .xlsx through SheetJS (CSV per
|
|
147
|
+
* sheet), everything else through officeparser. */
|
|
148
|
+
async function extractOfficeText(filePath) {
|
|
149
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
150
|
+
try {
|
|
151
|
+
if (ext === '.docx') {
|
|
152
|
+
const mammoth = await import('mammoth');
|
|
153
|
+
const result = await mammoth.extractRawText({ path: filePath });
|
|
154
|
+
return result.value;
|
|
155
|
+
}
|
|
156
|
+
if (ext === '.xlsx') {
|
|
157
|
+
const XLSX = await import('xlsx');
|
|
158
|
+
const wb = XLSX.readFile(filePath);
|
|
159
|
+
const parts = [];
|
|
160
|
+
for (const sheetName of wb.SheetNames) {
|
|
161
|
+
const sheet = wb.Sheets[sheetName];
|
|
162
|
+
if (!sheet)
|
|
163
|
+
continue;
|
|
164
|
+
parts.push(`--- Sheet: ${sheetName} ---\n${XLSX.utils.sheet_to_csv(sheet)}`);
|
|
165
|
+
}
|
|
166
|
+
return parts.join('\n\n');
|
|
167
|
+
}
|
|
168
|
+
// .pptx, .odt, .ods, .odp — officeparser handles these.
|
|
169
|
+
const { OfficeParser } = await import('officeparser');
|
|
170
|
+
const ast = await OfficeParser.parseOffice(filePath);
|
|
171
|
+
return ast.toText();
|
|
172
|
+
}
|
|
173
|
+
catch (err) {
|
|
174
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
175
|
+
return `[Failed to extract text from ${path.basename(filePath)}: ${msg}]`;
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
/** OCR an image via tesseract.js. Loads Chinese + English language packs on
|
|
179
|
+
* first call (cached in-memory afterwards). Accuracy is limited, especially
|
|
180
|
+
* for handwriting or stylized text — intended as a text-extraction fallback
|
|
181
|
+
* for providers that can't natively see images. */
|
|
182
|
+
export async function ocrImage(filePath) {
|
|
183
|
+
try {
|
|
184
|
+
const { createWorker } = await import('tesseract.js');
|
|
185
|
+
const worker = await createWorker(['eng', 'chi_sim'], 1, {
|
|
186
|
+
cachePath: await tesseractCacheDir(),
|
|
187
|
+
});
|
|
188
|
+
try {
|
|
189
|
+
const { data } = await worker.recognize(filePath);
|
|
190
|
+
return data.text ?? '';
|
|
191
|
+
}
|
|
192
|
+
finally {
|
|
193
|
+
await worker.terminate();
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
catch (err) {
|
|
197
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
198
|
+
return `[OCR failed: ${msg}]`;
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
/** OCR every page of a PDF by rasterizing first. Used for scanned PDFs when
|
|
202
|
+
* pdf-parse's text extraction returns little/no text. Rasterization uses
|
|
203
|
+
* pdf-parse's own getScreenshot (pdfjs under the hood), so we don't need
|
|
204
|
+
* a separate pdf-to-img dependency. */
|
|
205
|
+
async function ocrPdf(filePath) {
|
|
206
|
+
try {
|
|
207
|
+
const { PDFParse } = await import('pdf-parse');
|
|
208
|
+
const buffer = await fs.readFile(filePath);
|
|
209
|
+
const parser = new PDFParse({ data: new Uint8Array(buffer) });
|
|
210
|
+
let screenshots;
|
|
211
|
+
try {
|
|
212
|
+
screenshots = (await parser.getScreenshot({ scale: 2, imageBuffer: true }));
|
|
213
|
+
}
|
|
214
|
+
finally {
|
|
215
|
+
await parser.destroy().catch(() => { });
|
|
216
|
+
}
|
|
217
|
+
const { createWorker } = await import('tesseract.js');
|
|
218
|
+
const worker = await createWorker(['eng', 'chi_sim'], 1, {
|
|
219
|
+
cachePath: await tesseractCacheDir(),
|
|
220
|
+
});
|
|
221
|
+
try {
|
|
222
|
+
const out = [];
|
|
223
|
+
for (const page of screenshots.pages) {
|
|
224
|
+
if (!page.data)
|
|
225
|
+
continue;
|
|
226
|
+
const { data } = await worker.recognize(Buffer.from(page.data));
|
|
227
|
+
out.push(`--- Page ${page.pageNumber} ---\n${data.text ?? ''}`);
|
|
228
|
+
}
|
|
229
|
+
return out.join('\n\n');
|
|
230
|
+
}
|
|
231
|
+
finally {
|
|
232
|
+
await worker.terminate();
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
catch (err) {
|
|
236
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
237
|
+
return `[PDF OCR failed: ${msg}]`;
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
/** Map a file extension to an IANA media type. Used for ImagePart mediaType
|
|
241
|
+
* hints; returning `image/png` for unknown extensions is safe — the SDK
|
|
242
|
+
* mostly treats mediaType as advisory. */
|
|
243
|
+
function mediaTypeFor(filePath) {
|
|
244
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
245
|
+
if (ext === '.jpg' || ext === '.jpeg')
|
|
246
|
+
return 'image/jpeg';
|
|
247
|
+
if (ext === '.png')
|
|
248
|
+
return 'image/png';
|
|
249
|
+
if (ext === '.webp')
|
|
250
|
+
return 'image/webp';
|
|
251
|
+
if (ext === '.gif')
|
|
252
|
+
return 'image/gif';
|
|
253
|
+
if (ext === '.bmp')
|
|
254
|
+
return 'image/bmp';
|
|
255
|
+
return 'image/png';
|
|
256
|
+
}
|
|
257
|
+
/**
|
|
258
|
+
* Resolve a single file reference into one or more content parts, taking
|
|
259
|
+
* the active provider's multi-modal capabilities into account.
|
|
260
|
+
*
|
|
261
|
+
* Contract:
|
|
262
|
+
* - Text, Office, and text-bearing PDFs always collapse to a single
|
|
263
|
+
* TextPart — cheapest path, works for every provider.
|
|
264
|
+
* - Images: ImagePart if the provider can see images; otherwise OCR'd
|
|
265
|
+
* TextPart annotated as a fallback.
|
|
266
|
+
* - Scanned PDFs (pdf-parse yields near-empty text): FilePart for providers
|
|
267
|
+
* with PDF support; OCR'd TextPart otherwise.
|
|
268
|
+
* - Missing/unreadable files return a TextPart carrying the error so the
|
|
269
|
+
* model can acknowledge the failure rather than silently ignore it.
|
|
270
|
+
*/
|
|
271
|
+
export async function ingestFile(ref, caps, onNotice) {
|
|
272
|
+
let kind;
|
|
273
|
+
try {
|
|
274
|
+
await fs.stat(ref.absolutePath);
|
|
275
|
+
kind = await classifyFile(ref.absolutePath);
|
|
276
|
+
}
|
|
277
|
+
catch (err) {
|
|
278
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
279
|
+
return [{ type: 'text', text: `[Cannot read ${ref.raw}: ${msg}]` }];
|
|
280
|
+
}
|
|
281
|
+
if (kind === 'text' || kind === 'unknown') {
|
|
282
|
+
try {
|
|
283
|
+
const body = await readTextFile(ref.absolutePath);
|
|
284
|
+
return [{ type: 'text', text: `<<file path="${ref.absolutePath}">>\n${body}\n<</file>>` }];
|
|
285
|
+
}
|
|
286
|
+
catch (err) {
|
|
287
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
288
|
+
return [{ type: 'text', text: `[Failed to read ${ref.raw}: ${msg}]` }];
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
if (kind === 'office') {
|
|
292
|
+
const text = await extractOfficeText(ref.absolutePath);
|
|
293
|
+
return [{ type: 'text', text: `<<file path="${ref.absolutePath}" kind="office">>\n${text}\n<</file>>` }];
|
|
294
|
+
}
|
|
295
|
+
if (kind === 'pdf') {
|
|
296
|
+
const extracted = await extractPdfText(ref.absolutePath);
|
|
297
|
+
// Heuristic: a "real" text PDF yields at least a couple hundred chars.
|
|
298
|
+
// Scanned PDFs typically yield empty strings or a few stray ligatures.
|
|
299
|
+
if (extracted.trim().length > 200) {
|
|
300
|
+
return [
|
|
301
|
+
{ type: 'text', text: `<<file path="${ref.absolutePath}" kind="pdf-text">>\n${extracted}\n<</file>>` },
|
|
302
|
+
];
|
|
303
|
+
}
|
|
304
|
+
// Scanned / image-based PDF.
|
|
305
|
+
if (caps.pdf) {
|
|
306
|
+
try {
|
|
307
|
+
const buffer = await fs.readFile(ref.absolutePath);
|
|
308
|
+
return [
|
|
309
|
+
{ type: 'file', data: buffer, mediaType: 'application/pdf', filename: path.basename(ref.absolutePath) },
|
|
310
|
+
];
|
|
311
|
+
}
|
|
312
|
+
catch (err) {
|
|
313
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
314
|
+
return [{ type: 'text', text: `[Failed to attach PDF ${ref.raw}: ${msg}]` }];
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
// DeepSeek + scanned PDF: OCR locally.
|
|
318
|
+
const ocr = await ocrPdf(ref.absolutePath);
|
|
319
|
+
return [
|
|
320
|
+
{
|
|
321
|
+
type: 'text',
|
|
322
|
+
text: `<<file path="${ref.absolutePath}" kind="pdf-ocr">>\n${ocr}\n<</file>>\n[Note: this PDF was OCR'd locally because the current model does not support PDF input; accuracy is limited.]`,
|
|
323
|
+
},
|
|
324
|
+
];
|
|
325
|
+
}
|
|
326
|
+
// Image.
|
|
327
|
+
if (caps.image) {
|
|
328
|
+
try {
|
|
329
|
+
const buffer = await fs.readFile(ref.absolutePath);
|
|
330
|
+
return [
|
|
331
|
+
{ type: 'text', text: `<<file path="${ref.absolutePath}" kind="image">>` },
|
|
332
|
+
{ type: 'image', image: buffer, mediaType: mediaTypeFor(ref.absolutePath) },
|
|
333
|
+
];
|
|
334
|
+
}
|
|
335
|
+
catch (err) {
|
|
336
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
337
|
+
return [{ type: 'text', text: `[Failed to attach image ${ref.raw}: ${msg}]` }];
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
// Text-only provider (DeepSeek, custom). Prefer a vision sub-agent if any
|
|
341
|
+
// other multimodal provider has a key configured — caption captures both
|
|
342
|
+
// text and visual content, OCR only catches text. Falls through to OCR
|
|
343
|
+
// when no sub-agent is available, or when the sub-agent call fails.
|
|
344
|
+
const sub = pickVisionProvider();
|
|
345
|
+
if (sub) {
|
|
346
|
+
try {
|
|
347
|
+
const caption = await captionImage(ref.absolutePath, sub);
|
|
348
|
+
onNotice?.(`Captioned image via ${sub.modelId}`);
|
|
349
|
+
return [
|
|
350
|
+
{
|
|
351
|
+
type: 'text',
|
|
352
|
+
text: `<<file path="${ref.absolutePath}" kind="image-caption" via="${sub.modelId}">>\n${caption}\n<</file>>\n[Note: the current model cannot see images. The above description was generated by ${sub.label} (vision sub-agent), not the current model. For complex visual tasks, /model switch to a vision-capable model and ask follow-ups directly.]`,
|
|
353
|
+
},
|
|
354
|
+
];
|
|
355
|
+
}
|
|
356
|
+
catch (err) {
|
|
357
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
358
|
+
onNotice?.(`Vision sub-agent (${sub.label}) failed: ${msg} — falling back to OCR`);
|
|
359
|
+
// fall through to OCR
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
// DeepSeek + image, no sub-agent (or sub-agent failed): OCR. Warn the model
|
|
363
|
+
// that this is not true image understanding so it doesn't confidently
|
|
364
|
+
// describe colors/layout/etc.
|
|
365
|
+
const ocr = await ocrImage(ref.absolutePath);
|
|
366
|
+
return [
|
|
367
|
+
{
|
|
368
|
+
type: 'text',
|
|
369
|
+
text: `<<file path="${ref.absolutePath}" kind="image-ocr">>\n${ocr}\n<</file>>\n[Note: the current model cannot natively see images. Only OCR text is available; visual content (layout, diagrams, photos) is NOT visible.]`,
|
|
370
|
+
},
|
|
371
|
+
];
|
|
372
|
+
}
|
|
373
|
+
/**
|
|
374
|
+
* Compose the content parts for a user message: original text first, then
|
|
375
|
+
* one or more parts per ingested file. Returns a plain string when no
|
|
376
|
+
* files were referenced, so simple prompts stay on the string fast path
|
|
377
|
+
* (keeps existing provider behavior / caching semantics unchanged).
|
|
378
|
+
*/
|
|
379
|
+
export async function buildUserContent(text, caps, onNotice) {
|
|
380
|
+
const refs = extractFileReferences(text);
|
|
381
|
+
if (refs.length === 0)
|
|
382
|
+
return text;
|
|
383
|
+
const parts = [{ type: 'text', text }];
|
|
384
|
+
for (const ref of refs) {
|
|
385
|
+
const ingested = await ingestFile(ref, caps, onNotice);
|
|
386
|
+
parts.push(...ingested);
|
|
387
|
+
}
|
|
388
|
+
return parts;
|
|
389
|
+
}
|
|
390
|
+
//# sourceMappingURL=file-ingest.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"file-ingest.js","sourceRoot":"","sources":["../../src/agent/file-ingest.ts"],"names":[],"mappings":"AAAA,qDAAqD;AACrD,EAAE;AACF,qEAAqE;AACrE,uEAAuE;AACvE,EAAE;AACF,2CAA2C;AAC3C,oFAAoF;AACpF,4DAA4D;AAC5D,mFAAmF;AACnF,EAAE;AACF,0EAA0E;AAC1E,0EAA0E;AAC1E,sEAAsE;AACtE,OAAO,EAAE,MAAM,kBAAkB,CAAA;AACjC,OAAO,IAAI,MAAM,WAAW,CAAA;AAK5B,OAAO,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAA;AAC9C,OAAO,EAAE,YAAY,EAAE,kBAAkB,EAAE,MAAM,sBAAsB,CAAA;AAEvE;;;;;uDAKuD;AACvD,KAAK,UAAU,iBAAiB;IAC9B,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,gBAAgB,EAAE,UAAU,CAAC,CAAA;IACnD,MAAM,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;IACxC,OAAO,GAAG,CAAA;AACZ,CAAC;AAiBD;+CAC+C;AAC/C,MAAM,eAAe,GAAG,IAAI,GAAG,CAAC;IAC9B,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,QAAQ;IACxE,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO;IACzD,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM;IAC5C,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,IAAI,EAAE,IAAI;IAChE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,OAAO;IACnE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,QAAQ;IAC7D,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS;IACrE,MAAM,EAAE,MAAM,EAAE,aAAa,EAAE,WAAW,EAAE,YAAY,EAAE,eAAe;CAC1E,CAAC,CAAA;AAEF,MAAM,gBAAgB,GAAG,IAAI,GAAG,CAAC,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC,CAAA;AACpF,MAAM,iBAAiB,GAAG,IAAI,GAAG,CAAC,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC,CAAA;AAEtF;qDACqD;AACrD,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,QAAgB;IACjD,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAA;IAChD,IAAI,eAAe,CAAC,GAAG,CAAC,GAAG,CAAC;QAAE,OAAO,MAAM,CAAA;IAC3C,IAAI,gBAAgB,CAAC,GAAG,CAAC,GAAG,CAAC;QAAE,OAAO,OAAO,CAAA;IAC7C,IAAI,iBAAiB,CAAC,GAAG,CAAC,GAAG,CAAC;QAAE,OAAO,QAAQ,CAAA;IAC/C,IAAI,GAAG,KAAK,MAAM;QAAE,OAAO,KAAK,CAAA;IAEhC,wCAAwC;IACxC,IAAI,CAAC;QACH,MAAM,EAAE,gBAAgB,EAAE,GAAG,MAAM,MAAM,CAAC,WAAW,CAAC,CAAA;QACtD,MAAM,QAAQ,GAAG,MAAM,gBAAgB,CAAC,QAAQ,CAAC,CAAA;QACjD,IAAI,CAAC,QAAQ;YAAE,OAAO,MAAM,CAAA,CAAC,uCAAuC;QACpE,IAAI,QAAQ,CAAC,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC;YAAE,OAAO,OAAO,CAAA;QACtD,IAAI,QAAQ,CAAC,IAAI,KAAK,iBAAiB;YAAE,OAAO,KAAK,CAAA;QACrD,IACE,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,gBAAgB,CAAC;YACxC,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,cAAc,CAAC;YACtC,OAAO,QAAQ,CAAA;QACjB,IAAI,QAAQ,CAAC,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC;YAAE,OAAO,MAAM,CAAA;QACpD,OAAO,SAAS,CAAA;IAClB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,SAAS,CAAA;IAClB,CAAC;AACH,CAAC;AAED;;;;;;;;;;;;;;;GAeG;AACH,MAAM,UAAU,qBAAqB,CAAC,KAAa;IACjD,MAAM,IAAI,GAAG,IAAI,GAAG,EAAyB,CAAA;IAE7C,uEAAuE;IACvE,wEAAwE;IACxE,MAAM,OAAO,GAAG,yDAAyD,CAAA;IACzE,KAAK,MAAM,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;QACxC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAA;QACtB,IAAI,CAAC,GAAG;YAAE,SAAQ;QAClB,MAAM,GAAG,GAAG,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAA;QAC1E,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,GAAG,EAAE,IAAI,GAAG,EAAE,EAAE,YAAY,EAAE,GAAG,EAAE,CAAC,CAAA;IACtD,CAAC;IAED,wEAAwE;IACxE,4EAA4E;IAC5E,MAAM,SAAS,GAAG,0DAA0D,CAAA;IAC5E,KAAK,MAAM,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;QAC1C,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAA;QACtB,IAAI,CAAC,GAAG;YAAE,SAAQ;QAClB,MAAM,GAAG,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAA;QAC/B,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;YAAE,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,GAAG,EAAE,YAAY,EAAE,GAAG,EAAE,CAAC,CAAA;IAC/D,CAAC;IAED,OAAO,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,CAAA;AAC3B,CAAC;AAED;;0DAE0D;AAC1D,KAAK,UAAU,YAAY,CAAC,QAAgB;IAC1C,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAA;IACpD,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;IACjC,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;AAC/D,CAAC;AAED;;2CAE2C;AAC3C,KAAK,UAAU,cAAc,CAAC,QAAgB;IAC5C,IAAI,CAAC;QACH,MAAM,EAAE,QAAQ,EAAE,GAAG,MAAM,MAAM,CAAC,WAAW,CAAC,CAAA;QAC9C,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAA;QAC1C,MAAM,MAAM,GAAG,IAAI,QAAQ,CAAC,EAAE,IAAI,EAAE,IAAI,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC,CAAA;QAC7D,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,OAAO,EAAE,CAAA;YACrC,OAAO,MAAM,CAAC,IAAI,IAAI,EAAE,CAAA;QAC1B,CAAC;gBAAS,CAAC;YACT,MAAM,MAAM,CAAC,OAAO,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAA;QACxC,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,CAAA;IACX,CAAC;AACH,CAAC;AAED;;oDAEoD;AACpD,KAAK,UAAU,iBAAiB,CAAC,QAAgB;IAC/C,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAA;IAChD,IAAI,CAAC;QACH,IAAI,GAAG,KAAK,OAAO,EAAE,CAAC;YACpB,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,SAAS,CAAC,CAAA;YACvC,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,cAAc,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAA;YAC/D,OAAO,MAAM,CAAC,KAAK,CAAA;QACrB,CAAC;QACD,IAAI,GAAG,KAAK,OAAO,EAAE,CAAC;YACpB,MAAM,IAAI,GAAG,MAAM,MAAM,CAAC,MAAM,CAAC,CAAA;YACjC,MAAM,EAAE,GAAG,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAA;YAClC,MAAM,KAAK,GAAa,EAAE,CAAA;YAC1B,KAAK,MAAM,SAAS,IAAI,EAAE,CAAC,UAAU,EAAE,CAAC;gBACtC,MAAM,KAAK,GAAG,EAAE,CAAC,MAAM,CAAC,SAAS,CAAC,CAAA;gBAClC,IAAI,CAAC,KAAK;oBAAE,SAAQ;gBACpB,KAAK,CAAC,IAAI,CAAC,cAAc,SAAS,SAAS,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,KAAK,CAAC,EAAE,CAAC,CAAA;YAC9E,CAAC;YACD,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;QAC3B,CAAC;QACD,wDAAwD;QACxD,MAAM,EAAE,YAAY,EAAE,GAAG,MAAM,MAAM,CAAC,cAAc,CAAC,CAAA;QACrD,MAAM,GAAG,GAAG,MAAM,YAAY,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAA;QACpD,OAAO,GAAG,CAAC,MAAM,EAAE,CAAA;IACrB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,GAAG,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAA;QAC5D,OAAO,gCAAgC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,KAAK,GAAG,GAAG,CAAA;IAC3E,CAAC;AACH,CAAC;AAED;;;oDAGoD;AACpD,MAAM,CAAC,KAAK,UAAU,QAAQ,CAAC,QAAgB;IAC7C,IAAI,CAAC;QACH,MAAM,EAAE,YAAY,EAAE,GAAG,MAAM,MAAM,CAAC,cAAc,CAAC,CAAA;QACrD,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,CAAC,KAAK,EAAE,SAAS,CAAC,EAAE,CAAC,EAAE;YACvD,SAAS,EAAE,MAAM,iBAAiB,EAAE;SACrC,CAAC,CAAA;QACF,IAAI,CAAC;YACH,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM,MAAM,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAA;YACjD,OAAO,IAAI,CAAC,IAAI,IAAI,EAAE,CAAA;QACxB,CAAC;gBAAS,CAAC;YACT,MAAM,MAAM,CAAC,SAAS,EAAE,CAAA;QAC1B,CAAC;IACH,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,GAAG,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAA;QAC5D,OAAO,gBAAgB,GAAG,GAAG,CAAA;IAC/B,CAAC;AACH,CAAC;AAED;;;wCAGwC;AACxC,KAAK,UAAU,MAAM,CAAC,QAAgB;IACpC,IAAI,CAAC;QACH,MAAM,EAAE,QAAQ,EAAE,GAAG,MAAM,MAAM,CAAC,WAAW,CAAC,CAAA;QAC9C,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAA;QAC1C,MAAM,MAAM,GAAG,IAAI,QAAQ,CAAC,EAAE,IAAI,EAAE,IAAI,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC,CAAA;QAC7D,IAAI,WAAwE,CAAA;QAC5E,IAAI,CAAC;YACH,WAAW,GAAG,CAAC,MAAM,MAAM,CAAC,aAAa,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC,CAAuB,CAAA;QACnG,CAAC;gBAAS,CAAC;YACT,MAAM,MAAM,CAAC,OAAO,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAA;QACxC,CAAC;QAED,MAAM,EAAE,YAAY,EAAE,GAAG,MAAM,MAAM,CAAC,cAAc,CAAC,CAAA;QACrD,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,CAAC,KAAK,EAAE,SAAS,CAAC,EAAE,CAAC,EAAE;YACvD,SAAS,EAAE,MAAM,iBAAiB,EAAE;SACrC,CAAC,CAAA;QACF,IAAI,CAAC;YACH,MAAM,GAAG,GAAa,EAAE,CAAA;YACxB,KAAK,MAAM,IAAI,IAAI,WAAW,CAAC,KAAK,EAAE,CAAC;gBACrC,IAAI,CAAC,IAAI,CAAC,IAAI;oBAAE,SAAQ;gBACxB,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM,MAAM,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAA;gBAC/D,GAAG,CAAC,IAAI,CAAC,YAAY,IAAI,CAAC,UAAU,SAAS,IAAI,CAAC,IAAI,IAAI,EAAE,EAAE,CAAC,CAAA;YACjE,CAAC;YACD,OAAO,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;QACzB,CAAC;gBAAS,CAAC;YACT,MAAM,MAAM,CAAC,SAAS,EAAE,CAAA;QAC1B,CAAC;IACH,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,GAAG,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAA;QAC5D,OAAO,oBAAoB,GAAG,GAAG,CAAA;IACnC,CAAC;AACH,CAAC;AAED;;2CAE2C;AAC3C,SAAS,YAAY,CAAC,QAAgB;IACpC,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAA;IAChD,IAAI,GAAG,KAAK,MAAM,IAAI,GAAG,KAAK,OAAO;QAAE,OAAO,YAAY,CAAA;IAC1D,IAAI,GAAG,KAAK,MAAM;QAAE,OAAO,WAAW,CAAA;IACtC,IAAI,GAAG,KAAK,OAAO;QAAE,OAAO,YAAY,CAAA;IACxC,IAAI,GAAG,KAAK,MAAM;QAAE,OAAO,WAAW,CAAA;IACtC,IAAI,GAAG,KAAK,MAAM;QAAE,OAAO,WAAW,CAAA;IACtC,OAAO,WAAW,CAAA;AACpB,CAAC;AAED;;;;;;;;;;;;;GAaG;AACH,MAAM,CAAC,KAAK,UAAU,UAAU,CAC9B,GAAkB,EAClB,IAA0B,EAC1B,QAAgC;IAEhC,IAAI,IAAc,CAAA;IAClB,IAAI,CAAC;QACH,MAAM,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,YAAY,CAAC,CAAA;QAC/B,IAAI,GAAG,MAAM,YAAY,CAAC,GAAG,CAAC,YAAY,CAAC,CAAA;IAC7C,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,GAAG,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAA;QAC5D,OAAO,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,gBAAgB,GAAG,CAAC,GAAG,KAAK,GAAG,GAAG,EAAE,CAAC,CAAA;IACrE,CAAC;IAED,IAAI,IAAI,KAAK,MAAM,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;QAC1C,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,MAAM,YAAY,CAAC,GAAG,CAAC,YAAY,CAAC,CAAA;YACjD,OAAO,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,gBAAgB,GAAG,CAAC,YAAY,QAAQ,IAAI,aAAa,EAAE,CAAC,CAAA;QAC5F,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,GAAG,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAA;YAC5D,OAAO,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,mBAAmB,GAAG,CAAC,GAAG,KAAK,GAAG,GAAG,EAAE,CAAC,CAAA;QACxE,CAAC;IACH,CAAC;IAED,IAAI,IAAI,KAAK,QAAQ,EAAE,CAAC;QACtB,MAAM,IAAI,GAAG,MAAM,iBAAiB,CAAC,GAAG,CAAC,YAAY,CAAC,CAAA;QACtD,OAAO,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,gBAAgB,GAAG,CAAC,YAAY,sBAAsB,IAAI,aAAa,EAAE,CAAC,CAAA;IAC1G,CAAC;IAED,IAAI,IAAI,KAAK,KAAK,EAAE,CAAC;QACnB,MAAM,SAAS,GAAG,MAAM,cAAc,CAAC,GAAG,CAAC,YAAY,CAAC,CAAA;QACxD,uEAAuE;QACvE,uEAAuE;QACvE,IAAI,SAAS,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;YAClC,OAAO;gBACL,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,gBAAgB,GAAG,CAAC,YAAY,wBAAwB,SAAS,aAAa,EAAE;aACvG,CAAA;QACH,CAAC;QACD,6BAA6B;QAC7B,IAAI,IAAI,CAAC,GAAG,EAAE,CAAC;YACb,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,YAAY,CAAC,CAAA;gBAClD,OAAO;oBACL,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,iBAAiB,EAAE,QAAQ,EAAE,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,YAAY,CAAC,EAAE;iBACxG,CAAA;YACH,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,MAAM,GAAG,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAA;gBAC5D,OAAO,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,yBAAyB,GAAG,CAAC,GAAG,KAAK,GAAG,GAAG,EAAE,CAAC,CAAA;YAC9E,CAAC;QACH,CAAC;QACD,uCAAuC;QACvC,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,GAAG,CAAC,YAAY,CAAC,CAAA;QAC1C,OAAO;YACL;gBACE,IAAI,EAAE,MAAM;gBACZ,IAAI,EAAE,gBAAgB,GAAG,CAAC,YAAY,uBAAuB,GAAG,4HAA4H;aAC7L;SACF,CAAA;IACH,CAAC;IAED,SAAS;IACT,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;QACf,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,YAAY,CAAC,CAAA;YAClD,OAAO;gBACL,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,gBAAgB,GAAG,CAAC,YAAY,kBAAkB,EAAE;gBAC1E,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,SAAS,EAAE,YAAY,CAAC,GAAG,CAAC,YAAY,CAAC,EAAE;aAC5E,CAAA;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,GAAG,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAA;YAC5D,OAAO,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,2BAA2B,GAAG,CAAC,GAAG,KAAK,GAAG,GAAG,EAAE,CAAC,CAAA;QAChF,CAAC;IACH,CAAC;IAED,0EAA0E;IAC1E,yEAAyE;IACzE,uEAAuE;IACvE,oEAAoE;IACpE,MAAM,GAAG,GAAG,kBAAkB,EAAE,CAAA;IAChC,IAAI,GAAG,EAAE,CAAC;QACR,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,MAAM,YAAY,CAAC,GAAG,CAAC,YAAY,EAAE,GAAG,CAAC,CAAA;YACzD,QAAQ,EAAE,CAAC,uBAAuB,GAAG,CAAC,OAAO,EAAE,CAAC,CAAA;YAChD,OAAO;gBACL;oBACE,IAAI,EAAE,MAAM;oBACZ,IAAI,EAAE,gBAAgB,GAAG,CAAC,YAAY,+BAA+B,GAAG,CAAC,OAAO,QAAQ,OAAO,mGAAmG,GAAG,CAAC,KAAK,6IAA6I;iBACzV;aACF,CAAA;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,GAAG,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAA;YAC5D,QAAQ,EAAE,CAAC,qBAAqB,GAAG,CAAC,KAAK,aAAa,GAAG,wBAAwB,CAAC,CAAA;YAClF,sBAAsB;QACxB,CAAC;IACH,CAAC;IAED,4EAA4E;IAC5E,sEAAsE;IACtE,8BAA8B;IAC9B,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,GAAG,CAAC,YAAY,CAAC,CAAA;IAC5C,OAAO;QACL;YACE,IAAI,EAAE,MAAM;YACZ,IAAI,EAAE,gBAAgB,GAAG,CAAC,YAAY,yBAAyB,GAAG,0JAA0J;SAC7N;KACF,CAAA;AACH,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,IAAY,EACZ,IAA0B,EAC1B,QAAgC;IAEhC,MAAM,IAAI,GAAG,qBAAqB,CAAC,IAAI,CAAC,CAAA;IACxC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAA;IAElC,MAAM,KAAK,GAAmB,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,CAAA;IACtD,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,QAAQ,GAAG,MAAM,UAAU,CAAC,GAAG,EAAE,IAAI,EAAE,QAAQ,CAAC,CAAA;QACtD,KAAK,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC,CAAA;IACzB,CAAC;IACD,OAAO,KAAK,CAAA;AACd,CAAC"}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import type { ModelMessage } from 'ai';
|
|
2
|
+
export interface LightCompactResult {
|
|
3
|
+
messages: ModelMessage[];
|
|
4
|
+
/** Number of messages dropped. Useful for UI / telemetry — if zero, the
|
|
5
|
+
* caller may still want to fall through to the LLM summariser. */
|
|
6
|
+
dropped: number;
|
|
7
|
+
}
|
|
8
|
+
/**
|
|
9
|
+
* Drop loop-guard tool-call/result pairs from the message array. Leaves
|
|
10
|
+
* everything else untouched. Does not mutate the input array.
|
|
11
|
+
*/
|
|
12
|
+
export declare function lightCompactMessages(messages: ModelMessage[]): LightCompactResult;
|
|
13
|
+
//# sourceMappingURL=light-compact.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"light-compact.d.ts","sourceRoot":"","sources":["../../src/agent/light-compact.ts"],"names":[],"mappings":"AAoBA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,IAAI,CAAA;AAmEtC,MAAM,WAAW,kBAAkB;IACjC,QAAQ,EAAE,YAAY,EAAE,CAAA;IACxB;uEACmE;IACnE,OAAO,EAAE,MAAM,CAAA;CAChB;AAED;;;GAGG;AACH,wBAAgB,oBAAoB,CAAC,QAAQ,EAAE,YAAY,EAAE,GAAG,kBAAkB,CAmBjF"}
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
// @x-code-cli/core — Light-weight message compaction (no LLM call)
|
|
2
|
+
//
|
|
3
|
+
// The main compression path (`compressMessages` in loop.ts) summarises old
|
|
4
|
+
// turns by making a separate `generateText` call — that's a network round
|
|
5
|
+
// trip plus a full pass over the messages, which is wasteful when the bulk
|
|
6
|
+
// of the context comes from a narrow, obvious source: repeated tool-call
|
|
7
|
+
// failures that the loop guard already flagged.
|
|
8
|
+
//
|
|
9
|
+
// This module runs a cheap O(n) pass that drops the messages we can safely
|
|
10
|
+
// throw away without losing signal:
|
|
11
|
+
// - tool-call + tool-result pairs whose result is a `[loop-guard]` notice
|
|
12
|
+
// (the model has already been told to stop; the blocked calls don't
|
|
13
|
+
// teach it anything new on replay)
|
|
14
|
+
// - tool-result payloads that are PowerShell noise stacks older than the
|
|
15
|
+
// most recent one (keep at most the latest so the model can still see
|
|
16
|
+
// the current error shape, drop older duplicates)
|
|
17
|
+
//
|
|
18
|
+
// Callers should run this BEFORE invoking the LLM summariser so the
|
|
19
|
+
// summariser operates on the signal-rich remainder.
|
|
20
|
+
/** Content of a tool-result part that we should drop on sight. */
|
|
21
|
+
const LOOP_GUARD_SENTINEL = '[loop-guard]';
|
|
22
|
+
function isToolResultDropTarget(part) {
|
|
23
|
+
if (part?.type !== 'tool-result')
|
|
24
|
+
return false;
|
|
25
|
+
const output = part.output;
|
|
26
|
+
if (!output)
|
|
27
|
+
return false;
|
|
28
|
+
if (output.type === 'text' && typeof output.value === 'string') {
|
|
29
|
+
return output.value.startsWith(LOOP_GUARD_SENTINEL);
|
|
30
|
+
}
|
|
31
|
+
return false;
|
|
32
|
+
}
|
|
33
|
+
function hasDropTargetResult(msg) {
|
|
34
|
+
if (msg.role !== 'tool')
|
|
35
|
+
return false;
|
|
36
|
+
const parts = msg.content;
|
|
37
|
+
if (!Array.isArray(parts))
|
|
38
|
+
return false;
|
|
39
|
+
return parts.some(isToolResultDropTarget);
|
|
40
|
+
}
|
|
41
|
+
/** Remove an assistant message's tool-call parts for the given id set.
|
|
42
|
+
* Returns the message as-is if no changes needed, otherwise a shallow copy
|
|
43
|
+
* with filtered content. If every part is removed, returns null so the
|
|
44
|
+
* caller can drop the whole message. */
|
|
45
|
+
function stripToolCallParts(msg, idsToRemove) {
|
|
46
|
+
if (msg.role !== 'assistant')
|
|
47
|
+
return msg;
|
|
48
|
+
const content = msg.content;
|
|
49
|
+
if (!Array.isArray(content))
|
|
50
|
+
return msg;
|
|
51
|
+
let changed = false;
|
|
52
|
+
const filtered = content.filter((part) => {
|
|
53
|
+
if (part?.type === 'tool-call' && typeof part.toolCallId === 'string' && idsToRemove.has(part.toolCallId)) {
|
|
54
|
+
changed = true;
|
|
55
|
+
return false;
|
|
56
|
+
}
|
|
57
|
+
return true;
|
|
58
|
+
});
|
|
59
|
+
if (!changed)
|
|
60
|
+
return msg;
|
|
61
|
+
if (filtered.length === 0)
|
|
62
|
+
return null;
|
|
63
|
+
return { ...msg, content: filtered };
|
|
64
|
+
}
|
|
65
|
+
/** Collect the toolCallIds whose tool-result was a loop-guard notice. */
|
|
66
|
+
function collectLoopGuardedIds(messages) {
|
|
67
|
+
const ids = new Set();
|
|
68
|
+
for (const msg of messages) {
|
|
69
|
+
if (msg.role !== 'tool')
|
|
70
|
+
continue;
|
|
71
|
+
const parts = msg.content;
|
|
72
|
+
if (!Array.isArray(parts))
|
|
73
|
+
continue;
|
|
74
|
+
for (const part of parts) {
|
|
75
|
+
if (isToolResultDropTarget(part) && typeof part.toolCallId === 'string') {
|
|
76
|
+
ids.add(part.toolCallId);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
return ids;
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Drop loop-guard tool-call/result pairs from the message array. Leaves
|
|
84
|
+
* everything else untouched. Does not mutate the input array.
|
|
85
|
+
*/
|
|
86
|
+
export function lightCompactMessages(messages) {
|
|
87
|
+
const idsToRemove = collectLoopGuardedIds(messages);
|
|
88
|
+
if (idsToRemove.size === 0)
|
|
89
|
+
return { messages, dropped: 0 };
|
|
90
|
+
const out = [];
|
|
91
|
+
let dropped = 0;
|
|
92
|
+
for (const msg of messages) {
|
|
93
|
+
if (hasDropTargetResult(msg)) {
|
|
94
|
+
dropped++;
|
|
95
|
+
continue;
|
|
96
|
+
}
|
|
97
|
+
const stripped = stripToolCallParts(msg, idsToRemove);
|
|
98
|
+
if (stripped == null) {
|
|
99
|
+
dropped++;
|
|
100
|
+
continue;
|
|
101
|
+
}
|
|
102
|
+
out.push(stripped);
|
|
103
|
+
}
|
|
104
|
+
return { messages: out, dropped };
|
|
105
|
+
}
|
|
106
|
+
//# sourceMappingURL=light-compact.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"light-compact.js","sourceRoot":"","sources":["../../src/agent/light-compact.ts"],"names":[],"mappings":"AAAA,mEAAmE;AACnE,EAAE;AACF,2EAA2E;AAC3E,0EAA0E;AAC1E,2EAA2E;AAC3E,yEAAyE;AACzE,gDAAgD;AAChD,EAAE;AACF,2EAA2E;AAC3E,oCAAoC;AACpC,4EAA4E;AAC5E,wEAAwE;AACxE,uCAAuC;AACvC,2EAA2E;AAC3E,0EAA0E;AAC1E,sDAAsD;AACtD,EAAE;AACF,oEAAoE;AACpE,oDAAoD;AAIpD,kEAAkE;AAClE,MAAM,mBAAmB,GAAG,cAAc,CAAA;AAQ1C,SAAS,sBAAsB,CAAC,IAAwB;IACtD,IAAI,IAAI,EAAE,IAAI,KAAK,aAAa;QAAE,OAAO,KAAK,CAAA;IAC9C,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,CAAA;IAC1B,IAAI,CAAC,MAAM;QAAE,OAAO,KAAK,CAAA;IACzB,IAAI,MAAM,CAAC,IAAI,KAAK,MAAM,IAAI,OAAO,MAAM,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC/D,OAAO,MAAM,CAAC,KAAK,CAAC,UAAU,CAAC,mBAAmB,CAAC,CAAA;IACrD,CAAC;IACD,OAAO,KAAK,CAAA;AACd,CAAC;AAED,SAAS,mBAAmB,CAAC,GAAiB;IAC5C,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM;QAAE,OAAO,KAAK,CAAA;IACrC,MAAM,KAAK,GAAG,GAAG,CAAC,OAA0C,CAAA;IAC5D,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC;QAAE,OAAO,KAAK,CAAA;IACvC,OAAO,KAAK,CAAC,IAAI,CAAC,sBAAsB,CAAC,CAAA;AAC3C,CAAC;AAED;;;yCAGyC;AACzC,SAAS,kBAAkB,CAAC,GAAiB,EAAE,WAAwB;IACrE,IAAI,GAAG,CAAC,IAAI,KAAK,WAAW;QAAE,OAAO,GAAG,CAAA;IACxC,MAAM,OAAO,GAAG,GAAG,CAAC,OAAmE,CAAA;IACvF,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC;QAAE,OAAO,GAAG,CAAA;IAEvC,IAAI,OAAO,GAAG,KAAK,CAAA;IACnB,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE;QACvC,IAAI,IAAI,EAAE,IAAI,KAAK,WAAW,IAAI,OAAO,IAAI,CAAC,UAAU,KAAK,QAAQ,IAAI,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,UAAU,CAAC,EAAE,CAAC;YAC1G,OAAO,GAAG,IAAI,CAAA;YACd,OAAO,KAAK,CAAA;QACd,CAAC;QACD,OAAO,IAAI,CAAA;IACb,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,OAAO;QAAE,OAAO,GAAG,CAAA;IACxB,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAA;IACtC,OAAO,EAAE,GAAG,GAAG,EAAE,OAAO,EAAE,QAAQ,EAAkB,CAAA;AACtD,CAAC;AAED,yEAAyE;AACzE,SAAS,qBAAqB,CAAC,QAAwB;IACrD,MAAM,GAAG,GAAG,IAAI,GAAG,EAAU,CAAA;IAC7B,KAAK,MAAM,GAAG,IAAI,QAAQ,EAAE,CAAC;QAC3B,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM;YAAE,SAAQ;QACjC,MAAM,KAAK,GAAG,GAAG,CAAC,OAA0C,CAAA;QAC5D,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC;YAAE,SAAQ;QACnC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,IAAI,sBAAsB,CAAC,IAAI,CAAC,IAAI,OAAO,IAAI,CAAC,UAAU,KAAK,QAAQ,EAAE,CAAC;gBACxE,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,UAAU,CAAC,CAAA;YAC1B,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAA;AACZ,CAAC;AASD;;;GAGG;AACH,MAAM,UAAU,oBAAoB,CAAC,QAAwB;IAC3D,MAAM,WAAW,GAAG,qBAAqB,CAAC,QAAQ,CAAC,CAAA;IACnD,IAAI,WAAW,CAAC,IAAI,KAAK,CAAC;QAAE,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,CAAC,EAAE,CAAA;IAE3D,MAAM,GAAG,GAAmB,EAAE,CAAA;IAC9B,IAAI,OAAO,GAAG,CAAC,CAAA;IACf,KAAK,MAAM,GAAG,IAAI,QAAQ,EAAE,CAAC;QAC3B,IAAI,mBAAmB,CAAC,GAAG,CAAC,EAAE,CAAC;YAC7B,OAAO,EAAE,CAAA;YACT,SAAQ;QACV,CAAC;QACD,MAAM,QAAQ,GAAG,kBAAkB,CAAC,GAAG,EAAE,WAAW,CAAC,CAAA;QACrD,IAAI,QAAQ,IAAI,IAAI,EAAE,CAAC;YACrB,OAAO,EAAE,CAAA;YACT,SAAQ;QACV,CAAC;QACD,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;IACpB,CAAC;IACD,OAAO,EAAE,QAAQ,EAAE,GAAG,EAAE,OAAO,EAAE,CAAA;AACnC,CAAC"}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import type { LoopState } from './loop-state.js';
|
|
2
|
+
/** Tool calls at or above this count in the rolling window trigger the soft
|
|
3
|
+
* synthetic nudge. */
|
|
4
|
+
export declare const SOFT_LOOP_THRESHOLD = 3;
|
|
5
|
+
/** Tool calls at or above this count abort the turn and prompt the user. */
|
|
6
|
+
export declare const HARD_LOOP_THRESHOLD = 5;
|
|
7
|
+
/** Size of the rolling window we scan for duplicates. */
|
|
8
|
+
export declare const LOOP_WINDOW_SIZE = 8;
|
|
9
|
+
/** Hash a tool call for duplicate detection. Truncated to 16 hex chars —
|
|
10
|
+
* collision probability at that length is vanishingly small for the 8-entry
|
|
11
|
+
* window we're comparing against. */
|
|
12
|
+
export declare function hashToolCall(toolName: string, input: unknown): string;
|
|
13
|
+
export type LoopCheck =
|
|
14
|
+
/** No loop detected — dispatch this tool call normally. */
|
|
15
|
+
{
|
|
16
|
+
kind: 'ok';
|
|
17
|
+
}
|
|
18
|
+
/** Loop detected at soft threshold — inject a synthetic tool-result that
|
|
19
|
+
* tells the model to stop, and SKIP actually running the tool this round.
|
|
20
|
+
* `toolCallId` is the id of the current call so the synthetic result
|
|
21
|
+
* reads as the response to it. */
|
|
22
|
+
| {
|
|
23
|
+
kind: 'soft-block';
|
|
24
|
+
toolCallId: string;
|
|
25
|
+
message: string;
|
|
26
|
+
}
|
|
27
|
+
/** Loop detected at hard threshold — abort the turn and prompt the user. */
|
|
28
|
+
| {
|
|
29
|
+
kind: 'hard-block';
|
|
30
|
+
toolName: string;
|
|
31
|
+
message: string;
|
|
32
|
+
};
|
|
33
|
+
/**
|
|
34
|
+
* Check whether the incoming tool call is a duplicate of recent calls in the
|
|
35
|
+
* window, and report what the caller should do. Does NOT mutate state — the
|
|
36
|
+
* caller commits the hash via {@link recordToolCall} once the call proceeds.
|
|
37
|
+
*
|
|
38
|
+
* We only count matches that share the same hash AND the same toolName; a
|
|
39
|
+
* fresh command with identical-looking args under a different tool never
|
|
40
|
+
* triggers the guard.
|
|
41
|
+
*/
|
|
42
|
+
export declare function checkForLoop(state: LoopState, toolName: string, input: unknown, toolCallId: string): LoopCheck;
|
|
43
|
+
/** Commit a tool call to the rolling window. Bound size so the array doesn't
|
|
44
|
+
* grow for long sessions. */
|
|
45
|
+
export declare function recordToolCall(state: LoopState, toolName: string, input: unknown): void;
|
|
46
|
+
/** Build a synthetic tool-result message telling the model the call was
|
|
47
|
+
* blocked by the loop guard. The model sees this as if the tool returned it
|
|
48
|
+
* and usually adjusts on the next turn. */
|
|
49
|
+
export declare function syntheticLoopBlockResult(toolName: string, toolCallId: string, message: string): import("ai").ModelMessage;
|
|
50
|
+
//# sourceMappingURL=loop-guard.d.ts.map
|