fast-context-skill 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/NOTICE.md +12 -0
- package/README.md +172 -0
- package/SKILL.md +116 -0
- package/package.json +34 -0
- package/references/script-contract.md +70 -0
- package/src/cli.mjs +348 -0
- package/src/config.mjs +40 -0
- package/src/core.mjs +2246 -0
- package/src/directory-scorer.mjs +1086 -0
- package/src/executor.mjs +659 -0
- package/src/extract-key.mjs +93 -0
- package/src/project-path.mjs +47 -0
- package/src/protobuf.mjs +235 -0
package/src/core.mjs
ADDED
|
@@ -0,0 +1,2246 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Windsurf Fast Context — core protocol implementation (Node.js).
|
|
3
|
+
*
|
|
4
|
+
* Reverse-engineered Windsurf SWE-grep Connect-RPC/Protobuf protocol
|
|
5
|
+
* for standalone AI-driven semantic code search.
|
|
6
|
+
*
|
|
7
|
+
* Flow:
|
|
8
|
+
* query + tree → Windsurf Devstral API
|
|
9
|
+
* → Devstral returns tool_calls (rg/readfile/tree/ls/glob, up to 8 parallel)
|
|
10
|
+
* → execute locally → send results back → repeat for N rounds
|
|
11
|
+
* → ANSWER: file paths + line ranges + suggested rg patterns
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { readdirSync, existsSync, statSync, readFileSync } from "node:fs";
|
|
15
|
+
import { resolve, join, relative, sep, isAbsolute, extname } from "node:path";
|
|
16
|
+
import { execFileSync } from "node:child_process";
|
|
17
|
+
import { gzipSync } from "node:zlib";
|
|
18
|
+
import { randomUUID } from "node:crypto";
|
|
19
|
+
import { platform, arch, release, version as osVersion, hostname, cpus, totalmem } from "node:os";
|
|
20
|
+
import treeNodeCli from "tree-node-cli";
|
|
21
|
+
|
|
22
|
+
import {
|
|
23
|
+
ProtobufEncoder,
|
|
24
|
+
extractStrings,
|
|
25
|
+
connectFrameEncode,
|
|
26
|
+
connectFrameDecode,
|
|
27
|
+
} from "./protobuf.mjs";
|
|
28
|
+
import { ToolExecutor } from "./executor.mjs";
|
|
29
|
+
import { rgPath } from "@vscode/ripgrep";
|
|
30
|
+
import { extractKey } from "./extract-key.mjs";
|
|
31
|
+
import { scoreDirectories, tokenize as tokenizeBM25 } from "./directory-scorer.mjs";
|
|
32
|
+
|
|
33
|
+
// ─── Error Classification ──────────────────────────────────
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Classified error for fetch failures with structured error codes.
|
|
37
|
+
*/
|
|
38
|
+
class FastContextError extends Error {
|
|
39
|
+
/**
|
|
40
|
+
* @param {string} message
|
|
41
|
+
* @param {string} code - TIMEOUT | PAYLOAD_TOO_LARGE | RATE_LIMITED | AUTH_ERROR | SERVER_ERROR | NETWORK_ERROR
|
|
42
|
+
* @param {Object} [details]
|
|
43
|
+
*/
|
|
44
|
+
constructor(message, code, details = {}) {
|
|
45
|
+
super(message);
|
|
46
|
+
this.name = "FastContextError";
|
|
47
|
+
this.code = code;
|
|
48
|
+
this.details = details;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Classify a raw fetch/HTTP error into a FastContextError.
|
|
54
|
+
* @param {Error} err
|
|
55
|
+
* @returns {FastContextError}
|
|
56
|
+
*/
|
|
57
|
+
function _classifyError(err) {
|
|
58
|
+
if (err instanceof FastContextError) return err;
|
|
59
|
+
|
|
60
|
+
// HTTP status-based classification
|
|
61
|
+
if (err.status) {
|
|
62
|
+
const s = err.status;
|
|
63
|
+
if (s === 413) return new FastContextError(err.message, "PAYLOAD_TOO_LARGE", { status: s });
|
|
64
|
+
if (s === 429) return new FastContextError(err.message, "RATE_LIMITED", { status: s });
|
|
65
|
+
if (s === 401 || s === 403) return new FastContextError(err.message, "AUTH_ERROR", { status: s });
|
|
66
|
+
return new FastContextError(err.message, "SERVER_ERROR", { status: s });
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// Timeout (AbortSignal.timeout throws AbortError or TimeoutError)
|
|
70
|
+
if (err.name === "AbortError" || err.name === "TimeoutError" || /timeout/i.test(err.message)) {
|
|
71
|
+
return new FastContextError(err.message, "TIMEOUT");
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// Everything else is a network-level issue
|
|
75
|
+
return new FastContextError(err.message, "NETWORK_ERROR");
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// ─── Protocol Constants ────────────────────────────────────
|
|
79
|
+
|
|
80
|
+
const API_BASE = "https://server.self-serve.windsurf.com/exa.api_server_pb.ApiServerService";
|
|
81
|
+
const AUTH_BASE = "https://server.self-serve.windsurf.com/exa.auth_pb.AuthService";
|
|
82
|
+
const WS_APP = "windsurf";
|
|
83
|
+
const WS_APP_VER = process.env.WS_APP_VER || "1.48.2";
|
|
84
|
+
const WS_LS_VER = process.env.WS_LS_VER || "1.9544.35";
|
|
85
|
+
const WS_MODEL = process.env.WS_MODEL || "MODEL_SWE_1_6_FAST";
|
|
86
|
+
const DEBUG_MODE = process.env.FAST_CONTEXT_DEBUG === "1" || process.env.FAST_CONTEXT_DEBUG === "true";
|
|
87
|
+
|
|
88
|
+
// Default excludes — directories/patterns that are almost never source code.
|
|
89
|
+
// Aligned with Windsurf fast-search guidance + ace-tool-rs defaults.
|
|
90
|
+
// Users can add more via the exclude_paths parameter; these are always applied.
|
|
91
|
+
const DEFAULT_EXCLUDE_PATHS = [
|
|
92
|
+
// 依赖目录
|
|
93
|
+
"node_modules",
|
|
94
|
+
"vendor",
|
|
95
|
+
".venv",
|
|
96
|
+
"venv",
|
|
97
|
+
// 版本控制
|
|
98
|
+
".git",
|
|
99
|
+
".svn",
|
|
100
|
+
".hg",
|
|
101
|
+
// 构建输出
|
|
102
|
+
"dist",
|
|
103
|
+
"build",
|
|
104
|
+
"out",
|
|
105
|
+
"target",
|
|
106
|
+
".next",
|
|
107
|
+
".nuxt",
|
|
108
|
+
".output",
|
|
109
|
+
// 缓存
|
|
110
|
+
"__pycache__",
|
|
111
|
+
".cache",
|
|
112
|
+
".pytest_cache",
|
|
113
|
+
// 压缩/混淆产物
|
|
114
|
+
"*.min.*",
|
|
115
|
+
// 常见无关目录
|
|
116
|
+
"coverage",
|
|
117
|
+
".idea",
|
|
118
|
+
".vscode",
|
|
119
|
+
];
|
|
120
|
+
|
|
121
|
+
// Repo-map optimization defaults (tunable via MCP params).
|
|
122
|
+
const REPO_MAP_OPTIMIZER_DEFAULTS = {
|
|
123
|
+
mode: "bootstrap_hotspot", // classic | bootstrap_hotspot
|
|
124
|
+
bootstrapTreeDepth: 1,
|
|
125
|
+
hotspotTopK: 4,
|
|
126
|
+
hotspotTreeDepth: 2,
|
|
127
|
+
maxBytes: 120 * 1024,
|
|
128
|
+
};
|
|
129
|
+
|
|
130
|
+
function _mergeExcludePaths(excludePaths = []) {
|
|
131
|
+
const merged = [...DEFAULT_EXCLUDE_PATHS];
|
|
132
|
+
for (const p of excludePaths || []) {
|
|
133
|
+
if (typeof p === "string" && p && !merged.includes(p)) {
|
|
134
|
+
merged.push(p);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
return merged;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
function _expandExcludeGlobsForRg(pattern) {
|
|
141
|
+
if (typeof pattern !== "string") return [];
|
|
142
|
+
const normalized = pattern.trim().replace(/\\/g, "/");
|
|
143
|
+
if (!normalized) return [];
|
|
144
|
+
const expanded = [normalized];
|
|
145
|
+
if (!normalized.startsWith("**/") && !normalized.startsWith("/")) {
|
|
146
|
+
expanded.push(`**/${normalized}`);
|
|
147
|
+
}
|
|
148
|
+
return [...new Set(expanded)];
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// ─── System Prompt Template ────────────────────────────────
|
|
152
|
+
|
|
153
|
+
const SYSTEM_PROMPT_TEMPLATE = `You are an expert software engineer, responsible for providing context \
|
|
154
|
+
to another engineer to solve a code issue in the current codebase. \
|
|
155
|
+
The user will present you with a description of the issue, and it is \
|
|
156
|
+
your job to provide a series of file paths with associated line ranges \
|
|
157
|
+
that contain ALL the information relevant to understand and correctly \
|
|
158
|
+
address the issue.
|
|
159
|
+
|
|
160
|
+
# IMPORTANT:
|
|
161
|
+
- A relevant file does not mean only the files that must be modified to \
|
|
162
|
+
solve the task. It means any file that contains information relevant to \
|
|
163
|
+
planning and implementing the fix, such as the definitions of classes \
|
|
164
|
+
and functions that are relevant to the pieces of code that will have to \
|
|
165
|
+
be modified.
|
|
166
|
+
- You should include enough context around the relevant lines to allow \
|
|
167
|
+
the engineer to understand the task correctly. You must include ENTIRE \
|
|
168
|
+
semantic blocks (functions, classes, definitions, etc). For example:
|
|
169
|
+
If addressing the issue requires modifying a method within a class, then \
|
|
170
|
+
you should include the entire class definition, not just the lines around \
|
|
171
|
+
the method we want to modify.
|
|
172
|
+
- NEVER truncate these blocks unless they are very large (hundreds of \
|
|
173
|
+
lines or more, in which case providing only a relevant portion of the \
|
|
174
|
+
block is acceptable).
|
|
175
|
+
- Your job is to essentially alleviate the job of the other engineer by \
|
|
176
|
+
giving them a clean starting context from which to start working. More \
|
|
177
|
+
precisely, you should minimize the number of files the engineer has to \
|
|
178
|
+
read to understand and solve the task correctly (while not providing \
|
|
179
|
+
irrelevant code snippets).
|
|
180
|
+
|
|
181
|
+
# ENVIRONMENT
|
|
182
|
+
- Working directory: /codebase. Make sure to run commands in this \
|
|
183
|
+
directory, not \`.
|
|
184
|
+
- Tool access: use the restricted_exec tool ONLY
|
|
185
|
+
- Allowed sub-commands (schema-enforced):
|
|
186
|
+
- rg: Search for patterns in files using ripgrep
|
|
187
|
+
- Required: pattern (string), path (string)
|
|
188
|
+
- Optional: include (array of globs), exclude (array of globs)
|
|
189
|
+
- readfile: Read contents of a file with optional line range
|
|
190
|
+
- Required: file (string)
|
|
191
|
+
- Optional: start_line (int), end_line (int) — 1-indexed, inclusive
|
|
192
|
+
- tree: Display directory structure as a tree
|
|
193
|
+
- Required: path (string)
|
|
194
|
+
- Optional: levels (int)
|
|
195
|
+
- ls: List files in a directory
|
|
196
|
+
- Required: path (string)
|
|
197
|
+
- Optional: long_format (bool), all (bool)
|
|
198
|
+
- glob: Find files matching a glob pattern
|
|
199
|
+
- Required: pattern (string), path (string)
|
|
200
|
+
- Optional: type_filter (file|directory|all)
|
|
201
|
+
|
|
202
|
+
# THINKING RULES
|
|
203
|
+
- Think step-by-step. Plan, reason, and reflect before each tool call.
|
|
204
|
+
- Use tool calls liberally and purposefully to ground every conclusion \
|
|
205
|
+
in real code, not assumptions.
|
|
206
|
+
- If a command fails, rethink and try something different; do not \
|
|
207
|
+
complain to the user.
|
|
208
|
+
- AVOID REDUNDANT SEARCHES: Do not search for the same pattern multiple \
|
|
209
|
+
times with slightly different paths or excludes. One well-targeted search \
|
|
210
|
+
is better than multiple overlapping ones.
|
|
211
|
+
- PRIORITIZE READING over searching: Once you find a file path, read it \
|
|
212
|
+
directly instead of searching for more variations of the same pattern.
|
|
213
|
+
|
|
214
|
+
# FAST-SEARCH DEFAULTS (optimize rg/tree on large repos)
|
|
215
|
+
- Start NARROW, then widen only if needed. Prefer searching likely code \
|
|
216
|
+
roots first (e.g., \`src/\`, \`lib/\`, \`app/\`, \`packages/\`, \`services/\`) \
|
|
217
|
+
instead of \`/codebase\`.
|
|
218
|
+
- Prefer fixed-string search for literals: escape patterns or keep regex \
|
|
219
|
+
simple. Use smart case; avoid case-insensitive unless necessary.
|
|
220
|
+
- Prefer file-type filters and globs (in include) over full-repo scans.
|
|
221
|
+
- Default EXCLUDES for speed (apply via the exclude array): \
|
|
222
|
+
node_modules, .git, dist, build, coverage, .venv, venv, target, out, \
|
|
223
|
+
.cache, __pycache__, vendor, deps, third_party, logs, data, *.min.*
|
|
224
|
+
- Skip huge files where possible; when opening files, prefer reading \
|
|
225
|
+
only relevant ranges with readfile.
|
|
226
|
+
- Limit directory traversal with tree levels to quickly orient before \
|
|
227
|
+
deeper inspection.
|
|
228
|
+
|
|
229
|
+
# SOME EXAMPLES OF WORKFLOWS
|
|
230
|
+
- MAP – Use \`tree\` with small levels; \`rg\` on likely roots to grasp \
|
|
231
|
+
structure and hotspots.
|
|
232
|
+
- ANCHOR – \`rg\` for problem keywords and anchor symbols; restrict by \
|
|
233
|
+
language globs via include.
|
|
234
|
+
- TRACE – Follow imports with targeted \`rg\` in narrowed roots; open \
|
|
235
|
+
files with \`readfile\` scoped to entire semantic blocks.
|
|
236
|
+
- VERIFY – Confirm each candidate path exists by reading or additional \
|
|
237
|
+
searches; drop false positives (tests, vendored, generated) unless they \
|
|
238
|
+
must change.
|
|
239
|
+
|
|
240
|
+
# TOOL USE GUIDELINES
|
|
241
|
+
- You must use a SINGLE restricted_exec call in your answer, that lets \
|
|
242
|
+
you execute at most {max_commands} commands in a single turn. Each command must be \
|
|
243
|
+
an object with a \`type\` field of \`rg\`, \`readfile\`, or \`tree\` and the appropriate fields for that type.
|
|
244
|
+
- Example restricted_exec usage:
|
|
245
|
+
[TOOL_CALLS]restricted_exec[ARGS]{{
|
|
246
|
+
"command1": {{
|
|
247
|
+
"type": "rg",
|
|
248
|
+
"pattern": "Controller",
|
|
249
|
+
"path": "/codebase/slime",
|
|
250
|
+
"include": ["**/*.py"],
|
|
251
|
+
"exclude": ["**/node_modules/**", "**/.git/**", "**/dist/**", \
|
|
252
|
+
"**/build/**", "**/.venv/**", "**/__pycache__/**"]
|
|
253
|
+
}},
|
|
254
|
+
"command2": {{
|
|
255
|
+
"type": "readfile",
|
|
256
|
+
"file": "/codebase/slime/train.py",
|
|
257
|
+
"start_line": 1,
|
|
258
|
+
"end_line": 200
|
|
259
|
+
}},
|
|
260
|
+
"command3": {{
|
|
261
|
+
"type": "tree",
|
|
262
|
+
"path": "/codebase/slime/",
|
|
263
|
+
"levels": 2
|
|
264
|
+
}}
|
|
265
|
+
}}
|
|
266
|
+
- You have at most {max_turns} turns to interact with the environment by calling \
|
|
267
|
+
tools, so issuing multiple commands at once is necessary and encouraged \
|
|
268
|
+
to speed up your research.
|
|
269
|
+
- Each command result may be truncated to 50 lines; prefer multiple \
|
|
270
|
+
targeted reads/searches to build complete context.
|
|
271
|
+
- DO NOT EVER USE MORE THAN {max_commands} commands in a single turn, or you will \
|
|
272
|
+
be penalized.
|
|
273
|
+
|
|
274
|
+
# ANSWER FORMAT (strict format, including tags)
|
|
275
|
+
- You will output an XML structure with a root element "ANSWER" \
|
|
276
|
+
containing "file" elements. Each "file" element will have a "path" \
|
|
277
|
+
attribute and contain "range" elements.
|
|
278
|
+
- You will output this as your final response.
|
|
279
|
+
- The line ranges must be inclusive.
|
|
280
|
+
|
|
281
|
+
Output example inside the "answer" tool argument:
|
|
282
|
+
<ANSWER>
|
|
283
|
+
<file path="/codebase/info_theory/formulas/entropy.py">
|
|
284
|
+
<range>10-60</range>
|
|
285
|
+
<range>150-210</range>
|
|
286
|
+
</file>
|
|
287
|
+
<file path="/codebase/info_theory/data_structures/bits.py">
|
|
288
|
+
<range>1-40</range>
|
|
289
|
+
<range>110-170</range>
|
|
290
|
+
</file>
|
|
291
|
+
</ANSWER>
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
Remember: Prefer narrow, fixed-string, and type-filtered searches with \
|
|
295
|
+
aggressive excludes and size/depth limits. Widen scope only as needed. \
|
|
296
|
+
Use the restricted tools available to you, and output your answer in \
|
|
297
|
+
exactly the specified format.
|
|
298
|
+
|
|
299
|
+
# NO RESULTS POLICY
|
|
300
|
+
If after thorough searching you are confident that NO relevant files exist \
|
|
301
|
+
for the given query (e.g., the function/class/concept does not exist in the \
|
|
302
|
+
codebase), you MUST return an empty ANSWER:
|
|
303
|
+
<ANSWER></ANSWER>
|
|
304
|
+
Do NOT return irrelevant files (such as entry points or config files) just \
|
|
305
|
+
to provide some output. An empty answer is always better than a misleading one.
|
|
306
|
+
|
|
307
|
+
# RESULT COUNT
|
|
308
|
+
Aim to return at most {max_results} files in your answer. Focus on the most \
|
|
309
|
+
relevant files first. If fewer files are relevant, return fewer.
|
|
310
|
+
`;
|
|
311
|
+
|
|
312
|
+
const FINAL_FORCE_ANSWER =
|
|
313
|
+
"You have no turns left. Now you MUST provide your final ANSWER, even if it's not complete.";
|
|
314
|
+
|
|
315
|
+
const BOOTSTRAP_PROMPT_TEMPLATE = `You are a bootstrap planning agent for codebase hotspot discovery.
|
|
316
|
+
Your ONLY goal is to discover high-signal search keywords and hotspot directories for a later full search phase.
|
|
317
|
+
|
|
318
|
+
# OUTPUT CONTRACT
|
|
319
|
+
- Use the restricted_exec tool ONLY.
|
|
320
|
+
- Prefer rg + tree commands. Avoid deep readfile unless absolutely necessary.
|
|
321
|
+
- Do NOT output final <ANSWER> for code fixes in this phase.
|
|
322
|
+
- Keep commands focused and broad enough to identify likely relevant modules quickly.
|
|
323
|
+
|
|
324
|
+
# TOOL BUDGET
|
|
325
|
+
- You have at most {max_turns} turns.
|
|
326
|
+
- You may use up to {max_commands} commands per turn.
|
|
327
|
+
|
|
328
|
+
# STRATEGY
|
|
329
|
+
1) Start from the provided mini repo map.
|
|
330
|
+
2) Use targeted rg patterns derived from the user problem.
|
|
331
|
+
3) Use tree on likely top-level directories to identify hotspots.
|
|
332
|
+
4) Stop once you have enough keyword and hotspot coverage for phase-2.
|
|
333
|
+
`;
|
|
334
|
+
|
|
335
|
+
/**
|
|
336
|
+
* Smart trim accumulated messages to reduce payload size.
|
|
337
|
+
*
|
|
338
|
+
* Why this is needed:
|
|
339
|
+
* - Proto size grows quickly across turns (messages + tool results).
|
|
340
|
+
* - Keeping only the last N messages naively may drop the tool-call ↔ tool-result
|
|
341
|
+
* linkage (tool_call_id/ref_call_id) and remove useful progress context.
|
|
342
|
+
*
|
|
343
|
+
* Strategy:
|
|
344
|
+
* - Keep system prompt (index 0).
|
|
345
|
+
* - Keep user problem statement, but compact the repo map when trimming.
|
|
346
|
+
* - Keep the latest tool-call + tool-result pair (plus any trailing prompts).
|
|
347
|
+
* - Insert a compact progress summary so the model doesn't lose the thread.
|
|
348
|
+
*
|
|
349
|
+
* @param {Array} messages
|
|
350
|
+
* @param {Object} [state]
|
|
351
|
+
* @param {string} [state.query]
|
|
352
|
+
* @param {string[]} [state.recentFiles]
|
|
353
|
+
* @param {string[]} [state.recentPatterns]
|
|
354
|
+
* @param {Array<{type:string, desc:string}>} [state.recentCommands]
|
|
355
|
+
* @param {number} [state.turn]
|
|
356
|
+
* @returns {boolean} true if messages were actually trimmed/compacted
|
|
357
|
+
*/
|
|
358
|
+
function _trimMessages(messages, state = {}) {
|
|
359
|
+
if (!Array.isArray(messages) || messages.length < 2) return false;
|
|
360
|
+
|
|
361
|
+
const systemMsg = messages[0];
|
|
362
|
+
const userMsg = messages[1];
|
|
363
|
+
|
|
364
|
+
const truncateToolResultsPreserve = (text, maxPerBlock = 4000, maxTotal = 20000) => {
|
|
365
|
+
if (typeof text !== "string" || text.length <= maxTotal) return text;
|
|
366
|
+
const re = /<(command\d+)_result>\n([\s\S]*?)\n<\/\1_result>/g;
|
|
367
|
+
let m;
|
|
368
|
+
const parts = [];
|
|
369
|
+
let matched = false;
|
|
370
|
+
while ((m = re.exec(text)) !== null) {
|
|
371
|
+
matched = true;
|
|
372
|
+
const key = m[1];
|
|
373
|
+
let body = m[2] || "";
|
|
374
|
+
if (body.length > maxPerBlock) {
|
|
375
|
+
body = body.slice(0, maxPerBlock) + "\n...[truncated]...";
|
|
376
|
+
}
|
|
377
|
+
parts.push(`<${key}_result>\n${body}\n</${key}_result>`);
|
|
378
|
+
if (parts.join("").length > maxTotal) break;
|
|
379
|
+
}
|
|
380
|
+
if (!matched) {
|
|
381
|
+
return text.slice(0, maxTotal) + "\n...[tool results truncated]...";
|
|
382
|
+
}
|
|
383
|
+
const out = parts.join("");
|
|
384
|
+
return out.length <= maxTotal ? out : out.slice(0, maxTotal) + "\n...[tool results truncated]...";
|
|
385
|
+
};
|
|
386
|
+
|
|
387
|
+
// Find the most recent tool-result message and its matching tool-call message (if present).
|
|
388
|
+
let lastToolResultIdx = -1;
|
|
389
|
+
let refId = null;
|
|
390
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
391
|
+
const m = messages[i];
|
|
392
|
+
if (m && m.role === 4 && typeof m.ref_call_id === "string" && m.ref_call_id) {
|
|
393
|
+
lastToolResultIdx = i;
|
|
394
|
+
refId = m.ref_call_id;
|
|
395
|
+
break;
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
let lastToolCallIdx = -1;
|
|
400
|
+
if (refId) {
|
|
401
|
+
for (let i = lastToolResultIdx - 1; i >= 0; i--) {
|
|
402
|
+
const m = messages[i];
|
|
403
|
+
if (m && m.role === 2 && m.tool_call_id === refId) {
|
|
404
|
+
lastToolCallIdx = i;
|
|
405
|
+
break;
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
// Tail: keep tool-call + tool-result pair, plus anything after it (e.g., force-answer).
|
|
411
|
+
let tailStart = -1;
|
|
412
|
+
if (lastToolResultIdx !== -1) {
|
|
413
|
+
tailStart = lastToolCallIdx !== -1 ? lastToolCallIdx : Math.max(2, lastToolResultIdx - 1);
|
|
414
|
+
} else {
|
|
415
|
+
// No tool results yet: keep the last few messages only.
|
|
416
|
+
tailStart = Math.max(2, messages.length - 4);
|
|
417
|
+
}
|
|
418
|
+
const tail = messages.slice(tailStart);
|
|
419
|
+
|
|
420
|
+
// Compact the user message (repo map) when trimming, since it's usually the largest chunk.
|
|
421
|
+
let compactedUser = userMsg;
|
|
422
|
+
let didCompactUser = false;
|
|
423
|
+
if (userMsg && typeof userMsg.content === "string" && userMsg.content.includes("Repo Map")) {
|
|
424
|
+
const q =
|
|
425
|
+
(typeof state.query === "string" && state.query) ||
|
|
426
|
+
userMsg.content.match(/Problem Statement:\s*([^\n]+)/)?.[1]?.trim() ||
|
|
427
|
+
"";
|
|
428
|
+
const compact = `Problem Statement: ${q}\n\nRepo Map: (omitted to reduce payload). Use tree/rg to explore structure if needed.`;
|
|
429
|
+
if (compact.length < userMsg.content.length) {
|
|
430
|
+
compactedUser = { ...userMsg, content: compact };
|
|
431
|
+
didCompactUser = true;
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
// Build a compact progress summary to preserve important context across trims.
|
|
436
|
+
const recentCommands = Array.isArray(state.recentCommands) ? state.recentCommands : [];
|
|
437
|
+
const recentFiles = Array.isArray(state.recentFiles) ? state.recentFiles : [];
|
|
438
|
+
const recentPatterns = Array.isArray(state.recentPatterns) ? state.recentPatterns : [];
|
|
439
|
+
const turnNote = Number.isInteger(state.turn) ? ` turn=${state.turn}` : "";
|
|
440
|
+
|
|
441
|
+
const summaryLines = [
|
|
442
|
+
`[Context trimmed to reduce payload size.${turnNote}]`,
|
|
443
|
+
recentCommands.length ? `recent_commands: ${recentCommands.slice(-6).map((c) => c.desc).join(" | ")}` : "",
|
|
444
|
+
recentFiles.length ? `recent_files: ${recentFiles.slice(-12).join(", ")}` : "",
|
|
445
|
+
recentPatterns.length ? `rg_patterns: ${recentPatterns.slice(-20).join(", ")}` : "",
|
|
446
|
+
"Continue from the most recent tool results kept below.",
|
|
447
|
+
].filter(Boolean);
|
|
448
|
+
|
|
449
|
+
const summaryMsg = { role: 1, content: summaryLines.join("\n") };
|
|
450
|
+
|
|
451
|
+
// If trimming doesn't actually reduce anything, bail.
|
|
452
|
+
// We consider it "useful" if we either compact the user message or drop history.
|
|
453
|
+
const willDropHistory = tailStart > 2;
|
|
454
|
+
if (!didCompactUser && !willDropHistory) return false;
|
|
455
|
+
|
|
456
|
+
// Reduce oversized assistant/tool messages in the tail to avoid immediate re-overflow.
|
|
457
|
+
for (const m of tail) {
|
|
458
|
+
if (m && typeof m.content === "string") {
|
|
459
|
+
if (m.role === 2 && m.content.length > 8000) {
|
|
460
|
+
m.content = m.content.slice(0, 8000) + "\n...[assistant content truncated]...";
|
|
461
|
+
}
|
|
462
|
+
if (m.role === 4 && m.content.length > 20000) {
|
|
463
|
+
m.content = truncateToolResultsPreserve(m.content, 4000, 20000);
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
messages.length = 0;
|
|
469
|
+
messages.push(systemMsg);
|
|
470
|
+
// Avoid duplicating user message if it's already within the kept tail.
|
|
471
|
+
if (tailStart > 1) {
|
|
472
|
+
messages.push(compactedUser);
|
|
473
|
+
}
|
|
474
|
+
messages.push(summaryMsg, ...tail);
|
|
475
|
+
return true;
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
/**
|
|
479
|
+
* @param {number} maxTurns
|
|
480
|
+
* @param {number} maxCommands
|
|
481
|
+
* @param {number} maxResults
|
|
482
|
+
* @returns {string}
|
|
483
|
+
*/
|
|
484
|
+
function buildSystemPrompt(maxTurns = 3, maxCommands = 8, maxResults = 10) {
|
|
485
|
+
return SYSTEM_PROMPT_TEMPLATE
|
|
486
|
+
.replaceAll("{max_turns}", String(maxTurns))
|
|
487
|
+
.replaceAll("{max_commands}", String(maxCommands))
|
|
488
|
+
.replaceAll("{max_results}", String(maxResults));
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
function buildBootstrapPrompt(maxTurns = 2, maxCommands = 6) {
|
|
492
|
+
return BOOTSTRAP_PROMPT_TEMPLATE
|
|
493
|
+
.replaceAll("{max_turns}", String(maxTurns))
|
|
494
|
+
.replaceAll("{max_commands}", String(maxCommands));
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
function _extractTopDirFromCodebasePath(path = "") {
|
|
498
|
+
const p = String(path || "").replace(/\\/g, "/");
|
|
499
|
+
if (!p.startsWith("/codebase")) return null;
|
|
500
|
+
const rel = p.replace(/^\/codebase\/?/, "");
|
|
501
|
+
if (!rel) return null;
|
|
502
|
+
return rel.split("/")[0] || null;
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
async function _runBootstrapPhase({
|
|
506
|
+
query,
|
|
507
|
+
projectRoot,
|
|
508
|
+
apiKey,
|
|
509
|
+
jwt,
|
|
510
|
+
timeoutMs,
|
|
511
|
+
excludePaths,
|
|
512
|
+
bootstrapTreeDepth,
|
|
513
|
+
bootstrapMaxTurns,
|
|
514
|
+
bootstrapMaxCommands,
|
|
515
|
+
onProgress,
|
|
516
|
+
}) {
|
|
517
|
+
const log = (msg) => onProgress?.(`[bootstrap] ${msg}`);
|
|
518
|
+
const hints = { rgPatterns: [], hotDirs: [] };
|
|
519
|
+
|
|
520
|
+
try {
|
|
521
|
+
const { tree: miniMap, depth } = getRepoMap(projectRoot, bootstrapTreeDepth, excludePaths);
|
|
522
|
+
const systemPrompt = buildBootstrapPrompt(bootstrapMaxTurns, bootstrapMaxCommands);
|
|
523
|
+
const userContent = `Problem Statement: ${query}\n\nRepo Map (tree -L ${depth} /codebase):\n\`\`\`text\n${miniMap}\n\`\`\``;
|
|
524
|
+
|
|
525
|
+
const messages = [
|
|
526
|
+
{ role: 5, content: systemPrompt },
|
|
527
|
+
{ role: 1, content: userContent },
|
|
528
|
+
];
|
|
529
|
+
|
|
530
|
+
const toolDefs = getToolDefinitions(bootstrapMaxCommands);
|
|
531
|
+
const executor = new ToolExecutor(projectRoot);
|
|
532
|
+
|
|
533
|
+
for (let turn = 0; turn < bootstrapMaxTurns; turn++) {
|
|
534
|
+
log(`Turn ${turn + 1}/${bootstrapMaxTurns}`);
|
|
535
|
+
const proto = _buildRequest(apiKey, jwt, messages, toolDefs);
|
|
536
|
+
let respData;
|
|
537
|
+
try {
|
|
538
|
+
respData = await _streamingRequest(proto, timeoutMs);
|
|
539
|
+
} catch (e) {
|
|
540
|
+
log(`request failed: ${e.code || "UNKNOWN"}`);
|
|
541
|
+
break;
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
const [thinking, toolInfo] = _parseResponse(respData);
|
|
545
|
+
if (!toolInfo) break;
|
|
546
|
+
|
|
547
|
+
const [toolName, toolArgs] = toolInfo;
|
|
548
|
+
if (toolName !== "restricted_exec") break;
|
|
549
|
+
|
|
550
|
+
const callId = randomUUID();
|
|
551
|
+
const argsJson = JSON.stringify(toolArgs);
|
|
552
|
+
const cmds = Object.keys(toolArgs).filter((k) => k.startsWith("command"));
|
|
553
|
+
|
|
554
|
+
for (const cmdKey of cmds) {
|
|
555
|
+
const cmd = toolArgs[cmdKey];
|
|
556
|
+
if (!cmd || typeof cmd !== "object") continue;
|
|
557
|
+
if (cmd.type === "rg" && typeof cmd.pattern === "string" && cmd.pattern) {
|
|
558
|
+
hints.rgPatterns.push(cmd.pattern);
|
|
559
|
+
}
|
|
560
|
+
if (cmd.type === "tree" && typeof cmd.path === "string") {
|
|
561
|
+
const top = _extractTopDirFromCodebasePath(cmd.path);
|
|
562
|
+
if (top) hints.hotDirs.push(top);
|
|
563
|
+
}
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
const results = await executor.execToolCallAsync(toolArgs);
|
|
567
|
+
messages.push({
|
|
568
|
+
role: 2,
|
|
569
|
+
content: thinking,
|
|
570
|
+
tool_call_id: callId,
|
|
571
|
+
tool_name: "restricted_exec",
|
|
572
|
+
tool_args_json: argsJson,
|
|
573
|
+
});
|
|
574
|
+
messages.push({ role: 4, content: results, ref_call_id: callId });
|
|
575
|
+
}
|
|
576
|
+
} catch {
|
|
577
|
+
// Bootstrap is best-effort. Fall back silently.
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
return {
|
|
581
|
+
rgPatterns: [...new Set(hints.rgPatterns)].slice(-30),
|
|
582
|
+
hotDirs: [...new Set(hints.hotDirs)].slice(-12),
|
|
583
|
+
};
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
// ─── Tool Schema ───────────────────────────────────────────
|
|
587
|
+
|
|
588
|
+
function _buildCommandSchema(n) {
|
|
589
|
+
return {
|
|
590
|
+
type: "object",
|
|
591
|
+
description: `Command ${n} to execute. Must be one of: rg, readfile, or tree.`,
|
|
592
|
+
oneOf: [
|
|
593
|
+
{
|
|
594
|
+
properties: {
|
|
595
|
+
type: { type: "string", const: "rg", description: "Search for patterns in files using ripgrep." },
|
|
596
|
+
pattern: { type: "string", description: "The regex pattern to search for." },
|
|
597
|
+
path: { type: "string", description: "The path to search in." },
|
|
598
|
+
include: { type: "array", items: { type: "string" }, description: "File patterns to include." },
|
|
599
|
+
exclude: { type: "array", items: { type: "string" }, description: "File patterns to exclude." },
|
|
600
|
+
},
|
|
601
|
+
required: ["type", "pattern", "path"],
|
|
602
|
+
},
|
|
603
|
+
{
|
|
604
|
+
properties: {
|
|
605
|
+
type: { type: "string", const: "readfile", description: "Read contents of a file with optional line range." },
|
|
606
|
+
file: { type: "string", description: "Path to the file to read." },
|
|
607
|
+
start_line: { type: "integer", description: "Starting line number (1-indexed)." },
|
|
608
|
+
end_line: { type: "integer", description: "Ending line number (1-indexed)." },
|
|
609
|
+
},
|
|
610
|
+
required: ["type", "file"],
|
|
611
|
+
},
|
|
612
|
+
{
|
|
613
|
+
properties: {
|
|
614
|
+
type: { type: "string", const: "tree", description: "Display directory structure as a tree." },
|
|
615
|
+
path: { type: "string", description: "Path to the directory." },
|
|
616
|
+
levels: { type: "integer", description: "Number of directory levels." },
|
|
617
|
+
},
|
|
618
|
+
required: ["type", "path"],
|
|
619
|
+
},
|
|
620
|
+
{
|
|
621
|
+
properties: {
|
|
622
|
+
type: { type: "string", const: "ls", description: "List files in a directory." },
|
|
623
|
+
path: { type: "string", description: "Path to the directory." },
|
|
624
|
+
long_format: { type: "boolean" },
|
|
625
|
+
all: { type: "boolean" },
|
|
626
|
+
},
|
|
627
|
+
required: ["type", "path"],
|
|
628
|
+
},
|
|
629
|
+
{
|
|
630
|
+
properties: {
|
|
631
|
+
type: { type: "string", const: "glob", description: "Find files matching a glob pattern." },
|
|
632
|
+
pattern: { type: "string" },
|
|
633
|
+
path: { type: "string" },
|
|
634
|
+
type_filter: { type: "string", enum: ["file", "directory", "all"] },
|
|
635
|
+
},
|
|
636
|
+
required: ["type", "pattern", "path"],
|
|
637
|
+
},
|
|
638
|
+
],
|
|
639
|
+
};
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
/**
|
|
643
|
+
* @param {number} maxCommands
|
|
644
|
+
* @returns {string}
|
|
645
|
+
*/
|
|
646
|
+
function getToolDefinitions(maxCommands = 8) {
|
|
647
|
+
const props = {};
|
|
648
|
+
for (let i = 1; i <= maxCommands; i++) {
|
|
649
|
+
props[`command${i}`] = _buildCommandSchema(i);
|
|
650
|
+
}
|
|
651
|
+
const tools = [
|
|
652
|
+
{
|
|
653
|
+
type: "function",
|
|
654
|
+
function: {
|
|
655
|
+
name: "restricted_exec",
|
|
656
|
+
description: "Execute restricted commands (rg, readfile, tree, ls, glob) in parallel.",
|
|
657
|
+
parameters: { type: "object", properties: props, required: ["command1"] },
|
|
658
|
+
},
|
|
659
|
+
},
|
|
660
|
+
{
|
|
661
|
+
type: "function",
|
|
662
|
+
function: {
|
|
663
|
+
name: "answer",
|
|
664
|
+
description: "Final answer with relevant files and line ranges.",
|
|
665
|
+
parameters: {
|
|
666
|
+
type: "object",
|
|
667
|
+
properties: { answer: { type: "string", description: "The final answer in XML format." } },
|
|
668
|
+
required: ["answer"],
|
|
669
|
+
},
|
|
670
|
+
},
|
|
671
|
+
},
|
|
672
|
+
];
|
|
673
|
+
return JSON.stringify(tools);
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
// ─── Credentials ───────────────────────────────────────────
|
|
677
|
+
|
|
678
|
+
/**
|
|
679
|
+
* 判断从本地提取的 key 是否可接受。
|
|
680
|
+
* 不对前缀做假设:Windsurf 历史上用过 sk-ws-,后改为 devin-session-token,未来可能再变。
|
|
681
|
+
* extractKey 已精确取自 windsurfAuthStatus.apiKey 字段,故非空即接受。
|
|
682
|
+
* @param {unknown} key
|
|
683
|
+
* @returns {boolean}
|
|
684
|
+
*/
|
|
685
|
+
export function isAcceptableApiKey(key) {
|
|
686
|
+
return typeof key === "string" && key.trim().length > 0;
|
|
687
|
+
}
|
|
688
|
+
|
|
689
|
+
/**
|
|
690
|
+
* 检测手动传入的 key 是否疑似被截断。
|
|
691
|
+
*
|
|
692
|
+
* 背景:当前 Windsurf key 格式为 `devin-session-token$<JWT>`,真正的凭证是 `$`
|
|
693
|
+
* 之后那段 JWT。`$` 在 shell / 配置加载器(如 Codex 的 TOML env)中会触发变量展开,
|
|
694
|
+
* 导致 `$eyJ...` 被当作未定义变量替换为空——key 退化成 `devin-session-token`(或带个
|
|
695
|
+
* 光秃秃的 `$`),服务端换 JWT 时返回 HTTP 401 invalid api key(已实测证实)。
|
|
696
|
+
*
|
|
697
|
+
* 判定规则:以 devin-session-token 开头,但缺少 `$` 之后的 JWT 主体(eyJ 开头)。
|
|
698
|
+
* @param {unknown} key
|
|
699
|
+
* @returns {boolean} true 表示疑似被截断
|
|
700
|
+
*/
|
|
701
|
+
export function looksTruncated(key) {
|
|
702
|
+
if (typeof key !== "string") return false;
|
|
703
|
+
const k = key.trim();
|
|
704
|
+
if (!k.startsWith("devin-session-token")) return false;
|
|
705
|
+
// 完整形态:devin-session-token$<JWT>,$ 后应有 eyJ 开头的 JWT
|
|
706
|
+
const dollarIdx = k.indexOf("$");
|
|
707
|
+
if (dollarIdx === -1) return true; // 完全没有 $ —— 被吃光了
|
|
708
|
+
const afterDollar = k.slice(dollarIdx + 1);
|
|
709
|
+
return !afterDollar.startsWith("eyJ"); // 有 $ 但后面不是 JWT —— 残缺
|
|
710
|
+
}
|
|
711
|
+
|
|
712
|
+
async function autoDiscoverApiKey() {
|
|
713
|
+
try {
|
|
714
|
+
const result = await extractKey();
|
|
715
|
+
if (isAcceptableApiKey(result.api_key)) {
|
|
716
|
+
return result.api_key;
|
|
717
|
+
}
|
|
718
|
+
} catch {
|
|
719
|
+
// Extraction failed
|
|
720
|
+
}
|
|
721
|
+
return null;
|
|
722
|
+
}
|
|
723
|
+
|
|
724
|
+
/**
|
|
725
|
+
* Get API key from env var or auto-discovery.
|
|
726
|
+
*
|
|
727
|
+
* 优先级:
|
|
728
|
+
* 1. 环境变量 WINDSURF_API_KEY —— 但若检测到疑似被 `$` 转义截断,则不信任它,
|
|
729
|
+
* 自动回退到本地 SQLite 提取完整 key(用户无需改配置即可自愈)。
|
|
730
|
+
* 2. 自动发现(从 Windsurf 本地 SQLite 读取完整 key)。
|
|
731
|
+
* @returns {Promise<string>}
|
|
732
|
+
*/
|
|
733
|
+
async function getApiKey() {
|
|
734
|
+
const envKey = process.env.WINDSURF_API_KEY;
|
|
735
|
+
|
|
736
|
+
if (envKey) {
|
|
737
|
+
if (looksTruncated(envKey)) {
|
|
738
|
+
// env 里的 key 疑似被 shell/$ 展开截断,尝试从本地完整提取救回
|
|
739
|
+
process.stderr.write(
|
|
740
|
+
`[fast-context] WARNING: WINDSURF_API_KEY looks truncated (length ${envKey.length}, ` +
|
|
741
|
+
`missing the JWT after '$'). This usually means the '$' was eaten by shell/config ` +
|
|
742
|
+
`variable expansion. Falling back to auto-discovery from Windsurf's local database...\n`
|
|
743
|
+
);
|
|
744
|
+
const recovered = await autoDiscoverApiKey();
|
|
745
|
+
if (recovered) {
|
|
746
|
+
process.stderr.write(
|
|
747
|
+
`[fast-context] Recovered full key from local Windsurf install (length ${recovered.length}).\n`
|
|
748
|
+
);
|
|
749
|
+
return recovered;
|
|
750
|
+
}
|
|
751
|
+
// 回退也失败:仍用 env key 试(让服务端给出真实错误),但已告警
|
|
752
|
+
process.stderr.write(
|
|
753
|
+
`[fast-context] Auto-discovery failed; proceeding with the (likely truncated) env key. ` +
|
|
754
|
+
`Expect HTTP 401. Fix: remove WINDSURF_API_KEY and rely on auto-discovery, or single-quote / escape '$'.\n`
|
|
755
|
+
);
|
|
756
|
+
}
|
|
757
|
+
return envKey;
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
const discovered = await autoDiscoverApiKey();
|
|
761
|
+
if (discovered) return discovered;
|
|
762
|
+
throw new Error(
|
|
763
|
+
"Windsurf API Key not found. Set WINDSURF_API_KEY env var or ensure Windsurf is logged in. " +
|
|
764
|
+
"Run extract-key.mjs to see extraction methods."
|
|
765
|
+
);
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
// ─── JWT Cache ──────────────────────────────────────────────
|
|
769
|
+
|
|
770
|
+
/** @type {Map<string, { token: string, expiresAt: number }>} */
|
|
771
|
+
const _jwtCache = new Map();
|
|
772
|
+
|
|
773
|
+
/**
|
|
774
|
+
* Decode JWT payload and extract expiration time.
|
|
775
|
+
* @param {string} jwt
|
|
776
|
+
* @returns {number} expiration timestamp in seconds
|
|
777
|
+
*/
|
|
778
|
+
function _getJwtExp(jwt) {
|
|
779
|
+
try {
|
|
780
|
+
const parts = jwt.split(".");
|
|
781
|
+
if (parts.length < 2) return 0;
|
|
782
|
+
const payload = JSON.parse(Buffer.from(parts[1], "base64url").toString("utf-8"));
|
|
783
|
+
return payload.exp || 0;
|
|
784
|
+
} catch {
|
|
785
|
+
return 0;
|
|
786
|
+
}
|
|
787
|
+
}
|
|
788
|
+
|
|
789
|
+
/**
|
|
790
|
+
* Get a cached or fresh JWT token.
|
|
791
|
+
* Refreshes when token expires or is within 60s of expiration.
|
|
792
|
+
* @param {string} apiKey
|
|
793
|
+
* @returns {Promise<string>}
|
|
794
|
+
*/
|
|
795
|
+
async function getCachedJwt(apiKey, timeoutMs = 30000) {
|
|
796
|
+
const now = Math.floor(Date.now() / 1000);
|
|
797
|
+
const cached = _jwtCache.get(apiKey);
|
|
798
|
+
if (cached && cached.expiresAt > now + 60) return cached.token;
|
|
799
|
+
const token = await fetchJwt(apiKey, timeoutMs);
|
|
800
|
+
const exp = _getJwtExp(token);
|
|
801
|
+
_jwtCache.set(apiKey, { token, expiresAt: exp || now + 3600 });
|
|
802
|
+
return token;
|
|
803
|
+
}
|
|
804
|
+
|
|
805
|
+
// ─── TLS Fallback ──────────────────────────────────────────
|
|
806
|
+
// Match Python's SSL fallback: if NODE_TLS_REJECT_UNAUTHORIZED is not set
|
|
807
|
+
// and the first fetch fails with a TLS error, disable cert verification.
|
|
808
|
+
let _tlsFallbackApplied = false;
|
|
809
|
+
|
|
810
|
+
function _applyTlsFallback() {
|
|
811
|
+
if (!_tlsFallbackApplied && !process.env.NODE_TLS_REJECT_UNAUTHORIZED) {
|
|
812
|
+
process.env.NODE_TLS_REJECT_UNAUTHORIZED = "0";
|
|
813
|
+
_tlsFallbackApplied = true;
|
|
814
|
+
process.stderr.write(
|
|
815
|
+
"[fast-context] WARNING: TLS certificate verification disabled due to connection failure. " +
|
|
816
|
+
"Set NODE_TLS_REJECT_UNAUTHORIZED=0 explicitly to suppress this warning.\n"
|
|
817
|
+
);
|
|
818
|
+
}
|
|
819
|
+
}
|
|
820
|
+
|
|
821
|
+
// ─── Network Layer ─────────────────────────────────────────
|
|
822
|
+
|
|
823
|
+
/**
|
|
824
|
+
* Standard unary HTTP POST with proto content type.
|
|
825
|
+
* @param {string} url
|
|
826
|
+
* @param {Buffer} protoBytes
|
|
827
|
+
* @param {boolean} [compress=true]
|
|
828
|
+
* @returns {Promise<Buffer>}
|
|
829
|
+
*/
|
|
830
|
+
async function _unaryRequest(url, protoBytes, compress = true, timeoutMs = 30000) {
|
|
831
|
+
const headers = {
|
|
832
|
+
"Content-Type": "application/proto",
|
|
833
|
+
"Connect-Protocol-Version": "1",
|
|
834
|
+
"User-Agent": "connect-go/1.18.1 (go1.25.5)",
|
|
835
|
+
"Accept-Encoding": "gzip",
|
|
836
|
+
};
|
|
837
|
+
|
|
838
|
+
let body;
|
|
839
|
+
if (compress) {
|
|
840
|
+
body = gzipSync(protoBytes);
|
|
841
|
+
headers["Content-Encoding"] = "gzip";
|
|
842
|
+
} else {
|
|
843
|
+
body = protoBytes;
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
const doFetch = () => fetch(url, {
|
|
847
|
+
method: "POST",
|
|
848
|
+
headers,
|
|
849
|
+
body,
|
|
850
|
+
signal: AbortSignal.timeout(Number.isFinite(timeoutMs) ? timeoutMs : 30000),
|
|
851
|
+
});
|
|
852
|
+
|
|
853
|
+
let resp;
|
|
854
|
+
try {
|
|
855
|
+
resp = await doFetch();
|
|
856
|
+
} catch (e) {
|
|
857
|
+
// TLS or network error — try with cert verification disabled
|
|
858
|
+
_applyTlsFallback();
|
|
859
|
+
try {
|
|
860
|
+
resp = await doFetch();
|
|
861
|
+
} catch (e2) {
|
|
862
|
+
throw _classifyError(e2);
|
|
863
|
+
}
|
|
864
|
+
}
|
|
865
|
+
|
|
866
|
+
if (!resp.ok) {
|
|
867
|
+
const err = new Error(`HTTP ${resp.status}`);
|
|
868
|
+
err.status = resp.status;
|
|
869
|
+
throw _classifyError(err);
|
|
870
|
+
}
|
|
871
|
+
|
|
872
|
+
const arrayBuf = await resp.arrayBuffer();
|
|
873
|
+
return Buffer.from(arrayBuf);
|
|
874
|
+
}
|
|
875
|
+
|
|
876
|
+
/**
|
|
877
|
+
* Connect-RPC streaming POST to GetDevstralStream with retry.
|
|
878
|
+
* @param {Buffer} protoBytes
|
|
879
|
+
* @param {number} [timeoutMs=30000]
|
|
880
|
+
* @param {number} [maxRetries=2]
|
|
881
|
+
* @returns {Promise<Buffer>}
|
|
882
|
+
*/
|
|
883
|
+
async function _streamingRequest(protoBytes, timeoutMs = 30000, maxRetries = 2) {
|
|
884
|
+
const frame = connectFrameEncode(protoBytes);
|
|
885
|
+
const url = `${API_BASE}/GetDevstralStream`;
|
|
886
|
+
const traceId = randomUUID().replace(/-/g, "");
|
|
887
|
+
const spanId = randomUUID().replace(/-/g, "").slice(0, 16);
|
|
888
|
+
const baseTimeoutMs = Number.isFinite(timeoutMs) ? timeoutMs : 30000;
|
|
889
|
+
const abortMs = baseTimeoutMs + 5000;
|
|
890
|
+
|
|
891
|
+
const headers = {
|
|
892
|
+
"Content-Type": "application/connect+proto",
|
|
893
|
+
"Connect-Protocol-Version": "1",
|
|
894
|
+
"Connect-Accept-Encoding": "gzip",
|
|
895
|
+
"Connect-Content-Encoding": "gzip",
|
|
896
|
+
"Connect-Timeout-Ms": String(baseTimeoutMs),
|
|
897
|
+
"User-Agent": "connect-go/1.18.1 (go1.25.5)",
|
|
898
|
+
"Accept-Encoding": "identity",
|
|
899
|
+
"Baggage": `sentry-release=language-server-windsurf@${WS_LS_VER},` +
|
|
900
|
+
`sentry-environment=stable,sentry-sampled=false,` +
|
|
901
|
+
`sentry-trace_id=${traceId},` +
|
|
902
|
+
`sentry-public_key=b813f73488da69eedec534dba1029111`,
|
|
903
|
+
"Sentry-Trace": `${traceId}-${spanId}-0`,
|
|
904
|
+
};
|
|
905
|
+
|
|
906
|
+
const doFetch = () => fetch(url, {
|
|
907
|
+
method: "POST",
|
|
908
|
+
headers,
|
|
909
|
+
body: frame,
|
|
910
|
+
signal: AbortSignal.timeout(abortMs),
|
|
911
|
+
});
|
|
912
|
+
|
|
913
|
+
let lastErr;
|
|
914
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
915
|
+
try {
|
|
916
|
+
let resp;
|
|
917
|
+
try {
|
|
918
|
+
resp = await doFetch();
|
|
919
|
+
} catch (e) {
|
|
920
|
+
if (attempt === 0) {
|
|
921
|
+
_applyTlsFallback();
|
|
922
|
+
resp = await doFetch();
|
|
923
|
+
} else {
|
|
924
|
+
throw e;
|
|
925
|
+
}
|
|
926
|
+
}
|
|
927
|
+
|
|
928
|
+
if (!resp.ok) {
|
|
929
|
+
const err = new Error(`HTTP ${resp.status}`);
|
|
930
|
+
err.status = resp.status;
|
|
931
|
+
// Don't retry on 4xx client errors (except 429)
|
|
932
|
+
if (resp.status >= 400 && resp.status < 500 && resp.status !== 429) {
|
|
933
|
+
throw err;
|
|
934
|
+
}
|
|
935
|
+
lastErr = err;
|
|
936
|
+
if (attempt < maxRetries) {
|
|
937
|
+
await new Promise((r) => setTimeout(r, 1000 * (attempt + 1)));
|
|
938
|
+
continue;
|
|
939
|
+
}
|
|
940
|
+
throw err;
|
|
941
|
+
}
|
|
942
|
+
|
|
943
|
+
const arrayBuf = await resp.arrayBuffer();
|
|
944
|
+
return Buffer.from(arrayBuf);
|
|
945
|
+
} catch (e) {
|
|
946
|
+
lastErr = e;
|
|
947
|
+
// Don't retry on 4xx client errors (except 429)
|
|
948
|
+
if (e.status && e.status >= 400 && e.status < 500 && e.status !== 429) {
|
|
949
|
+
throw _classifyError(e);
|
|
950
|
+
}
|
|
951
|
+
if (attempt < maxRetries) {
|
|
952
|
+
await new Promise((r) => setTimeout(r, 1000 * (attempt + 1)));
|
|
953
|
+
continue;
|
|
954
|
+
}
|
|
955
|
+
}
|
|
956
|
+
}
|
|
957
|
+
throw _classifyError(lastErr);
|
|
958
|
+
}
|
|
959
|
+
|
|
960
|
+
/**
|
|
961
|
+
* Authenticate with API key to get JWT token.
|
|
962
|
+
* @param {string} apiKey
|
|
963
|
+
* @returns {Promise<string>}
|
|
964
|
+
*/
|
|
965
|
+
async function fetchJwt(apiKey, timeoutMs = 30000) {
|
|
966
|
+
const meta = new ProtobufEncoder();
|
|
967
|
+
meta.writeString(1, WS_APP);
|
|
968
|
+
meta.writeString(2, WS_APP_VER);
|
|
969
|
+
meta.writeString(3, apiKey);
|
|
970
|
+
meta.writeString(4, "zh-cn");
|
|
971
|
+
meta.writeString(7, WS_LS_VER);
|
|
972
|
+
meta.writeString(12, WS_APP);
|
|
973
|
+
meta.writeBytes(30, Buffer.from([0x00, 0x01]));
|
|
974
|
+
|
|
975
|
+
const outer = new ProtobufEncoder();
|
|
976
|
+
outer.writeMessage(1, meta);
|
|
977
|
+
|
|
978
|
+
const resp = await _unaryRequest(`${AUTH_BASE}/GetUserJwt`, outer.toBuffer(), false, timeoutMs);
|
|
979
|
+
for (const s of extractStrings(resp)) {
|
|
980
|
+
if (s.startsWith("eyJ") && s.includes(".")) {
|
|
981
|
+
return s;
|
|
982
|
+
}
|
|
983
|
+
}
|
|
984
|
+
throw new Error("Failed to extract JWT from GetUserJwt response");
|
|
985
|
+
}
|
|
986
|
+
|
|
987
|
+
/**
|
|
988
|
+
* Check rate limit. Returns true if OK, false if rate-limited.
|
|
989
|
+
* @param {string} apiKey
|
|
990
|
+
* @param {string} jwt
|
|
991
|
+
* @returns {Promise<boolean>}
|
|
992
|
+
*/
|
|
993
|
+
async function checkRateLimit(apiKey, jwt, timeoutMs = 30000) {
|
|
994
|
+
const req = new ProtobufEncoder();
|
|
995
|
+
req.writeMessage(1, _buildMetadata(apiKey, jwt));
|
|
996
|
+
req.writeString(3, WS_MODEL);
|
|
997
|
+
|
|
998
|
+
try {
|
|
999
|
+
await _unaryRequest(`${API_BASE}/CheckUserMessageRateLimit`, req.toBuffer(), true, timeoutMs);
|
|
1000
|
+
return true;
|
|
1001
|
+
} catch (e) {
|
|
1002
|
+
if (e.status === 429 || e.code === "RATE_LIMITED") return false;
|
|
1003
|
+
return true; // Don't block on network issues
|
|
1004
|
+
}
|
|
1005
|
+
}
|
|
1006
|
+
|
|
1007
|
+
// ─── Request Building ──────────────────────────────────────
|
|
1008
|
+
|
|
1009
|
+
/**
|
|
1010
|
+
* Build protobuf metadata with app info, system info, JWT, etc.
|
|
1011
|
+
* @param {string} apiKey
|
|
1012
|
+
* @param {string} jwt
|
|
1013
|
+
* @returns {ProtobufEncoder}
|
|
1014
|
+
*/
|
|
1015
|
+
function _buildMetadata(apiKey, jwt) {
|
|
1016
|
+
const meta = new ProtobufEncoder();
|
|
1017
|
+
meta.writeString(1, WS_APP);
|
|
1018
|
+
meta.writeString(2, WS_APP_VER);
|
|
1019
|
+
meta.writeString(3, apiKey);
|
|
1020
|
+
meta.writeString(4, "zh-cn");
|
|
1021
|
+
|
|
1022
|
+
const plat = platform();
|
|
1023
|
+
const sysInfo = {
|
|
1024
|
+
Os: plat,
|
|
1025
|
+
Arch: arch(),
|
|
1026
|
+
Release: release(),
|
|
1027
|
+
Version: osVersion(),
|
|
1028
|
+
Machine: arch(),
|
|
1029
|
+
Nodename: hostname(),
|
|
1030
|
+
Sysname: plat === "darwin" ? "Darwin" : plat === "win32" ? "Windows_NT" : "Linux",
|
|
1031
|
+
ProductVersion: "",
|
|
1032
|
+
};
|
|
1033
|
+
meta.writeString(5, JSON.stringify(sysInfo));
|
|
1034
|
+
meta.writeString(7, WS_LS_VER);
|
|
1035
|
+
|
|
1036
|
+
const cpuList = cpus();
|
|
1037
|
+
const ncpu = cpuList.length || 4;
|
|
1038
|
+
const mem = totalmem();
|
|
1039
|
+
const cpuInfo = {
|
|
1040
|
+
NumSockets: 1,
|
|
1041
|
+
NumCores: ncpu,
|
|
1042
|
+
NumThreads: ncpu,
|
|
1043
|
+
VendorID: "",
|
|
1044
|
+
Family: "0",
|
|
1045
|
+
Model: "0",
|
|
1046
|
+
ModelName: cpuList[0]?.model || "Unknown",
|
|
1047
|
+
Memory: mem,
|
|
1048
|
+
};
|
|
1049
|
+
meta.writeString(8, JSON.stringify(cpuInfo));
|
|
1050
|
+
meta.writeString(12, WS_APP);
|
|
1051
|
+
meta.writeString(21, jwt);
|
|
1052
|
+
meta.writeBytes(30, Buffer.from([0x00, 0x01]));
|
|
1053
|
+
return meta;
|
|
1054
|
+
}
|
|
1055
|
+
|
|
1056
|
+
/**
|
|
1057
|
+
* Build a chat message protobuf.
|
|
1058
|
+
* @param {number} role - 1=user, 2=assistant, 4=tool_result, 5=system
|
|
1059
|
+
* @param {string} content
|
|
1060
|
+
* @param {Object} [opts]
|
|
1061
|
+
* @param {string} [opts.toolCallId]
|
|
1062
|
+
* @param {string} [opts.toolName]
|
|
1063
|
+
* @param {string} [opts.toolArgsJson]
|
|
1064
|
+
* @param {string} [opts.refCallId]
|
|
1065
|
+
* @returns {ProtobufEncoder}
|
|
1066
|
+
*/
|
|
1067
|
+
function _buildChatMessage(role, content, opts = {}) {
|
|
1068
|
+
const msg = new ProtobufEncoder();
|
|
1069
|
+
msg.writeVarint(2, role);
|
|
1070
|
+
msg.writeString(3, content);
|
|
1071
|
+
|
|
1072
|
+
if (opts.toolCallId && opts.toolName && opts.toolArgsJson) {
|
|
1073
|
+
const tc = new ProtobufEncoder();
|
|
1074
|
+
tc.writeString(1, opts.toolCallId);
|
|
1075
|
+
tc.writeString(2, opts.toolName);
|
|
1076
|
+
tc.writeString(3, opts.toolArgsJson);
|
|
1077
|
+
msg.writeMessage(6, tc);
|
|
1078
|
+
}
|
|
1079
|
+
|
|
1080
|
+
if (opts.refCallId) {
|
|
1081
|
+
msg.writeString(7, opts.refCallId);
|
|
1082
|
+
}
|
|
1083
|
+
|
|
1084
|
+
return msg;
|
|
1085
|
+
}
|
|
1086
|
+
|
|
1087
|
+
/**
|
|
1088
|
+
* Build a full request with metadata, messages, and tool definitions.
|
|
1089
|
+
* @param {string} apiKey
|
|
1090
|
+
* @param {string} jwt
|
|
1091
|
+
* @param {Array} messages
|
|
1092
|
+
* @param {string} toolDefs
|
|
1093
|
+
* @returns {Buffer}
|
|
1094
|
+
*/
|
|
1095
|
+
function _buildRequest(apiKey, jwt, messages, toolDefs) {
|
|
1096
|
+
const req = new ProtobufEncoder();
|
|
1097
|
+
req.writeMessage(1, _buildMetadata(apiKey, jwt));
|
|
1098
|
+
|
|
1099
|
+
for (const m of messages) {
|
|
1100
|
+
const msgEnc = _buildChatMessage(m.role, m.content, {
|
|
1101
|
+
toolCallId: m.tool_call_id,
|
|
1102
|
+
toolName: m.tool_name,
|
|
1103
|
+
toolArgsJson: m.tool_args_json,
|
|
1104
|
+
refCallId: m.ref_call_id,
|
|
1105
|
+
});
|
|
1106
|
+
req.writeMessage(2, msgEnc);
|
|
1107
|
+
}
|
|
1108
|
+
|
|
1109
|
+
req.writeString(3, toolDefs);
|
|
1110
|
+
return req.toBuffer();
|
|
1111
|
+
}
|
|
1112
|
+
|
|
1113
|
+
// ─── Response Parsing ──────────────────────────────────────
|
|
1114
|
+
|
|
1115
|
+
/**
|
|
1116
|
+
* Strip invalid UTF-8 bytes from a Buffer → clean string.
|
|
1117
|
+
* Matches Python's bytes.decode("utf-8", errors="ignore").
|
|
1118
|
+
* @param {Buffer} buf
|
|
1119
|
+
* @returns {string}
|
|
1120
|
+
*/
|
|
1121
|
+
function stripInvalidUtf8(buf) {
|
|
1122
|
+
return buf.toString("utf-8").replace(/\ufffd/g, "");
|
|
1123
|
+
}
|
|
1124
|
+
|
|
1125
|
+
/**
|
|
1126
|
+
* Parse tool call from [TOOL_CALLS]name[ARGS]{json} format.
|
|
1127
|
+
* @param {string} text
|
|
1128
|
+
* @returns {[string, string, Object]|null} [thinking, name, args] or null
|
|
1129
|
+
*/
|
|
1130
|
+
function _parseToolCall(text) {
|
|
1131
|
+
text = text.replace(/<\/s>/g, "");
|
|
1132
|
+
const m = text.match(/\[TOOL_CALLS\](\w+)\[ARGS\](\{.+)/s);
|
|
1133
|
+
if (!m) return null;
|
|
1134
|
+
|
|
1135
|
+
const name = m[1];
|
|
1136
|
+
const raw = m[2].trim();
|
|
1137
|
+
|
|
1138
|
+
// Find matching closing brace
|
|
1139
|
+
let depth = 0;
|
|
1140
|
+
let end = 0;
|
|
1141
|
+
for (let i = 0; i < raw.length; i++) {
|
|
1142
|
+
if (raw[i] === "{") depth++;
|
|
1143
|
+
else if (raw[i] === "}") {
|
|
1144
|
+
depth--;
|
|
1145
|
+
if (depth === 0) {
|
|
1146
|
+
end = i + 1;
|
|
1147
|
+
break;
|
|
1148
|
+
}
|
|
1149
|
+
}
|
|
1150
|
+
}
|
|
1151
|
+
if (end === 0) end = raw.length;
|
|
1152
|
+
|
|
1153
|
+
let args;
|
|
1154
|
+
const jsonCandidate = raw.slice(0, end);
|
|
1155
|
+
try {
|
|
1156
|
+
args = JSON.parse(jsonCandidate);
|
|
1157
|
+
} catch {
|
|
1158
|
+
// Attempt lenient fix: unquoted keys like exclude": → "exclude":
|
|
1159
|
+
try {
|
|
1160
|
+
const fixed = jsonCandidate.replace(/([{,]\s*)(\w+)\s*:/g, '$1"$2":');
|
|
1161
|
+
args = JSON.parse(fixed);
|
|
1162
|
+
} catch {
|
|
1163
|
+
return null;
|
|
1164
|
+
}
|
|
1165
|
+
}
|
|
1166
|
+
|
|
1167
|
+
const thinking = text.slice(0, m.index).trim();
|
|
1168
|
+
return [thinking, name, args];
|
|
1169
|
+
}
|
|
1170
|
+
|
|
1171
|
+
/**
|
|
1172
|
+
* Parse streaming response: decode frames, extract text, parse tool calls.
|
|
1173
|
+
* @param {Buffer} data
|
|
1174
|
+
* @returns {[string, [string, Object]|null]} [text, toolInfo]
|
|
1175
|
+
*/
|
|
1176
|
+
function _parseResponse(data) {
|
|
1177
|
+
const frames = connectFrameDecode(data);
|
|
1178
|
+
let allText = "";
|
|
1179
|
+
|
|
1180
|
+
for (const frameData of frames) {
|
|
1181
|
+
// Check for error JSON
|
|
1182
|
+
try {
|
|
1183
|
+
const textCandidate = frameData.toString("utf-8");
|
|
1184
|
+
if (textCandidate.startsWith("{")) {
|
|
1185
|
+
const errObj = JSON.parse(textCandidate);
|
|
1186
|
+
if (errObj.error) {
|
|
1187
|
+
const code = errObj.error.code || "unknown";
|
|
1188
|
+
const msg = errObj.error.message || "";
|
|
1189
|
+
return [`[Error] ${code}: ${msg}`, null];
|
|
1190
|
+
}
|
|
1191
|
+
}
|
|
1192
|
+
} catch {
|
|
1193
|
+
// Not JSON, continue
|
|
1194
|
+
}
|
|
1195
|
+
|
|
1196
|
+
// Extract text from frame — strip invalid UTF-8 (matches Python errors="ignore")
|
|
1197
|
+
const rawText = stripInvalidUtf8(frameData);
|
|
1198
|
+
if (rawText.includes("[TOOL_CALLS]")) {
|
|
1199
|
+
allText = rawText;
|
|
1200
|
+
break;
|
|
1201
|
+
}
|
|
1202
|
+
|
|
1203
|
+
for (const s of extractStrings(frameData)) {
|
|
1204
|
+
if (s.length > 10) {
|
|
1205
|
+
allText += s;
|
|
1206
|
+
}
|
|
1207
|
+
}
|
|
1208
|
+
}
|
|
1209
|
+
|
|
1210
|
+
const parsed = _parseToolCall(allText);
|
|
1211
|
+
if (parsed) {
|
|
1212
|
+
const [thinking, name, args] = parsed;
|
|
1213
|
+
return [thinking, [name, args]];
|
|
1214
|
+
}
|
|
1215
|
+
return [allText, null];
|
|
1216
|
+
}
|
|
1217
|
+
|
|
1218
|
+
// ─── Core Search ───────────────────────────────────────────
|
|
1219
|
+
|
|
1220
|
+
// Max safe tree size in bytes (server payload limit ~346KB, fixed overhead ~26KB,
|
|
1221
|
+
// leave room for conversation accumulation across rounds)
|
|
1222
|
+
const MAX_TREE_BYTES = 250 * 1024;
|
|
1223
|
+
|
|
1224
|
+
/**
|
|
1225
|
+
* Convert an exclude pattern (directory/file name or simple glob) to RegExp
|
|
1226
|
+
* for tree-node-cli's exclude option.
|
|
1227
|
+
* @param {string} pattern - e.g. "node_modules", "dist", "*.min.*"
|
|
1228
|
+
* @returns {RegExp}
|
|
1229
|
+
*/
|
|
1230
|
+
function _excludePatternToRegex(pattern) {
|
|
1231
|
+
if (!/[*?]/.test(pattern)) {
|
|
1232
|
+
// Simple name — match basename or any path segment. tree-node-cli tests
|
|
1233
|
+
// full paths, so exact basename matching alone misses nested directories.
|
|
1234
|
+
const escaped = pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
1235
|
+
return new RegExp(`(^|[\\\\/])${escaped}($|[\\\\/])`);
|
|
1236
|
+
}
|
|
1237
|
+
// Glob → regex
|
|
1238
|
+
let regex = "^";
|
|
1239
|
+
for (const c of pattern) {
|
|
1240
|
+
if (c === "*") regex += ".*";
|
|
1241
|
+
else if (c === "?") regex += ".";
|
|
1242
|
+
else if (".+^${}()|[]\\".includes(c)) regex += "\\" + c;
|
|
1243
|
+
else regex += c;
|
|
1244
|
+
}
|
|
1245
|
+
regex += "$";
|
|
1246
|
+
return new RegExp(regex);
|
|
1247
|
+
}
|
|
1248
|
+
|
|
1249
|
+
/**
|
|
1250
|
+
* Count files in a directory (non-recursive, fast estimate).
|
|
1251
|
+
* @param {string} dir
|
|
1252
|
+
* @returns {number}
|
|
1253
|
+
*/
|
|
1254
|
+
function _countFilesQuick(dir) {
|
|
1255
|
+
try {
|
|
1256
|
+
return readdirSync(dir).length;
|
|
1257
|
+
} catch {
|
|
1258
|
+
return 0;
|
|
1259
|
+
}
|
|
1260
|
+
}
|
|
1261
|
+
|
|
1262
|
+
/**
|
|
1263
|
+
* Estimate project size and suggest optimal tree depth.
|
|
1264
|
+
* - Small project (< 500 entries): depth 4
|
|
1265
|
+
* - Medium project (500-5000 entries): depth 3
|
|
1266
|
+
* - Large project (> 5000 entries): depth 2
|
|
1267
|
+
* @param {string} projectRoot
|
|
1268
|
+
* @returns {number}
|
|
1269
|
+
*/
|
|
1270
|
+
function _suggestTreeDepth(projectRoot) {
|
|
1271
|
+
const count = _countFilesQuick(projectRoot);
|
|
1272
|
+
if (count < 500) return 4;
|
|
1273
|
+
if (count <= 5000) return 3;
|
|
1274
|
+
return 2;
|
|
1275
|
+
}
|
|
1276
|
+
|
|
1277
|
+
function _normalizeTreeRoot(treeStr, absRoot, virtualRoot = "/codebase") {
|
|
1278
|
+
const rootPattern = new RegExp(absRoot.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), "g");
|
|
1279
|
+
let out = String(treeStr || "").replace(rootPattern, virtualRoot);
|
|
1280
|
+
const lines = out.split("\n");
|
|
1281
|
+
const dirName = absRoot.split("/").pop() || absRoot.split("\\").pop() || absRoot;
|
|
1282
|
+
if (lines[0] === dirName) {
|
|
1283
|
+
lines[0] = virtualRoot;
|
|
1284
|
+
out = lines.join("\n");
|
|
1285
|
+
}
|
|
1286
|
+
return out;
|
|
1287
|
+
}
|
|
1288
|
+
|
|
1289
|
+
/**
|
|
1290
|
+
* Get a directory tree of the project with adaptive depth fallback.
|
|
1291
|
+
*
|
|
1292
|
+
* Tries the requested depth first. If the tree output exceeds MAX_TREE_BYTES,
|
|
1293
|
+
* automatically falls back to lower depths until it fits.
|
|
1294
|
+
*
|
|
1295
|
+
* @param {string} projectRoot
|
|
1296
|
+
* @param {number} [targetDepth=3] - Desired tree depth (0-6), 0 means auto
|
|
1297
|
+
* @param {string[]} [excludePaths=[]] - Patterns to exclude from tree
|
|
1298
|
+
* @returns {{ tree: string, depth: number, sizeBytes: number, fellBack: boolean, autoDepth: boolean }}
|
|
1299
|
+
*/
|
|
1300
|
+
export function getRepoMap(projectRoot, targetDepth = 3, excludePaths = []) {
|
|
1301
|
+
// Auto depth: if targetDepth is 0, use heuristic
|
|
1302
|
+
const autoDepth = targetDepth === 0;
|
|
1303
|
+
if (autoDepth) {
|
|
1304
|
+
targetDepth = _suggestTreeDepth(projectRoot);
|
|
1305
|
+
}
|
|
1306
|
+
const excludeRegexes = excludePaths.length ? excludePaths.map(_excludePatternToRegex) : [];
|
|
1307
|
+
|
|
1308
|
+
for (let L = targetDepth; L >= 1; L--) {
|
|
1309
|
+
try {
|
|
1310
|
+
const opts = { maxDepth: L };
|
|
1311
|
+
if (excludeRegexes.length) opts.exclude = excludeRegexes;
|
|
1312
|
+
const stdout = treeNodeCli(projectRoot, opts);
|
|
1313
|
+
// Normalize root to /codebase consistently.
|
|
1314
|
+
let treeStr = _normalizeTreeRoot(stdout, projectRoot, "/codebase");
|
|
1315
|
+
const sizeBytes = Buffer.byteLength(treeStr, "utf-8");
|
|
1316
|
+
|
|
1317
|
+
if (sizeBytes <= MAX_TREE_BYTES) {
|
|
1318
|
+
return { tree: treeStr, depth: L, sizeBytes, fellBack: L < targetDepth, autoDepth };
|
|
1319
|
+
}
|
|
1320
|
+
// Too large, try lower depth
|
|
1321
|
+
} catch {
|
|
1322
|
+
// tree failed at this level, try lower
|
|
1323
|
+
}
|
|
1324
|
+
}
|
|
1325
|
+
|
|
1326
|
+
// Ultimate fallback: simple ls (also respects excludePaths)
|
|
1327
|
+
try {
|
|
1328
|
+
let entries = readdirSync(projectRoot).sort();
|
|
1329
|
+
if (excludeRegexes.length) {
|
|
1330
|
+
entries = entries.filter((e) => !excludeRegexes.some((rx) => rx.test(e)));
|
|
1331
|
+
}
|
|
1332
|
+
const treeStr = ["/codebase", ...entries.map((e) => `├── ${e}`)].join("\n");
|
|
1333
|
+
return { tree: treeStr, depth: 0, sizeBytes: Buffer.byteLength(treeStr, "utf-8"), fellBack: true, autoDepth };
|
|
1334
|
+
} catch {
|
|
1335
|
+
const treeStr = "/codebase\n(empty or inaccessible)";
|
|
1336
|
+
return { tree: treeStr, depth: 0, sizeBytes: treeStr.length, fellBack: true, autoDepth };
|
|
1337
|
+
}
|
|
1338
|
+
}
|
|
1339
|
+
|
|
1340
|
+
function _tokenizeQuery(query = "") {
|
|
1341
|
+
return [...new Set(
|
|
1342
|
+
String(query)
|
|
1343
|
+
.toLowerCase()
|
|
1344
|
+
.split(/[^a-z0-9_\-]+/)
|
|
1345
|
+
.map((t) => t.trim())
|
|
1346
|
+
.filter((t) => t.length >= 3)
|
|
1347
|
+
)];
|
|
1348
|
+
}
|
|
1349
|
+
|
|
1350
|
+
function _scoreTopLevelDir(dirName, queryTokens = []) {
|
|
1351
|
+
const name = String(dirName || "").toLowerCase();
|
|
1352
|
+
let score = 0;
|
|
1353
|
+
|
|
1354
|
+
const commonRoots = ["src", "app", "lib", "packages", "services", "server", "backend", "frontend", "api"];
|
|
1355
|
+
if (commonRoots.includes(name)) score += 2;
|
|
1356
|
+
|
|
1357
|
+
for (const token of queryTokens) {
|
|
1358
|
+
if (name.includes(token)) score += 4;
|
|
1359
|
+
}
|
|
1360
|
+
|
|
1361
|
+
return score;
|
|
1362
|
+
}
|
|
1363
|
+
|
|
1364
|
+
function _listTopLevelDirs(projectRoot, excludePaths = []) {
|
|
1365
|
+
const excludeRegexes = excludePaths.length ? excludePaths.map(_excludePatternToRegex) : [];
|
|
1366
|
+
const out = [];
|
|
1367
|
+
let entries = [];
|
|
1368
|
+
try {
|
|
1369
|
+
entries = readdirSync(projectRoot).sort();
|
|
1370
|
+
} catch {
|
|
1371
|
+
return out;
|
|
1372
|
+
}
|
|
1373
|
+
|
|
1374
|
+
for (const e of entries) {
|
|
1375
|
+
if (excludeRegexes.some((rx) => rx.test(e))) continue;
|
|
1376
|
+
const abs = join(projectRoot, e);
|
|
1377
|
+
try {
|
|
1378
|
+
if (statSync(abs).isDirectory()) out.push(e);
|
|
1379
|
+
} catch {
|
|
1380
|
+
// ignore
|
|
1381
|
+
}
|
|
1382
|
+
}
|
|
1383
|
+
return out;
|
|
1384
|
+
}
|
|
1385
|
+
|
|
1386
|
+
function _buildSubtreeForDir(projectRoot, dir, levels = 2, excludePaths = []) {
|
|
1387
|
+
const abs = join(projectRoot, dir);
|
|
1388
|
+
const vRoot = `/codebase/${dir}`;
|
|
1389
|
+
try {
|
|
1390
|
+
const opts = { maxDepth: levels };
|
|
1391
|
+
const excludeRegexes = excludePaths.length ? excludePaths.map(_excludePatternToRegex) : [];
|
|
1392
|
+
if (excludeRegexes.length) opts.exclude = excludeRegexes;
|
|
1393
|
+
const stdout = treeNodeCli(abs, opts);
|
|
1394
|
+
return _normalizeTreeRoot(stdout, abs, vRoot);
|
|
1395
|
+
} catch {
|
|
1396
|
+
return `${vRoot}\n (failed to generate subtree)`;
|
|
1397
|
+
}
|
|
1398
|
+
}
|
|
1399
|
+
|
|
1400
|
+
export function buildOptimizedRepoMap({
|
|
1401
|
+
query,
|
|
1402
|
+
projectRoot,
|
|
1403
|
+
treeDepth,
|
|
1404
|
+
excludePaths,
|
|
1405
|
+
optimizer = {},
|
|
1406
|
+
bootstrapHints = null,
|
|
1407
|
+
onProgress = null,
|
|
1408
|
+
}) {
|
|
1409
|
+
const log = (msg) => onProgress?.(msg);
|
|
1410
|
+
const cfg = { ...REPO_MAP_OPTIMIZER_DEFAULTS, ...(optimizer || {}) };
|
|
1411
|
+
if (cfg.mode === "classic") {
|
|
1412
|
+
const base = getRepoMap(projectRoot, treeDepth, excludePaths);
|
|
1413
|
+
return {
|
|
1414
|
+
...base,
|
|
1415
|
+
strategy: "classic",
|
|
1416
|
+
hotDirs: [],
|
|
1417
|
+
};
|
|
1418
|
+
}
|
|
1419
|
+
|
|
1420
|
+
const bootstrapDepth = Math.max(1, Math.min(3, Number(cfg.bootstrapTreeDepth) || 1));
|
|
1421
|
+
const rawHotspotTopK = Number(cfg.hotspotTopK);
|
|
1422
|
+
const hotspotTopK = Math.max(0, Math.min(8, Number.isFinite(rawHotspotTopK) ? rawHotspotTopK : 4));
|
|
1423
|
+
const cfgHotspotDepth = Math.max(1, Math.min(4, Number(cfg.hotspotTreeDepth) || 2));
|
|
1424
|
+
// 用户的 treeDepth 提升 hotspot subtree 深度,避免参数被静默忽略
|
|
1425
|
+
const hotspotTreeDepth = treeDepth > cfgHotspotDepth ? Math.min(4, treeDepth) : cfgHotspotDepth;
|
|
1426
|
+
const maxBytes = Math.max(16 * 1024, Number(cfg.maxBytes) || REPO_MAP_OPTIMIZER_DEFAULTS.maxBytes);
|
|
1427
|
+
|
|
1428
|
+
const bootstrap = getRepoMap(projectRoot, bootstrapDepth, excludePaths);
|
|
1429
|
+
const topDirs = _listTopLevelDirs(projectRoot, excludePaths);
|
|
1430
|
+
|
|
1431
|
+
// Extract keywords from bootstrap hints (rgPatterns)
|
|
1432
|
+
const keywords = bootstrapHints?.rgPatterns || [];
|
|
1433
|
+
|
|
1434
|
+
// Use BM25F + Probe + RRF for directory scoring
|
|
1435
|
+
// This replaces the old token-based scoring + commonRoots approach
|
|
1436
|
+
let hotDirs = [];
|
|
1437
|
+
let pathSpines = [];
|
|
1438
|
+
if (hotspotTopK === 0) {
|
|
1439
|
+
log("Hotspot directory expansion disabled (hotspotTopK=0)");
|
|
1440
|
+
} else try {
|
|
1441
|
+
const results = scoreDirectories(query, projectRoot, topDirs, excludePaths, {
|
|
1442
|
+
topK: hotspotTopK,
|
|
1443
|
+
useProbe: true, // Enable probe grep signal
|
|
1444
|
+
keywords, // Bootstrap keywords
|
|
1445
|
+
minReturn: 2, // Always return at least 2 directories for coverage
|
|
1446
|
+
});
|
|
1447
|
+
hotDirs = results.hotDirs;
|
|
1448
|
+
pathSpines = results.pathSpines;
|
|
1449
|
+
log(`BM25F scoring: hotDirs=[${hotDirs.join(",")}] pathSpines=${pathSpines.length} signals=${JSON.stringify(results.signals)}`);
|
|
1450
|
+
} catch (e) {
|
|
1451
|
+
// Lightweight fallback: use quick scoring without commonRoots
|
|
1452
|
+
log(`BM25F failed, using quick token scoring: ${e.message}`);
|
|
1453
|
+
const queryTerms = tokenizeBM25(query);
|
|
1454
|
+
const scored = topDirs.map((d) => {
|
|
1455
|
+
const dirTerms = tokenizeBM25(d);
|
|
1456
|
+
let score = 0;
|
|
1457
|
+
for (const qt of queryTerms) {
|
|
1458
|
+
if (dirTerms.some(dt => dt.includes(qt) || qt.includes(dt))) score += 1;
|
|
1459
|
+
}
|
|
1460
|
+
return { dir: d, score };
|
|
1461
|
+
}).sort((a, b) => b.score - a.score);
|
|
1462
|
+
|
|
1463
|
+
// Always return at least topK directories (no score > 0 filter)
|
|
1464
|
+
hotDirs = scored.slice(0, hotspotTopK).map((x) => x.dir);
|
|
1465
|
+
if (hotDirs.length === 0) hotDirs = topDirs.slice(0, hotspotTopK);
|
|
1466
|
+
log(`Quick scoring fallback: ${hotDirs.join(",")}`);
|
|
1467
|
+
}
|
|
1468
|
+
|
|
1469
|
+
const hotspotSections = [];
|
|
1470
|
+
for (const d of hotDirs) {
|
|
1471
|
+
hotspotSections.push(_buildSubtreeForDir(projectRoot, d, hotspotTreeDepth, excludePaths));
|
|
1472
|
+
}
|
|
1473
|
+
|
|
1474
|
+
// Build path spines section for deep file visibility
|
|
1475
|
+
const pathSpineSection = pathSpines.length > 0
|
|
1476
|
+
? "# Relevant File Paths (from BM25F path spine extraction)\n" + pathSpines.map(p => `- /codebase/${p}`).join("\n")
|
|
1477
|
+
: "";
|
|
1478
|
+
|
|
1479
|
+
let tree = bootstrap.tree;
|
|
1480
|
+
const sections = [];
|
|
1481
|
+
if (hotspotSections.length) {
|
|
1482
|
+
sections.push("# Hotspot Subtrees\n" + hotspotSections.join("\n\n"));
|
|
1483
|
+
}
|
|
1484
|
+
if (pathSpineSection) {
|
|
1485
|
+
sections.push(pathSpineSection);
|
|
1486
|
+
}
|
|
1487
|
+
if (sections.length) {
|
|
1488
|
+
tree = `${bootstrap.tree}\n\n${sections.join("\n\n")}`;
|
|
1489
|
+
}
|
|
1490
|
+
|
|
1491
|
+
// Keep map under configurable budget.
|
|
1492
|
+
let sizeBytes = Buffer.byteLength(tree, "utf-8");
|
|
1493
|
+
if (sizeBytes > maxBytes && (hotspotSections.length || pathSpineSection)) {
|
|
1494
|
+
// First try removing path spines
|
|
1495
|
+
if (pathSpineSection) {
|
|
1496
|
+
const withoutSpines = sections.length > 1
|
|
1497
|
+
? `${bootstrap.tree}\n\n${sections[0]}`
|
|
1498
|
+
: bootstrap.tree;
|
|
1499
|
+
sizeBytes = Buffer.byteLength(withoutSpines, "utf-8");
|
|
1500
|
+
if (sizeBytes <= maxBytes) {
|
|
1501
|
+
tree = withoutSpines;
|
|
1502
|
+
}
|
|
1503
|
+
}
|
|
1504
|
+
|
|
1505
|
+
// If still too large, progressively remove hotspot sections
|
|
1506
|
+
if (sizeBytes > maxBytes && hotspotSections.length) {
|
|
1507
|
+
let kept = [...hotspotSections];
|
|
1508
|
+
while (kept.length > 0) {
|
|
1509
|
+
kept.pop();
|
|
1510
|
+
tree = kept.length
|
|
1511
|
+
? `${bootstrap.tree}\n\n# Hotspot Subtrees\n${kept.join("\n\n")}`
|
|
1512
|
+
: bootstrap.tree;
|
|
1513
|
+
sizeBytes = Buffer.byteLength(tree, "utf-8");
|
|
1514
|
+
if (sizeBytes <= maxBytes) break;
|
|
1515
|
+
}
|
|
1516
|
+
}
|
|
1517
|
+
}
|
|
1518
|
+
|
|
1519
|
+
return {
|
|
1520
|
+
tree,
|
|
1521
|
+
depth: bootstrap.depth,
|
|
1522
|
+
hotspotDepth: hotspotTreeDepth,
|
|
1523
|
+
sizeBytes: Buffer.byteLength(tree, "utf-8"),
|
|
1524
|
+
fellBack: bootstrap.fellBack,
|
|
1525
|
+
autoDepth: bootstrap.autoDepth,
|
|
1526
|
+
strategy: "bootstrap_hotspot",
|
|
1527
|
+
hotDirs,
|
|
1528
|
+
};
|
|
1529
|
+
}
|
|
1530
|
+
|
|
1531
|
+
/**
|
|
1532
|
+
* Parse answer XML into structured file + range data.
|
|
1533
|
+
* @param {string} xmlText
|
|
1534
|
+
* @param {string} projectRoot
|
|
1535
|
+
* @returns {{ files: Array }}
|
|
1536
|
+
*/
|
|
1537
|
+
function _parseAnswer(xmlText, projectRoot) {
|
|
1538
|
+
const files = [];
|
|
1539
|
+
const resolvedRoot = resolve(projectRoot);
|
|
1540
|
+
const fileRegex = /<file\s+path=(["'])([^"']+)\1>([\s\S]*?)<\/file>/g;
|
|
1541
|
+
let fm;
|
|
1542
|
+
while ((fm = fileRegex.exec(xmlText)) !== null) {
|
|
1543
|
+
const vpath = fm[2];
|
|
1544
|
+
let rel = vpath.replace(/^\/codebase[\/\\]?/, "");
|
|
1545
|
+
rel = rel.replace(/^[\/\\]+/, "");
|
|
1546
|
+
|
|
1547
|
+
// Path safety: reject traversal attempts (../) and paths outside project root
|
|
1548
|
+
const fullPath = resolve(projectRoot, rel);
|
|
1549
|
+
const relToRoot = relative(resolvedRoot, fullPath);
|
|
1550
|
+
if (relToRoot === ".." || relToRoot.startsWith(`..${sep}`) || isAbsolute(relToRoot)) {
|
|
1551
|
+
continue;
|
|
1552
|
+
}
|
|
1553
|
+
|
|
1554
|
+
const ranges = [];
|
|
1555
|
+
const rangeRegex = /<range>(\d+)-(\d+)<\/range>/g;
|
|
1556
|
+
let rm;
|
|
1557
|
+
while ((rm = rangeRegex.exec(fm[3])) !== null) {
|
|
1558
|
+
ranges.push([parseInt(rm[1], 10), parseInt(rm[2], 10)]);
|
|
1559
|
+
}
|
|
1560
|
+
|
|
1561
|
+
files.push({ path: rel, full_path: fullPath, ranges });
|
|
1562
|
+
}
|
|
1563
|
+
return { files };
|
|
1564
|
+
}
|
|
1565
|
+
|
|
1566
|
+
/**
|
|
1567
|
+
* Execute Fast Context search.
|
|
1568
|
+
*
|
|
1569
|
+
* @param {Object} opts
|
|
1570
|
+
* @param {string} opts.query - Natural language search query
|
|
1571
|
+
* @param {string} opts.projectRoot - Project root directory
|
|
1572
|
+
* @param {string} [opts.apiKey] - Windsurf API key (auto-discovered if not set)
|
|
1573
|
+
* @param {string} [opts.jwt] - JWT token (auto-fetched if not set)
|
|
1574
|
+
* @param {number} [opts.maxTurns=3] - Search rounds
|
|
1575
|
+
* @param {number} [opts.maxCommands=8] - Max commands per round
|
|
1576
|
+
* @param {number} [opts.maxResults=10] - Max number of files to return
|
|
1577
|
+
* @param {number} [opts.treeDepth=3] - Directory tree depth for repo map (1-6, auto fallback)
|
|
1578
|
+
* @param {number} [opts.timeoutMs=30000] - Connect-Timeout-Ms for streaming requests
|
|
1579
|
+
* @param {string[]} [opts.excludePaths=[]] - Patterns to exclude from tree
|
|
1580
|
+
* @param {function} [opts.onProgress] - Progress callback
|
|
1581
|
+
* @returns {Promise<Object>}
|
|
1582
|
+
*/
|
|
1583
|
+
export async function search({
|
|
1584
|
+
query,
|
|
1585
|
+
projectRoot,
|
|
1586
|
+
apiKey = null,
|
|
1587
|
+
jwt = null,
|
|
1588
|
+
maxTurns = 3,
|
|
1589
|
+
maxCommands = 8,
|
|
1590
|
+
maxResults = 10,
|
|
1591
|
+
treeDepth = 3,
|
|
1592
|
+
timeoutMs = 30000,
|
|
1593
|
+
excludePaths = [],
|
|
1594
|
+
repoMapMode = "bootstrap_hotspot",
|
|
1595
|
+
bootstrapTreeDepth = 1,
|
|
1596
|
+
hotspotTopK = 4,
|
|
1597
|
+
hotspotTreeDepth = 2,
|
|
1598
|
+
hotspotMaxBytes = 120 * 1024,
|
|
1599
|
+
bootstrapEnabled = true,
|
|
1600
|
+
bootstrapMaxTurns = 2,
|
|
1601
|
+
bootstrapMaxCommands = 6,
|
|
1602
|
+
onProgress = null,
|
|
1603
|
+
}) {
|
|
1604
|
+
const log = (msg) => onProgress?.(msg);
|
|
1605
|
+
projectRoot = resolve(projectRoot);
|
|
1606
|
+
const effectiveExcludePaths = _mergeExcludePaths(excludePaths);
|
|
1607
|
+
|
|
1608
|
+
// Get credentials
|
|
1609
|
+
if (!apiKey) {
|
|
1610
|
+
apiKey = await getApiKey();
|
|
1611
|
+
}
|
|
1612
|
+
if (!jwt) {
|
|
1613
|
+
log("Fetching JWT...");
|
|
1614
|
+
jwt = await getCachedJwt(apiKey, timeoutMs);
|
|
1615
|
+
}
|
|
1616
|
+
|
|
1617
|
+
// Check rate limit
|
|
1618
|
+
log("Checking rate limit...");
|
|
1619
|
+
if (!(await checkRateLimit(apiKey, jwt, timeoutMs))) {
|
|
1620
|
+
return { files: [], error: "Rate limited, please try again later" };
|
|
1621
|
+
}
|
|
1622
|
+
|
|
1623
|
+
const executor = new ToolExecutor(projectRoot);
|
|
1624
|
+
const toolDefs = getToolDefinitions(maxCommands);
|
|
1625
|
+
const systemPrompt = buildSystemPrompt(maxTurns, maxCommands, maxResults);
|
|
1626
|
+
|
|
1627
|
+
let bootstrapHints = null;
|
|
1628
|
+
if (bootstrapEnabled) {
|
|
1629
|
+
bootstrapHints = await _runBootstrapPhase({
|
|
1630
|
+
query,
|
|
1631
|
+
projectRoot,
|
|
1632
|
+
apiKey,
|
|
1633
|
+
jwt,
|
|
1634
|
+
timeoutMs,
|
|
1635
|
+
excludePaths: effectiveExcludePaths,
|
|
1636
|
+
bootstrapTreeDepth,
|
|
1637
|
+
bootstrapMaxTurns,
|
|
1638
|
+
bootstrapMaxCommands,
|
|
1639
|
+
onProgress,
|
|
1640
|
+
});
|
|
1641
|
+
log(`Bootstrap hints: patterns=${bootstrapHints.rgPatterns.length}, hot_dirs=${bootstrapHints.hotDirs.length}`);
|
|
1642
|
+
}
|
|
1643
|
+
|
|
1644
|
+
const { tree: repoMap, depth: actualDepth, hotspotDepth: actualHotspotDepth, sizeBytes: treeSizeBytes, fellBack, autoDepth, strategy: repoMapStrategy, hotDirs = [] } = buildOptimizedRepoMap({
|
|
1645
|
+
query,
|
|
1646
|
+
projectRoot,
|
|
1647
|
+
treeDepth,
|
|
1648
|
+
excludePaths: effectiveExcludePaths,
|
|
1649
|
+
optimizer: {
|
|
1650
|
+
mode: repoMapMode,
|
|
1651
|
+
bootstrapTreeDepth,
|
|
1652
|
+
hotspotTopK,
|
|
1653
|
+
hotspotTreeDepth,
|
|
1654
|
+
maxBytes: hotspotMaxBytes,
|
|
1655
|
+
},
|
|
1656
|
+
bootstrapHints,
|
|
1657
|
+
onProgress,
|
|
1658
|
+
});
|
|
1659
|
+
log(`Repo map: tree -L ${actualDepth} (${(treeSizeBytes / 1024).toFixed(1)}KB)${fellBack ? ` [fell back from L=${treeDepth}]` : ""}${autoDepth ? " [auto]" : ""} [strategy=${repoMapStrategy}]${hotDirs.length ? ` [hot=${hotDirs.join(",")}]` : ""}`);
|
|
1660
|
+
const userContent = `Problem Statement: ${query}\n\nRepo Map (tree -L ${actualDepth} /codebase):\n\`\`\`text\n${repoMap}\n\`\`\``;
|
|
1661
|
+
|
|
1662
|
+
const messages = [
|
|
1663
|
+
{ role: 5, content: systemPrompt },
|
|
1664
|
+
{ role: 1, content: userContent },
|
|
1665
|
+
];
|
|
1666
|
+
|
|
1667
|
+
// Trim state for smart context trimming
|
|
1668
|
+
const trimState = {
|
|
1669
|
+
query,
|
|
1670
|
+
turn: 0,
|
|
1671
|
+
recentFiles: [],
|
|
1672
|
+
recentPatterns: [],
|
|
1673
|
+
recentCommands: [],
|
|
1674
|
+
};
|
|
1675
|
+
|
|
1676
|
+
// Total API calls = maxTurns + 1 (last round for answer)
|
|
1677
|
+
const totalApiCalls = maxTurns + 1;
|
|
1678
|
+
let compensatedTurns = 0;
|
|
1679
|
+
const MAX_COMPENSATIONS = 2;
|
|
1680
|
+
let forceAnswerInjected = false;
|
|
1681
|
+
|
|
1682
|
+
for (let turn = 0; turn < totalApiCalls + compensatedTurns; turn++) {
|
|
1683
|
+
log(`Turn ${turn + 1}/${totalApiCalls}`);
|
|
1684
|
+
trimState.turn = turn + 1;
|
|
1685
|
+
|
|
1686
|
+
let proto = _buildRequest(apiKey, jwt, messages, toolDefs);
|
|
1687
|
+
|
|
1688
|
+
// Debug logging
|
|
1689
|
+
if (DEBUG_MODE) {
|
|
1690
|
+
console.error(`\n[DEBUG] ===== Turn ${turn + 1} Request =====`);
|
|
1691
|
+
console.error(`[DEBUG] Messages count: ${messages.length}`);
|
|
1692
|
+
console.error(`[DEBUG] Last message role: ${messages[messages.length - 1]?.role}`);
|
|
1693
|
+
console.error(`[DEBUG] Proto size: ${proto.length} bytes`);
|
|
1694
|
+
}
|
|
1695
|
+
|
|
1696
|
+
// Preflight trim: proactively reduce payload if proto is already large.
|
|
1697
|
+
const MAX_PROTO_BYTES = 320 * 1024;
|
|
1698
|
+
if (proto.length > MAX_PROTO_BYTES && messages.length > 1) {
|
|
1699
|
+
log(`Proto size ${proto.length} bytes > ${MAX_PROTO_BYTES}. Trimming context before request...`);
|
|
1700
|
+
if (_trimMessages(messages, trimState)) {
|
|
1701
|
+
proto = _buildRequest(apiKey, jwt, messages, toolDefs);
|
|
1702
|
+
if (DEBUG_MODE) console.error(`[DEBUG] Proto size after trim: ${proto.length} bytes`);
|
|
1703
|
+
}
|
|
1704
|
+
}
|
|
1705
|
+
|
|
1706
|
+
let respData;
|
|
1707
|
+
try {
|
|
1708
|
+
respData = await _streamingRequest(proto, timeoutMs);
|
|
1709
|
+
} catch (e) {
|
|
1710
|
+
const errCode = e.code || "UNKNOWN";
|
|
1711
|
+
const baseMeta = {
|
|
1712
|
+
treeDepth: actualDepth,
|
|
1713
|
+
hotspotDepth: actualHotspotDepth,
|
|
1714
|
+
treeSizeKB: +(treeSizeBytes / 1024).toFixed(1),
|
|
1715
|
+
fellBack,
|
|
1716
|
+
projectRoot,
|
|
1717
|
+
errorCode: errCode,
|
|
1718
|
+
repoMapStrategy,
|
|
1719
|
+
hotDirs,
|
|
1720
|
+
};
|
|
1721
|
+
|
|
1722
|
+
// Auto-retry with trimmed context on payload/timeout errors
|
|
1723
|
+
if ((errCode === "PAYLOAD_TOO_LARGE" || errCode === "TIMEOUT") && messages.length > 1) {
|
|
1724
|
+
log(`${errCode} on turn ${turn + 1}: trimming context and retrying...`);
|
|
1725
|
+
_trimMessages(messages, trimState);
|
|
1726
|
+
const retryProto = _buildRequest(apiKey, jwt, messages, toolDefs);
|
|
1727
|
+
try {
|
|
1728
|
+
respData = await _streamingRequest(retryProto, timeoutMs);
|
|
1729
|
+
} catch (retryErr) {
|
|
1730
|
+
const retryCode = retryErr.code || errCode;
|
|
1731
|
+
return {
|
|
1732
|
+
files: [],
|
|
1733
|
+
error: `${retryCode}: ${retryErr.message} (retry after context trim also failed)`,
|
|
1734
|
+
_meta: { ...baseMeta, errorCode: retryCode, contextTrimmed: true },
|
|
1735
|
+
};
|
|
1736
|
+
}
|
|
1737
|
+
} else {
|
|
1738
|
+
return {
|
|
1739
|
+
files: [],
|
|
1740
|
+
error: `${errCode}: ${e.message}`,
|
|
1741
|
+
_meta: baseMeta,
|
|
1742
|
+
};
|
|
1743
|
+
}
|
|
1744
|
+
}
|
|
1745
|
+
|
|
1746
|
+
const [thinking, toolInfo] = _parseResponse(respData);
|
|
1747
|
+
|
|
1748
|
+
// Debug logging
|
|
1749
|
+
if (DEBUG_MODE) {
|
|
1750
|
+
console.error(`\n[DEBUG] ===== Turn ${turn + 1} Response =====`);
|
|
1751
|
+
console.error(`[DEBUG] Response size: ${respData.length} bytes`);
|
|
1752
|
+
console.error(`[DEBUG] Thinking: ${thinking.slice(0, 500)}${thinking.length > 500 ? '...' : ''}`);
|
|
1753
|
+
console.error(`[DEBUG] Tool info: ${toolInfo ? `${toolInfo[0]}` : 'null'}`);
|
|
1754
|
+
}
|
|
1755
|
+
|
|
1756
|
+
if (toolInfo === null) {
|
|
1757
|
+
if (thinking.startsWith("[Error]")) {
|
|
1758
|
+
return { files: [], error: thinking };
|
|
1759
|
+
}
|
|
1760
|
+
return { files: [], raw_response: thinking };
|
|
1761
|
+
}
|
|
1762
|
+
|
|
1763
|
+
const [toolName, toolArgs] = toolInfo;
|
|
1764
|
+
|
|
1765
|
+
if (toolName === "answer") {
|
|
1766
|
+
const answerXml = toolArgs.answer || "";
|
|
1767
|
+
log("Received final answer");
|
|
1768
|
+
const result = _parseAnswer(answerXml, projectRoot);
|
|
1769
|
+
result.rg_patterns = [...new Set(executor.collectedRgPatterns)];
|
|
1770
|
+
result._meta = {
|
|
1771
|
+
treeDepth: actualDepth,
|
|
1772
|
+
hotspotDepth: actualHotspotDepth,
|
|
1773
|
+
treeSizeKB: +(treeSizeBytes / 1024).toFixed(1),
|
|
1774
|
+
fellBack,
|
|
1775
|
+
repoMapStrategy,
|
|
1776
|
+
hotDirs,
|
|
1777
|
+
};
|
|
1778
|
+
return result;
|
|
1779
|
+
}
|
|
1780
|
+
|
|
1781
|
+
if (toolName === "restricted_exec") {
|
|
1782
|
+
const callId = randomUUID();
|
|
1783
|
+
const argsJson = JSON.stringify(toolArgs);
|
|
1784
|
+
|
|
1785
|
+
const cmds = Object.keys(toolArgs).filter((k) => k.startsWith("command"));
|
|
1786
|
+
log(`Executing ${cmds.length} local commands`);
|
|
1787
|
+
|
|
1788
|
+
// Debug logging
|
|
1789
|
+
if (DEBUG_MODE) {
|
|
1790
|
+
console.error(`\n[DEBUG] ===== Tool Calls =====`);
|
|
1791
|
+
for (const cmdKey of cmds) {
|
|
1792
|
+
const cmd = toolArgs[cmdKey];
|
|
1793
|
+
console.error(`[DEBUG] ${cmdKey}: ${JSON.stringify(cmd)}`);
|
|
1794
|
+
}
|
|
1795
|
+
}
|
|
1796
|
+
|
|
1797
|
+
// Check for valid commands (those with a type field)
|
|
1798
|
+
const validCommands = cmds.filter((k) => {
|
|
1799
|
+
const cmd = toolArgs[k];
|
|
1800
|
+
return cmd && typeof cmd === "object" && cmd.type;
|
|
1801
|
+
});
|
|
1802
|
+
if (validCommands.length === 0 && compensatedTurns < MAX_COMPENSATIONS) {
|
|
1803
|
+
compensatedTurns++;
|
|
1804
|
+
log(`Turn compensation: no valid commands, extending search by 1 turn (${compensatedTurns}/${MAX_COMPENSATIONS})`);
|
|
1805
|
+
} else if (validCommands.length === 0) {
|
|
1806
|
+
log(`Turn compensation skipped: max compensations (${MAX_COMPENSATIONS}) reached, forcing turn advance`);
|
|
1807
|
+
}
|
|
1808
|
+
|
|
1809
|
+
const results = await executor.execToolCallAsync(toolArgs);
|
|
1810
|
+
|
|
1811
|
+
// Update trim state with a compact summary of what we executed
|
|
1812
|
+
try {
|
|
1813
|
+
const tailUnique = (arr, n) => {
|
|
1814
|
+
const out = [];
|
|
1815
|
+
const seen = new Set();
|
|
1816
|
+
for (let i = arr.length - 1; i >= 0 && out.length < n; i--) {
|
|
1817
|
+
const v = arr[i];
|
|
1818
|
+
if (typeof v !== "string" || !v) continue;
|
|
1819
|
+
if (seen.has(v)) continue;
|
|
1820
|
+
seen.add(v);
|
|
1821
|
+
out.push(v);
|
|
1822
|
+
}
|
|
1823
|
+
return out.reverse();
|
|
1824
|
+
};
|
|
1825
|
+
|
|
1826
|
+
const newCommands = [];
|
|
1827
|
+
const newFiles = [];
|
|
1828
|
+
const newPatterns = [];
|
|
1829
|
+
|
|
1830
|
+
for (const cmdKey of cmds) {
|
|
1831
|
+
const cmd = toolArgs[cmdKey];
|
|
1832
|
+
if (!cmd || typeof cmd !== "object") continue;
|
|
1833
|
+
const t = cmd.type;
|
|
1834
|
+
if (t === "rg" && cmd.pattern) {
|
|
1835
|
+
newPatterns.push(cmd.pattern);
|
|
1836
|
+
newCommands.push({ type: "rg", desc: `rg ${cmd.pattern}` });
|
|
1837
|
+
} else if (t === "readfile" && cmd.file) {
|
|
1838
|
+
const shortFile = cmd.file.replace(/^\/codebase\//, "");
|
|
1839
|
+
newFiles.push(shortFile);
|
|
1840
|
+
newCommands.push({ type: "readfile", desc: `read ${shortFile}` });
|
|
1841
|
+
} else if (t === "tree" && cmd.path) {
|
|
1842
|
+
newCommands.push({ type: "tree", desc: `tree ${cmd.path}` });
|
|
1843
|
+
}
|
|
1844
|
+
}
|
|
1845
|
+
|
|
1846
|
+
trimState.recentCommands = [...trimState.recentCommands, ...newCommands].slice(-12);
|
|
1847
|
+
trimState.recentFiles = tailUnique([...trimState.recentFiles, ...newFiles], 20);
|
|
1848
|
+
trimState.recentPatterns = tailUnique([...trimState.recentPatterns, ...newPatterns], 30);
|
|
1849
|
+
} catch {
|
|
1850
|
+
// Ignore errors in trim state update
|
|
1851
|
+
}
|
|
1852
|
+
|
|
1853
|
+
messages.push({
|
|
1854
|
+
role: 2,
|
|
1855
|
+
content: thinking,
|
|
1856
|
+
tool_call_id: callId,
|
|
1857
|
+
tool_name: "restricted_exec",
|
|
1858
|
+
tool_args_json: argsJson,
|
|
1859
|
+
});
|
|
1860
|
+
messages.push({ role: 4, content: results, ref_call_id: callId });
|
|
1861
|
+
|
|
1862
|
+
// Inject force-answer after last effective search round
|
|
1863
|
+
const effectiveTurn = turn - compensatedTurns;
|
|
1864
|
+
if (effectiveTurn >= maxTurns - 1 && !forceAnswerInjected) {
|
|
1865
|
+
messages.push({ role: 1, content: FINAL_FORCE_ANSWER });
|
|
1866
|
+
forceAnswerInjected = true;
|
|
1867
|
+
log("Injected force-answer prompt");
|
|
1868
|
+
}
|
|
1869
|
+
}
|
|
1870
|
+
}
|
|
1871
|
+
|
|
1872
|
+
return {
|
|
1873
|
+
files: [],
|
|
1874
|
+
error: "Max turns reached without getting an answer",
|
|
1875
|
+
rg_patterns: [...new Set(executor.collectedRgPatterns)],
|
|
1876
|
+
_meta: {
|
|
1877
|
+
treeDepth: actualDepth,
|
|
1878
|
+
hotspotDepth: actualHotspotDepth,
|
|
1879
|
+
treeSizeKB: +(treeSizeBytes / 1024).toFixed(1),
|
|
1880
|
+
fellBack,
|
|
1881
|
+
projectRoot,
|
|
1882
|
+
repoMapStrategy,
|
|
1883
|
+
hotDirs,
|
|
1884
|
+
},
|
|
1885
|
+
};
|
|
1886
|
+
}
|
|
1887
|
+
|
|
1888
|
+
// ─── Grep Keyword Expansion ────────────────────────────────
|
|
1889
|
+
|
|
1890
|
+
// 噪音文件排除规则:打包产物、依赖文件、自动生成物、二进制、AI 配置等
|
|
1891
|
+
// 参考来源:ace-tool-rs (Augment 逆向) 默认排除列表 + 实测反馈
|
|
1892
|
+
const GREP_NOISE_GLOBS = [
|
|
1893
|
+
// 打包产物 & sourcemap
|
|
1894
|
+
"chunk-*", "*.chunk.*", "*.bundle.*",
|
|
1895
|
+
"*.min.js", "*.min.css", "*.map",
|
|
1896
|
+
"app.*.js", "app.*.css", // Vue/Webpack hash-named bundles
|
|
1897
|
+
// 依赖 & 锁定文件
|
|
1898
|
+
"*.lock", "package-lock.json", "yarn.lock", "pnpm-lock.yaml",
|
|
1899
|
+
"go.sum", "go.mod", // Go 依赖
|
|
1900
|
+
"Cargo.lock", // Rust 依赖
|
|
1901
|
+
// 自动生成的类型声明
|
|
1902
|
+
"*.d.ts",
|
|
1903
|
+
// 二进制文件(来自 ace-tool-rs)
|
|
1904
|
+
"*.exe", "*.dll", "*.so", "*.dylib", // 平台二进制
|
|
1905
|
+
"*.pyc", "*.pyo", "*.class", // 编译中间物
|
|
1906
|
+
"*.wasm", "*.o", "*.a", // 编译产物
|
|
1907
|
+
// 静态资源 & 媒体文件(来自 ace-tool-rs)
|
|
1908
|
+
"*.svg", "*.png", "*.jpg", "*.jpeg", "*.gif", "*.ico", "*.webp",
|
|
1909
|
+
"*.woff*", "*.ttf", "*.eot", "*.otf",
|
|
1910
|
+
"*.mp4", "*.mp3", "*.wav", "*.avi", "*.mov", // 音视频
|
|
1911
|
+
"*.pdf", "*.doc", "*.docx", "*.xls", "*.xlsx", // 文档
|
|
1912
|
+
"*.zip", "*.tar", "*.gz", "*.rar", "*.7z", // 压缩包
|
|
1913
|
+
// AI 配置文件(grep 扩展中属于噪音)
|
|
1914
|
+
"CLAUDE.md", "AGENTS.md", ".cursorrules", ".cursorignore",
|
|
1915
|
+
];
|
|
1916
|
+
|
|
1917
|
+
// 整目录排除:构建输出、缓存、VCS、依赖目录
|
|
1918
|
+
// 参考来源:ace-tool-rs 默认排除 + 实测反馈
|
|
1919
|
+
const GREP_NOISE_DIR_GLOBS = [
|
|
1920
|
+
// 构建输出
|
|
1921
|
+
"dist/**", "build/**", "out/**", "target/**",
|
|
1922
|
+
"resource/page/**", // 静态资源目录
|
|
1923
|
+
// 框架构建缓存
|
|
1924
|
+
".nuxt/**", ".next/**", ".output/**",
|
|
1925
|
+
// 缓存目录(来自 ace-tool-rs)
|
|
1926
|
+
"__pycache__/**", ".cache/**", ".pytest_cache/**",
|
|
1927
|
+
// 版本控制(来自 ace-tool-rs)
|
|
1928
|
+
".svn/**", ".hg/**",
|
|
1929
|
+
// 依赖目录(来自 ace-tool-rs)
|
|
1930
|
+
"vendor/**", ".venv/**", "venv/**",
|
|
1931
|
+
];
|
|
1932
|
+
|
|
1933
|
+
/**
|
|
1934
|
+
* 自动执行 grep keywords 查找额外匹配文件,补充远端模型的遗漏。
|
|
1935
|
+
* 纯本地操作,0 API 调用。
|
|
1936
|
+
* @param {number} maxPerPattern - 每个 pattern 最多补充的文件数
|
|
1937
|
+
* @param {number} maxTotal - grep 扩展的总文件数上限(避免输出过大)
|
|
1938
|
+
*/
|
|
1939
|
+
function _autoGrepFiles(patterns, projectRoot, excludePaths, existingPaths, maxPerPattern = 3, maxTotal = 10) {
|
|
1940
|
+
// hitCount 记录每个文件被多少个 pattern 命中,用于后续排序
|
|
1941
|
+
const hitCount = new Map(); // resolve(path) -> count
|
|
1942
|
+
const fileInfo = new Map(); // resolve(path) -> { path, full_path }
|
|
1943
|
+
const seen = new Set(existingPaths.map((p) => resolve(projectRoot, p)));
|
|
1944
|
+
|
|
1945
|
+
// 构建噪音 glob 参数(文件级 + 目录级)
|
|
1946
|
+
const noiseArgs = [];
|
|
1947
|
+
for (const noise of GREP_NOISE_GLOBS) {
|
|
1948
|
+
noiseArgs.push("--glob", `!${noise}`);
|
|
1949
|
+
}
|
|
1950
|
+
for (const noise of GREP_NOISE_DIR_GLOBS) {
|
|
1951
|
+
noiseArgs.push("--glob", `!${noise}`);
|
|
1952
|
+
}
|
|
1953
|
+
|
|
1954
|
+
for (const pattern of patterns.slice(0, 8)) {
|
|
1955
|
+
// 总数已达上限,提前退出节省 rg 调用
|
|
1956
|
+
if (fileInfo.size >= maxTotal) break;
|
|
1957
|
+
|
|
1958
|
+
try {
|
|
1959
|
+
const args = ["-l", "--max-count", "10", "-S"];
|
|
1960
|
+
for (const ex of excludePaths) {
|
|
1961
|
+
for (const expanded of _expandExcludeGlobsForRg(ex)) {
|
|
1962
|
+
args.push("--glob", `!${expanded}`);
|
|
1963
|
+
}
|
|
1964
|
+
}
|
|
1965
|
+
args.push(...noiseArgs);
|
|
1966
|
+
args.push("--", pattern, projectRoot);
|
|
1967
|
+
const stdout = execFileSync(rgPath, args, {
|
|
1968
|
+
timeout: 5000,
|
|
1969
|
+
maxBuffer: 1024 * 1024,
|
|
1970
|
+
encoding: "utf-8",
|
|
1971
|
+
});
|
|
1972
|
+
const files = stdout.trim().split("\n").filter(Boolean);
|
|
1973
|
+
let added = 0;
|
|
1974
|
+
for (const f of files) {
|
|
1975
|
+
if (added >= maxPerPattern) break;
|
|
1976
|
+
const full = resolve(f);
|
|
1977
|
+
if (seen.has(full)) continue;
|
|
1978
|
+
const rel = relative(projectRoot, full);
|
|
1979
|
+
if (rel.startsWith("..") || isAbsolute(rel)) continue;
|
|
1980
|
+
|
|
1981
|
+
// 记录命中次数
|
|
1982
|
+
hitCount.set(full, (hitCount.get(full) || 0) + 1);
|
|
1983
|
+
if (!fileInfo.has(full)) {
|
|
1984
|
+
fileInfo.set(full, { path: rel, full_path: full, ranges: [], fromGrep: true });
|
|
1985
|
+
added++;
|
|
1986
|
+
if (fileInfo.size >= maxTotal) break;
|
|
1987
|
+
}
|
|
1988
|
+
}
|
|
1989
|
+
} catch {
|
|
1990
|
+
// rg 返回 1 表示无匹配,正常忽略
|
|
1991
|
+
}
|
|
1992
|
+
}
|
|
1993
|
+
|
|
1994
|
+
// 按匹配 pattern 数降序排序,再截取 maxTotal
|
|
1995
|
+
const found = [...fileInfo.values()]
|
|
1996
|
+
.sort((a, b) => {
|
|
1997
|
+
return (hitCount.get(resolve(b.full_path)) || 0) - (hitCount.get(resolve(a.full_path)) || 0);
|
|
1998
|
+
})
|
|
1999
|
+
.slice(0, maxTotal);
|
|
2000
|
+
return found;
|
|
2001
|
+
}
|
|
2002
|
+
|
|
2003
|
+
// ─── Code Snippet Reader ───────────────────────────────────
|
|
2004
|
+
|
|
2005
|
+
const EXT_LANG_MAP = {
|
|
2006
|
+
".js": "javascript", ".mjs": "javascript", ".cjs": "javascript",
|
|
2007
|
+
".ts": "typescript", ".tsx": "typescript", ".jsx": "javascript",
|
|
2008
|
+
".py": "python", ".go": "go", ".rs": "rust", ".java": "java",
|
|
2009
|
+
".rb": "ruby", ".vue": "vue", ".c": "c", ".h": "c",
|
|
2010
|
+
".cpp": "cpp", ".hpp": "cpp", ".cs": "csharp", ".php": "php",
|
|
2011
|
+
".swift": "swift", ".kt": "kotlin", ".sh": "bash",
|
|
2012
|
+
".yaml": "yaml", ".yml": "yaml", ".json": "json", ".toml": "toml",
|
|
2013
|
+
".sql": "sql", ".html": "html", ".css": "css", ".scss": "scss",
|
|
2014
|
+
};
|
|
2015
|
+
|
|
2016
|
+
/**
|
|
2017
|
+
* 读取文件指定行范围的代码片段。
|
|
2018
|
+
* @param {string} filePath - 文件绝对路径
|
|
2019
|
+
* @param {Array<[number, number]>} ranges - 行范围列表 (1-indexed, inclusive)
|
|
2020
|
+
* @param {number} budget - 剩余字符预算
|
|
2021
|
+
* @returns {{ snippets: string[], used: number }}
|
|
2022
|
+
*/
|
|
2023
|
+
function _readCodeSnippets(filePath, ranges, budget) {
|
|
2024
|
+
const snippets = [];
|
|
2025
|
+
let used = 0;
|
|
2026
|
+
const lang = EXT_LANG_MAP[extname(filePath).toLowerCase()] || "";
|
|
2027
|
+
|
|
2028
|
+
try {
|
|
2029
|
+
const content = readFileSync(filePath, "utf-8");
|
|
2030
|
+
const lines = content.split("\n");
|
|
2031
|
+
|
|
2032
|
+
// 如果没有 ranges(grep 扩展来的),取前 20 行
|
|
2033
|
+
const effectiveRanges = ranges.length > 0 ? ranges : [[1, Math.min(20, lines.length)]];
|
|
2034
|
+
|
|
2035
|
+
for (const [start, end] of effectiveRanges) {
|
|
2036
|
+
const s = Math.max(1, start) - 1;
|
|
2037
|
+
const e = Math.min(lines.length, end);
|
|
2038
|
+
const slice = lines.slice(s, e);
|
|
2039
|
+
const snippet = slice.map((l, i) => `${String(s + i + 1).padStart(4)} | ${l}`).join("\n");
|
|
2040
|
+
const block = `\`\`\`${lang}\n${snippet}\n\`\`\``;
|
|
2041
|
+
|
|
2042
|
+
if (used + block.length > budget) {
|
|
2043
|
+
// 预算不足,截断当前 snippet
|
|
2044
|
+
const remaining = budget - used - 100; // 留 100 字符给截断提示
|
|
2045
|
+
if (remaining > 200) {
|
|
2046
|
+
const truncated = block.slice(0, remaining) + "\n... [truncated]```";
|
|
2047
|
+
snippets.push(truncated);
|
|
2048
|
+
used += truncated.length;
|
|
2049
|
+
}
|
|
2050
|
+
return { snippets, used };
|
|
2051
|
+
}
|
|
2052
|
+
|
|
2053
|
+
snippets.push(block);
|
|
2054
|
+
used += block.length;
|
|
2055
|
+
}
|
|
2056
|
+
} catch {
|
|
2057
|
+
// 文件读取失败,跳过
|
|
2058
|
+
}
|
|
2059
|
+
return { snippets, used };
|
|
2060
|
+
}
|
|
2061
|
+
|
|
2062
|
+
/**
|
|
2063
|
+
* Search and return formatted result suitable for MCP tool response.
|
|
2064
|
+
*
|
|
2065
|
+
* @param {Object} opts
|
|
2066
|
+
* @param {string} opts.query
|
|
2067
|
+
* @param {string} opts.projectRoot
|
|
2068
|
+
* @param {string} [opts.apiKey]
|
|
2069
|
+
* @param {number} [opts.maxTurns=3]
|
|
2070
|
+
* @param {number} [opts.maxCommands=8]
|
|
2071
|
+
* @param {number} [opts.maxResults=10]
|
|
2072
|
+
* @param {number} [opts.treeDepth=3]
|
|
2073
|
+
* @param {number} [opts.timeoutMs=30000]
|
|
2074
|
+
* @param {string[]} [opts.excludePaths=[]]
|
|
2075
|
+
* @param {function} [opts.onProgress]
|
|
2076
|
+
* @returns {Promise<string>}
|
|
2077
|
+
*/
|
|
2078
|
+
export async function searchWithContent({
|
|
2079
|
+
query,
|
|
2080
|
+
projectRoot,
|
|
2081
|
+
apiKey = null,
|
|
2082
|
+
maxTurns = 3,
|
|
2083
|
+
maxCommands = 8,
|
|
2084
|
+
maxResults = 10,
|
|
2085
|
+
treeDepth = 3,
|
|
2086
|
+
timeoutMs = 30000,
|
|
2087
|
+
excludePaths = [],
|
|
2088
|
+
repoMapMode = "bootstrap_hotspot",
|
|
2089
|
+
bootstrapTreeDepth = 1,
|
|
2090
|
+
hotspotTopK = 4,
|
|
2091
|
+
hotspotTreeDepth = 2,
|
|
2092
|
+
hotspotMaxBytes = 120 * 1024,
|
|
2093
|
+
bootstrapEnabled = true,
|
|
2094
|
+
bootstrapMaxTurns = 2,
|
|
2095
|
+
bootstrapMaxCommands = 6,
|
|
2096
|
+
includeSnippets = false,
|
|
2097
|
+
onProgress = null,
|
|
2098
|
+
}) {
|
|
2099
|
+
const result = await search({
|
|
2100
|
+
query,
|
|
2101
|
+
projectRoot,
|
|
2102
|
+
apiKey,
|
|
2103
|
+
maxTurns,
|
|
2104
|
+
maxCommands,
|
|
2105
|
+
maxResults,
|
|
2106
|
+
treeDepth,
|
|
2107
|
+
timeoutMs,
|
|
2108
|
+
excludePaths,
|
|
2109
|
+
repoMapMode,
|
|
2110
|
+
bootstrapTreeDepth,
|
|
2111
|
+
hotspotTopK,
|
|
2112
|
+
hotspotTreeDepth,
|
|
2113
|
+
hotspotMaxBytes,
|
|
2114
|
+
bootstrapEnabled,
|
|
2115
|
+
bootstrapMaxTurns,
|
|
2116
|
+
bootstrapMaxCommands,
|
|
2117
|
+
onProgress,
|
|
2118
|
+
});
|
|
2119
|
+
|
|
2120
|
+
if (result.error) {
|
|
2121
|
+
const meta = result._meta;
|
|
2122
|
+
let errMsg = `Error: ${result.error}`;
|
|
2123
|
+
if (meta) {
|
|
2124
|
+
errMsg += `\n\n[diagnostic] error_type=${meta.errorCode || "unknown"}, tree_depth_used=${meta.treeDepth}, tree_size=${meta.treeSizeKB}KB`;
|
|
2125
|
+
if (meta.fellBack) errMsg += ` (auto fell back from requested depth)`;
|
|
2126
|
+
if (meta.contextTrimmed) errMsg += `, context_trimmed=true`;
|
|
2127
|
+
if (meta.projectRoot) errMsg += `\n[diagnostic] project_path=${meta.projectRoot}`;
|
|
2128
|
+
errMsg += `\n[config] max_turns=${maxTurns}, max_results=${maxResults}, max_commands=${maxCommands}, timeout_ms=${timeoutMs}`;
|
|
2129
|
+
if (excludePaths.length) errMsg += `, exclude_paths=[${excludePaths.join(", ")}]`;
|
|
2130
|
+
// Targeted hints based on error type
|
|
2131
|
+
if (meta.errorCode === "PAYLOAD_TOO_LARGE" || meta.errorCode === "TIMEOUT") {
|
|
2132
|
+
errMsg += `\n[hint] Payload/timeout error. Try: reduce tree_depth, reduce max_turns, add exclude_paths, or narrow project_path to a subdirectory.`;
|
|
2133
|
+
} else if (meta.errorCode === "AUTH_ERROR") {
|
|
2134
|
+
errMsg += `\n[hint] Authentication error. The API key may be expired or revoked. Try re-extracting with extract_windsurf_key, or set a fresh WINDSURF_API_KEY.`;
|
|
2135
|
+
} else if (meta.errorCode === "RATE_LIMITED") {
|
|
2136
|
+
errMsg += `\n[hint] Rate limited. Wait a moment and retry.`;
|
|
2137
|
+
} else {
|
|
2138
|
+
errMsg += `\n[hint] If the error is payload-related, try a lower tree_depth value or add exclude_paths.`;
|
|
2139
|
+
}
|
|
2140
|
+
}
|
|
2141
|
+
return errMsg;
|
|
2142
|
+
}
|
|
2143
|
+
|
|
2144
|
+
let files = result.files || [];
|
|
2145
|
+
if (files.length > maxResults) {
|
|
2146
|
+
files = files.slice(0, maxResults);
|
|
2147
|
+
}
|
|
2148
|
+
const rgPatterns = result.rg_patterns || [];
|
|
2149
|
+
// Deduplicate + filter short patterns
|
|
2150
|
+
const uniquePatterns = [...new Set(rgPatterns)].filter((p) => p.length >= 3);
|
|
2151
|
+
|
|
2152
|
+
// B: 自动执行 grep keywords 补充遗漏文件(纯本地,0 API 调用)
|
|
2153
|
+
let grepExpanded = 0;
|
|
2154
|
+
const grepBudget = Math.max(0, maxResults - files.length);
|
|
2155
|
+
if (uniquePatterns.length > 0 && grepBudget > 0) {
|
|
2156
|
+
const effectiveExcludePaths = _mergeExcludePaths(excludePaths);
|
|
2157
|
+
const extra = _autoGrepFiles(
|
|
2158
|
+
uniquePatterns,
|
|
2159
|
+
projectRoot,
|
|
2160
|
+
effectiveExcludePaths,
|
|
2161
|
+
files.map((f) => f.path),
|
|
2162
|
+
3,
|
|
2163
|
+
grepBudget,
|
|
2164
|
+
);
|
|
2165
|
+
if (extra.length > 0) {
|
|
2166
|
+
files = [...files, ...extra];
|
|
2167
|
+
grepExpanded = extra.length;
|
|
2168
|
+
}
|
|
2169
|
+
}
|
|
2170
|
+
|
|
2171
|
+
if (!files.length && !uniquePatterns.length) {
|
|
2172
|
+
const raw = result.raw_response || "";
|
|
2173
|
+
if (!raw) return "No relevant files found.";
|
|
2174
|
+
const MAX_RAW = 500;
|
|
2175
|
+
const truncated = raw.length > MAX_RAW ? raw.slice(0, MAX_RAW) + "\n...[raw_response truncated]..." : raw;
|
|
2176
|
+
return `No relevant files found.\n\nRaw response:\n${truncated}`;
|
|
2177
|
+
}
|
|
2178
|
+
|
|
2179
|
+
const parts = [];
|
|
2180
|
+
const n = files.length;
|
|
2181
|
+
|
|
2182
|
+
if (files.length) {
|
|
2183
|
+
const summary = grepExpanded > 0
|
|
2184
|
+
? `Found ${n} relevant files (${n - grepExpanded} from AI search, ${grepExpanded} from grep keyword expansion).`
|
|
2185
|
+
: `Found ${n} relevant files.`;
|
|
2186
|
+
parts.push(summary);
|
|
2187
|
+
} else {
|
|
2188
|
+
parts.push("No files found.");
|
|
2189
|
+
}
|
|
2190
|
+
|
|
2191
|
+
// C: 附带代码片段,让单次调用就能提供完整上下文
|
|
2192
|
+
// 45KB 代码预算 + ~5KB metadata = 控制总输出 ≤50KB
|
|
2193
|
+
// 文件顺序:AI 找到的在前,grep 扩展的(按 pattern 命中数排序)在后
|
|
2194
|
+
const CODE_BUDGET = 45000;
|
|
2195
|
+
let codeBudgetLeft = CODE_BUDGET;
|
|
2196
|
+
|
|
2197
|
+
for (let i = 0; i < files.length; i++) {
|
|
2198
|
+
const entry = files[i];
|
|
2199
|
+
const rangesStr = entry.ranges.length > 0
|
|
2200
|
+
? entry.ranges.map(([s, e]) => `L${s}-${e}`).join(", ")
|
|
2201
|
+
: (entry.fromGrep ? "grep match" : "");
|
|
2202
|
+
const label = entry.fromGrep ? " [grep expanded]" : "";
|
|
2203
|
+
|
|
2204
|
+
parts.push("");
|
|
2205
|
+
parts.push(`--- [${i + 1}/${n}] ${entry.full_path} (${rangesStr})${label} ---`);
|
|
2206
|
+
|
|
2207
|
+
// 仅在 includeSnippets=true 时读取并附带代码片段
|
|
2208
|
+
if (includeSnippets && codeBudgetLeft > 200) {
|
|
2209
|
+
const { snippets, used } = _readCodeSnippets(entry.full_path, entry.ranges, codeBudgetLeft);
|
|
2210
|
+
for (const s of snippets) {
|
|
2211
|
+
parts.push(s);
|
|
2212
|
+
}
|
|
2213
|
+
codeBudgetLeft -= used;
|
|
2214
|
+
} else if (includeSnippets) {
|
|
2215
|
+
parts.push("(code snippet omitted — output budget reached)");
|
|
2216
|
+
}
|
|
2217
|
+
}
|
|
2218
|
+
|
|
2219
|
+
if (uniquePatterns.length) {
|
|
2220
|
+
parts.push("");
|
|
2221
|
+
parts.push(`grep keywords: ${uniquePatterns.join(", ")}`);
|
|
2222
|
+
}
|
|
2223
|
+
|
|
2224
|
+
// Append diagnostic metadata so the calling AI knows what happened
|
|
2225
|
+
const meta = result._meta;
|
|
2226
|
+
if (meta) {
|
|
2227
|
+
const fbNote = meta.fellBack ? ` (fell back from requested depth)` : "";
|
|
2228
|
+
parts.push("");
|
|
2229
|
+
const hotspotNote = meta.hotspotDepth ? `, hotspot_depth=${meta.hotspotDepth}` : "";
|
|
2230
|
+
let configLine = `[config] project_path=${projectRoot}, tree_depth=${meta.treeDepth}${fbNote}${hotspotNote}, tree_size=${meta.treeSizeKB}KB, max_turns=${maxTurns}, max_results=${maxResults}, timeout_ms=${timeoutMs}`;
|
|
2231
|
+
if (excludePaths.length) configLine += `, exclude_paths=[${excludePaths.join(", ")}]`;
|
|
2232
|
+
if (grepExpanded > 0) configLine += `, grep_expanded=${grepExpanded}`;
|
|
2233
|
+
parts.push(configLine);
|
|
2234
|
+
}
|
|
2235
|
+
|
|
2236
|
+
return parts.join("\n");
|
|
2237
|
+
}
|
|
2238
|
+
|
|
2239
|
+
/**
|
|
2240
|
+
* Extract Windsurf API Key info (for MCP tool use).
|
|
2241
|
+
* @param {string} [dbPath]
|
|
2242
|
+
* @returns {Promise<Object>}
|
|
2243
|
+
*/
|
|
2244
|
+
export async function extractKeyInfo(dbPath) {
|
|
2245
|
+
return extractKey(dbPath);
|
|
2246
|
+
}
|