@probelabs/probe 0.6.0-rc281 → 0.6.0-rc283
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/binaries/probe-v0.6.0-rc283-aarch64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc283-aarch64-unknown-linux-musl.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc283-x86_64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc283-x86_64-pc-windows-msvc.zip +0 -0
- package/bin/binaries/probe-v0.6.0-rc283-x86_64-unknown-linux-musl.tar.gz +0 -0
- package/build/agent/index.js +132 -18
- package/build/tools/vercel.js +178 -18
- package/cjs/agent/ProbeAgent.cjs +134 -20
- package/cjs/index.cjs +134 -20
- package/package.json +2 -2
- package/src/tools/vercel.js +178 -18
- package/bin/binaries/probe-v0.6.0-rc281-aarch64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc281-aarch64-unknown-linux-musl.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc281-x86_64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc281-x86_64-pc-windows-msvc.zip +0 -0
- package/bin/binaries/probe-v0.6.0-rc281-x86_64-unknown-linux-musl.tar.gz +0 -0
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/build/agent/index.js
CHANGED
|
@@ -9085,6 +9085,48 @@ var init_hashline = __esm({
|
|
|
9085
9085
|
// src/tools/vercel.js
|
|
9086
9086
|
import { tool } from "ai";
|
|
9087
9087
|
import { existsSync } from "fs";
|
|
9088
|
+
function autoQuoteSearchTerms(query2) {
|
|
9089
|
+
if (!query2 || typeof query2 !== "string") return query2;
|
|
9090
|
+
const tokens = [];
|
|
9091
|
+
let i = 0;
|
|
9092
|
+
while (i < query2.length) {
|
|
9093
|
+
if (/\s/.test(query2[i])) {
|
|
9094
|
+
i++;
|
|
9095
|
+
continue;
|
|
9096
|
+
}
|
|
9097
|
+
if (query2[i] === '"') {
|
|
9098
|
+
const end = query2.indexOf('"', i + 1);
|
|
9099
|
+
if (end !== -1) {
|
|
9100
|
+
tokens.push(query2.substring(i, end + 1));
|
|
9101
|
+
i = end + 1;
|
|
9102
|
+
} else {
|
|
9103
|
+
tokens.push(query2.substring(i));
|
|
9104
|
+
break;
|
|
9105
|
+
}
|
|
9106
|
+
continue;
|
|
9107
|
+
}
|
|
9108
|
+
let j = i;
|
|
9109
|
+
while (j < query2.length && !/\s/.test(query2[j]) && query2[j] !== '"') {
|
|
9110
|
+
j++;
|
|
9111
|
+
}
|
|
9112
|
+
tokens.push(query2.substring(i, j));
|
|
9113
|
+
i = j;
|
|
9114
|
+
}
|
|
9115
|
+
const operators = /* @__PURE__ */ new Set(["AND", "OR", "NOT"]);
|
|
9116
|
+
const result = tokens.map((token) => {
|
|
9117
|
+
if (token.startsWith('"')) return token;
|
|
9118
|
+
if (operators.has(token)) return token;
|
|
9119
|
+
const hasUpper = /[A-Z]/.test(token);
|
|
9120
|
+
const hasLower = /[a-z]/.test(token);
|
|
9121
|
+
const hasUnderscore = token.includes("_");
|
|
9122
|
+
const hasMixedCase = hasUpper && hasLower;
|
|
9123
|
+
if (hasMixedCase || hasUnderscore) {
|
|
9124
|
+
return `"${token}"`;
|
|
9125
|
+
}
|
|
9126
|
+
return token;
|
|
9127
|
+
});
|
|
9128
|
+
return result.join(" ");
|
|
9129
|
+
}
|
|
9088
9130
|
function normalizeTargets(targets) {
|
|
9089
9131
|
if (!Array.isArray(targets)) return [];
|
|
9090
9132
|
const seen = /* @__PURE__ */ new Set();
|
|
@@ -9185,40 +9227,75 @@ function buildSearchDelegateTask({ searchQuery, searchPath, exact, language, all
|
|
|
9185
9227
|
"- listFiles: Understand directory structure to find where relevant code might live.",
|
|
9186
9228
|
"",
|
|
9187
9229
|
"CRITICAL - How probe search works (do NOT ignore):",
|
|
9188
|
-
"- By default (exact=false), probe ALREADY handles stemming, case-insensitive matching, and camelCase/snake_case splitting.",
|
|
9230
|
+
"- By default (exact=false), probe ALREADY handles stemming, case-insensitive matching, and camelCase/snake_case splitting automatically.",
|
|
9189
9231
|
'- Searching "allowed_ips" ALREADY matches "AllowedIPs", "allowedIps", "allowed_ips", etc. Do NOT manually try case/style variations.',
|
|
9190
9232
|
'- Searching "getUserData" ALREADY matches "get", "user", "data" and their variations.',
|
|
9191
|
-
"- NEVER repeat the same search query \u2014 you will get the same results.",
|
|
9233
|
+
"- NEVER repeat the same search query \u2014 you will get the same results. Changing the path does NOT change this.",
|
|
9192
9234
|
"- NEVER search trivial variations of the same keyword (e.g., AllowedIPs then allowedIps then allowed_ips). This is wasteful \u2014 probe handles it.",
|
|
9193
|
-
"- If a search returns no results, the term likely does not exist
|
|
9194
|
-
"- If 2-3
|
|
9235
|
+
"- If a search returns no results, the term likely does not exist. Try a genuinely DIFFERENT keyword or concept, not a variation.",
|
|
9236
|
+
"- If 2-3 searches return no results for a concept, STOP searching for it and move on. Do NOT keep retrying.",
|
|
9237
|
+
"",
|
|
9238
|
+
"When to use exact=true:",
|
|
9239
|
+
"- Use exact=true when searching for a KNOWN symbol name (function, type, variable, struct).",
|
|
9240
|
+
"- exact=true matches the literal string only \u2014 no stemming, no splitting.",
|
|
9241
|
+
'- This is ideal for precise lookups: exact=true "ForwardMessage", exact=true "SessionLimiter", exact=true "ThrottleRetryLimit".',
|
|
9242
|
+
"- Do NOT use exact=true for exploratory/conceptual queries \u2014 use the default for those.",
|
|
9243
|
+
"",
|
|
9244
|
+
"Combining searches with OR:",
|
|
9245
|
+
'- Multiple unquoted words use OR logic: rate limit matches files containing EITHER "rate" OR "limit".',
|
|
9246
|
+
`- For known symbol names, quote each term to prevent splitting: '"limitDRL" "limitRedis"' matches either exact symbol.`,
|
|
9247
|
+
'- Without quotes, camelCase like limitDRL gets split into "limit" + "DRL" \u2014 not what you want for symbol lookup.',
|
|
9248
|
+
"- Use OR to search for multiple related symbols in ONE search instead of separate searches.",
|
|
9249
|
+
"- This is much faster than running separate searches sequentially.",
|
|
9250
|
+
`- Example: search '"ForwardMessage" "SessionLimiter"' finds files with either exact symbol in one call.`,
|
|
9251
|
+
`- Example: search '"limitDRL" "doRollingWindowWrite"' finds both rate limiting functions at once.`,
|
|
9252
|
+
'- Use AND only when you need both terms to appear in the same file: "rate AND limit".',
|
|
9253
|
+
"",
|
|
9254
|
+
"Parallel tool calls:",
|
|
9255
|
+
"- When you need to search for INDEPENDENT concepts, call multiple search tools IN PARALLEL (same response).",
|
|
9256
|
+
"- Do NOT wait for one search to finish before starting the next if they are independent.",
|
|
9257
|
+
'- Example: for "rate limiting and session management", call search "rate limiting" AND search "session management" in parallel.',
|
|
9258
|
+
"- Similarly, call multiple extract tools in parallel when verifying different files.",
|
|
9195
9259
|
"",
|
|
9196
9260
|
"GOOD search strategy (do this):",
|
|
9197
9261
|
' Query: "How does authentication work and how are sessions managed?"',
|
|
9198
|
-
' \u2192 search "authentication"
|
|
9262
|
+
' \u2192 search "authentication" + search "session management" IN PARALLEL (two independent concepts)',
|
|
9199
9263
|
' Query: "Find the IP allowlist middleware"',
|
|
9200
9264
|
' \u2192 search "allowlist middleware" (one search, probe handles IP/ip/Ip variations)',
|
|
9265
|
+
' Query: "Find ForwardMessage and SessionLimiter"',
|
|
9266
|
+
` \u2192 search '"ForwardMessage" "SessionLimiter"' (one OR search finds both exact symbols)`,
|
|
9267
|
+
' OR: search exact=true "ForwardMessage" + search exact=true "SessionLimiter" IN PARALLEL',
|
|
9268
|
+
' Query: "Find limitDRL and limitRedis functions"',
|
|
9269
|
+
` \u2192 search '"limitDRL" "limitRedis"' (one OR search, quoted to prevent camelCase splitting)`,
|
|
9270
|
+
' Query: "Find ThrottleRetryLimit usage"',
|
|
9271
|
+
' \u2192 search exact=true "ThrottleRetryLimit" (one search, if no results the symbol does not exist \u2014 stop)',
|
|
9201
9272
|
' Query: "How does BM25 scoring work with SIMD optimization?"',
|
|
9202
|
-
' \u2192 search "BM25 scoring"
|
|
9273
|
+
' \u2192 search "BM25 scoring" + search "SIMD optimization" IN PARALLEL (two different concepts)',
|
|
9203
9274
|
"",
|
|
9204
9275
|
"BAD search strategy (never do this):",
|
|
9205
|
-
' \u2192 search "AllowedIPs" \u2192 search "allowedIps" \u2192 search "allowed_ips" (WRONG:
|
|
9206
|
-
|
|
9207
|
-
' \u2192 search "
|
|
9276
|
+
' \u2192 search "AllowedIPs" \u2192 search "allowedIps" \u2192 search "allowed_ips" (WRONG: case/style variations, probe handles them)',
|
|
9277
|
+
` \u2192 search "limitDRL" \u2192 search "LimitDRL" (WRONG: case variation \u2014 combine with OR: '"limitDRL" "limitRedis"')`,
|
|
9278
|
+
' \u2192 search "throttle_retry_limit" after searching "ThrottleRetryLimit" (WRONG: snake_case variation, probe handles it)',
|
|
9279
|
+
' \u2192 search "ThrottleRetryLimit" path=tyk \u2192 search "ThrottleRetryLimit" path=gateway \u2192 search "ThrottleRetryLimit" path=apidef (WRONG: same query on different paths \u2014 probe searches recursively)',
|
|
9280
|
+
' \u2192 search "func (k *RateLimitAndQuotaCheck) handleRateLimitFailure" (WRONG: do not search full function signatures, just use exact=true "handleRateLimitFailure")',
|
|
9281
|
+
' \u2192 search "ForwardMessage" \u2192 search "ForwardMessage" \u2192 search "ForwardMessage" (WRONG: repeating the exact same query)',
|
|
9282
|
+
' \u2192 search "authentication" \u2192 wait \u2192 search "session management" \u2192 wait (WRONG: these are independent, run them in parallel)',
|
|
9208
9283
|
"",
|
|
9209
9284
|
"Keyword tips:",
|
|
9210
9285
|
"- Common programming keywords are filtered as stopwords when unquoted: function, class, return, new, struct, impl, var, let, const, etc.",
|
|
9211
9286
|
'- Avoid searching for these alone \u2014 combine with a specific term (e.g., "middleware function" is fine, "function" alone is too generic).',
|
|
9212
9287
|
'- To bypass stopword filtering: wrap terms in quotes ("return", "struct") or set exact=true. Both disable stemming and splitting too.',
|
|
9213
|
-
"- Multiple words without operators use OR logic: foo bar = foo OR bar. Use AND explicitly if you need both: foo AND bar.",
|
|
9214
9288
|
'- camelCase terms are split: getUserData becomes "get", "user", "data" \u2014 so one search covers all naming styles.',
|
|
9289
|
+
'- Do NOT search for full function signatures like "func (r *Type) Method(args)". Just search for the method name with exact=true.',
|
|
9215
9290
|
"",
|
|
9216
9291
|
"Strategy:",
|
|
9217
|
-
"1. Analyze the query - identify key concepts
|
|
9218
|
-
|
|
9219
|
-
"3.
|
|
9220
|
-
"4.
|
|
9221
|
-
"5.
|
|
9292
|
+
"1. Analyze the query - identify key concepts and group related symbols",
|
|
9293
|
+
`2. Combine related symbols into OR searches: '"symbolA" "symbolB"' finds files with either (quote to prevent splitting)`,
|
|
9294
|
+
"3. Run INDEPENDENT searches in PARALLEL \u2014 do not wait for one to finish before starting another",
|
|
9295
|
+
"4. For known symbol names use exact=true. For concepts use default (exact=false).",
|
|
9296
|
+
"5. If a search returns results, use extract to verify relevance. Run multiple extracts in parallel too.",
|
|
9297
|
+
"6. If a search returns NO results, the term does not exist. Do NOT retry with variations, different paths, or longer strings. Move on.",
|
|
9298
|
+
"7. Combine all relevant targets in your final response",
|
|
9222
9299
|
"",
|
|
9223
9300
|
`Query: ${searchQuery}`,
|
|
9224
9301
|
`Search path(s): ${searchPath}`,
|
|
@@ -9277,6 +9354,13 @@ var init_vercel = __esm({
|
|
|
9277
9354
|
description: searchDelegate ? searchDelegateDescription : searchDescription,
|
|
9278
9355
|
inputSchema: searchSchema,
|
|
9279
9356
|
execute: async ({ query: searchQuery, path: path9, allow_tests, exact, maxTokens: paramMaxTokens, language, session, nextPage }) => {
|
|
9357
|
+
if (!exact && searchQuery) {
|
|
9358
|
+
const originalQuery = searchQuery;
|
|
9359
|
+
searchQuery = autoQuoteSearchTerms(searchQuery);
|
|
9360
|
+
if (debug && searchQuery !== originalQuery) {
|
|
9361
|
+
console.error(`[search] Auto-quoted query: "${originalQuery}" \u2192 "${searchQuery}"`);
|
|
9362
|
+
}
|
|
9363
|
+
}
|
|
9280
9364
|
const effectiveMaxTokens = paramMaxTokens || maxTokens;
|
|
9281
9365
|
let searchPaths;
|
|
9282
9366
|
if (path9) {
|
|
@@ -9312,13 +9396,13 @@ var init_vercel = __esm({
|
|
|
9312
9396
|
return await search(searchOptions);
|
|
9313
9397
|
};
|
|
9314
9398
|
if (!searchDelegate) {
|
|
9315
|
-
const searchKey = `${searchQuery}::${
|
|
9399
|
+
const searchKey = `${searchQuery}::${exact || false}`;
|
|
9316
9400
|
if (!nextPage) {
|
|
9317
9401
|
if (previousSearches.has(searchKey)) {
|
|
9318
9402
|
if (debug) {
|
|
9319
|
-
console.error(`[DEDUP] Blocked duplicate search: "${searchQuery}"
|
|
9403
|
+
console.error(`[DEDUP] Blocked duplicate search: "${searchQuery}" (path: "${searchPath}")`);
|
|
9320
9404
|
}
|
|
9321
|
-
return "DUPLICATE SEARCH BLOCKED: You already searched for this exact query
|
|
9405
|
+
return "DUPLICATE SEARCH BLOCKED: You already searched for this exact query. Changing the path does NOT give different results \u2014 probe searches recursively. Do NOT repeat the same search. Try a genuinely different keyword, use extract to examine results you already found, or use attempt_completion if you have enough information.";
|
|
9322
9406
|
}
|
|
9323
9407
|
previousSearches.add(searchKey);
|
|
9324
9408
|
paginationCounts.set(searchKey, 0);
|
|
@@ -9538,6 +9622,36 @@ var init_vercel = __esm({
|
|
|
9538
9622
|
} else if (targets) {
|
|
9539
9623
|
const parsedTargets = parseTargets(targets);
|
|
9540
9624
|
extractFiles = parsedTargets.map((target) => resolveTargetPath(target, effectiveCwd));
|
|
9625
|
+
if (options.allowedFolders && options.allowedFolders.length > 0) {
|
|
9626
|
+
extractFiles = extractFiles.map((target) => {
|
|
9627
|
+
const { filePart, suffix } = splitTargetSuffix(target);
|
|
9628
|
+
if (existsSync(filePart)) return target;
|
|
9629
|
+
const cwdPrefix = effectiveCwd.endsWith("/") ? effectiveCwd : effectiveCwd + "/";
|
|
9630
|
+
const relativePart = filePart.startsWith(cwdPrefix) ? filePart.slice(cwdPrefix.length) : null;
|
|
9631
|
+
if (relativePart) {
|
|
9632
|
+
for (const folder of options.allowedFolders) {
|
|
9633
|
+
const candidate = folder + "/" + relativePart;
|
|
9634
|
+
if (existsSync(candidate)) {
|
|
9635
|
+
if (debug) console.error(`[extract] Auto-fixed path: ${filePart} \u2192 ${candidate}`);
|
|
9636
|
+
return candidate + suffix;
|
|
9637
|
+
}
|
|
9638
|
+
}
|
|
9639
|
+
}
|
|
9640
|
+
for (const folder of options.allowedFolders) {
|
|
9641
|
+
const folderPrefix = folder.endsWith("/") ? folder : folder + "/";
|
|
9642
|
+
const wsParent = folderPrefix.replace(/[^/]+\/$/, "");
|
|
9643
|
+
if (filePart.startsWith(wsParent)) {
|
|
9644
|
+
const tail = filePart.slice(wsParent.length);
|
|
9645
|
+
const candidate = folderPrefix + tail;
|
|
9646
|
+
if (candidate !== filePart && existsSync(candidate)) {
|
|
9647
|
+
if (debug) console.error(`[extract] Auto-fixed path via workspace: ${filePart} \u2192 ${candidate}`);
|
|
9648
|
+
return candidate + suffix;
|
|
9649
|
+
}
|
|
9650
|
+
}
|
|
9651
|
+
}
|
|
9652
|
+
return target;
|
|
9653
|
+
});
|
|
9654
|
+
}
|
|
9541
9655
|
let effectiveFormat = format;
|
|
9542
9656
|
if (outline && format === "outline-xml") {
|
|
9543
9657
|
effectiveFormat = "xml";
|
package/build/tools/vercel.js
CHANGED
|
@@ -14,6 +14,75 @@ import { existsSync } from 'fs';
|
|
|
14
14
|
import { formatErrorForAI } from '../utils/error-types.js';
|
|
15
15
|
import { annotateOutputWithHashes } from './hashline.js';
|
|
16
16
|
|
|
17
|
+
/**
|
|
18
|
+
* Auto-quote search query terms that contain mixed case or underscores.
|
|
19
|
+
* Unquoted camelCase like "limitDRL" gets split by stemming into "limit" + "DRL".
|
|
20
|
+
* This wraps such terms in quotes so they match as literal strings.
|
|
21
|
+
*
|
|
22
|
+
* Examples:
|
|
23
|
+
* "limitDRL limitRedis" → '"limitDRL" "limitRedis"'
|
|
24
|
+
* "ThrottleRetryLimit" → '"ThrottleRetryLimit"'
|
|
25
|
+
* "allowed_ips" → '"allowed_ips"'
|
|
26
|
+
* "rate limit" → 'rate limit' (no change, all lowercase)
|
|
27
|
+
* '"already quoted"' → '"already quoted"' (no change)
|
|
28
|
+
* 'foo AND bar' → 'foo AND bar' (operators preserved)
|
|
29
|
+
*/
|
|
30
|
+
function autoQuoteSearchTerms(query) {
|
|
31
|
+
if (!query || typeof query !== 'string') return query;
|
|
32
|
+
|
|
33
|
+
// Split on whitespace, preserving quoted strings and operators
|
|
34
|
+
const tokens = [];
|
|
35
|
+
let i = 0;
|
|
36
|
+
while (i < query.length) {
|
|
37
|
+
// Skip whitespace
|
|
38
|
+
if (/\s/.test(query[i])) {
|
|
39
|
+
i++;
|
|
40
|
+
continue;
|
|
41
|
+
}
|
|
42
|
+
// Quoted string — keep as-is
|
|
43
|
+
if (query[i] === '"') {
|
|
44
|
+
const end = query.indexOf('"', i + 1);
|
|
45
|
+
if (end !== -1) {
|
|
46
|
+
tokens.push(query.substring(i, end + 1));
|
|
47
|
+
i = end + 1;
|
|
48
|
+
} else {
|
|
49
|
+
// Unclosed quote — take rest
|
|
50
|
+
tokens.push(query.substring(i));
|
|
51
|
+
break;
|
|
52
|
+
}
|
|
53
|
+
continue;
|
|
54
|
+
}
|
|
55
|
+
// Unquoted token
|
|
56
|
+
let j = i;
|
|
57
|
+
while (j < query.length && !/\s/.test(query[j]) && query[j] !== '"') {
|
|
58
|
+
j++;
|
|
59
|
+
}
|
|
60
|
+
tokens.push(query.substring(i, j));
|
|
61
|
+
i = j;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// Boolean operators that should not be quoted
|
|
65
|
+
const operators = new Set(['AND', 'OR', 'NOT']);
|
|
66
|
+
|
|
67
|
+
const result = tokens.map(token => {
|
|
68
|
+
// Already quoted
|
|
69
|
+
if (token.startsWith('"')) return token;
|
|
70
|
+
// Boolean operator
|
|
71
|
+
if (operators.has(token)) return token;
|
|
72
|
+
// Check if token needs quoting: has mixed case (upper+lower) or underscores
|
|
73
|
+
const hasUpper = /[A-Z]/.test(token);
|
|
74
|
+
const hasLower = /[a-z]/.test(token);
|
|
75
|
+
const hasUnderscore = token.includes('_');
|
|
76
|
+
const hasMixedCase = hasUpper && hasLower;
|
|
77
|
+
if (hasMixedCase || hasUnderscore) {
|
|
78
|
+
return `"${token}"`;
|
|
79
|
+
}
|
|
80
|
+
return token;
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
return result.join(' ');
|
|
84
|
+
}
|
|
85
|
+
|
|
17
86
|
const CODE_SEARCH_SCHEMA = {
|
|
18
87
|
type: 'object',
|
|
19
88
|
properties: {
|
|
@@ -144,40 +213,75 @@ function buildSearchDelegateTask({ searchQuery, searchPath, exact, language, all
|
|
|
144
213
|
'- listFiles: Understand directory structure to find where relevant code might live.',
|
|
145
214
|
'',
|
|
146
215
|
'CRITICAL - How probe search works (do NOT ignore):',
|
|
147
|
-
'- By default (exact=false), probe ALREADY handles stemming, case-insensitive matching, and camelCase/snake_case splitting.',
|
|
216
|
+
'- By default (exact=false), probe ALREADY handles stemming, case-insensitive matching, and camelCase/snake_case splitting automatically.',
|
|
148
217
|
'- Searching "allowed_ips" ALREADY matches "AllowedIPs", "allowedIps", "allowed_ips", etc. Do NOT manually try case/style variations.',
|
|
149
218
|
'- Searching "getUserData" ALREADY matches "get", "user", "data" and their variations.',
|
|
150
|
-
'- NEVER repeat the same search query — you will get the same results.',
|
|
219
|
+
'- NEVER repeat the same search query — you will get the same results. Changing the path does NOT change this.',
|
|
151
220
|
'- NEVER search trivial variations of the same keyword (e.g., AllowedIPs then allowedIps then allowed_ips). This is wasteful — probe handles it.',
|
|
152
|
-
'- If a search returns no results, the term likely does not exist
|
|
153
|
-
'- If 2-3
|
|
221
|
+
'- If a search returns no results, the term likely does not exist. Try a genuinely DIFFERENT keyword or concept, not a variation.',
|
|
222
|
+
'- If 2-3 searches return no results for a concept, STOP searching for it and move on. Do NOT keep retrying.',
|
|
223
|
+
'',
|
|
224
|
+
'When to use exact=true:',
|
|
225
|
+
'- Use exact=true when searching for a KNOWN symbol name (function, type, variable, struct).',
|
|
226
|
+
'- exact=true matches the literal string only — no stemming, no splitting.',
|
|
227
|
+
'- This is ideal for precise lookups: exact=true "ForwardMessage", exact=true "SessionLimiter", exact=true "ThrottleRetryLimit".',
|
|
228
|
+
'- Do NOT use exact=true for exploratory/conceptual queries — use the default for those.',
|
|
229
|
+
'',
|
|
230
|
+
'Combining searches with OR:',
|
|
231
|
+
'- Multiple unquoted words use OR logic: rate limit matches files containing EITHER "rate" OR "limit".',
|
|
232
|
+
'- For known symbol names, quote each term to prevent splitting: \'"limitDRL" "limitRedis"\' matches either exact symbol.',
|
|
233
|
+
'- Without quotes, camelCase like limitDRL gets split into "limit" + "DRL" — not what you want for symbol lookup.',
|
|
234
|
+
'- Use OR to search for multiple related symbols in ONE search instead of separate searches.',
|
|
235
|
+
'- This is much faster than running separate searches sequentially.',
|
|
236
|
+
'- Example: search \'"ForwardMessage" "SessionLimiter"\' finds files with either exact symbol in one call.',
|
|
237
|
+
'- Example: search \'"limitDRL" "doRollingWindowWrite"\' finds both rate limiting functions at once.',
|
|
238
|
+
'- Use AND only when you need both terms to appear in the same file: "rate AND limit".',
|
|
239
|
+
'',
|
|
240
|
+
'Parallel tool calls:',
|
|
241
|
+
'- When you need to search for INDEPENDENT concepts, call multiple search tools IN PARALLEL (same response).',
|
|
242
|
+
'- Do NOT wait for one search to finish before starting the next if they are independent.',
|
|
243
|
+
'- Example: for "rate limiting and session management", call search "rate limiting" AND search "session management" in parallel.',
|
|
244
|
+
'- Similarly, call multiple extract tools in parallel when verifying different files.',
|
|
154
245
|
'',
|
|
155
246
|
'GOOD search strategy (do this):',
|
|
156
247
|
' Query: "How does authentication work and how are sessions managed?"',
|
|
157
|
-
' → search "authentication"
|
|
248
|
+
' → search "authentication" + search "session management" IN PARALLEL (two independent concepts)',
|
|
158
249
|
' Query: "Find the IP allowlist middleware"',
|
|
159
250
|
' → search "allowlist middleware" (one search, probe handles IP/ip/Ip variations)',
|
|
251
|
+
' Query: "Find ForwardMessage and SessionLimiter"',
|
|
252
|
+
' → search \'"ForwardMessage" "SessionLimiter"\' (one OR search finds both exact symbols)',
|
|
253
|
+
' OR: search exact=true "ForwardMessage" + search exact=true "SessionLimiter" IN PARALLEL',
|
|
254
|
+
' Query: "Find limitDRL and limitRedis functions"',
|
|
255
|
+
' → search \'"limitDRL" "limitRedis"\' (one OR search, quoted to prevent camelCase splitting)',
|
|
256
|
+
' Query: "Find ThrottleRetryLimit usage"',
|
|
257
|
+
' → search exact=true "ThrottleRetryLimit" (one search, if no results the symbol does not exist — stop)',
|
|
160
258
|
' Query: "How does BM25 scoring work with SIMD optimization?"',
|
|
161
|
-
' → search "BM25 scoring"
|
|
259
|
+
' → search "BM25 scoring" + search "SIMD optimization" IN PARALLEL (two different concepts)',
|
|
162
260
|
'',
|
|
163
261
|
'BAD search strategy (never do this):',
|
|
164
|
-
' → search "AllowedIPs" → search "allowedIps" → search "allowed_ips" (WRONG:
|
|
165
|
-
' → search "
|
|
166
|
-
' → search "
|
|
262
|
+
' → search "AllowedIPs" → search "allowedIps" → search "allowed_ips" (WRONG: case/style variations, probe handles them)',
|
|
263
|
+
' → search "limitDRL" → search "LimitDRL" (WRONG: case variation — combine with OR: \'"limitDRL" "limitRedis"\')',
|
|
264
|
+
' → search "throttle_retry_limit" after searching "ThrottleRetryLimit" (WRONG: snake_case variation, probe handles it)',
|
|
265
|
+
' → search "ThrottleRetryLimit" path=tyk → search "ThrottleRetryLimit" path=gateway → search "ThrottleRetryLimit" path=apidef (WRONG: same query on different paths — probe searches recursively)',
|
|
266
|
+
' → search "func (k *RateLimitAndQuotaCheck) handleRateLimitFailure" (WRONG: do not search full function signatures, just use exact=true "handleRateLimitFailure")',
|
|
267
|
+
' → search "ForwardMessage" → search "ForwardMessage" → search "ForwardMessage" (WRONG: repeating the exact same query)',
|
|
268
|
+
' → search "authentication" → wait → search "session management" → wait (WRONG: these are independent, run them in parallel)',
|
|
167
269
|
'',
|
|
168
270
|
'Keyword tips:',
|
|
169
271
|
'- Common programming keywords are filtered as stopwords when unquoted: function, class, return, new, struct, impl, var, let, const, etc.',
|
|
170
272
|
'- Avoid searching for these alone — combine with a specific term (e.g., "middleware function" is fine, "function" alone is too generic).',
|
|
171
273
|
'- To bypass stopword filtering: wrap terms in quotes ("return", "struct") or set exact=true. Both disable stemming and splitting too.',
|
|
172
|
-
'- Multiple words without operators use OR logic: foo bar = foo OR bar. Use AND explicitly if you need both: foo AND bar.',
|
|
173
274
|
'- camelCase terms are split: getUserData becomes "get", "user", "data" — so one search covers all naming styles.',
|
|
275
|
+
'- Do NOT search for full function signatures like "func (r *Type) Method(args)". Just search for the method name with exact=true.',
|
|
174
276
|
'',
|
|
175
277
|
'Strategy:',
|
|
176
|
-
'1. Analyze the query - identify key concepts
|
|
177
|
-
'2.
|
|
178
|
-
'3.
|
|
179
|
-
'4.
|
|
180
|
-
'5.
|
|
278
|
+
'1. Analyze the query - identify key concepts and group related symbols',
|
|
279
|
+
'2. Combine related symbols into OR searches: \'"symbolA" "symbolB"\' finds files with either (quote to prevent splitting)',
|
|
280
|
+
'3. Run INDEPENDENT searches in PARALLEL — do not wait for one to finish before starting another',
|
|
281
|
+
'4. For known symbol names use exact=true. For concepts use default (exact=false).',
|
|
282
|
+
'5. If a search returns results, use extract to verify relevance. Run multiple extracts in parallel too.',
|
|
283
|
+
'6. If a search returns NO results, the term does not exist. Do NOT retry with variations, different paths, or longer strings. Move on.',
|
|
284
|
+
'7. Combine all relevant targets in your final response',
|
|
181
285
|
'',
|
|
182
286
|
`Query: ${searchQuery}`,
|
|
183
287
|
`Search path(s): ${searchPath}`,
|
|
@@ -229,6 +333,16 @@ export const searchTool = (options = {}) => {
|
|
|
229
333
|
: searchDescription,
|
|
230
334
|
inputSchema: searchSchema,
|
|
231
335
|
execute: async ({ query: searchQuery, path, allow_tests, exact, maxTokens: paramMaxTokens, language, session, nextPage }) => {
|
|
336
|
+
// Auto-quote mixed-case and underscore terms to prevent unwanted stemming/splitting
|
|
337
|
+
// Skip when exact=true since that already preserves the literal string
|
|
338
|
+
if (!exact && searchQuery) {
|
|
339
|
+
const originalQuery = searchQuery;
|
|
340
|
+
searchQuery = autoQuoteSearchTerms(searchQuery);
|
|
341
|
+
if (debug && searchQuery !== originalQuery) {
|
|
342
|
+
console.error(`[search] Auto-quoted query: "${originalQuery}" → "${searchQuery}"`);
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
|
|
232
346
|
// Use parameter maxTokens if provided, otherwise use the default
|
|
233
347
|
const effectiveMaxTokens = paramMaxTokens || maxTokens;
|
|
234
348
|
|
|
@@ -274,13 +388,15 @@ export const searchTool = (options = {}) => {
|
|
|
274
388
|
if (!searchDelegate) {
|
|
275
389
|
// Block duplicate non-paginated searches (models sometimes repeat the exact same call)
|
|
276
390
|
// Allow pagination: only nextPage=true is a legitimate repeat of the same query
|
|
277
|
-
|
|
391
|
+
// Use query+exact as the key (ignore path) to prevent path-hopping evasion
|
|
392
|
+
// where model searches same term on different subpaths hoping for different results
|
|
393
|
+
const searchKey = `${searchQuery}::${exact || false}`;
|
|
278
394
|
if (!nextPage) {
|
|
279
395
|
if (previousSearches.has(searchKey)) {
|
|
280
396
|
if (debug) {
|
|
281
|
-
console.error(`[DEDUP] Blocked duplicate search: "${searchQuery}"
|
|
397
|
+
console.error(`[DEDUP] Blocked duplicate search: "${searchQuery}" (path: "${searchPath}")`);
|
|
282
398
|
}
|
|
283
|
-
return 'DUPLICATE SEARCH BLOCKED: You already searched for this exact query
|
|
399
|
+
return 'DUPLICATE SEARCH BLOCKED: You already searched for this exact query. Changing the path does NOT give different results — probe searches recursively. Do NOT repeat the same search. Try a genuinely different keyword, use extract to examine results you already found, or use attempt_completion if you have enough information.';
|
|
284
400
|
}
|
|
285
401
|
previousSearches.add(searchKey);
|
|
286
402
|
paginationCounts.set(searchKey, 0);
|
|
@@ -570,6 +686,50 @@ export const extractTool = (options = {}) => {
|
|
|
570
686
|
// Resolve relative paths in targets against cwd
|
|
571
687
|
extractFiles = parsedTargets.map(target => resolveTargetPath(target, effectiveCwd));
|
|
572
688
|
|
|
689
|
+
// Auto-fix: if resolved paths don't exist, try allowedFolders subdirs
|
|
690
|
+
// Handles when search returns relative paths (e.g., "gateway/file.go") and
|
|
691
|
+
// model constructs wrong absolute paths (e.g., /workspace/gateway/file.go
|
|
692
|
+
// instead of /workspace/tyk/gateway/file.go)
|
|
693
|
+
if (options.allowedFolders && options.allowedFolders.length > 0) {
|
|
694
|
+
extractFiles = extractFiles.map(target => {
|
|
695
|
+
const { filePart, suffix } = splitTargetSuffix(target);
|
|
696
|
+
if (existsSync(filePart)) return target;
|
|
697
|
+
|
|
698
|
+
// Try resolving the relative tail against each allowedFolder
|
|
699
|
+
const cwdPrefix = (effectiveCwd.endsWith('/') ? effectiveCwd : effectiveCwd + '/');
|
|
700
|
+
const relativePart = filePart.startsWith(cwdPrefix)
|
|
701
|
+
? filePart.slice(cwdPrefix.length)
|
|
702
|
+
: null;
|
|
703
|
+
|
|
704
|
+
if (relativePart) {
|
|
705
|
+
for (const folder of options.allowedFolders) {
|
|
706
|
+
const candidate = folder + '/' + relativePart;
|
|
707
|
+
if (existsSync(candidate)) {
|
|
708
|
+
if (debug) console.error(`[extract] Auto-fixed path: ${filePart} → ${candidate}`);
|
|
709
|
+
return candidate + suffix;
|
|
710
|
+
}
|
|
711
|
+
}
|
|
712
|
+
}
|
|
713
|
+
|
|
714
|
+
// Try stripping workspace prefix and resolving against allowedFolders
|
|
715
|
+
// e.g., /tmp/visor-workspaces/abc/gateway/file.go → try each folder + gateway/file.go
|
|
716
|
+
for (const folder of options.allowedFolders) {
|
|
717
|
+
const folderPrefix = folder.endsWith('/') ? folder : folder + '/';
|
|
718
|
+
const wsParent = folderPrefix.replace(/[^/]+\/$/, '');
|
|
719
|
+
if (filePart.startsWith(wsParent)) {
|
|
720
|
+
const tail = filePart.slice(wsParent.length);
|
|
721
|
+
const candidate = folderPrefix + tail;
|
|
722
|
+
if (candidate !== filePart && existsSync(candidate)) {
|
|
723
|
+
if (debug) console.error(`[extract] Auto-fixed path via workspace: ${filePart} → ${candidate}`);
|
|
724
|
+
return candidate + suffix;
|
|
725
|
+
}
|
|
726
|
+
}
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
return target;
|
|
730
|
+
});
|
|
731
|
+
}
|
|
732
|
+
|
|
573
733
|
// Apply format mapping for outline-xml to xml
|
|
574
734
|
let effectiveFormat = format;
|
|
575
735
|
if (outline && format === 'outline-xml') {
|
package/cjs/agent/ProbeAgent.cjs
CHANGED
|
@@ -4205,7 +4205,7 @@ var init_NormalizedSchema = __esm({
|
|
|
4205
4205
|
if (this.isDocumentSchema()) {
|
|
4206
4206
|
return member([15, 0], memberName);
|
|
4207
4207
|
}
|
|
4208
|
-
throw new Error(`@smithy/core/schema - ${this.getName(true)} has no
|
|
4208
|
+
throw new Error(`@smithy/core/schema - ${this.getName(true)} has no member=${memberName}.`);
|
|
4209
4209
|
}
|
|
4210
4210
|
getMemberSchemas() {
|
|
4211
4211
|
const buffer = {};
|
|
@@ -5996,7 +5996,7 @@ var init_EventStreamSerde = __esm({
|
|
|
5996
5996
|
throw new Error("@smithy/core/event-streams - non-struct member not supported in event stream union.");
|
|
5997
5997
|
}
|
|
5998
5998
|
}
|
|
5999
|
-
const messageSerialization = serializer.flush();
|
|
5999
|
+
const messageSerialization = serializer.flush() ?? new Uint8Array();
|
|
6000
6000
|
const body = typeof messageSerialization === "string" ? (this.serdeContext?.utf8Decoder ?? import_util_utf8.fromUtf8)(messageSerialization) : messageSerialization;
|
|
6001
6001
|
return {
|
|
6002
6002
|
body,
|
|
@@ -36463,6 +36463,48 @@ var init_hashline = __esm({
|
|
|
36463
36463
|
});
|
|
36464
36464
|
|
|
36465
36465
|
// src/tools/vercel.js
|
|
36466
|
+
function autoQuoteSearchTerms(query2) {
|
|
36467
|
+
if (!query2 || typeof query2 !== "string") return query2;
|
|
36468
|
+
const tokens = [];
|
|
36469
|
+
let i5 = 0;
|
|
36470
|
+
while (i5 < query2.length) {
|
|
36471
|
+
if (/\s/.test(query2[i5])) {
|
|
36472
|
+
i5++;
|
|
36473
|
+
continue;
|
|
36474
|
+
}
|
|
36475
|
+
if (query2[i5] === '"') {
|
|
36476
|
+
const end = query2.indexOf('"', i5 + 1);
|
|
36477
|
+
if (end !== -1) {
|
|
36478
|
+
tokens.push(query2.substring(i5, end + 1));
|
|
36479
|
+
i5 = end + 1;
|
|
36480
|
+
} else {
|
|
36481
|
+
tokens.push(query2.substring(i5));
|
|
36482
|
+
break;
|
|
36483
|
+
}
|
|
36484
|
+
continue;
|
|
36485
|
+
}
|
|
36486
|
+
let j5 = i5;
|
|
36487
|
+
while (j5 < query2.length && !/\s/.test(query2[j5]) && query2[j5] !== '"') {
|
|
36488
|
+
j5++;
|
|
36489
|
+
}
|
|
36490
|
+
tokens.push(query2.substring(i5, j5));
|
|
36491
|
+
i5 = j5;
|
|
36492
|
+
}
|
|
36493
|
+
const operators = /* @__PURE__ */ new Set(["AND", "OR", "NOT"]);
|
|
36494
|
+
const result = tokens.map((token) => {
|
|
36495
|
+
if (token.startsWith('"')) return token;
|
|
36496
|
+
if (operators.has(token)) return token;
|
|
36497
|
+
const hasUpper = /[A-Z]/.test(token);
|
|
36498
|
+
const hasLower = /[a-z]/.test(token);
|
|
36499
|
+
const hasUnderscore = token.includes("_");
|
|
36500
|
+
const hasMixedCase = hasUpper && hasLower;
|
|
36501
|
+
if (hasMixedCase || hasUnderscore) {
|
|
36502
|
+
return `"${token}"`;
|
|
36503
|
+
}
|
|
36504
|
+
return token;
|
|
36505
|
+
});
|
|
36506
|
+
return result.join(" ");
|
|
36507
|
+
}
|
|
36466
36508
|
function normalizeTargets(targets) {
|
|
36467
36509
|
if (!Array.isArray(targets)) return [];
|
|
36468
36510
|
const seen = /* @__PURE__ */ new Set();
|
|
@@ -36563,40 +36605,75 @@ function buildSearchDelegateTask({ searchQuery, searchPath, exact, language, all
|
|
|
36563
36605
|
"- listFiles: Understand directory structure to find where relevant code might live.",
|
|
36564
36606
|
"",
|
|
36565
36607
|
"CRITICAL - How probe search works (do NOT ignore):",
|
|
36566
|
-
"- By default (exact=false), probe ALREADY handles stemming, case-insensitive matching, and camelCase/snake_case splitting.",
|
|
36608
|
+
"- By default (exact=false), probe ALREADY handles stemming, case-insensitive matching, and camelCase/snake_case splitting automatically.",
|
|
36567
36609
|
'- Searching "allowed_ips" ALREADY matches "AllowedIPs", "allowedIps", "allowed_ips", etc. Do NOT manually try case/style variations.',
|
|
36568
36610
|
'- Searching "getUserData" ALREADY matches "get", "user", "data" and their variations.',
|
|
36569
|
-
"- NEVER repeat the same search query \u2014 you will get the same results.",
|
|
36611
|
+
"- NEVER repeat the same search query \u2014 you will get the same results. Changing the path does NOT change this.",
|
|
36570
36612
|
"- NEVER search trivial variations of the same keyword (e.g., AllowedIPs then allowedIps then allowed_ips). This is wasteful \u2014 probe handles it.",
|
|
36571
|
-
"- If a search returns no results, the term likely does not exist
|
|
36572
|
-
"- If 2-3
|
|
36613
|
+
"- If a search returns no results, the term likely does not exist. Try a genuinely DIFFERENT keyword or concept, not a variation.",
|
|
36614
|
+
"- If 2-3 searches return no results for a concept, STOP searching for it and move on. Do NOT keep retrying.",
|
|
36615
|
+
"",
|
|
36616
|
+
"When to use exact=true:",
|
|
36617
|
+
"- Use exact=true when searching for a KNOWN symbol name (function, type, variable, struct).",
|
|
36618
|
+
"- exact=true matches the literal string only \u2014 no stemming, no splitting.",
|
|
36619
|
+
'- This is ideal for precise lookups: exact=true "ForwardMessage", exact=true "SessionLimiter", exact=true "ThrottleRetryLimit".',
|
|
36620
|
+
"- Do NOT use exact=true for exploratory/conceptual queries \u2014 use the default for those.",
|
|
36621
|
+
"",
|
|
36622
|
+
"Combining searches with OR:",
|
|
36623
|
+
'- Multiple unquoted words use OR logic: rate limit matches files containing EITHER "rate" OR "limit".',
|
|
36624
|
+
`- For known symbol names, quote each term to prevent splitting: '"limitDRL" "limitRedis"' matches either exact symbol.`,
|
|
36625
|
+
'- Without quotes, camelCase like limitDRL gets split into "limit" + "DRL" \u2014 not what you want for symbol lookup.',
|
|
36626
|
+
"- Use OR to search for multiple related symbols in ONE search instead of separate searches.",
|
|
36627
|
+
"- This is much faster than running separate searches sequentially.",
|
|
36628
|
+
`- Example: search '"ForwardMessage" "SessionLimiter"' finds files with either exact symbol in one call.`,
|
|
36629
|
+
`- Example: search '"limitDRL" "doRollingWindowWrite"' finds both rate limiting functions at once.`,
|
|
36630
|
+
'- Use AND only when you need both terms to appear in the same file: "rate AND limit".',
|
|
36631
|
+
"",
|
|
36632
|
+
"Parallel tool calls:",
|
|
36633
|
+
"- When you need to search for INDEPENDENT concepts, call multiple search tools IN PARALLEL (same response).",
|
|
36634
|
+
"- Do NOT wait for one search to finish before starting the next if they are independent.",
|
|
36635
|
+
'- Example: for "rate limiting and session management", call search "rate limiting" AND search "session management" in parallel.',
|
|
36636
|
+
"- Similarly, call multiple extract tools in parallel when verifying different files.",
|
|
36573
36637
|
"",
|
|
36574
36638
|
"GOOD search strategy (do this):",
|
|
36575
36639
|
' Query: "How does authentication work and how are sessions managed?"',
|
|
36576
|
-
' \u2192 search "authentication"
|
|
36640
|
+
' \u2192 search "authentication" + search "session management" IN PARALLEL (two independent concepts)',
|
|
36577
36641
|
' Query: "Find the IP allowlist middleware"',
|
|
36578
36642
|
' \u2192 search "allowlist middleware" (one search, probe handles IP/ip/Ip variations)',
|
|
36643
|
+
' Query: "Find ForwardMessage and SessionLimiter"',
|
|
36644
|
+
` \u2192 search '"ForwardMessage" "SessionLimiter"' (one OR search finds both exact symbols)`,
|
|
36645
|
+
' OR: search exact=true "ForwardMessage" + search exact=true "SessionLimiter" IN PARALLEL',
|
|
36646
|
+
' Query: "Find limitDRL and limitRedis functions"',
|
|
36647
|
+
` \u2192 search '"limitDRL" "limitRedis"' (one OR search, quoted to prevent camelCase splitting)`,
|
|
36648
|
+
' Query: "Find ThrottleRetryLimit usage"',
|
|
36649
|
+
' \u2192 search exact=true "ThrottleRetryLimit" (one search, if no results the symbol does not exist \u2014 stop)',
|
|
36579
36650
|
' Query: "How does BM25 scoring work with SIMD optimization?"',
|
|
36580
|
-
' \u2192 search "BM25 scoring"
|
|
36651
|
+
' \u2192 search "BM25 scoring" + search "SIMD optimization" IN PARALLEL (two different concepts)',
|
|
36581
36652
|
"",
|
|
36582
36653
|
"BAD search strategy (never do this):",
|
|
36583
|
-
' \u2192 search "AllowedIPs" \u2192 search "allowedIps" \u2192 search "allowed_ips" (WRONG:
|
|
36584
|
-
|
|
36585
|
-
' \u2192 search "
|
|
36654
|
+
' \u2192 search "AllowedIPs" \u2192 search "allowedIps" \u2192 search "allowed_ips" (WRONG: case/style variations, probe handles them)',
|
|
36655
|
+
` \u2192 search "limitDRL" \u2192 search "LimitDRL" (WRONG: case variation \u2014 combine with OR: '"limitDRL" "limitRedis"')`,
|
|
36656
|
+
' \u2192 search "throttle_retry_limit" after searching "ThrottleRetryLimit" (WRONG: snake_case variation, probe handles it)',
|
|
36657
|
+
' \u2192 search "ThrottleRetryLimit" path=tyk \u2192 search "ThrottleRetryLimit" path=gateway \u2192 search "ThrottleRetryLimit" path=apidef (WRONG: same query on different paths \u2014 probe searches recursively)',
|
|
36658
|
+
' \u2192 search "func (k *RateLimitAndQuotaCheck) handleRateLimitFailure" (WRONG: do not search full function signatures, just use exact=true "handleRateLimitFailure")',
|
|
36659
|
+
' \u2192 search "ForwardMessage" \u2192 search "ForwardMessage" \u2192 search "ForwardMessage" (WRONG: repeating the exact same query)',
|
|
36660
|
+
' \u2192 search "authentication" \u2192 wait \u2192 search "session management" \u2192 wait (WRONG: these are independent, run them in parallel)',
|
|
36586
36661
|
"",
|
|
36587
36662
|
"Keyword tips:",
|
|
36588
36663
|
"- Common programming keywords are filtered as stopwords when unquoted: function, class, return, new, struct, impl, var, let, const, etc.",
|
|
36589
36664
|
'- Avoid searching for these alone \u2014 combine with a specific term (e.g., "middleware function" is fine, "function" alone is too generic).',
|
|
36590
36665
|
'- To bypass stopword filtering: wrap terms in quotes ("return", "struct") or set exact=true. Both disable stemming and splitting too.',
|
|
36591
|
-
"- Multiple words without operators use OR logic: foo bar = foo OR bar. Use AND explicitly if you need both: foo AND bar.",
|
|
36592
36666
|
'- camelCase terms are split: getUserData becomes "get", "user", "data" \u2014 so one search covers all naming styles.',
|
|
36667
|
+
'- Do NOT search for full function signatures like "func (r *Type) Method(args)". Just search for the method name with exact=true.',
|
|
36593
36668
|
"",
|
|
36594
36669
|
"Strategy:",
|
|
36595
|
-
"1. Analyze the query - identify key concepts
|
|
36596
|
-
|
|
36597
|
-
"3.
|
|
36598
|
-
"4.
|
|
36599
|
-
"5.
|
|
36670
|
+
"1. Analyze the query - identify key concepts and group related symbols",
|
|
36671
|
+
`2. Combine related symbols into OR searches: '"symbolA" "symbolB"' finds files with either (quote to prevent splitting)`,
|
|
36672
|
+
"3. Run INDEPENDENT searches in PARALLEL \u2014 do not wait for one to finish before starting another",
|
|
36673
|
+
"4. For known symbol names use exact=true. For concepts use default (exact=false).",
|
|
36674
|
+
"5. If a search returns results, use extract to verify relevance. Run multiple extracts in parallel too.",
|
|
36675
|
+
"6. If a search returns NO results, the term does not exist. Do NOT retry with variations, different paths, or longer strings. Move on.",
|
|
36676
|
+
"7. Combine all relevant targets in your final response",
|
|
36600
36677
|
"",
|
|
36601
36678
|
`Query: ${searchQuery}`,
|
|
36602
36679
|
`Search path(s): ${searchPath}`,
|
|
@@ -36657,6 +36734,13 @@ var init_vercel = __esm({
|
|
|
36657
36734
|
description: searchDelegate ? searchDelegateDescription : searchDescription,
|
|
36658
36735
|
inputSchema: searchSchema,
|
|
36659
36736
|
execute: async ({ query: searchQuery, path: path9, allow_tests, exact, maxTokens: paramMaxTokens, language, session, nextPage }) => {
|
|
36737
|
+
if (!exact && searchQuery) {
|
|
36738
|
+
const originalQuery = searchQuery;
|
|
36739
|
+
searchQuery = autoQuoteSearchTerms(searchQuery);
|
|
36740
|
+
if (debug && searchQuery !== originalQuery) {
|
|
36741
|
+
console.error(`[search] Auto-quoted query: "${originalQuery}" \u2192 "${searchQuery}"`);
|
|
36742
|
+
}
|
|
36743
|
+
}
|
|
36660
36744
|
const effectiveMaxTokens = paramMaxTokens || maxTokens;
|
|
36661
36745
|
let searchPaths;
|
|
36662
36746
|
if (path9) {
|
|
@@ -36692,13 +36776,13 @@ var init_vercel = __esm({
|
|
|
36692
36776
|
return await search(searchOptions);
|
|
36693
36777
|
};
|
|
36694
36778
|
if (!searchDelegate) {
|
|
36695
|
-
const searchKey = `${searchQuery}::${
|
|
36779
|
+
const searchKey = `${searchQuery}::${exact || false}`;
|
|
36696
36780
|
if (!nextPage) {
|
|
36697
36781
|
if (previousSearches.has(searchKey)) {
|
|
36698
36782
|
if (debug) {
|
|
36699
|
-
console.error(`[DEDUP] Blocked duplicate search: "${searchQuery}"
|
|
36783
|
+
console.error(`[DEDUP] Blocked duplicate search: "${searchQuery}" (path: "${searchPath}")`);
|
|
36700
36784
|
}
|
|
36701
|
-
return "DUPLICATE SEARCH BLOCKED: You already searched for this exact query
|
|
36785
|
+
return "DUPLICATE SEARCH BLOCKED: You already searched for this exact query. Changing the path does NOT give different results \u2014 probe searches recursively. Do NOT repeat the same search. Try a genuinely different keyword, use extract to examine results you already found, or use attempt_completion if you have enough information.";
|
|
36702
36786
|
}
|
|
36703
36787
|
previousSearches.add(searchKey);
|
|
36704
36788
|
paginationCounts.set(searchKey, 0);
|
|
@@ -36918,6 +37002,36 @@ var init_vercel = __esm({
|
|
|
36918
37002
|
} else if (targets) {
|
|
36919
37003
|
const parsedTargets = parseTargets(targets);
|
|
36920
37004
|
extractFiles = parsedTargets.map((target) => resolveTargetPath(target, effectiveCwd));
|
|
37005
|
+
if (options.allowedFolders && options.allowedFolders.length > 0) {
|
|
37006
|
+
extractFiles = extractFiles.map((target) => {
|
|
37007
|
+
const { filePart, suffix } = splitTargetSuffix(target);
|
|
37008
|
+
if ((0, import_fs4.existsSync)(filePart)) return target;
|
|
37009
|
+
const cwdPrefix = effectiveCwd.endsWith("/") ? effectiveCwd : effectiveCwd + "/";
|
|
37010
|
+
const relativePart = filePart.startsWith(cwdPrefix) ? filePart.slice(cwdPrefix.length) : null;
|
|
37011
|
+
if (relativePart) {
|
|
37012
|
+
for (const folder of options.allowedFolders) {
|
|
37013
|
+
const candidate = folder + "/" + relativePart;
|
|
37014
|
+
if ((0, import_fs4.existsSync)(candidate)) {
|
|
37015
|
+
if (debug) console.error(`[extract] Auto-fixed path: ${filePart} \u2192 ${candidate}`);
|
|
37016
|
+
return candidate + suffix;
|
|
37017
|
+
}
|
|
37018
|
+
}
|
|
37019
|
+
}
|
|
37020
|
+
for (const folder of options.allowedFolders) {
|
|
37021
|
+
const folderPrefix = folder.endsWith("/") ? folder : folder + "/";
|
|
37022
|
+
const wsParent = folderPrefix.replace(/[^/]+\/$/, "");
|
|
37023
|
+
if (filePart.startsWith(wsParent)) {
|
|
37024
|
+
const tail = filePart.slice(wsParent.length);
|
|
37025
|
+
const candidate = folderPrefix + tail;
|
|
37026
|
+
if (candidate !== filePart && (0, import_fs4.existsSync)(candidate)) {
|
|
37027
|
+
if (debug) console.error(`[extract] Auto-fixed path via workspace: ${filePart} \u2192 ${candidate}`);
|
|
37028
|
+
return candidate + suffix;
|
|
37029
|
+
}
|
|
37030
|
+
}
|
|
37031
|
+
}
|
|
37032
|
+
return target;
|
|
37033
|
+
});
|
|
37034
|
+
}
|
|
36921
37035
|
let effectiveFormat = format2;
|
|
36922
37036
|
if (outline && format2 === "outline-xml") {
|
|
36923
37037
|
effectiveFormat = "xml";
|
package/cjs/index.cjs
CHANGED
|
@@ -6063,7 +6063,7 @@ var init_NormalizedSchema = __esm({
|
|
|
6063
6063
|
if (this.isDocumentSchema()) {
|
|
6064
6064
|
return member([15, 0], memberName);
|
|
6065
6065
|
}
|
|
6066
|
-
throw new Error(`@smithy/core/schema - ${this.getName(true)} has no
|
|
6066
|
+
throw new Error(`@smithy/core/schema - ${this.getName(true)} has no member=${memberName}.`);
|
|
6067
6067
|
}
|
|
6068
6068
|
getMemberSchemas() {
|
|
6069
6069
|
const buffer = {};
|
|
@@ -7854,7 +7854,7 @@ var init_EventStreamSerde = __esm({
|
|
|
7854
7854
|
throw new Error("@smithy/core/event-streams - non-struct member not supported in event stream union.");
|
|
7855
7855
|
}
|
|
7856
7856
|
}
|
|
7857
|
-
const messageSerialization = serializer.flush();
|
|
7857
|
+
const messageSerialization = serializer.flush() ?? new Uint8Array();
|
|
7858
7858
|
const body = typeof messageSerialization === "string" ? (this.serdeContext?.utf8Decoder ?? import_util_utf8.fromUtf8)(messageSerialization) : messageSerialization;
|
|
7859
7859
|
return {
|
|
7860
7860
|
body,
|
|
@@ -110625,6 +110625,48 @@ var init_hashline = __esm({
|
|
|
110625
110625
|
});
|
|
110626
110626
|
|
|
110627
110627
|
// src/tools/vercel.js
|
|
110628
|
+
function autoQuoteSearchTerms(query2) {
|
|
110629
|
+
if (!query2 || typeof query2 !== "string") return query2;
|
|
110630
|
+
const tokens = [];
|
|
110631
|
+
let i5 = 0;
|
|
110632
|
+
while (i5 < query2.length) {
|
|
110633
|
+
if (/\s/.test(query2[i5])) {
|
|
110634
|
+
i5++;
|
|
110635
|
+
continue;
|
|
110636
|
+
}
|
|
110637
|
+
if (query2[i5] === '"') {
|
|
110638
|
+
const end = query2.indexOf('"', i5 + 1);
|
|
110639
|
+
if (end !== -1) {
|
|
110640
|
+
tokens.push(query2.substring(i5, end + 1));
|
|
110641
|
+
i5 = end + 1;
|
|
110642
|
+
} else {
|
|
110643
|
+
tokens.push(query2.substring(i5));
|
|
110644
|
+
break;
|
|
110645
|
+
}
|
|
110646
|
+
continue;
|
|
110647
|
+
}
|
|
110648
|
+
let j5 = i5;
|
|
110649
|
+
while (j5 < query2.length && !/\s/.test(query2[j5]) && query2[j5] !== '"') {
|
|
110650
|
+
j5++;
|
|
110651
|
+
}
|
|
110652
|
+
tokens.push(query2.substring(i5, j5));
|
|
110653
|
+
i5 = j5;
|
|
110654
|
+
}
|
|
110655
|
+
const operators = /* @__PURE__ */ new Set(["AND", "OR", "NOT"]);
|
|
110656
|
+
const result = tokens.map((token) => {
|
|
110657
|
+
if (token.startsWith('"')) return token;
|
|
110658
|
+
if (operators.has(token)) return token;
|
|
110659
|
+
const hasUpper = /[A-Z]/.test(token);
|
|
110660
|
+
const hasLower = /[a-z]/.test(token);
|
|
110661
|
+
const hasUnderscore = token.includes("_");
|
|
110662
|
+
const hasMixedCase = hasUpper && hasLower;
|
|
110663
|
+
if (hasMixedCase || hasUnderscore) {
|
|
110664
|
+
return `"${token}"`;
|
|
110665
|
+
}
|
|
110666
|
+
return token;
|
|
110667
|
+
});
|
|
110668
|
+
return result.join(" ");
|
|
110669
|
+
}
|
|
110628
110670
|
function normalizeTargets(targets) {
|
|
110629
110671
|
if (!Array.isArray(targets)) return [];
|
|
110630
110672
|
const seen = /* @__PURE__ */ new Set();
|
|
@@ -110725,40 +110767,75 @@ function buildSearchDelegateTask({ searchQuery, searchPath, exact, language, all
|
|
|
110725
110767
|
"- listFiles: Understand directory structure to find where relevant code might live.",
|
|
110726
110768
|
"",
|
|
110727
110769
|
"CRITICAL - How probe search works (do NOT ignore):",
|
|
110728
|
-
"- By default (exact=false), probe ALREADY handles stemming, case-insensitive matching, and camelCase/snake_case splitting.",
|
|
110770
|
+
"- By default (exact=false), probe ALREADY handles stemming, case-insensitive matching, and camelCase/snake_case splitting automatically.",
|
|
110729
110771
|
'- Searching "allowed_ips" ALREADY matches "AllowedIPs", "allowedIps", "allowed_ips", etc. Do NOT manually try case/style variations.',
|
|
110730
110772
|
'- Searching "getUserData" ALREADY matches "get", "user", "data" and their variations.',
|
|
110731
|
-
"- NEVER repeat the same search query \u2014 you will get the same results.",
|
|
110773
|
+
"- NEVER repeat the same search query \u2014 you will get the same results. Changing the path does NOT change this.",
|
|
110732
110774
|
"- NEVER search trivial variations of the same keyword (e.g., AllowedIPs then allowedIps then allowed_ips). This is wasteful \u2014 probe handles it.",
|
|
110733
|
-
"- If a search returns no results, the term likely does not exist
|
|
110734
|
-
"- If 2-3
|
|
110775
|
+
"- If a search returns no results, the term likely does not exist. Try a genuinely DIFFERENT keyword or concept, not a variation.",
|
|
110776
|
+
"- If 2-3 searches return no results for a concept, STOP searching for it and move on. Do NOT keep retrying.",
|
|
110777
|
+
"",
|
|
110778
|
+
"When to use exact=true:",
|
|
110779
|
+
"- Use exact=true when searching for a KNOWN symbol name (function, type, variable, struct).",
|
|
110780
|
+
"- exact=true matches the literal string only \u2014 no stemming, no splitting.",
|
|
110781
|
+
'- This is ideal for precise lookups: exact=true "ForwardMessage", exact=true "SessionLimiter", exact=true "ThrottleRetryLimit".',
|
|
110782
|
+
"- Do NOT use exact=true for exploratory/conceptual queries \u2014 use the default for those.",
|
|
110783
|
+
"",
|
|
110784
|
+
"Combining searches with OR:",
|
|
110785
|
+
'- Multiple unquoted words use OR logic: rate limit matches files containing EITHER "rate" OR "limit".',
|
|
110786
|
+
`- For known symbol names, quote each term to prevent splitting: '"limitDRL" "limitRedis"' matches either exact symbol.`,
|
|
110787
|
+
'- Without quotes, camelCase like limitDRL gets split into "limit" + "DRL" \u2014 not what you want for symbol lookup.',
|
|
110788
|
+
"- Use OR to search for multiple related symbols in ONE search instead of separate searches.",
|
|
110789
|
+
"- This is much faster than running separate searches sequentially.",
|
|
110790
|
+
`- Example: search '"ForwardMessage" "SessionLimiter"' finds files with either exact symbol in one call.`,
|
|
110791
|
+
`- Example: search '"limitDRL" "doRollingWindowWrite"' finds both rate limiting functions at once.`,
|
|
110792
|
+
'- Use AND only when you need both terms to appear in the same file: "rate AND limit".',
|
|
110793
|
+
"",
|
|
110794
|
+
"Parallel tool calls:",
|
|
110795
|
+
"- When you need to search for INDEPENDENT concepts, call multiple search tools IN PARALLEL (same response).",
|
|
110796
|
+
"- Do NOT wait for one search to finish before starting the next if they are independent.",
|
|
110797
|
+
'- Example: for "rate limiting and session management", call search "rate limiting" AND search "session management" in parallel.',
|
|
110798
|
+
"- Similarly, call multiple extract tools in parallel when verifying different files.",
|
|
110735
110799
|
"",
|
|
110736
110800
|
"GOOD search strategy (do this):",
|
|
110737
110801
|
' Query: "How does authentication work and how are sessions managed?"',
|
|
110738
|
-
' \u2192 search "authentication"
|
|
110802
|
+
' \u2192 search "authentication" + search "session management" IN PARALLEL (two independent concepts)',
|
|
110739
110803
|
' Query: "Find the IP allowlist middleware"',
|
|
110740
110804
|
' \u2192 search "allowlist middleware" (one search, probe handles IP/ip/Ip variations)',
|
|
110805
|
+
' Query: "Find ForwardMessage and SessionLimiter"',
|
|
110806
|
+
` \u2192 search '"ForwardMessage" "SessionLimiter"' (one OR search finds both exact symbols)`,
|
|
110807
|
+
' OR: search exact=true "ForwardMessage" + search exact=true "SessionLimiter" IN PARALLEL',
|
|
110808
|
+
' Query: "Find limitDRL and limitRedis functions"',
|
|
110809
|
+
` \u2192 search '"limitDRL" "limitRedis"' (one OR search, quoted to prevent camelCase splitting)`,
|
|
110810
|
+
' Query: "Find ThrottleRetryLimit usage"',
|
|
110811
|
+
' \u2192 search exact=true "ThrottleRetryLimit" (one search, if no results the symbol does not exist \u2014 stop)',
|
|
110741
110812
|
' Query: "How does BM25 scoring work with SIMD optimization?"',
|
|
110742
|
-
' \u2192 search "BM25 scoring"
|
|
110813
|
+
' \u2192 search "BM25 scoring" + search "SIMD optimization" IN PARALLEL (two different concepts)',
|
|
110743
110814
|
"",
|
|
110744
110815
|
"BAD search strategy (never do this):",
|
|
110745
|
-
' \u2192 search "AllowedIPs" \u2192 search "allowedIps" \u2192 search "allowed_ips" (WRONG:
|
|
110746
|
-
|
|
110747
|
-
' \u2192 search "
|
|
110816
|
+
' \u2192 search "AllowedIPs" \u2192 search "allowedIps" \u2192 search "allowed_ips" (WRONG: case/style variations, probe handles them)',
|
|
110817
|
+
` \u2192 search "limitDRL" \u2192 search "LimitDRL" (WRONG: case variation \u2014 combine with OR: '"limitDRL" "limitRedis"')`,
|
|
110818
|
+
' \u2192 search "throttle_retry_limit" after searching "ThrottleRetryLimit" (WRONG: snake_case variation, probe handles it)',
|
|
110819
|
+
' \u2192 search "ThrottleRetryLimit" path=tyk \u2192 search "ThrottleRetryLimit" path=gateway \u2192 search "ThrottleRetryLimit" path=apidef (WRONG: same query on different paths \u2014 probe searches recursively)',
|
|
110820
|
+
' \u2192 search "func (k *RateLimitAndQuotaCheck) handleRateLimitFailure" (WRONG: do not search full function signatures, just use exact=true "handleRateLimitFailure")',
|
|
110821
|
+
' \u2192 search "ForwardMessage" \u2192 search "ForwardMessage" \u2192 search "ForwardMessage" (WRONG: repeating the exact same query)',
|
|
110822
|
+
' \u2192 search "authentication" \u2192 wait \u2192 search "session management" \u2192 wait (WRONG: these are independent, run them in parallel)',
|
|
110748
110823
|
"",
|
|
110749
110824
|
"Keyword tips:",
|
|
110750
110825
|
"- Common programming keywords are filtered as stopwords when unquoted: function, class, return, new, struct, impl, var, let, const, etc.",
|
|
110751
110826
|
'- Avoid searching for these alone \u2014 combine with a specific term (e.g., "middleware function" is fine, "function" alone is too generic).',
|
|
110752
110827
|
'- To bypass stopword filtering: wrap terms in quotes ("return", "struct") or set exact=true. Both disable stemming and splitting too.',
|
|
110753
|
-
"- Multiple words without operators use OR logic: foo bar = foo OR bar. Use AND explicitly if you need both: foo AND bar.",
|
|
110754
110828
|
'- camelCase terms are split: getUserData becomes "get", "user", "data" \u2014 so one search covers all naming styles.',
|
|
110829
|
+
'- Do NOT search for full function signatures like "func (r *Type) Method(args)". Just search for the method name with exact=true.',
|
|
110755
110830
|
"",
|
|
110756
110831
|
"Strategy:",
|
|
110757
|
-
"1. Analyze the query - identify key concepts
|
|
110758
|
-
|
|
110759
|
-
"3.
|
|
110760
|
-
"4.
|
|
110761
|
-
"5.
|
|
110832
|
+
"1. Analyze the query - identify key concepts and group related symbols",
|
|
110833
|
+
`2. Combine related symbols into OR searches: '"symbolA" "symbolB"' finds files with either (quote to prevent splitting)`,
|
|
110834
|
+
"3. Run INDEPENDENT searches in PARALLEL \u2014 do not wait for one to finish before starting another",
|
|
110835
|
+
"4. For known symbol names use exact=true. For concepts use default (exact=false).",
|
|
110836
|
+
"5. If a search returns results, use extract to verify relevance. Run multiple extracts in parallel too.",
|
|
110837
|
+
"6. If a search returns NO results, the term does not exist. Do NOT retry with variations, different paths, or longer strings. Move on.",
|
|
110838
|
+
"7. Combine all relevant targets in your final response",
|
|
110762
110839
|
"",
|
|
110763
110840
|
`Query: ${searchQuery}`,
|
|
110764
110841
|
`Search path(s): ${searchPath}`,
|
|
@@ -110819,6 +110896,13 @@ var init_vercel = __esm({
|
|
|
110819
110896
|
description: searchDelegate ? searchDelegateDescription : searchDescription,
|
|
110820
110897
|
inputSchema: searchSchema,
|
|
110821
110898
|
execute: async ({ query: searchQuery, path: path9, allow_tests, exact, maxTokens: paramMaxTokens, language, session, nextPage }) => {
|
|
110899
|
+
if (!exact && searchQuery) {
|
|
110900
|
+
const originalQuery = searchQuery;
|
|
110901
|
+
searchQuery = autoQuoteSearchTerms(searchQuery);
|
|
110902
|
+
if (debug && searchQuery !== originalQuery) {
|
|
110903
|
+
console.error(`[search] Auto-quoted query: "${originalQuery}" \u2192 "${searchQuery}"`);
|
|
110904
|
+
}
|
|
110905
|
+
}
|
|
110822
110906
|
const effectiveMaxTokens = paramMaxTokens || maxTokens;
|
|
110823
110907
|
let searchPaths;
|
|
110824
110908
|
if (path9) {
|
|
@@ -110854,13 +110938,13 @@ var init_vercel = __esm({
|
|
|
110854
110938
|
return await search(searchOptions);
|
|
110855
110939
|
};
|
|
110856
110940
|
if (!searchDelegate) {
|
|
110857
|
-
const searchKey = `${searchQuery}::${
|
|
110941
|
+
const searchKey = `${searchQuery}::${exact || false}`;
|
|
110858
110942
|
if (!nextPage) {
|
|
110859
110943
|
if (previousSearches.has(searchKey)) {
|
|
110860
110944
|
if (debug) {
|
|
110861
|
-
console.error(`[DEDUP] Blocked duplicate search: "${searchQuery}"
|
|
110945
|
+
console.error(`[DEDUP] Blocked duplicate search: "${searchQuery}" (path: "${searchPath}")`);
|
|
110862
110946
|
}
|
|
110863
|
-
return "DUPLICATE SEARCH BLOCKED: You already searched for this exact query
|
|
110947
|
+
return "DUPLICATE SEARCH BLOCKED: You already searched for this exact query. Changing the path does NOT give different results \u2014 probe searches recursively. Do NOT repeat the same search. Try a genuinely different keyword, use extract to examine results you already found, or use attempt_completion if you have enough information.";
|
|
110864
110948
|
}
|
|
110865
110949
|
previousSearches.add(searchKey);
|
|
110866
110950
|
paginationCounts.set(searchKey, 0);
|
|
@@ -111080,6 +111164,36 @@ var init_vercel = __esm({
|
|
|
111080
111164
|
} else if (targets) {
|
|
111081
111165
|
const parsedTargets = parseTargets(targets);
|
|
111082
111166
|
extractFiles = parsedTargets.map((target) => resolveTargetPath(target, effectiveCwd));
|
|
111167
|
+
if (options.allowedFolders && options.allowedFolders.length > 0) {
|
|
111168
|
+
extractFiles = extractFiles.map((target) => {
|
|
111169
|
+
const { filePart, suffix } = splitTargetSuffix(target);
|
|
111170
|
+
if ((0, import_fs11.existsSync)(filePart)) return target;
|
|
111171
|
+
const cwdPrefix = effectiveCwd.endsWith("/") ? effectiveCwd : effectiveCwd + "/";
|
|
111172
|
+
const relativePart = filePart.startsWith(cwdPrefix) ? filePart.slice(cwdPrefix.length) : null;
|
|
111173
|
+
if (relativePart) {
|
|
111174
|
+
for (const folder of options.allowedFolders) {
|
|
111175
|
+
const candidate = folder + "/" + relativePart;
|
|
111176
|
+
if ((0, import_fs11.existsSync)(candidate)) {
|
|
111177
|
+
if (debug) console.error(`[extract] Auto-fixed path: ${filePart} \u2192 ${candidate}`);
|
|
111178
|
+
return candidate + suffix;
|
|
111179
|
+
}
|
|
111180
|
+
}
|
|
111181
|
+
}
|
|
111182
|
+
for (const folder of options.allowedFolders) {
|
|
111183
|
+
const folderPrefix = folder.endsWith("/") ? folder : folder + "/";
|
|
111184
|
+
const wsParent = folderPrefix.replace(/[^/]+\/$/, "");
|
|
111185
|
+
if (filePart.startsWith(wsParent)) {
|
|
111186
|
+
const tail = filePart.slice(wsParent.length);
|
|
111187
|
+
const candidate = folderPrefix + tail;
|
|
111188
|
+
if (candidate !== filePart && (0, import_fs11.existsSync)(candidate)) {
|
|
111189
|
+
if (debug) console.error(`[extract] Auto-fixed path via workspace: ${filePart} \u2192 ${candidate}`);
|
|
111190
|
+
return candidate + suffix;
|
|
111191
|
+
}
|
|
111192
|
+
}
|
|
111193
|
+
}
|
|
111194
|
+
return target;
|
|
111195
|
+
});
|
|
111196
|
+
}
|
|
111083
111197
|
let effectiveFormat = format2;
|
|
111084
111198
|
if (outline && format2 === "outline-xml") {
|
|
111085
111199
|
effectiveFormat = "xml";
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@probelabs/probe",
|
|
3
|
-
"version": "0.6.0-
|
|
3
|
+
"version": "0.6.0-rc283",
|
|
4
4
|
"description": "Node.js wrapper for the probe code search tool",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"module": "src/index.js",
|
|
@@ -75,7 +75,7 @@
|
|
|
75
75
|
"dependencies": {
|
|
76
76
|
"@ai-sdk/amazon-bedrock": "^1.0.8",
|
|
77
77
|
"@ai-sdk/anthropic": "^2.0.8",
|
|
78
|
-
"@ai-sdk/google": "^
|
|
78
|
+
"@ai-sdk/google": "^3.0.37",
|
|
79
79
|
"@ai-sdk/openai": "^2.0.10",
|
|
80
80
|
"@anthropic-ai/claude-agent-sdk": "^0.1.46",
|
|
81
81
|
"@modelcontextprotocol/sdk": "^1.0.0",
|
package/src/tools/vercel.js
CHANGED
|
@@ -14,6 +14,75 @@ import { existsSync } from 'fs';
|
|
|
14
14
|
import { formatErrorForAI } from '../utils/error-types.js';
|
|
15
15
|
import { annotateOutputWithHashes } from './hashline.js';
|
|
16
16
|
|
|
17
|
+
/**
|
|
18
|
+
* Auto-quote search query terms that contain mixed case or underscores.
|
|
19
|
+
* Unquoted camelCase like "limitDRL" gets split by stemming into "limit" + "DRL".
|
|
20
|
+
* This wraps such terms in quotes so they match as literal strings.
|
|
21
|
+
*
|
|
22
|
+
* Examples:
|
|
23
|
+
* "limitDRL limitRedis" → '"limitDRL" "limitRedis"'
|
|
24
|
+
* "ThrottleRetryLimit" → '"ThrottleRetryLimit"'
|
|
25
|
+
* "allowed_ips" → '"allowed_ips"'
|
|
26
|
+
* "rate limit" → 'rate limit' (no change, all lowercase)
|
|
27
|
+
* '"already quoted"' → '"already quoted"' (no change)
|
|
28
|
+
* 'foo AND bar' → 'foo AND bar' (operators preserved)
|
|
29
|
+
*/
|
|
30
|
+
function autoQuoteSearchTerms(query) {
|
|
31
|
+
if (!query || typeof query !== 'string') return query;
|
|
32
|
+
|
|
33
|
+
// Split on whitespace, preserving quoted strings and operators
|
|
34
|
+
const tokens = [];
|
|
35
|
+
let i = 0;
|
|
36
|
+
while (i < query.length) {
|
|
37
|
+
// Skip whitespace
|
|
38
|
+
if (/\s/.test(query[i])) {
|
|
39
|
+
i++;
|
|
40
|
+
continue;
|
|
41
|
+
}
|
|
42
|
+
// Quoted string — keep as-is
|
|
43
|
+
if (query[i] === '"') {
|
|
44
|
+
const end = query.indexOf('"', i + 1);
|
|
45
|
+
if (end !== -1) {
|
|
46
|
+
tokens.push(query.substring(i, end + 1));
|
|
47
|
+
i = end + 1;
|
|
48
|
+
} else {
|
|
49
|
+
// Unclosed quote — take rest
|
|
50
|
+
tokens.push(query.substring(i));
|
|
51
|
+
break;
|
|
52
|
+
}
|
|
53
|
+
continue;
|
|
54
|
+
}
|
|
55
|
+
// Unquoted token
|
|
56
|
+
let j = i;
|
|
57
|
+
while (j < query.length && !/\s/.test(query[j]) && query[j] !== '"') {
|
|
58
|
+
j++;
|
|
59
|
+
}
|
|
60
|
+
tokens.push(query.substring(i, j));
|
|
61
|
+
i = j;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// Boolean operators that should not be quoted
|
|
65
|
+
const operators = new Set(['AND', 'OR', 'NOT']);
|
|
66
|
+
|
|
67
|
+
const result = tokens.map(token => {
|
|
68
|
+
// Already quoted
|
|
69
|
+
if (token.startsWith('"')) return token;
|
|
70
|
+
// Boolean operator
|
|
71
|
+
if (operators.has(token)) return token;
|
|
72
|
+
// Check if token needs quoting: has mixed case (upper+lower) or underscores
|
|
73
|
+
const hasUpper = /[A-Z]/.test(token);
|
|
74
|
+
const hasLower = /[a-z]/.test(token);
|
|
75
|
+
const hasUnderscore = token.includes('_');
|
|
76
|
+
const hasMixedCase = hasUpper && hasLower;
|
|
77
|
+
if (hasMixedCase || hasUnderscore) {
|
|
78
|
+
return `"${token}"`;
|
|
79
|
+
}
|
|
80
|
+
return token;
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
return result.join(' ');
|
|
84
|
+
}
|
|
85
|
+
|
|
17
86
|
const CODE_SEARCH_SCHEMA = {
|
|
18
87
|
type: 'object',
|
|
19
88
|
properties: {
|
|
@@ -144,40 +213,75 @@ function buildSearchDelegateTask({ searchQuery, searchPath, exact, language, all
|
|
|
144
213
|
'- listFiles: Understand directory structure to find where relevant code might live.',
|
|
145
214
|
'',
|
|
146
215
|
'CRITICAL - How probe search works (do NOT ignore):',
|
|
147
|
-
'- By default (exact=false), probe ALREADY handles stemming, case-insensitive matching, and camelCase/snake_case splitting.',
|
|
216
|
+
'- By default (exact=false), probe ALREADY handles stemming, case-insensitive matching, and camelCase/snake_case splitting automatically.',
|
|
148
217
|
'- Searching "allowed_ips" ALREADY matches "AllowedIPs", "allowedIps", "allowed_ips", etc. Do NOT manually try case/style variations.',
|
|
149
218
|
'- Searching "getUserData" ALREADY matches "get", "user", "data" and their variations.',
|
|
150
|
-
'- NEVER repeat the same search query — you will get the same results.',
|
|
219
|
+
'- NEVER repeat the same search query — you will get the same results. Changing the path does NOT change this.',
|
|
151
220
|
'- NEVER search trivial variations of the same keyword (e.g., AllowedIPs then allowedIps then allowed_ips). This is wasteful — probe handles it.',
|
|
152
|
-
'- If a search returns no results, the term likely does not exist
|
|
153
|
-
'- If 2-3
|
|
221
|
+
'- If a search returns no results, the term likely does not exist. Try a genuinely DIFFERENT keyword or concept, not a variation.',
|
|
222
|
+
'- If 2-3 searches return no results for a concept, STOP searching for it and move on. Do NOT keep retrying.',
|
|
223
|
+
'',
|
|
224
|
+
'When to use exact=true:',
|
|
225
|
+
'- Use exact=true when searching for a KNOWN symbol name (function, type, variable, struct).',
|
|
226
|
+
'- exact=true matches the literal string only — no stemming, no splitting.',
|
|
227
|
+
'- This is ideal for precise lookups: exact=true "ForwardMessage", exact=true "SessionLimiter", exact=true "ThrottleRetryLimit".',
|
|
228
|
+
'- Do NOT use exact=true for exploratory/conceptual queries — use the default for those.',
|
|
229
|
+
'',
|
|
230
|
+
'Combining searches with OR:',
|
|
231
|
+
'- Multiple unquoted words use OR logic: rate limit matches files containing EITHER "rate" OR "limit".',
|
|
232
|
+
'- For known symbol names, quote each term to prevent splitting: \'"limitDRL" "limitRedis"\' matches either exact symbol.',
|
|
233
|
+
'- Without quotes, camelCase like limitDRL gets split into "limit" + "DRL" — not what you want for symbol lookup.',
|
|
234
|
+
'- Use OR to search for multiple related symbols in ONE search instead of separate searches.',
|
|
235
|
+
'- This is much faster than running separate searches sequentially.',
|
|
236
|
+
'- Example: search \'"ForwardMessage" "SessionLimiter"\' finds files with either exact symbol in one call.',
|
|
237
|
+
'- Example: search \'"limitDRL" "doRollingWindowWrite"\' finds both rate limiting functions at once.',
|
|
238
|
+
'- Use AND only when you need both terms to appear in the same file: "rate AND limit".',
|
|
239
|
+
'',
|
|
240
|
+
'Parallel tool calls:',
|
|
241
|
+
'- When you need to search for INDEPENDENT concepts, call multiple search tools IN PARALLEL (same response).',
|
|
242
|
+
'- Do NOT wait for one search to finish before starting the next if they are independent.',
|
|
243
|
+
'- Example: for "rate limiting and session management", call search "rate limiting" AND search "session management" in parallel.',
|
|
244
|
+
'- Similarly, call multiple extract tools in parallel when verifying different files.',
|
|
154
245
|
'',
|
|
155
246
|
'GOOD search strategy (do this):',
|
|
156
247
|
' Query: "How does authentication work and how are sessions managed?"',
|
|
157
|
-
' → search "authentication"
|
|
248
|
+
' → search "authentication" + search "session management" IN PARALLEL (two independent concepts)',
|
|
158
249
|
' Query: "Find the IP allowlist middleware"',
|
|
159
250
|
' → search "allowlist middleware" (one search, probe handles IP/ip/Ip variations)',
|
|
251
|
+
' Query: "Find ForwardMessage and SessionLimiter"',
|
|
252
|
+
' → search \'"ForwardMessage" "SessionLimiter"\' (one OR search finds both exact symbols)',
|
|
253
|
+
' OR: search exact=true "ForwardMessage" + search exact=true "SessionLimiter" IN PARALLEL',
|
|
254
|
+
' Query: "Find limitDRL and limitRedis functions"',
|
|
255
|
+
' → search \'"limitDRL" "limitRedis"\' (one OR search, quoted to prevent camelCase splitting)',
|
|
256
|
+
' Query: "Find ThrottleRetryLimit usage"',
|
|
257
|
+
' → search exact=true "ThrottleRetryLimit" (one search, if no results the symbol does not exist — stop)',
|
|
160
258
|
' Query: "How does BM25 scoring work with SIMD optimization?"',
|
|
161
|
-
' → search "BM25 scoring"
|
|
259
|
+
' → search "BM25 scoring" + search "SIMD optimization" IN PARALLEL (two different concepts)',
|
|
162
260
|
'',
|
|
163
261
|
'BAD search strategy (never do this):',
|
|
164
|
-
' → search "AllowedIPs" → search "allowedIps" → search "allowed_ips" (WRONG:
|
|
165
|
-
' → search "
|
|
166
|
-
' → search "
|
|
262
|
+
' → search "AllowedIPs" → search "allowedIps" → search "allowed_ips" (WRONG: case/style variations, probe handles them)',
|
|
263
|
+
' → search "limitDRL" → search "LimitDRL" (WRONG: case variation — combine with OR: \'"limitDRL" "limitRedis"\')',
|
|
264
|
+
' → search "throttle_retry_limit" after searching "ThrottleRetryLimit" (WRONG: snake_case variation, probe handles it)',
|
|
265
|
+
' → search "ThrottleRetryLimit" path=tyk → search "ThrottleRetryLimit" path=gateway → search "ThrottleRetryLimit" path=apidef (WRONG: same query on different paths — probe searches recursively)',
|
|
266
|
+
' → search "func (k *RateLimitAndQuotaCheck) handleRateLimitFailure" (WRONG: do not search full function signatures, just use exact=true "handleRateLimitFailure")',
|
|
267
|
+
' → search "ForwardMessage" → search "ForwardMessage" → search "ForwardMessage" (WRONG: repeating the exact same query)',
|
|
268
|
+
' → search "authentication" → wait → search "session management" → wait (WRONG: these are independent, run them in parallel)',
|
|
167
269
|
'',
|
|
168
270
|
'Keyword tips:',
|
|
169
271
|
'- Common programming keywords are filtered as stopwords when unquoted: function, class, return, new, struct, impl, var, let, const, etc.',
|
|
170
272
|
'- Avoid searching for these alone — combine with a specific term (e.g., "middleware function" is fine, "function" alone is too generic).',
|
|
171
273
|
'- To bypass stopword filtering: wrap terms in quotes ("return", "struct") or set exact=true. Both disable stemming and splitting too.',
|
|
172
|
-
'- Multiple words without operators use OR logic: foo bar = foo OR bar. Use AND explicitly if you need both: foo AND bar.',
|
|
173
274
|
'- camelCase terms are split: getUserData becomes "get", "user", "data" — so one search covers all naming styles.',
|
|
275
|
+
'- Do NOT search for full function signatures like "func (r *Type) Method(args)". Just search for the method name with exact=true.',
|
|
174
276
|
'',
|
|
175
277
|
'Strategy:',
|
|
176
|
-
'1. Analyze the query - identify key concepts
|
|
177
|
-
'2.
|
|
178
|
-
'3.
|
|
179
|
-
'4.
|
|
180
|
-
'5.
|
|
278
|
+
'1. Analyze the query - identify key concepts and group related symbols',
|
|
279
|
+
'2. Combine related symbols into OR searches: \'"symbolA" "symbolB"\' finds files with either (quote to prevent splitting)',
|
|
280
|
+
'3. Run INDEPENDENT searches in PARALLEL — do not wait for one to finish before starting another',
|
|
281
|
+
'4. For known symbol names use exact=true. For concepts use default (exact=false).',
|
|
282
|
+
'5. If a search returns results, use extract to verify relevance. Run multiple extracts in parallel too.',
|
|
283
|
+
'6. If a search returns NO results, the term does not exist. Do NOT retry with variations, different paths, or longer strings. Move on.',
|
|
284
|
+
'7. Combine all relevant targets in your final response',
|
|
181
285
|
'',
|
|
182
286
|
`Query: ${searchQuery}`,
|
|
183
287
|
`Search path(s): ${searchPath}`,
|
|
@@ -229,6 +333,16 @@ export const searchTool = (options = {}) => {
|
|
|
229
333
|
: searchDescription,
|
|
230
334
|
inputSchema: searchSchema,
|
|
231
335
|
execute: async ({ query: searchQuery, path, allow_tests, exact, maxTokens: paramMaxTokens, language, session, nextPage }) => {
|
|
336
|
+
// Auto-quote mixed-case and underscore terms to prevent unwanted stemming/splitting
|
|
337
|
+
// Skip when exact=true since that already preserves the literal string
|
|
338
|
+
if (!exact && searchQuery) {
|
|
339
|
+
const originalQuery = searchQuery;
|
|
340
|
+
searchQuery = autoQuoteSearchTerms(searchQuery);
|
|
341
|
+
if (debug && searchQuery !== originalQuery) {
|
|
342
|
+
console.error(`[search] Auto-quoted query: "${originalQuery}" → "${searchQuery}"`);
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
|
|
232
346
|
// Use parameter maxTokens if provided, otherwise use the default
|
|
233
347
|
const effectiveMaxTokens = paramMaxTokens || maxTokens;
|
|
234
348
|
|
|
@@ -274,13 +388,15 @@ export const searchTool = (options = {}) => {
|
|
|
274
388
|
if (!searchDelegate) {
|
|
275
389
|
// Block duplicate non-paginated searches (models sometimes repeat the exact same call)
|
|
276
390
|
// Allow pagination: only nextPage=true is a legitimate repeat of the same query
|
|
277
|
-
|
|
391
|
+
// Use query+exact as the key (ignore path) to prevent path-hopping evasion
|
|
392
|
+
// where model searches same term on different subpaths hoping for different results
|
|
393
|
+
const searchKey = `${searchQuery}::${exact || false}`;
|
|
278
394
|
if (!nextPage) {
|
|
279
395
|
if (previousSearches.has(searchKey)) {
|
|
280
396
|
if (debug) {
|
|
281
|
-
console.error(`[DEDUP] Blocked duplicate search: "${searchQuery}"
|
|
397
|
+
console.error(`[DEDUP] Blocked duplicate search: "${searchQuery}" (path: "${searchPath}")`);
|
|
282
398
|
}
|
|
283
|
-
return 'DUPLICATE SEARCH BLOCKED: You already searched for this exact query
|
|
399
|
+
return 'DUPLICATE SEARCH BLOCKED: You already searched for this exact query. Changing the path does NOT give different results — probe searches recursively. Do NOT repeat the same search. Try a genuinely different keyword, use extract to examine results you already found, or use attempt_completion if you have enough information.';
|
|
284
400
|
}
|
|
285
401
|
previousSearches.add(searchKey);
|
|
286
402
|
paginationCounts.set(searchKey, 0);
|
|
@@ -570,6 +686,50 @@ export const extractTool = (options = {}) => {
|
|
|
570
686
|
// Resolve relative paths in targets against cwd
|
|
571
687
|
extractFiles = parsedTargets.map(target => resolveTargetPath(target, effectiveCwd));
|
|
572
688
|
|
|
689
|
+
// Auto-fix: if resolved paths don't exist, try allowedFolders subdirs
|
|
690
|
+
// Handles when search returns relative paths (e.g., "gateway/file.go") and
|
|
691
|
+
// model constructs wrong absolute paths (e.g., /workspace/gateway/file.go
|
|
692
|
+
// instead of /workspace/tyk/gateway/file.go)
|
|
693
|
+
if (options.allowedFolders && options.allowedFolders.length > 0) {
|
|
694
|
+
extractFiles = extractFiles.map(target => {
|
|
695
|
+
const { filePart, suffix } = splitTargetSuffix(target);
|
|
696
|
+
if (existsSync(filePart)) return target;
|
|
697
|
+
|
|
698
|
+
// Try resolving the relative tail against each allowedFolder
|
|
699
|
+
const cwdPrefix = (effectiveCwd.endsWith('/') ? effectiveCwd : effectiveCwd + '/');
|
|
700
|
+
const relativePart = filePart.startsWith(cwdPrefix)
|
|
701
|
+
? filePart.slice(cwdPrefix.length)
|
|
702
|
+
: null;
|
|
703
|
+
|
|
704
|
+
if (relativePart) {
|
|
705
|
+
for (const folder of options.allowedFolders) {
|
|
706
|
+
const candidate = folder + '/' + relativePart;
|
|
707
|
+
if (existsSync(candidate)) {
|
|
708
|
+
if (debug) console.error(`[extract] Auto-fixed path: ${filePart} → ${candidate}`);
|
|
709
|
+
return candidate + suffix;
|
|
710
|
+
}
|
|
711
|
+
}
|
|
712
|
+
}
|
|
713
|
+
|
|
714
|
+
// Try stripping workspace prefix and resolving against allowedFolders
|
|
715
|
+
// e.g., /tmp/visor-workspaces/abc/gateway/file.go → try each folder + gateway/file.go
|
|
716
|
+
for (const folder of options.allowedFolders) {
|
|
717
|
+
const folderPrefix = folder.endsWith('/') ? folder : folder + '/';
|
|
718
|
+
const wsParent = folderPrefix.replace(/[^/]+\/$/, '');
|
|
719
|
+
if (filePart.startsWith(wsParent)) {
|
|
720
|
+
const tail = filePart.slice(wsParent.length);
|
|
721
|
+
const candidate = folderPrefix + tail;
|
|
722
|
+
if (candidate !== filePart && existsSync(candidate)) {
|
|
723
|
+
if (debug) console.error(`[extract] Auto-fixed path via workspace: ${filePart} → ${candidate}`);
|
|
724
|
+
return candidate + suffix;
|
|
725
|
+
}
|
|
726
|
+
}
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
return target;
|
|
730
|
+
});
|
|
731
|
+
}
|
|
732
|
+
|
|
573
733
|
// Apply format mapping for outline-xml to xml
|
|
574
734
|
let effectiveFormat = format;
|
|
575
735
|
if (outline && format === 'outline-xml') {
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|