@probelabs/probe 0.6.0-rc282 → 0.6.0-rc283

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9085,6 +9085,48 @@ var init_hashline = __esm({
9085
9085
  // src/tools/vercel.js
9086
9086
  import { tool } from "ai";
9087
9087
  import { existsSync } from "fs";
9088
+ function autoQuoteSearchTerms(query2) {
9089
+ if (!query2 || typeof query2 !== "string") return query2;
9090
+ const tokens = [];
9091
+ let i = 0;
9092
+ while (i < query2.length) {
9093
+ if (/\s/.test(query2[i])) {
9094
+ i++;
9095
+ continue;
9096
+ }
9097
+ if (query2[i] === '"') {
9098
+ const end = query2.indexOf('"', i + 1);
9099
+ if (end !== -1) {
9100
+ tokens.push(query2.substring(i, end + 1));
9101
+ i = end + 1;
9102
+ } else {
9103
+ tokens.push(query2.substring(i));
9104
+ break;
9105
+ }
9106
+ continue;
9107
+ }
9108
+ let j = i;
9109
+ while (j < query2.length && !/\s/.test(query2[j]) && query2[j] !== '"') {
9110
+ j++;
9111
+ }
9112
+ tokens.push(query2.substring(i, j));
9113
+ i = j;
9114
+ }
9115
+ const operators = /* @__PURE__ */ new Set(["AND", "OR", "NOT"]);
9116
+ const result = tokens.map((token) => {
9117
+ if (token.startsWith('"')) return token;
9118
+ if (operators.has(token)) return token;
9119
+ const hasUpper = /[A-Z]/.test(token);
9120
+ const hasLower = /[a-z]/.test(token);
9121
+ const hasUnderscore = token.includes("_");
9122
+ const hasMixedCase = hasUpper && hasLower;
9123
+ if (hasMixedCase || hasUnderscore) {
9124
+ return `"${token}"`;
9125
+ }
9126
+ return token;
9127
+ });
9128
+ return result.join(" ");
9129
+ }
9088
9130
  function normalizeTargets(targets) {
9089
9131
  if (!Array.isArray(targets)) return [];
9090
9132
  const seen = /* @__PURE__ */ new Set();
@@ -9199,41 +9241,61 @@ function buildSearchDelegateTask({ searchQuery, searchPath, exact, language, all
9199
9241
  '- This is ideal for precise lookups: exact=true "ForwardMessage", exact=true "SessionLimiter", exact=true "ThrottleRetryLimit".',
9200
9242
  "- Do NOT use exact=true for exploratory/conceptual queries \u2014 use the default for those.",
9201
9243
  "",
9244
+ "Combining searches with OR:",
9245
+ '- Multiple unquoted words use OR logic: rate limit matches files containing EITHER "rate" OR "limit".',
9246
+ `- For known symbol names, quote each term to prevent splitting: '"limitDRL" "limitRedis"' matches either exact symbol.`,
9247
+ '- Without quotes, camelCase like limitDRL gets split into "limit" + "DRL" \u2014 not what you want for symbol lookup.',
9248
+ "- Use OR to search for multiple related symbols in ONE search instead of separate searches.",
9249
+ "- This is much faster than running separate searches sequentially.",
9250
+ `- Example: search '"ForwardMessage" "SessionLimiter"' finds files with either exact symbol in one call.`,
9251
+ `- Example: search '"limitDRL" "doRollingWindowWrite"' finds both rate limiting functions at once.`,
9252
+ '- Use AND only when you need both terms to appear in the same file: "rate AND limit".',
9253
+ "",
9254
+ "Parallel tool calls:",
9255
+ "- When you need to search for INDEPENDENT concepts, call multiple search tools IN PARALLEL (same response).",
9256
+ "- Do NOT wait for one search to finish before starting the next if they are independent.",
9257
+ '- Example: for "rate limiting and session management", call search "rate limiting" AND search "session management" in parallel.',
9258
+ "- Similarly, call multiple extract tools in parallel when verifying different files.",
9259
+ "",
9202
9260
  "GOOD search strategy (do this):",
9203
9261
  ' Query: "How does authentication work and how are sessions managed?"',
9204
- ' \u2192 search "authentication" \u2192 search "session management" (two different concepts)',
9262
+ ' \u2192 search "authentication" + search "session management" IN PARALLEL (two independent concepts)',
9205
9263
  ' Query: "Find the IP allowlist middleware"',
9206
9264
  ' \u2192 search "allowlist middleware" (one search, probe handles IP/ip/Ip variations)',
9207
- ' Query: "How does BM25 scoring work with SIMD optimization?"',
9208
- ' \u2192 search "BM25 scoring" \u2192 search "SIMD optimization" (two different concepts)',
9209
- ' Query: "Find ForwardMessage and SessionLimiter functions"',
9210
- ' \u2192 search exact=true "ForwardMessage" \u2192 search exact=true "SessionLimiter" (known symbols, use exact)',
9265
+ ' Query: "Find ForwardMessage and SessionLimiter"',
9266
+ ` \u2192 search '"ForwardMessage" "SessionLimiter"' (one OR search finds both exact symbols)`,
9267
+ ' OR: search exact=true "ForwardMessage" + search exact=true "SessionLimiter" IN PARALLEL',
9268
+ ' Query: "Find limitDRL and limitRedis functions"',
9269
+ ` \u2192 search '"limitDRL" "limitRedis"' (one OR search, quoted to prevent camelCase splitting)`,
9211
9270
  ' Query: "Find ThrottleRetryLimit usage"',
9212
9271
  ' \u2192 search exact=true "ThrottleRetryLimit" (one search, if no results the symbol does not exist \u2014 stop)',
9272
+ ' Query: "How does BM25 scoring work with SIMD optimization?"',
9273
+ ' \u2192 search "BM25 scoring" + search "SIMD optimization" IN PARALLEL (two different concepts)',
9213
9274
  "",
9214
9275
  "BAD search strategy (never do this):",
9215
9276
  ' \u2192 search "AllowedIPs" \u2192 search "allowedIps" \u2192 search "allowed_ips" (WRONG: case/style variations, probe handles them)',
9216
- ' \u2192 search "limitDRL" \u2192 search "LimitDRL" (WRONG: case variation of same term)',
9277
+ ` \u2192 search "limitDRL" \u2192 search "LimitDRL" (WRONG: case variation \u2014 combine with OR: '"limitDRL" "limitRedis"')`,
9217
9278
  ' \u2192 search "throttle_retry_limit" after searching "ThrottleRetryLimit" (WRONG: snake_case variation, probe handles it)',
9218
- ' \u2192 search "ThrottleRetryLimit" path=tyk \u2192 search "ThrottleRetryLimit" path=gateway \u2192 search "ThrottleRetryLimit" path=apidef (WRONG: same query on different paths hoping for different results)',
9279
+ ' \u2192 search "ThrottleRetryLimit" path=tyk \u2192 search "ThrottleRetryLimit" path=gateway \u2192 search "ThrottleRetryLimit" path=apidef (WRONG: same query on different paths \u2014 probe searches recursively)',
9219
9280
  ' \u2192 search "func (k *RateLimitAndQuotaCheck) handleRateLimitFailure" (WRONG: do not search full function signatures, just use exact=true "handleRateLimitFailure")',
9220
9281
  ' \u2192 search "ForwardMessage" \u2192 search "ForwardMessage" \u2192 search "ForwardMessage" (WRONG: repeating the exact same query)',
9221
- ' \u2192 search "error handling" \u2192 search "error handling" \u2192 search "error handling" (WRONG: repeating exact same query)',
9282
+ ' \u2192 search "authentication" \u2192 wait \u2192 search "session management" \u2192 wait (WRONG: these are independent, run them in parallel)',
9222
9283
  "",
9223
9284
  "Keyword tips:",
9224
9285
  "- Common programming keywords are filtered as stopwords when unquoted: function, class, return, new, struct, impl, var, let, const, etc.",
9225
9286
  '- Avoid searching for these alone \u2014 combine with a specific term (e.g., "middleware function" is fine, "function" alone is too generic).',
9226
9287
  '- To bypass stopword filtering: wrap terms in quotes ("return", "struct") or set exact=true. Both disable stemming and splitting too.',
9227
- "- Multiple words without operators use OR logic: foo bar = foo OR bar. Use AND explicitly if you need both: foo AND bar.",
9228
9288
  '- camelCase terms are split: getUserData becomes "get", "user", "data" \u2014 so one search covers all naming styles.',
9229
9289
  '- Do NOT search for full function signatures like "func (r *Type) Method(args)". Just search for the method name with exact=true.',
9230
9290
  "",
9231
9291
  "Strategy:",
9232
- "1. Analyze the query - identify key concepts, entities, and relationships",
9233
- "2. Run ONE focused search per concept. For known symbol names use exact=true. For concepts use default (exact=false).",
9234
- "3. If a search returns results, use extract to verify relevance",
9235
- "4. If a search returns NO results, the term does not exist in the codebase. Do NOT retry with variations, different paths, or longer strings. Move on.",
9236
- "5. Combine all relevant targets in your final response",
9292
+ "1. Analyze the query - identify key concepts and group related symbols",
9293
+ `2. Combine related symbols into OR searches: '"symbolA" "symbolB"' finds files with either (quote to prevent splitting)`,
9294
+ "3. Run INDEPENDENT searches in PARALLEL \u2014 do not wait for one to finish before starting another",
9295
+ "4. For known symbol names use exact=true. For concepts use default (exact=false).",
9296
+ "5. If a search returns results, use extract to verify relevance. Run multiple extracts in parallel too.",
9297
+ "6. If a search returns NO results, the term does not exist. Do NOT retry with variations, different paths, or longer strings. Move on.",
9298
+ "7. Combine all relevant targets in your final response",
9237
9299
  "",
9238
9300
  `Query: ${searchQuery}`,
9239
9301
  `Search path(s): ${searchPath}`,
@@ -9292,6 +9354,13 @@ var init_vercel = __esm({
9292
9354
  description: searchDelegate ? searchDelegateDescription : searchDescription,
9293
9355
  inputSchema: searchSchema,
9294
9356
  execute: async ({ query: searchQuery, path: path9, allow_tests, exact, maxTokens: paramMaxTokens, language, session, nextPage }) => {
9357
+ if (!exact && searchQuery) {
9358
+ const originalQuery = searchQuery;
9359
+ searchQuery = autoQuoteSearchTerms(searchQuery);
9360
+ if (debug && searchQuery !== originalQuery) {
9361
+ console.error(`[search] Auto-quoted query: "${originalQuery}" \u2192 "${searchQuery}"`);
9362
+ }
9363
+ }
9295
9364
  const effectiveMaxTokens = paramMaxTokens || maxTokens;
9296
9365
  let searchPaths;
9297
9366
  if (path9) {
@@ -9327,13 +9396,13 @@ var init_vercel = __esm({
9327
9396
  return await search(searchOptions);
9328
9397
  };
9329
9398
  if (!searchDelegate) {
9330
- const searchKey = `${searchQuery}::${searchPath}::${exact || false}`;
9399
+ const searchKey = `${searchQuery}::${exact || false}`;
9331
9400
  if (!nextPage) {
9332
9401
  if (previousSearches.has(searchKey)) {
9333
9402
  if (debug) {
9334
- console.error(`[DEDUP] Blocked duplicate search: "${searchQuery}" in "${searchPath}"`);
9403
+ console.error(`[DEDUP] Blocked duplicate search: "${searchQuery}" (path: "${searchPath}")`);
9335
9404
  }
9336
- return "DUPLICATE SEARCH BLOCKED: You already searched for this exact query in this path. Do NOT repeat the same search. If you need more results, set nextPage=true with the session ID from the previous search. Otherwise, try a genuinely different keyword, use extract to examine results you already found, or use attempt_completion if you have enough information.";
9405
+ return "DUPLICATE SEARCH BLOCKED: You already searched for this exact query. Changing the path does NOT give different results \u2014 probe searches recursively. Do NOT repeat the same search. Try a genuinely different keyword, use extract to examine results you already found, or use attempt_completion if you have enough information.";
9337
9406
  }
9338
9407
  previousSearches.add(searchKey);
9339
9408
  paginationCounts.set(searchKey, 0);
@@ -14,6 +14,75 @@ import { existsSync } from 'fs';
14
14
  import { formatErrorForAI } from '../utils/error-types.js';
15
15
  import { annotateOutputWithHashes } from './hashline.js';
16
16
 
17
+ /**
18
+ * Auto-quote search query terms that contain mixed case or underscores.
19
+ * Unquoted camelCase like "limitDRL" gets split by stemming into "limit" + "DRL".
20
+ * This wraps such terms in quotes so they match as literal strings.
21
+ *
22
+ * Examples:
23
+ * "limitDRL limitRedis" → '"limitDRL" "limitRedis"'
24
+ * "ThrottleRetryLimit" → '"ThrottleRetryLimit"'
25
+ * "allowed_ips" → '"allowed_ips"'
26
+ * "rate limit" → 'rate limit' (no change, all lowercase)
27
+ * '"already quoted"' → '"already quoted"' (no change)
28
+ * 'foo AND bar' → 'foo AND bar' (operators preserved)
29
+ */
30
+ function autoQuoteSearchTerms(query) {
31
+ if (!query || typeof query !== 'string') return query;
32
+
33
+ // Split on whitespace, preserving quoted strings and operators
34
+ const tokens = [];
35
+ let i = 0;
36
+ while (i < query.length) {
37
+ // Skip whitespace
38
+ if (/\s/.test(query[i])) {
39
+ i++;
40
+ continue;
41
+ }
42
+ // Quoted string — keep as-is
43
+ if (query[i] === '"') {
44
+ const end = query.indexOf('"', i + 1);
45
+ if (end !== -1) {
46
+ tokens.push(query.substring(i, end + 1));
47
+ i = end + 1;
48
+ } else {
49
+ // Unclosed quote — take rest
50
+ tokens.push(query.substring(i));
51
+ break;
52
+ }
53
+ continue;
54
+ }
55
+ // Unquoted token
56
+ let j = i;
57
+ while (j < query.length && !/\s/.test(query[j]) && query[j] !== '"') {
58
+ j++;
59
+ }
60
+ tokens.push(query.substring(i, j));
61
+ i = j;
62
+ }
63
+
64
+ // Boolean operators that should not be quoted
65
+ const operators = new Set(['AND', 'OR', 'NOT']);
66
+
67
+ const result = tokens.map(token => {
68
+ // Already quoted
69
+ if (token.startsWith('"')) return token;
70
+ // Boolean operator
71
+ if (operators.has(token)) return token;
72
+ // Check if token needs quoting: has mixed case (upper+lower) or underscores
73
+ const hasUpper = /[A-Z]/.test(token);
74
+ const hasLower = /[a-z]/.test(token);
75
+ const hasUnderscore = token.includes('_');
76
+ const hasMixedCase = hasUpper && hasLower;
77
+ if (hasMixedCase || hasUnderscore) {
78
+ return `"${token}"`;
79
+ }
80
+ return token;
81
+ });
82
+
83
+ return result.join(' ');
84
+ }
85
+
17
86
  const CODE_SEARCH_SCHEMA = {
18
87
  type: 'object',
19
88
  properties: {
@@ -158,41 +227,61 @@ function buildSearchDelegateTask({ searchQuery, searchPath, exact, language, all
158
227
  '- This is ideal for precise lookups: exact=true "ForwardMessage", exact=true "SessionLimiter", exact=true "ThrottleRetryLimit".',
159
228
  '- Do NOT use exact=true for exploratory/conceptual queries — use the default for those.',
160
229
  '',
230
+ 'Combining searches with OR:',
231
+ '- Multiple unquoted words use OR logic: rate limit matches files containing EITHER "rate" OR "limit".',
232
+ '- For known symbol names, quote each term to prevent splitting: \'"limitDRL" "limitRedis"\' matches either exact symbol.',
233
+ '- Without quotes, camelCase like limitDRL gets split into "limit" + "DRL" — not what you want for symbol lookup.',
234
+ '- Use OR to search for multiple related symbols in ONE search instead of separate searches.',
235
+ '- This is much faster than running separate searches sequentially.',
236
+ '- Example: search \'"ForwardMessage" "SessionLimiter"\' finds files with either exact symbol in one call.',
237
+ '- Example: search \'"limitDRL" "doRollingWindowWrite"\' finds both rate limiting functions at once.',
238
+ '- Use AND only when you need both terms to appear in the same file: "rate AND limit".',
239
+ '',
240
+ 'Parallel tool calls:',
241
+ '- When you need to search for INDEPENDENT concepts, call multiple search tools IN PARALLEL (same response).',
242
+ '- Do NOT wait for one search to finish before starting the next if they are independent.',
243
+ '- Example: for "rate limiting and session management", call search "rate limiting" AND search "session management" in parallel.',
244
+ '- Similarly, call multiple extract tools in parallel when verifying different files.',
245
+ '',
161
246
  'GOOD search strategy (do this):',
162
247
  ' Query: "How does authentication work and how are sessions managed?"',
163
- ' → search "authentication" search "session management" (two different concepts)',
248
+ ' → search "authentication" + search "session management" IN PARALLEL (two independent concepts)',
164
249
  ' Query: "Find the IP allowlist middleware"',
165
250
  ' → search "allowlist middleware" (one search, probe handles IP/ip/Ip variations)',
166
- ' Query: "How does BM25 scoring work with SIMD optimization?"',
167
- ' → search "BM25 scoring" search "SIMD optimization" (two different concepts)',
168
- ' Query: "Find ForwardMessage and SessionLimiter functions"',
169
- ' search exact=true "ForwardMessage" search exact=true "SessionLimiter" (known symbols, use exact)',
251
+ ' Query: "Find ForwardMessage and SessionLimiter"',
252
+ ' → search \'"ForwardMessage" "SessionLimiter"\' (one OR search finds both exact symbols)',
253
+ ' OR: search exact=true "ForwardMessage" + search exact=true "SessionLimiter" IN PARALLEL',
254
+ ' Query: "Find limitDRL and limitRedis functions"',
255
+ ' → search \'"limitDRL" "limitRedis"\' (one OR search, quoted to prevent camelCase splitting)',
170
256
  ' Query: "Find ThrottleRetryLimit usage"',
171
257
  ' → search exact=true "ThrottleRetryLimit" (one search, if no results the symbol does not exist — stop)',
258
+ ' Query: "How does BM25 scoring work with SIMD optimization?"',
259
+ ' → search "BM25 scoring" + search "SIMD optimization" IN PARALLEL (two different concepts)',
172
260
  '',
173
261
  'BAD search strategy (never do this):',
174
262
  ' → search "AllowedIPs" → search "allowedIps" → search "allowed_ips" (WRONG: case/style variations, probe handles them)',
175
- ' → search "limitDRL" → search "LimitDRL" (WRONG: case variation of same term)',
263
+ ' → search "limitDRL" → search "LimitDRL" (WRONG: case variation combine with OR: \'"limitDRL" "limitRedis"\')',
176
264
  ' → search "throttle_retry_limit" after searching "ThrottleRetryLimit" (WRONG: snake_case variation, probe handles it)',
177
- ' → search "ThrottleRetryLimit" path=tyk → search "ThrottleRetryLimit" path=gateway → search "ThrottleRetryLimit" path=apidef (WRONG: same query on different paths hoping for different results)',
265
+ ' → search "ThrottleRetryLimit" path=tyk → search "ThrottleRetryLimit" path=gateway → search "ThrottleRetryLimit" path=apidef (WRONG: same query on different paths probe searches recursively)',
178
266
  ' → search "func (k *RateLimitAndQuotaCheck) handleRateLimitFailure" (WRONG: do not search full function signatures, just use exact=true "handleRateLimitFailure")',
179
267
  ' → search "ForwardMessage" → search "ForwardMessage" → search "ForwardMessage" (WRONG: repeating the exact same query)',
180
- ' → search "error handling" → search "error handling" → search "error handling" (WRONG: repeating exact same query)',
268
+ ' → search "authentication" → wait → search "session management" → wait (WRONG: these are independent, run them in parallel)',
181
269
  '',
182
270
  'Keyword tips:',
183
271
  '- Common programming keywords are filtered as stopwords when unquoted: function, class, return, new, struct, impl, var, let, const, etc.',
184
272
  '- Avoid searching for these alone — combine with a specific term (e.g., "middleware function" is fine, "function" alone is too generic).',
185
273
  '- To bypass stopword filtering: wrap terms in quotes ("return", "struct") or set exact=true. Both disable stemming and splitting too.',
186
- '- Multiple words without operators use OR logic: foo bar = foo OR bar. Use AND explicitly if you need both: foo AND bar.',
187
274
  '- camelCase terms are split: getUserData becomes "get", "user", "data" — so one search covers all naming styles.',
188
275
  '- Do NOT search for full function signatures like "func (r *Type) Method(args)". Just search for the method name with exact=true.',
189
276
  '',
190
277
  'Strategy:',
191
- '1. Analyze the query - identify key concepts, entities, and relationships',
192
- '2. Run ONE focused search per concept. For known symbol names use exact=true. For concepts use default (exact=false).',
193
- '3. If a search returns results, use extract to verify relevance',
194
- '4. If a search returns NO results, the term does not exist in the codebase. Do NOT retry with variations, different paths, or longer strings. Move on.',
195
- '5. Combine all relevant targets in your final response',
278
+ '1. Analyze the query - identify key concepts and group related symbols',
279
+ '2. Combine related symbols into OR searches: \'"symbolA" "symbolB"\' finds files with either (quote to prevent splitting)',
280
+ '3. Run INDEPENDENT searches in PARALLEL do not wait for one to finish before starting another',
281
+ '4. For known symbol names use exact=true. For concepts use default (exact=false).',
282
+ '5. If a search returns results, use extract to verify relevance. Run multiple extracts in parallel too.',
283
+ '6. If a search returns NO results, the term does not exist. Do NOT retry with variations, different paths, or longer strings. Move on.',
284
+ '7. Combine all relevant targets in your final response',
196
285
  '',
197
286
  `Query: ${searchQuery}`,
198
287
  `Search path(s): ${searchPath}`,
@@ -244,6 +333,16 @@ export const searchTool = (options = {}) => {
244
333
  : searchDescription,
245
334
  inputSchema: searchSchema,
246
335
  execute: async ({ query: searchQuery, path, allow_tests, exact, maxTokens: paramMaxTokens, language, session, nextPage }) => {
336
+ // Auto-quote mixed-case and underscore terms to prevent unwanted stemming/splitting
337
+ // Skip when exact=true since that already preserves the literal string
338
+ if (!exact && searchQuery) {
339
+ const originalQuery = searchQuery;
340
+ searchQuery = autoQuoteSearchTerms(searchQuery);
341
+ if (debug && searchQuery !== originalQuery) {
342
+ console.error(`[search] Auto-quoted query: "${originalQuery}" → "${searchQuery}"`);
343
+ }
344
+ }
345
+
247
346
  // Use parameter maxTokens if provided, otherwise use the default
248
347
  const effectiveMaxTokens = paramMaxTokens || maxTokens;
249
348
 
@@ -289,13 +388,15 @@ export const searchTool = (options = {}) => {
289
388
  if (!searchDelegate) {
290
389
  // Block duplicate non-paginated searches (models sometimes repeat the exact same call)
291
390
  // Allow pagination: only nextPage=true is a legitimate repeat of the same query
292
- const searchKey = `${searchQuery}::${searchPath}::${exact || false}`;
391
+ // Use query+exact as the key (ignore path) to prevent path-hopping evasion
392
+ // where model searches same term on different subpaths hoping for different results
393
+ const searchKey = `${searchQuery}::${exact || false}`;
293
394
  if (!nextPage) {
294
395
  if (previousSearches.has(searchKey)) {
295
396
  if (debug) {
296
- console.error(`[DEDUP] Blocked duplicate search: "${searchQuery}" in "${searchPath}"`);
397
+ console.error(`[DEDUP] Blocked duplicate search: "${searchQuery}" (path: "${searchPath}")`);
297
398
  }
298
- return 'DUPLICATE SEARCH BLOCKED: You already searched for this exact query in this path. Do NOT repeat the same search. If you need more results, set nextPage=true with the session ID from the previous search. Otherwise, try a genuinely different keyword, use extract to examine results you already found, or use attempt_completion if you have enough information.';
399
+ return 'DUPLICATE SEARCH BLOCKED: You already searched for this exact query. Changing the path does NOT give different results probe searches recursively. Do NOT repeat the same search. Try a genuinely different keyword, use extract to examine results you already found, or use attempt_completion if you have enough information.';
299
400
  }
300
401
  previousSearches.add(searchKey);
301
402
  paginationCounts.set(searchKey, 0);
@@ -36463,6 +36463,48 @@ var init_hashline = __esm({
36463
36463
  });
36464
36464
 
36465
36465
  // src/tools/vercel.js
36466
+ function autoQuoteSearchTerms(query2) {
36467
+ if (!query2 || typeof query2 !== "string") return query2;
36468
+ const tokens = [];
36469
+ let i5 = 0;
36470
+ while (i5 < query2.length) {
36471
+ if (/\s/.test(query2[i5])) {
36472
+ i5++;
36473
+ continue;
36474
+ }
36475
+ if (query2[i5] === '"') {
36476
+ const end = query2.indexOf('"', i5 + 1);
36477
+ if (end !== -1) {
36478
+ tokens.push(query2.substring(i5, end + 1));
36479
+ i5 = end + 1;
36480
+ } else {
36481
+ tokens.push(query2.substring(i5));
36482
+ break;
36483
+ }
36484
+ continue;
36485
+ }
36486
+ let j5 = i5;
36487
+ while (j5 < query2.length && !/\s/.test(query2[j5]) && query2[j5] !== '"') {
36488
+ j5++;
36489
+ }
36490
+ tokens.push(query2.substring(i5, j5));
36491
+ i5 = j5;
36492
+ }
36493
+ const operators = /* @__PURE__ */ new Set(["AND", "OR", "NOT"]);
36494
+ const result = tokens.map((token) => {
36495
+ if (token.startsWith('"')) return token;
36496
+ if (operators.has(token)) return token;
36497
+ const hasUpper = /[A-Z]/.test(token);
36498
+ const hasLower = /[a-z]/.test(token);
36499
+ const hasUnderscore = token.includes("_");
36500
+ const hasMixedCase = hasUpper && hasLower;
36501
+ if (hasMixedCase || hasUnderscore) {
36502
+ return `"${token}"`;
36503
+ }
36504
+ return token;
36505
+ });
36506
+ return result.join(" ");
36507
+ }
36466
36508
  function normalizeTargets(targets) {
36467
36509
  if (!Array.isArray(targets)) return [];
36468
36510
  const seen = /* @__PURE__ */ new Set();
@@ -36577,41 +36619,61 @@ function buildSearchDelegateTask({ searchQuery, searchPath, exact, language, all
36577
36619
  '- This is ideal for precise lookups: exact=true "ForwardMessage", exact=true "SessionLimiter", exact=true "ThrottleRetryLimit".',
36578
36620
  "- Do NOT use exact=true for exploratory/conceptual queries \u2014 use the default for those.",
36579
36621
  "",
36622
+ "Combining searches with OR:",
36623
+ '- Multiple unquoted words use OR logic: rate limit matches files containing EITHER "rate" OR "limit".',
36624
+ `- For known symbol names, quote each term to prevent splitting: '"limitDRL" "limitRedis"' matches either exact symbol.`,
36625
+ '- Without quotes, camelCase like limitDRL gets split into "limit" + "DRL" \u2014 not what you want for symbol lookup.',
36626
+ "- Use OR to search for multiple related symbols in ONE search instead of separate searches.",
36627
+ "- This is much faster than running separate searches sequentially.",
36628
+ `- Example: search '"ForwardMessage" "SessionLimiter"' finds files with either exact symbol in one call.`,
36629
+ `- Example: search '"limitDRL" "doRollingWindowWrite"' finds both rate limiting functions at once.`,
36630
+ '- Use AND only when you need both terms to appear in the same file: "rate AND limit".',
36631
+ "",
36632
+ "Parallel tool calls:",
36633
+ "- When you need to search for INDEPENDENT concepts, call multiple search tools IN PARALLEL (same response).",
36634
+ "- Do NOT wait for one search to finish before starting the next if they are independent.",
36635
+ '- Example: for "rate limiting and session management", call search "rate limiting" AND search "session management" in parallel.',
36636
+ "- Similarly, call multiple extract tools in parallel when verifying different files.",
36637
+ "",
36580
36638
  "GOOD search strategy (do this):",
36581
36639
  ' Query: "How does authentication work and how are sessions managed?"',
36582
- ' \u2192 search "authentication" \u2192 search "session management" (two different concepts)',
36640
+ ' \u2192 search "authentication" + search "session management" IN PARALLEL (two independent concepts)',
36583
36641
  ' Query: "Find the IP allowlist middleware"',
36584
36642
  ' \u2192 search "allowlist middleware" (one search, probe handles IP/ip/Ip variations)',
36585
- ' Query: "How does BM25 scoring work with SIMD optimization?"',
36586
- ' \u2192 search "BM25 scoring" \u2192 search "SIMD optimization" (two different concepts)',
36587
- ' Query: "Find ForwardMessage and SessionLimiter functions"',
36588
- ' \u2192 search exact=true "ForwardMessage" \u2192 search exact=true "SessionLimiter" (known symbols, use exact)',
36643
+ ' Query: "Find ForwardMessage and SessionLimiter"',
36644
+ ` \u2192 search '"ForwardMessage" "SessionLimiter"' (one OR search finds both exact symbols)`,
36645
+ ' OR: search exact=true "ForwardMessage" + search exact=true "SessionLimiter" IN PARALLEL',
36646
+ ' Query: "Find limitDRL and limitRedis functions"',
36647
+ ` \u2192 search '"limitDRL" "limitRedis"' (one OR search, quoted to prevent camelCase splitting)`,
36589
36648
  ' Query: "Find ThrottleRetryLimit usage"',
36590
36649
  ' \u2192 search exact=true "ThrottleRetryLimit" (one search, if no results the symbol does not exist \u2014 stop)',
36650
+ ' Query: "How does BM25 scoring work with SIMD optimization?"',
36651
+ ' \u2192 search "BM25 scoring" + search "SIMD optimization" IN PARALLEL (two different concepts)',
36591
36652
  "",
36592
36653
  "BAD search strategy (never do this):",
36593
36654
  ' \u2192 search "AllowedIPs" \u2192 search "allowedIps" \u2192 search "allowed_ips" (WRONG: case/style variations, probe handles them)',
36594
- ' \u2192 search "limitDRL" \u2192 search "LimitDRL" (WRONG: case variation of same term)',
36655
+ ` \u2192 search "limitDRL" \u2192 search "LimitDRL" (WRONG: case variation \u2014 combine with OR: '"limitDRL" "limitRedis"')`,
36595
36656
  ' \u2192 search "throttle_retry_limit" after searching "ThrottleRetryLimit" (WRONG: snake_case variation, probe handles it)',
36596
- ' \u2192 search "ThrottleRetryLimit" path=tyk \u2192 search "ThrottleRetryLimit" path=gateway \u2192 search "ThrottleRetryLimit" path=apidef (WRONG: same query on different paths hoping for different results)',
36657
+ ' \u2192 search "ThrottleRetryLimit" path=tyk \u2192 search "ThrottleRetryLimit" path=gateway \u2192 search "ThrottleRetryLimit" path=apidef (WRONG: same query on different paths \u2014 probe searches recursively)',
36597
36658
  ' \u2192 search "func (k *RateLimitAndQuotaCheck) handleRateLimitFailure" (WRONG: do not search full function signatures, just use exact=true "handleRateLimitFailure")',
36598
36659
  ' \u2192 search "ForwardMessage" \u2192 search "ForwardMessage" \u2192 search "ForwardMessage" (WRONG: repeating the exact same query)',
36599
- ' \u2192 search "error handling" \u2192 search "error handling" \u2192 search "error handling" (WRONG: repeating exact same query)',
36660
+ ' \u2192 search "authentication" \u2192 wait \u2192 search "session management" \u2192 wait (WRONG: these are independent, run them in parallel)',
36600
36661
  "",
36601
36662
  "Keyword tips:",
36602
36663
  "- Common programming keywords are filtered as stopwords when unquoted: function, class, return, new, struct, impl, var, let, const, etc.",
36603
36664
  '- Avoid searching for these alone \u2014 combine with a specific term (e.g., "middleware function" is fine, "function" alone is too generic).',
36604
36665
  '- To bypass stopword filtering: wrap terms in quotes ("return", "struct") or set exact=true. Both disable stemming and splitting too.',
36605
- "- Multiple words without operators use OR logic: foo bar = foo OR bar. Use AND explicitly if you need both: foo AND bar.",
36606
36666
  '- camelCase terms are split: getUserData becomes "get", "user", "data" \u2014 so one search covers all naming styles.',
36607
36667
  '- Do NOT search for full function signatures like "func (r *Type) Method(args)". Just search for the method name with exact=true.',
36608
36668
  "",
36609
36669
  "Strategy:",
36610
- "1. Analyze the query - identify key concepts, entities, and relationships",
36611
- "2. Run ONE focused search per concept. For known symbol names use exact=true. For concepts use default (exact=false).",
36612
- "3. If a search returns results, use extract to verify relevance",
36613
- "4. If a search returns NO results, the term does not exist in the codebase. Do NOT retry with variations, different paths, or longer strings. Move on.",
36614
- "5. Combine all relevant targets in your final response",
36670
+ "1. Analyze the query - identify key concepts and group related symbols",
36671
+ `2. Combine related symbols into OR searches: '"symbolA" "symbolB"' finds files with either (quote to prevent splitting)`,
36672
+ "3. Run INDEPENDENT searches in PARALLEL \u2014 do not wait for one to finish before starting another",
36673
+ "4. For known symbol names use exact=true. For concepts use default (exact=false).",
36674
+ "5. If a search returns results, use extract to verify relevance. Run multiple extracts in parallel too.",
36675
+ "6. If a search returns NO results, the term does not exist. Do NOT retry with variations, different paths, or longer strings. Move on.",
36676
+ "7. Combine all relevant targets in your final response",
36615
36677
  "",
36616
36678
  `Query: ${searchQuery}`,
36617
36679
  `Search path(s): ${searchPath}`,
@@ -36672,6 +36734,13 @@ var init_vercel = __esm({
36672
36734
  description: searchDelegate ? searchDelegateDescription : searchDescription,
36673
36735
  inputSchema: searchSchema,
36674
36736
  execute: async ({ query: searchQuery, path: path9, allow_tests, exact, maxTokens: paramMaxTokens, language, session, nextPage }) => {
36737
+ if (!exact && searchQuery) {
36738
+ const originalQuery = searchQuery;
36739
+ searchQuery = autoQuoteSearchTerms(searchQuery);
36740
+ if (debug && searchQuery !== originalQuery) {
36741
+ console.error(`[search] Auto-quoted query: "${originalQuery}" \u2192 "${searchQuery}"`);
36742
+ }
36743
+ }
36675
36744
  const effectiveMaxTokens = paramMaxTokens || maxTokens;
36676
36745
  let searchPaths;
36677
36746
  if (path9) {
@@ -36707,13 +36776,13 @@ var init_vercel = __esm({
36707
36776
  return await search(searchOptions);
36708
36777
  };
36709
36778
  if (!searchDelegate) {
36710
- const searchKey = `${searchQuery}::${searchPath}::${exact || false}`;
36779
+ const searchKey = `${searchQuery}::${exact || false}`;
36711
36780
  if (!nextPage) {
36712
36781
  if (previousSearches.has(searchKey)) {
36713
36782
  if (debug) {
36714
- console.error(`[DEDUP] Blocked duplicate search: "${searchQuery}" in "${searchPath}"`);
36783
+ console.error(`[DEDUP] Blocked duplicate search: "${searchQuery}" (path: "${searchPath}")`);
36715
36784
  }
36716
- return "DUPLICATE SEARCH BLOCKED: You already searched for this exact query in this path. Do NOT repeat the same search. If you need more results, set nextPage=true with the session ID from the previous search. Otherwise, try a genuinely different keyword, use extract to examine results you already found, or use attempt_completion if you have enough information.";
36785
+ return "DUPLICATE SEARCH BLOCKED: You already searched for this exact query. Changing the path does NOT give different results \u2014 probe searches recursively. Do NOT repeat the same search. Try a genuinely different keyword, use extract to examine results you already found, or use attempt_completion if you have enough information.";
36717
36786
  }
36718
36787
  previousSearches.add(searchKey);
36719
36788
  paginationCounts.set(searchKey, 0);
package/cjs/index.cjs CHANGED
@@ -110625,6 +110625,48 @@ var init_hashline = __esm({
110625
110625
  });
110626
110626
 
110627
110627
  // src/tools/vercel.js
110628
+ function autoQuoteSearchTerms(query2) {
110629
+ if (!query2 || typeof query2 !== "string") return query2;
110630
+ const tokens = [];
110631
+ let i5 = 0;
110632
+ while (i5 < query2.length) {
110633
+ if (/\s/.test(query2[i5])) {
110634
+ i5++;
110635
+ continue;
110636
+ }
110637
+ if (query2[i5] === '"') {
110638
+ const end = query2.indexOf('"', i5 + 1);
110639
+ if (end !== -1) {
110640
+ tokens.push(query2.substring(i5, end + 1));
110641
+ i5 = end + 1;
110642
+ } else {
110643
+ tokens.push(query2.substring(i5));
110644
+ break;
110645
+ }
110646
+ continue;
110647
+ }
110648
+ let j5 = i5;
110649
+ while (j5 < query2.length && !/\s/.test(query2[j5]) && query2[j5] !== '"') {
110650
+ j5++;
110651
+ }
110652
+ tokens.push(query2.substring(i5, j5));
110653
+ i5 = j5;
110654
+ }
110655
+ const operators = /* @__PURE__ */ new Set(["AND", "OR", "NOT"]);
110656
+ const result = tokens.map((token) => {
110657
+ if (token.startsWith('"')) return token;
110658
+ if (operators.has(token)) return token;
110659
+ const hasUpper = /[A-Z]/.test(token);
110660
+ const hasLower = /[a-z]/.test(token);
110661
+ const hasUnderscore = token.includes("_");
110662
+ const hasMixedCase = hasUpper && hasLower;
110663
+ if (hasMixedCase || hasUnderscore) {
110664
+ return `"${token}"`;
110665
+ }
110666
+ return token;
110667
+ });
110668
+ return result.join(" ");
110669
+ }
110628
110670
  function normalizeTargets(targets) {
110629
110671
  if (!Array.isArray(targets)) return [];
110630
110672
  const seen = /* @__PURE__ */ new Set();
@@ -110739,41 +110781,61 @@ function buildSearchDelegateTask({ searchQuery, searchPath, exact, language, all
110739
110781
  '- This is ideal for precise lookups: exact=true "ForwardMessage", exact=true "SessionLimiter", exact=true "ThrottleRetryLimit".',
110740
110782
  "- Do NOT use exact=true for exploratory/conceptual queries \u2014 use the default for those.",
110741
110783
  "",
110784
+ "Combining searches with OR:",
110785
+ '- Multiple unquoted words use OR logic: rate limit matches files containing EITHER "rate" OR "limit".',
110786
+ `- For known symbol names, quote each term to prevent splitting: '"limitDRL" "limitRedis"' matches either exact symbol.`,
110787
+ '- Without quotes, camelCase like limitDRL gets split into "limit" + "DRL" \u2014 not what you want for symbol lookup.',
110788
+ "- Use OR to search for multiple related symbols in ONE search instead of separate searches.",
110789
+ "- This is much faster than running separate searches sequentially.",
110790
+ `- Example: search '"ForwardMessage" "SessionLimiter"' finds files with either exact symbol in one call.`,
110791
+ `- Example: search '"limitDRL" "doRollingWindowWrite"' finds both rate limiting functions at once.`,
110792
+ '- Use AND only when you need both terms to appear in the same file: "rate AND limit".',
110793
+ "",
110794
+ "Parallel tool calls:",
110795
+ "- When you need to search for INDEPENDENT concepts, call multiple search tools IN PARALLEL (same response).",
110796
+ "- Do NOT wait for one search to finish before starting the next if they are independent.",
110797
+ '- Example: for "rate limiting and session management", call search "rate limiting" AND search "session management" in parallel.',
110798
+ "- Similarly, call multiple extract tools in parallel when verifying different files.",
110799
+ "",
110742
110800
  "GOOD search strategy (do this):",
110743
110801
  ' Query: "How does authentication work and how are sessions managed?"',
110744
- ' \u2192 search "authentication" \u2192 search "session management" (two different concepts)',
110802
+ ' \u2192 search "authentication" + search "session management" IN PARALLEL (two independent concepts)',
110745
110803
  ' Query: "Find the IP allowlist middleware"',
110746
110804
  ' \u2192 search "allowlist middleware" (one search, probe handles IP/ip/Ip variations)',
110747
- ' Query: "How does BM25 scoring work with SIMD optimization?"',
110748
- ' \u2192 search "BM25 scoring" \u2192 search "SIMD optimization" (two different concepts)',
110749
- ' Query: "Find ForwardMessage and SessionLimiter functions"',
110750
- ' \u2192 search exact=true "ForwardMessage" \u2192 search exact=true "SessionLimiter" (known symbols, use exact)',
110805
+ ' Query: "Find ForwardMessage and SessionLimiter"',
110806
+ ` \u2192 search '"ForwardMessage" "SessionLimiter"' (one OR search finds both exact symbols)`,
110807
+ ' OR: search exact=true "ForwardMessage" + search exact=true "SessionLimiter" IN PARALLEL',
110808
+ ' Query: "Find limitDRL and limitRedis functions"',
110809
+ ` \u2192 search '"limitDRL" "limitRedis"' (one OR search, quoted to prevent camelCase splitting)`,
110751
110810
  ' Query: "Find ThrottleRetryLimit usage"',
110752
110811
  ' \u2192 search exact=true "ThrottleRetryLimit" (one search, if no results the symbol does not exist \u2014 stop)',
110812
+ ' Query: "How does BM25 scoring work with SIMD optimization?"',
110813
+ ' \u2192 search "BM25 scoring" + search "SIMD optimization" IN PARALLEL (two different concepts)',
110753
110814
  "",
110754
110815
  "BAD search strategy (never do this):",
110755
110816
  ' \u2192 search "AllowedIPs" \u2192 search "allowedIps" \u2192 search "allowed_ips" (WRONG: case/style variations, probe handles them)',
110756
- ' \u2192 search "limitDRL" \u2192 search "LimitDRL" (WRONG: case variation of same term)',
110817
+ ` \u2192 search "limitDRL" \u2192 search "LimitDRL" (WRONG: case variation \u2014 combine with OR: '"limitDRL" "limitRedis"')`,
110757
110818
  ' \u2192 search "throttle_retry_limit" after searching "ThrottleRetryLimit" (WRONG: snake_case variation, probe handles it)',
110758
- ' \u2192 search "ThrottleRetryLimit" path=tyk \u2192 search "ThrottleRetryLimit" path=gateway \u2192 search "ThrottleRetryLimit" path=apidef (WRONG: same query on different paths hoping for different results)',
110819
+ ' \u2192 search "ThrottleRetryLimit" path=tyk \u2192 search "ThrottleRetryLimit" path=gateway \u2192 search "ThrottleRetryLimit" path=apidef (WRONG: same query on different paths \u2014 probe searches recursively)',
110759
110820
  ' \u2192 search "func (k *RateLimitAndQuotaCheck) handleRateLimitFailure" (WRONG: do not search full function signatures, just use exact=true "handleRateLimitFailure")',
110760
110821
  ' \u2192 search "ForwardMessage" \u2192 search "ForwardMessage" \u2192 search "ForwardMessage" (WRONG: repeating the exact same query)',
110761
- ' \u2192 search "error handling" \u2192 search "error handling" \u2192 search "error handling" (WRONG: repeating exact same query)',
110822
+ ' \u2192 search "authentication" \u2192 wait \u2192 search "session management" \u2192 wait (WRONG: these are independent, run them in parallel)',
110762
110823
  "",
110763
110824
  "Keyword tips:",
110764
110825
  "- Common programming keywords are filtered as stopwords when unquoted: function, class, return, new, struct, impl, var, let, const, etc.",
110765
110826
  '- Avoid searching for these alone \u2014 combine with a specific term (e.g., "middleware function" is fine, "function" alone is too generic).',
110766
110827
  '- To bypass stopword filtering: wrap terms in quotes ("return", "struct") or set exact=true. Both disable stemming and splitting too.',
110767
- "- Multiple words without operators use OR logic: foo bar = foo OR bar. Use AND explicitly if you need both: foo AND bar.",
110768
110828
  '- camelCase terms are split: getUserData becomes "get", "user", "data" \u2014 so one search covers all naming styles.',
110769
110829
  '- Do NOT search for full function signatures like "func (r *Type) Method(args)". Just search for the method name with exact=true.',
110770
110830
  "",
110771
110831
  "Strategy:",
110772
- "1. Analyze the query - identify key concepts, entities, and relationships",
110773
- "2. Run ONE focused search per concept. For known symbol names use exact=true. For concepts use default (exact=false).",
110774
- "3. If a search returns results, use extract to verify relevance",
110775
- "4. If a search returns NO results, the term does not exist in the codebase. Do NOT retry with variations, different paths, or longer strings. Move on.",
110776
- "5. Combine all relevant targets in your final response",
110832
+ "1. Analyze the query - identify key concepts and group related symbols",
110833
+ `2. Combine related symbols into OR searches: '"symbolA" "symbolB"' finds files with either (quote to prevent splitting)`,
110834
+ "3. Run INDEPENDENT searches in PARALLEL \u2014 do not wait for one to finish before starting another",
110835
+ "4. For known symbol names use exact=true. For concepts use default (exact=false).",
110836
+ "5. If a search returns results, use extract to verify relevance. Run multiple extracts in parallel too.",
110837
+ "6. If a search returns NO results, the term does not exist. Do NOT retry with variations, different paths, or longer strings. Move on.",
110838
+ "7. Combine all relevant targets in your final response",
110777
110839
  "",
110778
110840
  `Query: ${searchQuery}`,
110779
110841
  `Search path(s): ${searchPath}`,
@@ -110834,6 +110896,13 @@ var init_vercel = __esm({
110834
110896
  description: searchDelegate ? searchDelegateDescription : searchDescription,
110835
110897
  inputSchema: searchSchema,
110836
110898
  execute: async ({ query: searchQuery, path: path9, allow_tests, exact, maxTokens: paramMaxTokens, language, session, nextPage }) => {
110899
+ if (!exact && searchQuery) {
110900
+ const originalQuery = searchQuery;
110901
+ searchQuery = autoQuoteSearchTerms(searchQuery);
110902
+ if (debug && searchQuery !== originalQuery) {
110903
+ console.error(`[search] Auto-quoted query: "${originalQuery}" \u2192 "${searchQuery}"`);
110904
+ }
110905
+ }
110837
110906
  const effectiveMaxTokens = paramMaxTokens || maxTokens;
110838
110907
  let searchPaths;
110839
110908
  if (path9) {
@@ -110869,13 +110938,13 @@ var init_vercel = __esm({
110869
110938
  return await search(searchOptions);
110870
110939
  };
110871
110940
  if (!searchDelegate) {
110872
- const searchKey = `${searchQuery}::${searchPath}::${exact || false}`;
110941
+ const searchKey = `${searchQuery}::${exact || false}`;
110873
110942
  if (!nextPage) {
110874
110943
  if (previousSearches.has(searchKey)) {
110875
110944
  if (debug) {
110876
- console.error(`[DEDUP] Blocked duplicate search: "${searchQuery}" in "${searchPath}"`);
110945
+ console.error(`[DEDUP] Blocked duplicate search: "${searchQuery}" (path: "${searchPath}")`);
110877
110946
  }
110878
- return "DUPLICATE SEARCH BLOCKED: You already searched for this exact query in this path. Do NOT repeat the same search. If you need more results, set nextPage=true with the session ID from the previous search. Otherwise, try a genuinely different keyword, use extract to examine results you already found, or use attempt_completion if you have enough information.";
110947
+ return "DUPLICATE SEARCH BLOCKED: You already searched for this exact query. Changing the path does NOT give different results \u2014 probe searches recursively. Do NOT repeat the same search. Try a genuinely different keyword, use extract to examine results you already found, or use attempt_completion if you have enough information.";
110879
110948
  }
110880
110949
  previousSearches.add(searchKey);
110881
110950
  paginationCounts.set(searchKey, 0);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@probelabs/probe",
3
- "version": "0.6.0-rc282",
3
+ "version": "0.6.0-rc283",
4
4
  "description": "Node.js wrapper for the probe code search tool",
5
5
  "main": "src/index.js",
6
6
  "module": "src/index.js",
@@ -14,6 +14,75 @@ import { existsSync } from 'fs';
14
14
  import { formatErrorForAI } from '../utils/error-types.js';
15
15
  import { annotateOutputWithHashes } from './hashline.js';
16
16
 
17
+ /**
18
+ * Auto-quote search query terms that contain mixed case or underscores.
19
+ * Unquoted camelCase like "limitDRL" gets split by stemming into "limit" + "DRL".
20
+ * This wraps such terms in quotes so they match as literal strings.
21
+ *
22
+ * Examples:
23
+ * "limitDRL limitRedis" → '"limitDRL" "limitRedis"'
24
+ * "ThrottleRetryLimit" → '"ThrottleRetryLimit"'
25
+ * "allowed_ips" → '"allowed_ips"'
26
+ * "rate limit" → 'rate limit' (no change, all lowercase)
27
+ * '"already quoted"' → '"already quoted"' (no change)
28
+ * 'foo AND bar' → 'foo AND bar' (operators preserved)
29
+ */
30
+ function autoQuoteSearchTerms(query) {
31
+ if (!query || typeof query !== 'string') return query;
32
+
33
+ // Split on whitespace, preserving quoted strings and operators
34
+ const tokens = [];
35
+ let i = 0;
36
+ while (i < query.length) {
37
+ // Skip whitespace
38
+ if (/\s/.test(query[i])) {
39
+ i++;
40
+ continue;
41
+ }
42
+ // Quoted string — keep as-is
43
+ if (query[i] === '"') {
44
+ const end = query.indexOf('"', i + 1);
45
+ if (end !== -1) {
46
+ tokens.push(query.substring(i, end + 1));
47
+ i = end + 1;
48
+ } else {
49
+ // Unclosed quote — take rest
50
+ tokens.push(query.substring(i));
51
+ break;
52
+ }
53
+ continue;
54
+ }
55
+ // Unquoted token
56
+ let j = i;
57
+ while (j < query.length && !/\s/.test(query[j]) && query[j] !== '"') {
58
+ j++;
59
+ }
60
+ tokens.push(query.substring(i, j));
61
+ i = j;
62
+ }
63
+
64
+ // Boolean operators that should not be quoted
65
+ const operators = new Set(['AND', 'OR', 'NOT']);
66
+
67
+ const result = tokens.map(token => {
68
+ // Already quoted
69
+ if (token.startsWith('"')) return token;
70
+ // Boolean operator
71
+ if (operators.has(token)) return token;
72
+ // Check if token needs quoting: has mixed case (upper+lower) or underscores
73
+ const hasUpper = /[A-Z]/.test(token);
74
+ const hasLower = /[a-z]/.test(token);
75
+ const hasUnderscore = token.includes('_');
76
+ const hasMixedCase = hasUpper && hasLower;
77
+ if (hasMixedCase || hasUnderscore) {
78
+ return `"${token}"`;
79
+ }
80
+ return token;
81
+ });
82
+
83
+ return result.join(' ');
84
+ }
85
+
17
86
  const CODE_SEARCH_SCHEMA = {
18
87
  type: 'object',
19
88
  properties: {
@@ -158,41 +227,61 @@ function buildSearchDelegateTask({ searchQuery, searchPath, exact, language, all
158
227
  '- This is ideal for precise lookups: exact=true "ForwardMessage", exact=true "SessionLimiter", exact=true "ThrottleRetryLimit".',
159
228
  '- Do NOT use exact=true for exploratory/conceptual queries — use the default for those.',
160
229
  '',
230
+ 'Combining searches with OR:',
231
+ '- Multiple unquoted words use OR logic: rate limit matches files containing EITHER "rate" OR "limit".',
232
+ '- For known symbol names, quote each term to prevent splitting: \'"limitDRL" "limitRedis"\' matches either exact symbol.',
233
+ '- Without quotes, camelCase like limitDRL gets split into "limit" + "DRL" — not what you want for symbol lookup.',
234
+ '- Use OR to search for multiple related symbols in ONE search instead of separate searches.',
235
+ '- This is much faster than running separate searches sequentially.',
236
+ '- Example: search \'"ForwardMessage" "SessionLimiter"\' finds files with either exact symbol in one call.',
237
+ '- Example: search \'"limitDRL" "doRollingWindowWrite"\' finds both rate limiting functions at once.',
238
+ '- Use AND only when you need both terms to appear in the same file: "rate AND limit".',
239
+ '',
240
+ 'Parallel tool calls:',
241
+ '- When you need to search for INDEPENDENT concepts, call multiple search tools IN PARALLEL (same response).',
242
+ '- Do NOT wait for one search to finish before starting the next if they are independent.',
243
+ '- Example: for "rate limiting and session management", call search "rate limiting" AND search "session management" in parallel.',
244
+ '- Similarly, call multiple extract tools in parallel when verifying different files.',
245
+ '',
161
246
  'GOOD search strategy (do this):',
162
247
  ' Query: "How does authentication work and how are sessions managed?"',
163
- ' → search "authentication" search "session management" (two different concepts)',
248
+ ' → search "authentication" + search "session management" IN PARALLEL (two independent concepts)',
164
249
  ' Query: "Find the IP allowlist middleware"',
165
250
  ' → search "allowlist middleware" (one search, probe handles IP/ip/Ip variations)',
166
- ' Query: "How does BM25 scoring work with SIMD optimization?"',
167
- ' → search "BM25 scoring" search "SIMD optimization" (two different concepts)',
168
- ' Query: "Find ForwardMessage and SessionLimiter functions"',
169
- ' search exact=true "ForwardMessage" search exact=true "SessionLimiter" (known symbols, use exact)',
251
+ ' Query: "Find ForwardMessage and SessionLimiter"',
252
+ ' → search \'"ForwardMessage" "SessionLimiter"\' (one OR search finds both exact symbols)',
253
+ ' OR: search exact=true "ForwardMessage" + search exact=true "SessionLimiter" IN PARALLEL',
254
+ ' Query: "Find limitDRL and limitRedis functions"',
255
+ ' → search \'"limitDRL" "limitRedis"\' (one OR search, quoted to prevent camelCase splitting)',
170
256
  ' Query: "Find ThrottleRetryLimit usage"',
171
257
  ' → search exact=true "ThrottleRetryLimit" (one search, if no results the symbol does not exist — stop)',
258
+ ' Query: "How does BM25 scoring work with SIMD optimization?"',
259
+ ' → search "BM25 scoring" + search "SIMD optimization" IN PARALLEL (two different concepts)',
172
260
  '',
173
261
  'BAD search strategy (never do this):',
174
262
  ' → search "AllowedIPs" → search "allowedIps" → search "allowed_ips" (WRONG: case/style variations, probe handles them)',
175
- ' → search "limitDRL" → search "LimitDRL" (WRONG: case variation of same term)',
263
+ ' → search "limitDRL" → search "LimitDRL" (WRONG: case variation combine with OR: \'"limitDRL" "limitRedis"\')',
176
264
  ' → search "throttle_retry_limit" after searching "ThrottleRetryLimit" (WRONG: snake_case variation, probe handles it)',
177
- ' → search "ThrottleRetryLimit" path=tyk → search "ThrottleRetryLimit" path=gateway → search "ThrottleRetryLimit" path=apidef (WRONG: same query on different paths hoping for different results)',
265
+ ' → search "ThrottleRetryLimit" path=tyk → search "ThrottleRetryLimit" path=gateway → search "ThrottleRetryLimit" path=apidef (WRONG: same query on different paths probe searches recursively)',
178
266
  ' → search "func (k *RateLimitAndQuotaCheck) handleRateLimitFailure" (WRONG: do not search full function signatures, just use exact=true "handleRateLimitFailure")',
179
267
  ' → search "ForwardMessage" → search "ForwardMessage" → search "ForwardMessage" (WRONG: repeating the exact same query)',
180
- ' → search "error handling" → search "error handling" → search "error handling" (WRONG: repeating exact same query)',
268
+ ' → search "authentication" → wait → search "session management" → wait (WRONG: these are independent, run them in parallel)',
181
269
  '',
182
270
  'Keyword tips:',
183
271
  '- Common programming keywords are filtered as stopwords when unquoted: function, class, return, new, struct, impl, var, let, const, etc.',
184
272
  '- Avoid searching for these alone — combine with a specific term (e.g., "middleware function" is fine, "function" alone is too generic).',
185
273
  '- To bypass stopword filtering: wrap terms in quotes ("return", "struct") or set exact=true. Both disable stemming and splitting too.',
186
- '- Multiple words without operators use OR logic: foo bar = foo OR bar. Use AND explicitly if you need both: foo AND bar.',
187
274
  '- camelCase terms are split: getUserData becomes "get", "user", "data" — so one search covers all naming styles.',
188
275
  '- Do NOT search for full function signatures like "func (r *Type) Method(args)". Just search for the method name with exact=true.',
189
276
  '',
190
277
  'Strategy:',
191
- '1. Analyze the query - identify key concepts, entities, and relationships',
192
- '2. Run ONE focused search per concept. For known symbol names use exact=true. For concepts use default (exact=false).',
193
- '3. If a search returns results, use extract to verify relevance',
194
- '4. If a search returns NO results, the term does not exist in the codebase. Do NOT retry with variations, different paths, or longer strings. Move on.',
195
- '5. Combine all relevant targets in your final response',
278
+ '1. Analyze the query - identify key concepts and group related symbols',
279
+ '2. Combine related symbols into OR searches: \'"symbolA" "symbolB"\' finds files with either (quote to prevent splitting)',
280
+ '3. Run INDEPENDENT searches in PARALLEL do not wait for one to finish before starting another',
281
+ '4. For known symbol names use exact=true. For concepts use default (exact=false).',
282
+ '5. If a search returns results, use extract to verify relevance. Run multiple extracts in parallel too.',
283
+ '6. If a search returns NO results, the term does not exist. Do NOT retry with variations, different paths, or longer strings. Move on.',
284
+ '7. Combine all relevant targets in your final response',
196
285
  '',
197
286
  `Query: ${searchQuery}`,
198
287
  `Search path(s): ${searchPath}`,
@@ -244,6 +333,16 @@ export const searchTool = (options = {}) => {
244
333
  : searchDescription,
245
334
  inputSchema: searchSchema,
246
335
  execute: async ({ query: searchQuery, path, allow_tests, exact, maxTokens: paramMaxTokens, language, session, nextPage }) => {
336
+ // Auto-quote mixed-case and underscore terms to prevent unwanted stemming/splitting
337
+ // Skip when exact=true since that already preserves the literal string
338
+ if (!exact && searchQuery) {
339
+ const originalQuery = searchQuery;
340
+ searchQuery = autoQuoteSearchTerms(searchQuery);
341
+ if (debug && searchQuery !== originalQuery) {
342
+ console.error(`[search] Auto-quoted query: "${originalQuery}" → "${searchQuery}"`);
343
+ }
344
+ }
345
+
247
346
  // Use parameter maxTokens if provided, otherwise use the default
248
347
  const effectiveMaxTokens = paramMaxTokens || maxTokens;
249
348
 
@@ -289,13 +388,15 @@ export const searchTool = (options = {}) => {
289
388
  if (!searchDelegate) {
290
389
  // Block duplicate non-paginated searches (models sometimes repeat the exact same call)
291
390
  // Allow pagination: only nextPage=true is a legitimate repeat of the same query
292
- const searchKey = `${searchQuery}::${searchPath}::${exact || false}`;
391
+ // Use query+exact as the key (ignore path) to prevent path-hopping evasion
392
+ // where model searches same term on different subpaths hoping for different results
393
+ const searchKey = `${searchQuery}::${exact || false}`;
293
394
  if (!nextPage) {
294
395
  if (previousSearches.has(searchKey)) {
295
396
  if (debug) {
296
- console.error(`[DEDUP] Blocked duplicate search: "${searchQuery}" in "${searchPath}"`);
397
+ console.error(`[DEDUP] Blocked duplicate search: "${searchQuery}" (path: "${searchPath}")`);
297
398
  }
298
- return 'DUPLICATE SEARCH BLOCKED: You already searched for this exact query in this path. Do NOT repeat the same search. If you need more results, set nextPage=true with the session ID from the previous search. Otherwise, try a genuinely different keyword, use extract to examine results you already found, or use attempt_completion if you have enough information.';
399
+ return 'DUPLICATE SEARCH BLOCKED: You already searched for this exact query. Changing the path does NOT give different results probe searches recursively. Do NOT repeat the same search. Try a genuinely different keyword, use extract to examine results you already found, or use attempt_completion if you have enough information.';
299
400
  }
300
401
  previousSearches.add(searchKey);
301
402
  paginationCounts.set(searchKey, 0);