@probelabs/probe 0.6.0-rc279 → 0.6.0-rc280

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -254,6 +254,7 @@ export class ProbeAgent {
254
254
  // Supports exclusion with '!' prefix: ['*', '!bash'] = all tools except bash
255
255
  // disableTools is a convenience flag that overrides allowedTools to []
256
256
  const effectiveAllowedTools = options.disableTools ? [] : options.allowedTools;
257
+ this._rawAllowedTools = options.allowedTools; // Keep raw value for explicit tool checks
257
258
  this.allowedTools = this._parseAllowedTools(effectiveAllowedTools);
258
259
 
259
260
  // Storage adapter (defaults to in-memory)
@@ -481,6 +482,17 @@ export class ProbeAgent {
481
482
  return mcpToolNames.filter(toolName => this._isMcpToolAllowed(toolName));
482
483
  }
483
484
 
485
+ /**
486
+ * Check if query tool was explicitly listed in allowedTools (not via wildcard).
487
+ * Query (ast-grep) is excluded by default because models struggle with AST pattern syntax.
488
+ * @returns {boolean}
489
+ * @private
490
+ */
491
+ _isQueryExplicitlyAllowed() {
492
+ if (!this._rawAllowedTools) return false;
493
+ return Array.isArray(this._rawAllowedTools) && this._rawAllowedTools.includes('query');
494
+ }
495
+
484
496
  /**
485
497
  * Check if tracer is AppTracer (expects sessionId as first param) vs SimpleAppTracer
486
498
  * @returns {boolean} - True if tracer is AppTracer style (requires sessionId)
@@ -837,7 +849,9 @@ export class ProbeAgent {
837
849
  if (wrappedTools.searchToolInstance && isToolAllowed('search')) {
838
850
  this.toolImplementations.search = wrappedTools.searchToolInstance;
839
851
  }
840
- if (wrappedTools.queryToolInstance && isToolAllowed('query')) {
852
+ // query tool (ast-grep) is not exposed to AI by default — models struggle with AST pattern syntax.
853
+ // Only register it when explicitly listed in allowedTools (not via wildcard '*').
854
+ if (wrappedTools.queryToolInstance && isToolAllowed('query') && this._isQueryExplicitlyAllowed()) {
841
855
  this.toolImplementations.query = wrappedTools.queryToolInstance;
842
856
  }
843
857
  if (wrappedTools.extractToolInstance && isToolAllowed('extract')) {
@@ -2008,12 +2022,15 @@ export class ProbeAgent {
2008
2022
  const toolMap = {
2009
2023
  search: {
2010
2024
  schema: searchSchema,
2011
- description: 'Search code in the repository using keyword queries with Elasticsearch syntax.'
2012
- },
2013
- query: {
2014
- schema: querySchema,
2015
- description: 'Search code using ast-grep structural pattern matching.'
2025
+ description: this.searchDelegate
2026
+ ? 'Search code in the repository by asking a question. Accepts natural language questions — a subagent breaks them into targeted keyword searches and returns extracted code blocks. Do NOT formulate keyword queries yourself.'
2027
+ : 'Search code in the repository using keyword queries with Elasticsearch syntax. Handles stemming, case-insensitive matching, and camelCase/snake_case splitting automatically — do NOT try keyword variations manually.'
2016
2028
  },
2029
+ // query tool (ast-grep) removed from AI-facing tools — models struggle with pattern syntax
2030
+ // query: {
2031
+ // schema: querySchema,
2032
+ // description: 'Search code using ast-grep structural pattern matching.'
2033
+ // },
2017
2034
  extract: {
2018
2035
  schema: extractSchema,
2019
2036
  description: 'Extract code blocks from files based on file paths and optional line numbers.'
@@ -2849,10 +2866,12 @@ export class ProbeAgent {
2849
2866
  }
2850
2867
 
2851
2868
  // Add high-level instructions about when to use tools
2869
+ const searchToolDesc1 = this.searchDelegate
2870
+ ? '- search: Ask natural language questions to find code (e.g., "How does authentication work?"). A subagent handles keyword searches and returns extracted code blocks. Do NOT formulate keyword queries — just ask questions.'
2871
+ : '- search: Find code patterns using keyword queries with Elasticsearch syntax. Handles stemming and case variations automatically — do NOT try manual keyword variations.';
2852
2872
  systemPrompt += `You have access to powerful code search and analysis tools through MCP:
2853
- - search: Find code patterns using semantic search
2873
+ ${searchToolDesc1}
2854
2874
  - extract: Extract specific code sections with context
2855
- - query: Use AST patterns for structural code matching
2856
2875
  - listFiles: Browse directory contents
2857
2876
  - searchFiles: Find files by name patterns`;
2858
2877
 
@@ -2860,19 +2879,21 @@ export class ProbeAgent {
2860
2879
  systemPrompt += `\n- bash: Execute bash commands for system operations`;
2861
2880
  }
2862
2881
 
2863
- const searchGuidance = this.searchDelegate
2864
- ? '1. Start with search to retrieve extracted code blocks'
2865
- : '1. Start with search to find relevant code patterns';
2866
- const extractGuidance = this.searchDelegate
2882
+ const searchGuidance1 = this.searchDelegate
2883
+ ? '1. Start with search — ask a question about what you want to understand. It returns extracted code blocks directly.'
2884
+ : '1. Start with search to find relevant code patterns. One search per concept is usually enough — probe handles stemming and case variations.';
2885
+ const extractGuidance1 = this.searchDelegate
2867
2886
  ? '2. Use extract only if you need more context or a full file'
2868
2887
  : '2. Use extract to get detailed context when needed';
2869
2888
 
2870
2889
  systemPrompt += `\n
2871
2890
  When exploring code:
2872
- ${searchGuidance}
2873
- ${extractGuidance}
2891
+ ${searchGuidance1}
2892
+ ${extractGuidance1}
2874
2893
  3. Prefer focused, specific searches over broad queries
2875
- 4. Combine multiple tools to build complete understanding`;
2894
+ 4. Do NOT repeat the same search or try trivial keyword variations — probe handles stemming and case variations automatically
2895
+ 5. If 2-3 consecutive searches return no results for a concept, stop searching for it — the term likely does not exist in that codebase
2896
+ 6. Combine multiple tools to build complete understanding`;
2876
2897
 
2877
2898
  // Add workspace context
2878
2899
  if (this.allowedFolders && this.allowedFolders.length > 0) {
@@ -2911,10 +2932,12 @@ ${extractGuidance}
2911
2932
  }
2912
2933
 
2913
2934
  // Add high-level instructions about when to use tools
2935
+ const searchToolDesc2 = this.searchDelegate
2936
+ ? '- search: Ask natural language questions to find code (e.g., "How does authentication work?"). A subagent handles keyword searches and returns extracted code blocks. Do NOT formulate keyword queries — just ask questions.'
2937
+ : '- search: Find code patterns using keyword queries with Elasticsearch syntax. Handles stemming and case variations automatically — do NOT try manual keyword variations.';
2914
2938
  systemPrompt += `You have access to powerful code search and analysis tools through MCP:
2915
- - search: Find code patterns using semantic search
2939
+ ${searchToolDesc2}
2916
2940
  - extract: Extract specific code sections with context
2917
- - query: Use AST patterns for structural code matching
2918
2941
  - listFiles: Browse directory contents
2919
2942
  - searchFiles: Find files by name patterns`;
2920
2943
 
@@ -2922,19 +2945,21 @@ ${extractGuidance}
2922
2945
  systemPrompt += `\n- bash: Execute bash commands for system operations`;
2923
2946
  }
2924
2947
 
2925
- const searchGuidance = this.searchDelegate
2926
- ? '1. Start with search to retrieve extracted code blocks'
2927
- : '1. Start with search to find relevant code patterns';
2928
- const extractGuidance = this.searchDelegate
2948
+ const searchGuidance2 = this.searchDelegate
2949
+ ? '1. Start with search — ask a question about what you want to understand. It returns extracted code blocks directly.'
2950
+ : '1. Start with search to find relevant code patterns. One search per concept is usually enough — probe handles stemming and case variations.';
2951
+ const extractGuidance2 = this.searchDelegate
2929
2952
  ? '2. Use extract only if you need more context or a full file'
2930
2953
  : '2. Use extract to get detailed context when needed';
2931
2954
 
2932
2955
  systemPrompt += `\n
2933
2956
  When exploring code:
2934
- ${searchGuidance}
2935
- ${extractGuidance}
2957
+ ${searchGuidance2}
2958
+ ${extractGuidance2}
2936
2959
  3. Prefer focused, specific searches over broad queries
2937
- 4. Combine multiple tools to build complete understanding`;
2960
+ 4. Do NOT repeat the same search or try trivial keyword variations — probe handles stemming and case variations automatically
2961
+ 5. If 2-3 consecutive searches return no results for a concept, stop searching for it — the term likely does not exist in that codebase
2962
+ 6. Combine multiple tools to build complete understanding`;
2938
2963
 
2939
2964
  // Add workspace context
2940
2965
  if (this.allowedFolders && this.allowedFolders.length > 0) {
@@ -2990,10 +3015,10 @@ ${extractGuidance}
2990
3015
  Follow these instructions carefully:
2991
3016
  1. Analyze the user's request.
2992
3017
  2. Use the available tools step-by-step to fulfill the request.
2993
- 3. You should always prefer the search tool for code-related questions.${this.searchDelegate ? ' It already returns extracted code blocks; use extract only to expand context or read full files.' : ' Read full files only if really necessary.'}
3018
+ 3. You should always prefer the search tool for code-related questions.${this.searchDelegate ? ' Ask natural language questions — the search subagent handles keyword formulation and returns extracted code blocks. Use extract only to expand context or read full files.' : ' Search handles stemming and case variations automatically — do NOT try keyword variations manually. Read full files only if really necessary.'}
2994
3019
  4. Ensure to get really deep and understand the full picture before answering.
2995
3020
  5. Once the task is fully completed, use the attempt_completion tool to provide the final result.
2996
- 6. Prefer concise and focused search queries. Use specific keywords and phrases to narrow down results.${this.allowEdit ? `
3021
+ 6. ${this.searchDelegate ? 'Ask clear, specific questions when searching. Each search should target a distinct concept or question.' : 'Prefer concise and focused search queries. Use specific keywords and phrases to narrow down results.'}${this.allowEdit ? `
2997
3022
  7. When modifying files, choose the appropriate tool:
2998
3023
  - Use 'edit' for all code modifications:
2999
3024
  * PREFERRED: Use start_line (and optionally end_line) for line-targeted editing — this is the safest and most precise approach.${this.hashLines ? ' Use the line:hash references from extract/search output (e.g. "42:ab") for integrity verification.' : ''} Always use extract first to see line numbers${this.hashLines ? ' and hashes' : ''}, then edit by line reference.
@@ -284,6 +284,7 @@ export function generateSandboxGlobals(options) {
284
284
  results.push(p);
285
285
 
286
286
  if (executing.size >= mapConcurrency) {
287
+ console.error(`[map] Concurrency limit reached (${executing.size}/${mapConcurrency}), waiting for a slot...`);
287
288
  await Promise.race(executing);
288
289
  }
289
290
  }
package/src/delegate.js CHANGED
@@ -122,20 +122,20 @@ class DelegationManager {
122
122
  }
123
123
 
124
124
  // Need to wait in queue
125
- if (debug) {
126
- console.error(`[DelegationManager] Slot unavailable (${this.globalActive}/${this.maxConcurrent}), queuing... (queue size: ${this.waitQueue.length}, timeout: ${effectiveTimeout}ms)`);
127
- }
125
+ console.error(`[DelegationManager] Slot unavailable (${this.globalActive}/${this.maxConcurrent}), queuing... (queue size: ${this.waitQueue.length + 1}, timeout: ${effectiveTimeout}ms)`);
128
126
 
129
127
  // Create a promise that will be resolved when a slot becomes available
130
128
  // or rejected if session limit is exceeded or queue timeout expires
131
129
  return new Promise((resolve, reject) => {
130
+ const queuedAt = Date.now();
132
131
  const entry = {
133
132
  resolve: null, // Will be wrapped below
134
133
  reject: null, // Will be wrapped below
135
134
  parentSessionId,
136
135
  debug,
137
- queuedAt: Date.now(),
138
- timeoutId: null
136
+ queuedAt,
137
+ timeoutId: null,
138
+ reminderId: null
139
139
  };
140
140
 
141
141
  // Wrap resolve/reject to clear timeout and prevent double-settling
@@ -144,12 +144,14 @@ class DelegationManager {
144
144
  if (settled) return;
145
145
  settled = true;
146
146
  if (entry.timeoutId) clearTimeout(entry.timeoutId);
147
+ if (entry.reminderId) clearInterval(entry.reminderId);
147
148
  resolve(value);
148
149
  };
149
150
  entry.reject = (error) => {
150
151
  if (settled) return;
151
152
  settled = true;
152
153
  if (entry.timeoutId) clearTimeout(entry.timeoutId);
154
+ if (entry.reminderId) clearInterval(entry.reminderId);
153
155
  reject(error);
154
156
  };
155
157
 
@@ -165,6 +167,15 @@ class DelegationManager {
165
167
  }, effectiveTimeout);
166
168
  }
167
169
 
170
+ // Always emit periodic wait visibility while queued.
171
+ entry.reminderId = setInterval(() => {
172
+ const waitedSeconds = Math.round((Date.now() - queuedAt) / 1000);
173
+ console.error(`[DelegationManager] Still waiting for slot (${waitedSeconds}s). ${this.globalActive}/${this.maxConcurrent} active, ${this.waitQueue.length} queued.`);
174
+ }, 15000);
175
+ if (entry.reminderId.unref) {
176
+ entry.reminderId.unref();
177
+ }
178
+
168
179
  this.waitQueue.push(entry);
169
180
  });
170
181
  }
@@ -221,9 +232,7 @@ class DelegationManager {
221
232
  if (sessionCount >= this.maxPerSession) {
222
233
  // Session limit reached - reject with error (consistent with tryAcquire behavior)
223
234
  // This is a hard limit, not something that will resolve by waiting longer
224
- if (debug) {
225
- console.error(`[DelegationManager] Session limit (${this.maxPerSession}) reached for queued item, rejecting`);
226
- }
235
+ console.error(`[DelegationManager] Session limit (${this.maxPerSession}) reached for queued item, rejecting`);
227
236
  toReject.push({ reject, error: new Error(`Maximum delegations per session (${this.maxPerSession}) reached for session ${parentSessionId}`) });
228
237
  // Continue to process next item in queue
229
238
  continue;
@@ -233,10 +242,8 @@ class DelegationManager {
233
242
  // Grant the slot
234
243
  this._incrementCounters(parentSessionId);
235
244
 
236
- if (debug) {
237
- const waitTime = Date.now() - queuedAt;
238
- console.error(`[DelegationManager] Granted slot from queue (waited ${waitTime}ms). Active: ${this.globalActive}/${this.maxConcurrent}`);
239
- }
245
+ const waitTime = Date.now() - queuedAt;
246
+ console.error(`[DelegationManager] Granted slot from queue (waited ${waitTime}ms). Active: ${this.globalActive}/${this.maxConcurrent}`);
240
247
 
241
248
  toResolve.push(resolve);
242
249
  }
@@ -296,6 +303,9 @@ class DelegationManager {
296
303
  if (entry.timeoutId) {
297
304
  clearTimeout(entry.timeoutId);
298
305
  }
306
+ if (entry.reminderId) {
307
+ clearInterval(entry.reminderId);
308
+ }
299
309
  // Reject pending entries so they don't hang
300
310
  if (entry.reject) {
301
311
  entry.reject(new Error('DelegationManager was cleaned up'));
package/src/downloader.js CHANGED
@@ -95,9 +95,7 @@ async function acquireFileLock(lockPath, version) {
95
95
  try {
96
96
  // Try to create lock file atomically (fails if already exists)
97
97
  await fs.writeFile(lockPath, JSON.stringify(lockData), { flag: 'wx' });
98
- if (process.env.DEBUG === '1' || process.env.VERBOSE === '1') {
99
- console.log(`Acquired file lock: ${lockPath}`);
100
- }
98
+ console.log(`Acquired file lock: ${lockPath}`);
101
99
  return true;
102
100
  } catch (error) {
103
101
  if (error.code === 'EEXIST') {
@@ -108,17 +106,13 @@ async function acquireFileLock(lockPath, version) {
108
106
 
109
107
  if (lockAge > LOCK_TIMEOUT_MS) {
110
108
  // Lock is stale, remove it
111
- if (process.env.DEBUG === '1' || process.env.VERBOSE === '1') {
112
- console.log(`Removing stale lock file (age: ${Math.round(lockAge / 1000)}s, pid: ${existingLock.pid})`);
113
- }
109
+ console.log(`Removing stale lock file (age: ${Math.round(lockAge / 1000)}s, pid: ${existingLock.pid})`);
114
110
  await fs.remove(lockPath);
115
111
  return false; // Caller should retry
116
112
  }
117
113
 
118
114
  // Lock is fresh, another process is downloading
119
- if (process.env.DEBUG === '1' || process.env.VERBOSE === '1') {
120
- console.log(`Download in progress by process ${existingLock.pid}, waiting...`);
121
- }
115
+ console.log(`Download in progress by process ${existingLock.pid}, waiting...`);
122
116
  return false;
123
117
  } catch (readError) {
124
118
  // Can't read lock file, might be corrupted - remove it
@@ -180,23 +174,23 @@ async function releaseFileLock(lockPath) {
180
174
  */
181
175
  async function waitForFileLock(lockPath, binaryPath) {
182
176
  const startTime = Date.now();
177
+ let lastStatusTime = startTime;
178
+
179
+ console.log(`Waiting for file lock to clear: ${lockPath}`);
183
180
 
184
181
  // Poll in a loop until binary appears, lock expires, or we timeout
185
182
  while (Date.now() - startTime < MAX_LOCK_WAIT_MS) {
186
183
  // Check #1: Is the binary now available?
187
184
  if (await fs.pathExists(binaryPath)) {
188
- if (process.env.DEBUG === '1' || process.env.VERBOSE === '1') {
189
- console.log(`Binary now available at ${binaryPath}, download completed by another process`);
190
- }
185
+ const waitedSeconds = Math.round((Date.now() - startTime) / 1000);
186
+ console.log(`Binary now available at ${binaryPath}, download completed by another process (waited ${waitedSeconds}s)`);
191
187
  return true;
192
188
  }
193
189
 
194
190
  // Check #2: Is the lock file gone? (download finished or failed)
195
191
  const lockExists = await fs.pathExists(lockPath);
196
192
  if (!lockExists) {
197
- if (process.env.DEBUG === '1' || process.env.VERBOSE === '1') {
198
- console.log(`Lock file removed but binary not found - download may have failed`);
199
- }
193
+ console.log(`Lock file removed but binary not found - download may have failed`);
200
194
  return false;
201
195
  }
202
196
 
@@ -205,22 +199,24 @@ async function waitForFileLock(lockPath, binaryPath) {
205
199
  const lockData = JSON.parse(await fs.readFile(lockPath, 'utf-8'));
206
200
  const lockAge = Date.now() - lockData.timestamp;
207
201
  if (lockAge > LOCK_TIMEOUT_MS) {
208
- if (process.env.DEBUG === '1' || process.env.VERBOSE === '1') {
209
- console.log(`Lock expired (age: ${Math.round(lockAge / 1000)}s), will retry download`);
210
- }
202
+ console.log(`Lock expired (age: ${Math.round(lockAge / 1000)}s), will retry download`);
211
203
  return false;
212
204
  }
213
205
  } catch {
214
206
  // Ignore errors reading lock file - will retry on next poll
215
207
  }
216
208
 
209
+ if (Date.now() - lastStatusTime >= 15000) {
210
+ const elapsedSeconds = Math.round((Date.now() - startTime) / 1000);
211
+ console.log(`Still waiting for file lock (${elapsedSeconds}s/${MAX_LOCK_WAIT_MS / 1000}s max)`);
212
+ lastStatusTime = Date.now();
213
+ }
214
+
217
215
  // Wait 1 second before checking again
218
216
  await new Promise(resolve => setTimeout(resolve, LOCK_POLL_INTERVAL_MS));
219
217
  }
220
218
 
221
- if (process.env.DEBUG === '1' || process.env.VERBOSE === '1') {
222
- console.log(`Timeout waiting for file lock`);
223
- }
219
+ console.log(`Timeout waiting for file lock after ${MAX_LOCK_WAIT_MS / 1000}s`);
224
220
  return false;
225
221
  }
226
222
 
@@ -247,9 +243,7 @@ async function withDownloadLock(version, downloadFn) {
247
243
  }
248
244
  downloadLocks.delete(lockKey);
249
245
  } else {
250
- if (process.env.DEBUG === '1' || process.env.VERBOSE === '1') {
251
- console.log(`Download already in progress in this process for version ${lockKey}, waiting...`);
252
- }
246
+ console.log(`Download already in progress in this process for version ${lockKey}, waiting...`);
253
247
  try {
254
248
  return await lock.promise;
255
249
  } catch (error) {
@@ -262,10 +256,16 @@ async function withDownloadLock(version, downloadFn) {
262
256
  }
263
257
 
264
258
  // Create new download promise with timeout protection
259
+ let timeoutId = null;
265
260
  const downloadPromise = Promise.race([
266
261
  downloadFn(),
267
262
  new Promise((_, reject) =>
268
- setTimeout(() => reject(new Error(`Download timeout after ${LOCK_TIMEOUT_MS / 1000}s`)), LOCK_TIMEOUT_MS)
263
+ {
264
+ timeoutId = setTimeout(() => reject(new Error(`Download timeout after ${LOCK_TIMEOUT_MS / 1000}s`)), LOCK_TIMEOUT_MS);
265
+ if (timeoutId.unref) {
266
+ timeoutId.unref();
267
+ }
268
+ }
269
269
  )
270
270
  ]);
271
271
 
@@ -278,6 +278,9 @@ async function withDownloadLock(version, downloadFn) {
278
278
  const result = await downloadPromise;
279
279
  return result;
280
280
  } finally {
281
+ if (timeoutId) {
282
+ clearTimeout(timeoutId);
283
+ }
281
284
  // Clean up lock after download completes (success or failure)
282
285
  downloadLocks.delete(lockKey);
283
286
  }
@@ -227,18 +227,14 @@ async function processChunksParallel(chunks, extractionPrompt, maxWorkers, optio
227
227
 
228
228
  active.add(promise);
229
229
 
230
- if (options.debug) {
231
- console.error(`[analyze_all] Started processing chunk ${chunk.id}/${chunk.total}`);
232
- }
230
+ console.error(`[analyze_all] Started processing chunk ${chunk.id}/${chunk.total}`);
233
231
  }
234
232
 
235
233
  if (active.size > 0) {
236
234
  const result = await Promise.race(active);
237
235
  results.push(result);
238
236
 
239
- if (options.debug) {
240
- console.error(`[analyze_all] Completed chunk ${result.chunk.id}/${result.chunk.total}`);
241
- }
237
+ console.error(`[analyze_all] Completed chunk ${result.chunk.id}/${result.chunk.total}`);
242
238
  }
243
239
  }
244
240
 
@@ -8,7 +8,7 @@ import { resolve, isAbsolute } from 'path';
8
8
 
9
9
  // Common schemas for tool parameters (used for internal execution after XML parsing)
10
10
  export const searchSchema = z.object({
11
- query: z.string().describe('Search query with Elasticsearch syntax. Use quotes for exact matches, AND/OR for boolean logic, - for negation.'),
11
+ query: z.string().describe('Search query natural language questions or Elasticsearch-style keywords both work. For keywords: use quotes for exact phrases, AND/OR for boolean logic, - for negation. Probe handles stemming and camelCase/snake_case splitting automatically, so do NOT try case or style variations of the same keyword.'),
12
12
  path: z.string().optional().default('.').describe('Path to search in. For dependencies use "go:github.com/owner/repo", "js:package_name", or "rust:cargo_name" etc.'),
13
13
  exact: z.boolean().optional().default(false).describe('Default (false) enables stemming and keyword splitting for exploratory search - "getUserData" matches "get", "user", "data", etc. Set true for precise symbol lookup where "getUserData" matches only "getUserData". Use true when you know the exact symbol name.'),
14
14
  maxTokens: z.number().nullable().optional().describe('Maximum tokens to return. Default is 20000. Set to null for unlimited results.'),
@@ -17,7 +17,7 @@ export const searchSchema = z.object({
17
17
  });
18
18
 
19
19
  export const searchAllSchema = z.object({
20
- query: z.string().describe('Search query with Elasticsearch syntax. Use quotes for exact matches, AND/OR for boolean logic, - for negation.'),
20
+ query: z.string().describe('Search query natural language questions or Elasticsearch-style keywords both work. For keywords: use quotes for exact phrases, AND/OR for boolean logic, - for negation. Probe handles stemming and camelCase/snake_case splitting automatically, so do NOT try case or style variations of the same keyword.'),
21
21
  path: z.string().optional().default('.').describe('Path to search in.'),
22
22
  exact: z.boolean().optional().default(false).describe('Use exact matching instead of stemming.'),
23
23
  maxTokensPerPage: z.number().optional().default(20000).describe('Tokens per page when paginating. Default 20000.'),
@@ -149,7 +149,8 @@ export const attemptCompletionSchema = {
149
149
 
150
150
  // Tool descriptions (used by Vercel tool() definitions)
151
151
 
152
- export const searchDescription = 'Search code in the repository. Free-form questions are accepted, but Elasticsearch-style keyword queries work best. Use this tool first for any code-related questions.';
152
+ export const searchDescription = 'Search code in the repository. Free-form questions are accepted, but Elasticsearch-style keyword queries work best. Use this tool first for any code-related questions. NOTE: By default, search handles stemming, case-insensitive matching, and camelCase/snake_case splitting automatically — do NOT manually try keyword variations like "getAllUsers" then "get_all_users" then "GetAllUsers". One search covers all variations.';
153
+ export const searchDelegateDescription = 'Search code in the repository by asking a question. Accepts natural language questions (e.g., "How does authentication work?", "Where is the user validation logic?"). A specialized subagent breaks down your question into targeted keyword searches and returns extracted code blocks. Do NOT formulate keyword queries yourself — just ask the question naturally.';
153
154
  export const queryDescription = 'Search code using ast-grep structural pattern matching. Use this tool to find specific code structures like functions, classes, or methods.';
154
155
  export const extractDescription = 'Extract code blocks from files based on file paths and optional line numbers. Use this tool to see complete context after finding relevant files. Line numbers from output can be used with edit start_line/end_line for precise editing.';
155
156
  export const delegateDescription = 'Automatically delegate big distinct tasks to specialized probe subagents within the agentic loop. Used by AI agents to break down complex requests into focused, parallel tasks.';
@@ -9,7 +9,7 @@ import { query } from '../query.js';
9
9
  import { extract } from '../extract.js';
10
10
  import { delegate } from '../delegate.js';
11
11
  import { analyzeAll } from './analyzeAll.js';
12
- import { searchSchema, querySchema, extractSchema, delegateSchema, analyzeAllSchema, searchDescription, queryDescription, extractDescription, delegateDescription, analyzeAllDescription, parseTargets, parseAndResolvePaths, resolveTargetPath } from './common.js';
12
+ import { searchSchema, querySchema, extractSchema, delegateSchema, analyzeAllSchema, searchDescription, searchDelegateDescription, queryDescription, extractDescription, delegateDescription, analyzeAllDescription, parseTargets, parseAndResolvePaths, resolveTargetPath } from './common.js';
13
13
  import { existsSync } from 'fs';
14
14
  import { formatErrorForAI } from '../utils/error-types.js';
15
15
  import { annotateOutputWithHashes } from './hashline.js';
@@ -143,11 +143,41 @@ function buildSearchDelegateTask({ searchQuery, searchPath, exact, language, all
143
143
  '- extract: Verify code snippets to ensure targets are actually relevant before including them.',
144
144
  '- listFiles: Understand directory structure to find where relevant code might live.',
145
145
  '',
146
- 'Strategy for complex queries:',
146
+ 'CRITICAL - How probe search works (do NOT ignore):',
147
+ '- By default (exact=false), probe ALREADY handles stemming, case-insensitive matching, and camelCase/snake_case splitting.',
148
+ '- Searching "allowed_ips" ALREADY matches "AllowedIPs", "allowedIps", "allowed_ips", etc. Do NOT manually try case/style variations.',
149
+ '- Searching "getUserData" ALREADY matches "get", "user", "data" and their variations.',
150
+ '- NEVER repeat the same search query — you will get the same results.',
151
+ '- NEVER search trivial variations of the same keyword (e.g., AllowedIPs then allowedIps then allowed_ips). This is wasteful — probe handles it.',
152
+ '- If a search returns no results, the term likely does not exist in that path. Try a genuinely DIFFERENT keyword or concept, not a variation.',
153
+ '- If 2-3 consecutive searches return no results for a concept, STOP searching for it and move on.',
154
+ '',
155
+ 'GOOD search strategy (do this):',
156
+ ' Query: "How does authentication work and how are sessions managed?"',
157
+ ' → search "authentication" → search "session management" (two different concepts)',
158
+ ' Query: "Find the IP allowlist middleware"',
159
+ ' → search "allowlist middleware" (one search, probe handles IP/ip/Ip variations)',
160
+ ' Query: "How does BM25 scoring work with SIMD optimization?"',
161
+ ' → search "BM25 scoring" → search "SIMD optimization" (two different concepts)',
162
+ '',
163
+ 'BAD search strategy (never do this):',
164
+ ' → search "AllowedIPs" → search "allowedIps" → search "allowed_ips" (WRONG: these are trivial case variations, probe handles them)',
165
+ ' → search "CIDR" → search "cidr" → search "Cidr" → search "*cidr*" (WRONG: same keyword repeated with variations)',
166
+ ' → search "error handling" → search "error handling" → search "error handling" (WRONG: repeating exact same query)',
167
+ '',
168
+ 'Keyword tips:',
169
+ '- Common programming keywords are filtered as stopwords when unquoted: function, class, return, new, struct, impl, var, let, const, etc.',
170
+ '- Avoid searching for these alone — combine with a specific term (e.g., "middleware function" is fine, "function" alone is too generic).',
171
+ '- To bypass stopword filtering: wrap terms in quotes ("return", "struct") or set exact=true. Both disable stemming and splitting too.',
172
+ '- Multiple words without operators use OR logic: foo bar = foo OR bar. Use AND explicitly if you need both: foo AND bar.',
173
+ '- camelCase terms are split: getUserData becomes "get", "user", "data" — so one search covers all naming styles.',
174
+ '',
175
+ 'Strategy:',
147
176
  '1. Analyze the query - identify key concepts, entities, and relationships',
148
- '2. Run focused searches for each independent concept (e.g., for "how do payments work and how are emails sent", search "payments" and "emails" separately since they are unrelated)',
149
- '3. Use extract to verify relevance of promising results',
150
- '4. Combine all relevant targets in your final response',
177
+ '2. Run ONE focused search per concept with the most natural keyword. Trust probe to handle variations.',
178
+ '3. If a search returns results, use extract to verify relevance',
179
+ '4. Only try a different keyword if the first one returned irrelevant results (not if it returned no results — that means the concept is absent)',
180
+ '5. Combine all relevant targets in your final response',
151
181
  '',
152
182
  `Query: ${searchQuery}`,
153
183
  `Search path(s): ${searchPath}`,
@@ -186,10 +216,16 @@ export const searchTool = (options = {}) => {
186
216
  return result;
187
217
  };
188
218
 
219
+ // Track previous non-paginated searches to detect and block duplicates
220
+ const previousSearches = new Set();
221
+ // Track pagination counts per query to cap runaway pagination
222
+ const paginationCounts = new Map();
223
+ const MAX_PAGES_PER_QUERY = 3;
224
+
189
225
  return tool({
190
226
  name: 'search',
191
227
  description: searchDelegate
192
- ? `${searchDescription} (delegates code search to a subagent and returns extracted code blocks)`
228
+ ? searchDelegateDescription
193
229
  : searchDescription,
194
230
  inputSchema: searchSchema,
195
231
  execute: async ({ query: searchQuery, path, allow_tests, exact, maxTokens: paramMaxTokens, language, session, nextPage }) => {
@@ -236,6 +272,29 @@ export const searchTool = (options = {}) => {
236
272
  };
237
273
 
238
274
  if (!searchDelegate) {
275
+ // Block duplicate non-paginated searches (models sometimes repeat the exact same call)
276
+ // Allow pagination: only nextPage=true is a legitimate repeat of the same query
277
+ const searchKey = `${searchQuery}::${searchPath}::${exact || false}`;
278
+ if (!nextPage) {
279
+ if (previousSearches.has(searchKey)) {
280
+ if (debug) {
281
+ console.error(`[DEDUP] Blocked duplicate search: "${searchQuery}" in "${searchPath}"`);
282
+ }
283
+ return 'DUPLICATE SEARCH BLOCKED: You already searched for this exact query in this path. Do NOT repeat the same search. If you need more results, set nextPage=true with the session ID from the previous search. Otherwise, try a genuinely different keyword, use extract to examine results you already found, or use attempt_completion if you have enough information.';
284
+ }
285
+ previousSearches.add(searchKey);
286
+ paginationCounts.set(searchKey, 0);
287
+ } else {
288
+ // Cap pagination to prevent runaway page-through of broad queries
289
+ const pageCount = (paginationCounts.get(searchKey) || 0) + 1;
290
+ paginationCounts.set(searchKey, pageCount);
291
+ if (pageCount > MAX_PAGES_PER_QUERY) {
292
+ if (debug) {
293
+ console.error(`[DEDUP] Blocked excessive pagination (page ${pageCount}/${MAX_PAGES_PER_QUERY}): "${searchQuery}" in "${searchPath}"`);
294
+ }
295
+ return `PAGINATION LIMIT REACHED: You have already retrieved ${MAX_PAGES_PER_QUERY} pages of results for this query. You have enough results — use extract to examine specific files, or use attempt_completion to return your findings.`;
296
+ }
297
+ }
239
298
  try {
240
299
  const result = maybeAnnotate(await runRawSearch());
241
300
  // Track files found in search results for staleness detection