@probelabs/probe 0.6.0-rc301 → 0.6.0-rc303

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3746,19 +3746,23 @@ ${activeToolsDesc}
3746
3746
  - Time budget remaining: ${Math.round(remainingBudgetMs / 60000)} minutes
3747
3747
  - Max per extension: ${maxPerReqMin} minutes
3748
3748
 
3749
- Decide whether the agent should get more time. EXTEND if:
3749
+ Your DEFAULT should be to EXTEND. The agent is working on a task and deserves time to finish. Only decline if you see clear evidence of a problem.
3750
+
3751
+ EXTEND (this should be your default) if:
3750
3752
  - Tools are actively running (especially delegates or complex analysis) — they need time to finish
3751
- - The agent is making clear progress on a complex task
3753
+ - The agent is making progress, even if slowly — complex tasks take time
3752
3754
  - New information is being gathered that will improve the final answer
3755
+ - The agent is iterating on an approach (trying different searches, refining queries) — this is normal problem-solving, not a loop
3756
+ - There is remaining budget and the task is not yet complete
3757
+ - When in doubt, extend — it's better to give the agent a chance than to cut it off prematurely
3753
3758
 
3754
- DO NOT EXTEND if:
3755
- - The agent appears stuck in a loop (repeating the same tool calls or getting the same errors)
3756
- - The conversation shows the agent retrying failed operations without changing approach
3757
- - The agent has enough information to answer but keeps searching for more
3758
- - Tool calls are returning empty or error results repeatedly
3759
- - The agent is doing redundant work (searching for things it already found)
3759
+ DO NOT EXTEND only if you see CLEAR evidence of:
3760
+ - The agent is stuck in an obvious loop repeating the EXACT same tool calls with the EXACT same arguments and getting the same errors back-to-back (3+ times)
3761
+ - The agent is retrying a fundamentally broken operation without changing its approach at all
3762
+ - Tool calls are consistently returning errors or empty results AND the agent is not adapting
3763
+ - The conversation clearly shows the agent has all the information it needs and is just making redundant calls
3760
3764
 
3761
- A stuck agent will not recover with more time — it will just burn the budget. Better to force it to answer with what it has.
3765
+ IMPORTANT: Iterating, refining, or trying variations is NOT the same as being stuck in a loop. A loop means identical repeated calls with no variation. Be generous with time — a slightly longer response time is much better than a prematurely cut-off incomplete answer.
3762
3766
 
3763
3767
  Respond with ONLY valid JSON (no markdown, no explanation):
3764
3768
  {"extend": true, "minutes": <1-${maxPerReqMin}>, "reason": "your reason here"}
@@ -3885,6 +3889,20 @@ or
3885
3889
 
3886
3890
  await this._initiateGracefulStop(gracefulTimeoutState, `observer declined: ${decision.reason}`);
3887
3891
  }
3892
+
3893
+ // Return decision data for span enrichment
3894
+ return {
3895
+ decision: decision.extend ? 'extended' : 'declined',
3896
+ reason: decision.reason || '',
3897
+ ...(decision.extend ? {
3898
+ granted_ms: grantedMs,
3899
+ granted_min: grantedMin,
3900
+ budget_remaining_ms: remainingBudgetMs - grantedMs,
3901
+ } : {}),
3902
+ extensions_used: negotiatedTimeoutState.extensionsUsed,
3903
+ max_requests: negotiatedTimeoutState.maxRequests,
3904
+ total_extra_time_ms: negotiatedTimeoutState.totalExtraTimeMs,
3905
+ };
3888
3906
  };
3889
3907
 
3890
3908
  try {
@@ -3894,6 +3912,23 @@ or
3894
3912
  'timeout.extensions_used': negotiatedTimeoutState.extensionsUsed,
3895
3913
  'timeout.active_tools_count': activeToolsList.length,
3896
3914
  'timeout.remaining_budget_ms': remainingBudgetMs,
3915
+ }, (span, result) => {
3916
+ if (result) {
3917
+ span.setAttributes({
3918
+ 'observer.decision': result.decision,
3919
+ 'observer.reason': result.reason,
3920
+ 'observer.extensions_used': result.extensions_used,
3921
+ 'observer.max_requests': result.max_requests,
3922
+ 'observer.total_extra_time_ms': result.total_extra_time_ms,
3923
+ });
3924
+ if (result.decision === 'extended') {
3925
+ span.setAttributes({
3926
+ 'observer.granted_ms': result.granted_ms,
3927
+ 'observer.granted_min': result.granted_min,
3928
+ 'observer.budget_remaining_ms': result.budget_remaining_ms,
3929
+ });
3930
+ }
3931
+ }
3897
3932
  });
3898
3933
  } else {
3899
3934
  await observerFn();
@@ -4033,7 +4068,7 @@ or
4033
4068
  }
4034
4069
  return {
4035
4070
  toolChoice: 'none',
4036
- userMessage: `⚠️ TIME LIMIT REACHED. You are running out of time. You have ${remaining} step(s) remaining. Provide your BEST answer NOW using the information you have already gathered. Do NOT call any more tools. Summarize your findings and respond completely. If something was not completed, honestly state what was not done and provide any partial results or recommendations you can offer.`
4071
+ userMessage: `⚠️ TIME BUDGET EXHAUSTED. Your allocated time for this task has run out. You have ${remaining} step(s) remaining to provide your answer.\n\nIMPORTANT: This is a time budget constraint, NOT a system shutdown or error. The system is working perfectly — you simply used all your allocated time.\n\nDo NOT say things like "the system is shutting down" or "try again later" — the user submitted a request and is waiting for YOUR answer right now.\n\nProvide your BEST answer NOW using the information you have already gathered. Do NOT call any more tools. Summarize your findings and respond completely. If something was not completed, honestly state what was not done and provide any partial results or recommendations you can offer.`
4037
4072
  };
4038
4073
  }
4039
4074
 
@@ -4571,8 +4606,10 @@ Double-check your response based on the criteria above. If everything looks good
4571
4606
  } catch {}
4572
4607
  }
4573
4608
 
4574
- const summaryPrompt = `Your operation was interrupted by a timeout observer because the time limit was reached. ` +
4575
- `Some of your tool calls were cancelled mid-execution.\n\n` +
4609
+ const summaryPrompt = `Your allocated time budget for this task has been exhausted. ` +
4610
+ `Some of your tool calls were cancelled mid-execution because the timeout observer determined the time limit was reached.\n\n` +
4611
+ `IMPORTANT: This is a time budget constraint, NOT a system shutdown or error. The system is working perfectly — you simply used all your allocated time. ` +
4612
+ `Do NOT say things like "the system is shutting down" or "try again later." The user is waiting for your answer RIGHT NOW.\n\n` +
4576
4613
  `Please provide a DETAILED summary of:\n` +
4577
4614
  `1. What you were asked to do (the original task)\n` +
4578
4615
  `2. What you accomplished — include ALL findings, code snippets, data, and conclusions you gathered\n` +
@@ -4615,6 +4652,13 @@ Double-check your response based on the criteria above. If everything looks good
4615
4652
  if (this.tracer) {
4616
4653
  summaryText = await this.tracer.withSpan('negotiated_timeout.abort_summary', summaryFn, {
4617
4654
  'summary.conversation_messages': currentMessages.length,
4655
+ 'observer.was_timeout': true,
4656
+ }, (span, result) => {
4657
+ if (result) {
4658
+ span.setAttributes({
4659
+ 'observer.summary_length': result.length,
4660
+ });
4661
+ }
4618
4662
  });
4619
4663
  } else {
4620
4664
  summaryText = await summaryFn();
@@ -132,8 +132,9 @@ export function parseSimpleCommand(command) {
132
132
  /&$/, // Background execution
133
133
  /\$\(/, // Command substitution $()
134
134
  /`/, // Command substitution ``
135
- />/, // Redirection >
136
- /</, // Redirection <
135
+ // Note: > and < (redirection) are intentionally NOT in this list.
136
+ // They are not command separators — they redirect I/O on a single command.
137
+ // The base command is still checked against allow/deny lists.
137
138
  /\*\*/, // Glob patterns (potentially dangerous)
138
139
  /^\s*\{.*,.*\}|\{.*\.\.\.*\}/, // Brace expansion like {a,b} or {1..10} (but not find {} placeholders)
139
140
  ];
@@ -257,6 +258,7 @@ export function isComplexPattern(pattern) {
257
258
  if (!pattern || typeof pattern !== 'string') return false;
258
259
 
259
260
  // Check for operators in the pattern (aligned with complexPatterns in parseSimpleCommand)
261
+ // Note: > and < are not included — redirection is not a command separator
260
262
  const operatorPatterns = [
261
263
  /\|/, // Pipes
262
264
  /&&/, // Logical AND
@@ -266,8 +268,6 @@ export function isComplexPattern(pattern) {
266
268
  /&$/, // Background execution
267
269
  /\$\(/, // Command substitution $()
268
270
  /`/, // Command substitution ``
269
- />/, // Redirection >
270
- /</, // Redirection <
271
271
  ];
272
272
 
273
273
  return operatorPatterns.some(p => p.test(pattern));
@@ -94,12 +94,14 @@ export class BashPermissionChecker {
94
94
  * @param {string[]} [config.deny] - Additional deny patterns (always win)
95
95
  * @param {boolean} [config.disableDefaultAllow] - Disable default allow list
96
96
  * @param {boolean} [config.disableDefaultDeny] - Disable default deny list
97
+ * @param {boolean} [config.allowEdit] - Whether file editing is allowed (controls output redirection)
97
98
  * @param {boolean} [config.debug] - Enable debug logging
98
99
  * @param {Object} [config.tracer] - Optional tracer for telemetry
99
100
  */
100
101
  constructor(config = {}) {
101
102
  this.debug = config.debug || false;
102
103
  this.tracer = config.tracer || null;
104
+ this.allowEdit = config.allowEdit || false;
103
105
 
104
106
  // Separate default and custom patterns for priority-based resolution
105
107
  this.defaultAllowPatterns = config.disableDefaultAllow ? [] : [...DEFAULT_ALLOW_PATTERNS];
@@ -203,6 +205,27 @@ export class BashPermissionChecker {
203
205
  console.log(`[BashPermissions] Parsed: ${parsed.command} with args: [${parsed.args.join(', ')}]`);
204
206
  }
205
207
 
208
+ // Block output redirection when allowEdit is false
209
+ // Output redirection (>, >>) writes to files, which is an edit operation
210
+ if (!this.allowEdit && parsed.args.some(arg => arg === '>' || arg === '>>')) {
211
+ const result = {
212
+ allowed: false,
213
+ reason: 'Output redirection (> or >>) requires edit permissions (allowEdit)',
214
+ command: command,
215
+ parsed: parsed
216
+ };
217
+ if (this.debug) {
218
+ console.log(`[BashPermissions] DENIED - output redirection without allowEdit`);
219
+ }
220
+ this.recordBashEvent('permission.denied', {
221
+ command,
222
+ parsedCommand: parsed.command,
223
+ reason: 'output_redirection_without_allow_edit',
224
+ isComplex: false
225
+ });
226
+ return result;
227
+ }
228
+
206
229
  // Priority-based permission check:
207
230
  // 1. Custom deny always wins
208
231
  // 2. Custom allow overrides default deny
@@ -534,6 +557,17 @@ export class BashPermissionChecker {
534
557
  break;
535
558
  }
536
559
 
560
+ // Block output redirection in components when allowEdit is false
561
+ if (!this.allowEdit && parsed.args && parsed.args.some(arg => arg === '>' || arg === '>>')) {
562
+ if (this.debug) {
563
+ console.log(`[BashPermissions] Component "${component}" has output redirection without allowEdit`);
564
+ }
565
+ allAllowed = false;
566
+ deniedComponent = component;
567
+ deniedReason = 'Output redirection (> or >>) requires edit permissions (allowEdit)';
568
+ break;
569
+ }
570
+
537
571
  // Check using same priority logic as simple commands:
538
572
  // 1. Custom deny always wins
539
573
  if (matchesAnyPattern(parsed, this.customDenyPatterns)) {
@@ -33,6 +33,7 @@ export const bashTool = (options = {}) => {
33
33
  debug = false,
34
34
  cwd,
35
35
  allowedFolders = [],
36
+ allowEdit = false,
36
37
  workspaceRoot: providedWorkspaceRoot,
37
38
  tracer = null
38
39
  } = options;
@@ -47,6 +48,7 @@ export const bashTool = (options = {}) => {
47
48
  deny: bashConfig.deny,
48
49
  disableDefaultAllow: bashConfig.disableDefaultAllow,
49
50
  disableDefaultDeny: bashConfig.disableDefaultDeny,
51
+ allowEdit,
50
52
  debug,
51
53
  tracer
52
54
  });
@@ -248,8 +248,23 @@ function buildSearchDelegateTask({ searchQuery, searchPath, exact, language, all
248
248
  '- Searching "getUserData" ALREADY matches "get", "user", "data" and their variations.',
249
249
  '- NEVER repeat the same search query — you will get the same results. Changing the path does NOT change this.',
250
250
  '- NEVER search trivial variations of the same keyword (e.g., AllowedIPs then allowedIps then allowed_ips). This is wasteful — probe handles it.',
251
- '- If a search returns no results, the term likely does not exist. Try a genuinely DIFFERENT keyword or concept, not a variation.',
252
- '- If 2-3 searches return no results for a concept, STOP searching for it and move on. Do NOT keep retrying.',
251
+ '',
252
+ 'When a search returns no results:',
253
+ '- If you searched a SUBFOLDER (e.g., path="gateway/"), the term might exist elsewhere.',
254
+ ' Try searching from the workspace root (omit the path parameter) or a different directory.',
255
+ ' But do NOT retry the same subfolder with different quoting — that will not help.',
256
+ '- If you searched the WORKSPACE ROOT and got no results, the term does not exist in this codebase.',
257
+ ' Changing quotes, adding "func " prefix, or switching to method syntax will NOT help.',
258
+ '- These are ALL the same failed search, NOT different searches:',
259
+ ' search("func ctxGetData") → no results',
260
+ ' search("ctxGetData") → no results ← WASTED, same concept, different quoting',
261
+ ' search(ctxGetData) → no results ← WASTED, same concept, no quotes',
262
+ ' search("ctx.GetData") → no results ← WASTED, method syntax of same concept',
263
+ ' After the FIRST "no results" at a given scope, either widen the search path or try',
264
+ ' a fundamentally different approach: search for a broader concept, use listFiles',
265
+ ' to discover actual function names, or extract a known file to read real code.',
266
+ '- If 2 searches return no results for a concept (across different scopes), the code likely',
267
+ ' uses different naming than you expect — discover the real names via extract or listFiles.',
253
268
  '',
254
269
  'When to use exact=true:',
255
270
  '- Use exact=true when searching for a KNOWN symbol name (function, type, variable, struct).',
@@ -302,6 +317,21 @@ function buildSearchDelegateTask({ searchQuery, searchPath, exact, language, all
302
317
  ' → search "ForwardMessage" → search "ForwardMessage" → search "ForwardMessage" (WRONG: repeating the exact same query)',
303
318
  ' → search "authentication" → wait → search "session management" → wait (WRONG: these are independent, run them in parallel)',
304
319
  '',
320
+ ' WORST pattern — retrying a non-existent function with quote/syntax variations (this wastes 30 minutes):',
321
+ ' → search "func ctxGetData" → no results',
322
+ ' → search "ctxGetData" → no results ← WRONG: same term without "func" prefix',
323
+ ' → search "ctx.GetData" → no results ← WRONG: method syntax of same concept',
324
+ ' → search "ctx.SetData" → no results ← WRONG: Set variant of same concept',
325
+ ' → search ctxGetData → no results ← WRONG: unquoted version of same term',
326
+ ' → extract api.go → extract api.go → extract api.go (8 times!) ← WRONG: re-reading same file',
327
+ ' FIX: After "func ctxGetData" returns no results in gateway/:',
328
+ ' Option A: Widen scope — search from the workspace root (omit path) in case the',
329
+ ' function is defined in a different package (e.g., apidef/, user/, config/).',
330
+ ' Option B: Discover real names — extract a file you KNOW uses context (e.g., a',
331
+ ' middleware file) and READ what functions it actually calls.',
332
+ ' Option C: Browse — use listFiles to see what files exist and extract the relevant ones.',
333
+ ' NEVER: retry the same concept with different quoting in the same directory.',
334
+ '',
305
335
  'Keyword tips:',
306
336
  '- Common programming keywords are filtered as stopwords when unquoted: function, class, return, new, struct, impl, var, let, const, etc.',
307
337
  '- Avoid searching for these alone — combine with a specific term (e.g., "middleware function" is fine, "function" alone is too generic).',
@@ -340,7 +370,7 @@ function buildSearchDelegateTask({ searchQuery, searchPath, exact, language, all
340
370
  ' - Type references and imports → include type definitions.',
341
371
  ' - Registered handlers/middleware → include all registered items.',
342
372
  '6. If a search returns results, use extract to verify relevance. Run multiple extracts in parallel too.',
343
- '7. If a search returns NO results, the term does not exist. Do NOT retry with variations. Move on.',
373
+ '7. If a search returns NO results: widen the path scope if you searched a subfolder, or move on. Do NOT retry with quote/syntax variations they search the same index.',
344
374
  '8. Once you have enough targets (typically 5-15), output your final JSON answer immediately.',
345
375
  '',
346
376
  `Query: ${searchQuery}`,
@@ -388,8 +418,30 @@ export const searchTool = (options = {}) => {
388
418
  const dupBlockCounts = new Map();
389
419
  // Track pagination counts per query to cap runaway pagination
390
420
  const paginationCounts = new Map();
421
+ // Track consecutive no-result searches (circuit breaker)
422
+ let consecutiveNoResults = 0;
423
+ const MAX_CONSECUTIVE_NO_RESULTS = 4;
424
+ // Track normalized query concepts for fuzzy dedup (catches quote/syntax variations)
425
+ const failedConcepts = new Map(); // normalizedKey → count
391
426
  const MAX_PAGES_PER_QUERY = 3;
392
427
 
428
+ /**
429
+ * Normalize a search query to detect syntax-level duplicates.
430
+ * Strips quotes, dots, underscores/hyphens, and lowercases.
431
+ * "ctxGetData", "ctx.GetData", "ctx_get_data" all → "ctxgetdata"
432
+ * Note: does NOT strip language keywords (func, type) — those change search
433
+ * semantics and are already handled as stopwords by the Rust search engine.
434
+ */
435
+ function normalizeQueryConcept(query) {
436
+ if (!query) return '';
437
+ return query
438
+ .replace(/^["']|["']$/g, '') // strip outer quotes
439
+ .replace(/\./g, '') // "ctx.GetData" → "ctxGetData"
440
+ .replace(/[_\-\s]+/g, '') // strip underscores/hyphens/spaces
441
+ .toLowerCase()
442
+ .trim();
443
+ }
444
+
393
445
  return tool({
394
446
  name: 'search',
395
447
  description: searchDelegate
@@ -478,6 +530,35 @@ export const searchTool = (options = {}) => {
478
530
  }
479
531
  previousSearches.set(searchKey, { hadResults: false });
480
532
  paginationCounts.set(searchKey, 0);
533
+
534
+ // Fuzzy concept dedup: catch quote/syntax variations of the same failed concept
535
+ // e.g., "func ctxGetData", "ctxGetData", "ctx.GetData" all normalize to "ctxgetdata"
536
+ const normalizedKey = `${searchPath}::${normalizeQueryConcept(searchQuery)}`;
537
+ if (failedConcepts.has(normalizedKey) && failedConcepts.get(normalizedKey) >= 2) {
538
+ const conceptCount = failedConcepts.get(normalizedKey) + 1;
539
+ failedConcepts.set(normalizedKey, conceptCount);
540
+ if (debug) {
541
+ console.error(`[CONCEPT-DEDUP] Blocked variation of failed concept (${conceptCount}x): "${searchQuery}" normalized to "${normalizeQueryConcept(searchQuery)}"`);
542
+ }
543
+ const isSubfolder = path && path !== effectiveSearchCwd && path !== '.';
544
+ const scopeHint = isSubfolder
545
+ ? `\n- Try searching from the workspace root (omit the path parameter) — the term may exist in a different directory`
546
+ : `\n- The term does not exist in this codebase at any path`;
547
+ return `CONCEPT ALREADY FAILED (${conceptCount} variations tried). You already searched for "${normalizeQueryConcept(searchQuery)}" with different quoting/syntax in this path and got NO results each time. Changing quotes, adding "func" prefix, or switching to method syntax will NOT change the results.\n\nChange your strategy:${scopeHint}\n- Use extract on a file you ALREADY found to read actual code and discover real function/type names\n- Use listFiles to browse directories and find what functions actually exist\n- Search for a BROADER concept (e.g., instead of "ctxGetData", try "context" or "middleware data access")\n- If you have enough information from prior searches, provide your final answer NOW`;
548
+ }
549
+
550
+ // Circuit breaker: too many consecutive no-result searches means the model
551
+ // is stuck in a loop guessing names that don't exist
552
+ if (consecutiveNoResults >= MAX_CONSECUTIVE_NO_RESULTS) {
553
+ if (debug) {
554
+ console.error(`[CIRCUIT-BREAKER] ${consecutiveNoResults} consecutive no-result searches, blocking: "${searchQuery}"`);
555
+ }
556
+ const isSubfolderCB = path && path !== effectiveSearchCwd && path !== '.';
557
+ const cbScopeHint = isSubfolderCB
558
+ ? `\n- You have been searching in "${path}" — try searching from the workspace root or a different directory`
559
+ : '';
560
+ return `CIRCUIT BREAKER: Your last ${consecutiveNoResults} searches ALL returned no results. You appear to be guessing function/type names that don't match what's actually in the code.\n\nChange your approach:${cbScopeHint}\n1. Use extract on files you already found — read the actual code to discover real function names\n2. Use listFiles to browse directories and see what files/functions actually exist\n3. If you found some results earlier, those are likely sufficient — provide your final answer\n\nRetrying search query variations will not help. Discover real names from real code instead.`;
561
+ }
481
562
  } else {
482
563
  // Cap pagination to prevent runaway page-through of broad queries
483
564
  const pageCount = (paginationCounts.get(searchKey) || 0) + 1;
@@ -493,11 +574,28 @@ export const searchTool = (options = {}) => {
493
574
  const result = maybeAnnotate(await runRawSearch());
494
575
  // Track whether this search had results for better dedup messages
495
576
  if (typeof result === 'string' && result.includes('No results found')) {
577
+ // Track consecutive no-results and failed concepts for circuit breaker
578
+ consecutiveNoResults++;
579
+ const normalizedKey = `${searchPath}::${normalizeQueryConcept(searchQuery)}`;
580
+ failedConcepts.set(normalizedKey, (failedConcepts.get(normalizedKey) || 0) + 1);
581
+ if (debug) {
582
+ console.error(`[NO-RESULTS] consecutiveNoResults=${consecutiveNoResults}, concept "${normalizeQueryConcept(searchQuery)}" failed ${failedConcepts.get(normalizedKey)}x`);
583
+ }
496
584
  // Append contextual hint for ticket/issue ID queries
497
585
  if (/^[A-Z]+-\d+$/.test(searchQuery.trim()) || /^[A-Z]+-\d+$/.test(searchQuery.replace(/"/g, '').trim())) {
498
586
  return result + '\n\n⚠️ Your query looks like a ticket/issue ID (e.g., JIRA-1234). Ticket IDs are rarely present in source code. Search for the technical concepts described in the ticket instead (e.g., function names, error messages, variable names).';
499
587
  }
588
+ // Add a hint when approaching the circuit breaker threshold
589
+ if (consecutiveNoResults >= MAX_CONSECUTIVE_NO_RESULTS - 1) {
590
+ const isSubfolderWarn = path && path !== effectiveSearchCwd && path !== '.';
591
+ const warnScopeHint = isSubfolderWarn
592
+ ? ` You are searching in "${path}" — consider searching from the workspace root or a different directory.`
593
+ : '';
594
+ return result + `\n\n⚠️ WARNING: ${consecutiveNoResults} consecutive searches returned no results.${warnScopeHint} Before your next action: use extract on a file you already found to read actual code, or use listFiles to discover what functions really exist. One more failed search will trigger the circuit breaker.`;
595
+ }
500
596
  } else if (typeof result === 'string') {
597
+ // Successful search — reset consecutive counter
598
+ consecutiveNoResults = 0;
501
599
  const entry = previousSearches.get(searchKey);
502
600
  if (entry) entry.hadResults = true;
503
601
  }