npm - @probelabs/probe - Versions diffs - 0.6.0-rc301 → 0.6.0-rc303 - Mend

@probelabs/probe 0.6.0-rc301 → 0.6.0-rc303

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/bin/binaries/{probe-v0.6.0-rc301-aarch64-apple-darwin.tar.gz → probe-v0.6.0-rc303-aarch64-apple-darwin.tar.gz} RENAMED Viewed

Binary file

package/bin/binaries/{probe-v0.6.0-rc301-aarch64-unknown-linux-musl.tar.gz → probe-v0.6.0-rc303-aarch64-unknown-linux-musl.tar.gz} RENAMED Viewed

Binary file

package/bin/binaries/{probe-v0.6.0-rc301-x86_64-apple-darwin.tar.gz → probe-v0.6.0-rc303-x86_64-apple-darwin.tar.gz} RENAMED Viewed

Binary file

package/bin/binaries/{probe-v0.6.0-rc301-x86_64-pc-windows-msvc.zip → probe-v0.6.0-rc303-x86_64-pc-windows-msvc.zip} RENAMED Viewed

Binary file

package/bin/binaries/{probe-v0.6.0-rc301-x86_64-unknown-linux-musl.tar.gz → probe-v0.6.0-rc303-x86_64-unknown-linux-musl.tar.gz} RENAMED Viewed

Binary file

package/build/agent/ProbeAgent.js CHANGED Viewed

@@ -3746,19 +3746,23 @@ ${activeToolsDesc}
 - Time budget remaining: ${Math.round(remainingBudgetMs / 60000)} minutes
 - Max per extension: ${maxPerReqMin} minutes
-Decide whether the agent should get more time. EXTEND if:
+Your DEFAULT should be to EXTEND. The agent is working on a task and deserves time to finish. Only decline if you see clear evidence of a problem.
+EXTEND (this should be your default) if:
 - Tools are actively running (especially delegates or complex analysis) — they need time to finish
-- The agent is making clear progress on a complex task
+- The agent is making progress, even if slowly — complex tasks take time
 - New information is being gathered that will improve the final answer
+- The agent is iterating on an approach (trying different searches, refining queries) — this is normal problem-solving, not a loop
+- There is remaining budget and the task is not yet complete
+- When in doubt, extend — it's better to give the agent a chance than to cut it off prematurely
-DO NOT EXTEND if:
-- The agent appears stuck in a loop (repeating the same tool calls or getting the same errors)
-- The conversation shows the agent retrying failed operations without changing approach
-- The agent has enough information to answer but keeps searching for more
-- Tool calls are returning empty or error results repeatedly
-- The agent is doing redundant work (searching for things it already found)
+DO NOT EXTEND only if you see CLEAR evidence of:
+- The agent is stuck in an obvious loop — repeating the EXACT same tool calls with the EXACT same arguments and getting the same errors back-to-back (3+ times)
+- The agent is retrying a fundamentally broken operation without changing its approach at all
+- Tool calls are consistently returning errors or empty results AND the agent is not adapting
+- The conversation clearly shows the agent has all the information it needs and is just making redundant calls
-A stuck agent will not recover with more time — it will just burn the budget. Better to force it to answer with what it has.
+IMPORTANT: Iterating, refining, or trying variations is NOT the same as being stuck in a loop. A loop means identical repeated calls with no variation. Be generous with time — a slightly longer response time is much better than a prematurely cut-off incomplete answer.
 Respond with ONLY valid JSON (no markdown, no explanation):
 {"extend": true, "minutes": <1-${maxPerReqMin}>, "reason": "your reason here"}
@@ -3885,6 +3889,20 @@ or
             await this._initiateGracefulStop(gracefulTimeoutState, `observer declined: ${decision.reason}`);
           }
+          // Return decision data for span enrichment
+          return {
+            decision: decision.extend ? 'extended' : 'declined',
+            reason: decision.reason || '',
+            ...(decision.extend ? {
+              granted_ms: grantedMs,
+              granted_min: grantedMin,
+              budget_remaining_ms: remainingBudgetMs - grantedMs,
+            } : {}),
+            extensions_used: negotiatedTimeoutState.extensionsUsed,
+            max_requests: negotiatedTimeoutState.maxRequests,
+            total_extra_time_ms: negotiatedTimeoutState.totalExtraTimeMs,
+          };
         };
         try {
@@ -3894,6 +3912,23 @@ or
               'timeout.extensions_used': negotiatedTimeoutState.extensionsUsed,
               'timeout.active_tools_count': activeToolsList.length,
               'timeout.remaining_budget_ms': remainingBudgetMs,
+            }, (span, result) => {
+              if (result) {
+                span.setAttributes({
+                  'observer.decision': result.decision,
+                  'observer.reason': result.reason,
+                  'observer.extensions_used': result.extensions_used,
+                  'observer.max_requests': result.max_requests,
+                  'observer.total_extra_time_ms': result.total_extra_time_ms,
+                });
+                if (result.decision === 'extended') {
+                  span.setAttributes({
+                    'observer.granted_ms': result.granted_ms,
+                    'observer.granted_min': result.granted_min,
+                    'observer.budget_remaining_ms': result.budget_remaining_ms,
+                  });
+                }
+              }
             });
           } else {
             await observerFn();
@@ -4033,7 +4068,7 @@ or
                   }
                   return {
                     toolChoice: 'none',
-                    userMessage: `⚠️ TIME LIMIT REACHED. You are running out of time. You have ${remaining} step(s) remaining. Provide your BEST answer NOW using the information you have already gathered. Do NOT call any more tools. Summarize your findings and respond completely. If something was not completed, honestly state what was not done and provide any partial results or recommendations you can offer.`
+                    userMessage: `⚠️ TIME BUDGET EXHAUSTED. Your allocated time for this task has run out. You have ${remaining} step(s) remaining to provide your answer.\n\nIMPORTANT: This is a time budget constraint, NOT a system shutdown or error. The system is working perfectly — you simply used all your allocated time.\n\nDo NOT say things like "the system is shutting down" or "try again later" — the user submitted a request and is waiting for YOUR answer right now.\n\nProvide your BEST answer NOW using the information you have already gathered. Do NOT call any more tools. Summarize your findings and respond completely. If something was not completed, honestly state what was not done and provide any partial results or recommendations you can offer.`
                   };
                 }
@@ -4571,8 +4606,10 @@ Double-check your response based on the criteria above. If everything looks good
                 } catch {}
               }
-              const summaryPrompt = `Your operation was interrupted by a timeout observer because the time limit was reached. ` +
-                `Some of your tool calls were cancelled mid-execution.\n\n` +
+              const summaryPrompt = `Your allocated time budget for this task has been exhausted. ` +
+                `Some of your tool calls were cancelled mid-execution because the timeout observer determined the time limit was reached.\n\n` +
+                `IMPORTANT: This is a time budget constraint, NOT a system shutdown or error. The system is working perfectly — you simply used all your allocated time. ` +
+                `Do NOT say things like "the system is shutting down" or "try again later." The user is waiting for your answer RIGHT NOW.\n\n` +
                 `Please provide a DETAILED summary of:\n` +
                 `1. What you were asked to do (the original task)\n` +
                 `2. What you accomplished — include ALL findings, code snippets, data, and conclusions you gathered\n` +
@@ -4615,6 +4652,13 @@ Double-check your response based on the criteria above. If everything looks good
               if (this.tracer) {
                 summaryText = await this.tracer.withSpan('negotiated_timeout.abort_summary', summaryFn, {
                   'summary.conversation_messages': currentMessages.length,
+                  'observer.was_timeout': true,
+                }, (span, result) => {
+                  if (result) {
+                    span.setAttributes({
+                      'observer.summary_length': result.length,
+                    });
+                  }
                 });
               } else {
                 summaryText = await summaryFn();

package/build/agent/bashCommandUtils.js CHANGED Viewed

@@ -132,8 +132,9 @@ export function parseSimpleCommand(command) {
     /&$/,           // Background execution
     /\$\(/,         // Command substitution $()
     /`/,            // Command substitution ``
-    />/,            // Redirection >
-    /</,            // Redirection <
+    // Note: > and < (redirection) are intentionally NOT in this list.
+    // They are not command separators — they redirect I/O on a single command.
+    // The base command is still checked against allow/deny lists.
     /\*\*/,         // Glob patterns (potentially dangerous)
     /^\s*\{.*,.*\}|\{.*\.\.\.*\}/,  // Brace expansion like {a,b} or {1..10} (but not find {} placeholders)
   ];
@@ -257,6 +258,7 @@ export function isComplexPattern(pattern) {
   if (!pattern || typeof pattern !== 'string') return false;
   // Check for operators in the pattern (aligned with complexPatterns in parseSimpleCommand)
+  // Note: > and < are not included — redirection is not a command separator
   const operatorPatterns = [
     /\|/,           // Pipes
     /&&/,           // Logical AND
@@ -266,8 +268,6 @@ export function isComplexPattern(pattern) {
     /&$/,           // Background execution
     /\$\(/,         // Command substitution $()
     /`/,            // Command substitution ``
-    />/,            // Redirection >
-    /</,            // Redirection <
   ];
   return operatorPatterns.some(p => p.test(pattern));

package/build/agent/bashPermissions.js CHANGED Viewed

@@ -94,12 +94,14 @@ export class BashPermissionChecker {
    * @param {string[]} [config.deny] - Additional deny patterns (always win)
    * @param {boolean} [config.disableDefaultAllow] - Disable default allow list
    * @param {boolean} [config.disableDefaultDeny] - Disable default deny list
+   * @param {boolean} [config.allowEdit] - Whether file editing is allowed (controls output redirection)
    * @param {boolean} [config.debug] - Enable debug logging
    * @param {Object} [config.tracer] - Optional tracer for telemetry
    */
   constructor(config = {}) {
     this.debug = config.debug || false;
     this.tracer = config.tracer || null;
+    this.allowEdit = config.allowEdit || false;
     // Separate default and custom patterns for priority-based resolution
     this.defaultAllowPatterns = config.disableDefaultAllow ? [] : [...DEFAULT_ALLOW_PATTERNS];
@@ -203,6 +205,27 @@ export class BashPermissionChecker {
       console.log(`[BashPermissions] Parsed: ${parsed.command} with args: [${parsed.args.join(', ')}]`);
     }
+    // Block output redirection when allowEdit is false
+    // Output redirection (>, >>) writes to files, which is an edit operation
+    if (!this.allowEdit && parsed.args.some(arg => arg === '>' || arg === '>>')) {
+      const result = {
+        allowed: false,
+        reason: 'Output redirection (> or >>) requires edit permissions (allowEdit)',
+        command: command,
+        parsed: parsed
+      };
+      if (this.debug) {
+        console.log(`[BashPermissions] DENIED - output redirection without allowEdit`);
+      }
+      this.recordBashEvent('permission.denied', {
+        command,
+        parsedCommand: parsed.command,
+        reason: 'output_redirection_without_allow_edit',
+        isComplex: false
+      });
+      return result;
+    }
     // Priority-based permission check:
     // 1. Custom deny always wins
     // 2. Custom allow overrides default deny
@@ -534,6 +557,17 @@ export class BashPermissionChecker {
           break;
         }
+        // Block output redirection in components when allowEdit is false
+        if (!this.allowEdit && parsed.args && parsed.args.some(arg => arg === '>' || arg === '>>')) {
+          if (this.debug) {
+            console.log(`[BashPermissions] Component "${component}" has output redirection without allowEdit`);
+          }
+          allAllowed = false;
+          deniedComponent = component;
+          deniedReason = 'Output redirection (> or >>) requires edit permissions (allowEdit)';
+          break;
+        }
         // Check using same priority logic as simple commands:
         // 1. Custom deny always wins
         if (matchesAnyPattern(parsed, this.customDenyPatterns)) {

package/build/tools/bash.js CHANGED Viewed

@@ -33,6 +33,7 @@ export const bashTool = (options = {}) => {
     debug = false,
     cwd,
     allowedFolders = [],
+    allowEdit = false,
     workspaceRoot: providedWorkspaceRoot,
     tracer = null
   } = options;
@@ -47,6 +48,7 @@ export const bashTool = (options = {}) => {
     deny: bashConfig.deny,
     disableDefaultAllow: bashConfig.disableDefaultAllow,
     disableDefaultDeny: bashConfig.disableDefaultDeny,
+    allowEdit,
     debug,
     tracer
   });

package/build/tools/vercel.js CHANGED Viewed

@@ -248,8 +248,23 @@ function buildSearchDelegateTask({ searchQuery, searchPath, exact, language, all
 		'- Searching "getUserData" ALREADY matches "get", "user", "data" and their variations.',
 		'- NEVER repeat the same search query — you will get the same results. Changing the path does NOT change this.',
 		'- NEVER search trivial variations of the same keyword (e.g., AllowedIPs then allowedIps then allowed_ips). This is wasteful — probe handles it.',
-		'- If a search returns no results, the term likely does not exist. Try a genuinely DIFFERENT keyword or concept, not a variation.',
-		'- If 2-3 searches return no results for a concept, STOP searching for it and move on. Do NOT keep retrying.',
+		'',
+		'When a search returns no results:',
+		'- If you searched a SUBFOLDER (e.g., path="gateway/"), the term might exist elsewhere.',
+		'  Try searching from the workspace root (omit the path parameter) or a different directory.',
+		'  But do NOT retry the same subfolder with different quoting — that will not help.',
+		'- If you searched the WORKSPACE ROOT and got no results, the term does not exist in this codebase.',
+		'  Changing quotes, adding "func " prefix, or switching to method syntax will NOT help.',
+		'- These are ALL the same failed search, NOT different searches:',
+		'    search("func ctxGetData") → no results',
+		'    search("ctxGetData")      → no results  ← WASTED, same concept, different quoting',
+		'    search(ctxGetData)         → no results  ← WASTED, same concept, no quotes',
+		'    search("ctx.GetData")      → no results  ← WASTED, method syntax of same concept',
+		'  After the FIRST "no results" at a given scope, either widen the search path or try',
+		'  a fundamentally different approach: search for a broader concept, use listFiles',
+		'  to discover actual function names, or extract a known file to read real code.',
+		'- If 2 searches return no results for a concept (across different scopes), the code likely',
+		'  uses different naming than you expect — discover the real names via extract or listFiles.',
 		'',
 		'When to use exact=true:',
 		'- Use exact=true when searching for a KNOWN symbol name (function, type, variable, struct).',
@@ -302,6 +317,21 @@ function buildSearchDelegateTask({ searchQuery, searchPath, exact, language, all
 		'  → search "ForwardMessage" → search "ForwardMessage" → search "ForwardMessage" (WRONG: repeating the exact same query)',
 		'  → search "authentication" → wait → search "session management" → wait (WRONG: these are independent, run them in parallel)',
 		'',
+		'  WORST pattern — retrying a non-existent function with quote/syntax variations (this wastes 30 minutes):',
+		'  → search "func ctxGetData" → no results',
+		'  → search "ctxGetData" → no results          ← WRONG: same term without "func" prefix',
+		'  → search "ctx.GetData" → no results          ← WRONG: method syntax of same concept',
+		'  → search "ctx.SetData" → no results          ← WRONG: Set variant of same concept',
+		'  → search ctxGetData → no results             ← WRONG: unquoted version of same term',
+		'  → extract api.go → extract api.go → extract api.go (8 times!) ← WRONG: re-reading same file',
+		'  FIX: After "func ctxGetData" returns no results in gateway/:',
+		'  Option A: Widen scope — search from the workspace root (omit path) in case the',
+		'    function is defined in a different package (e.g., apidef/, user/, config/).',
+		'  Option B: Discover real names — extract a file you KNOW uses context (e.g., a',
+		'    middleware file) and READ what functions it actually calls.',
+		'  Option C: Browse — use listFiles to see what files exist and extract the relevant ones.',
+		'  NEVER: retry the same concept with different quoting in the same directory.',
+		'',
 		'Keyword tips:',
 		'- Common programming keywords are filtered as stopwords when unquoted: function, class, return, new, struct, impl, var, let, const, etc.',
 		'- Avoid searching for these alone — combine with a specific term (e.g., "middleware function" is fine, "function" alone is too generic).',
@@ -340,7 +370,7 @@ function buildSearchDelegateTask({ searchQuery, searchPath, exact, language, all
 		'   - Type references and imports → include type definitions.',
 		'   - Registered handlers/middleware → include all registered items.',
 		'6. If a search returns results, use extract to verify relevance. Run multiple extracts in parallel too.',
-		'7. If a search returns NO results, the term does not exist. Do NOT retry with variations. Move on.',
+		'7. If a search returns NO results: widen the path scope if you searched a subfolder, or move on. Do NOT retry with quote/syntax variations — they search the same index.',
 		'8. Once you have enough targets (typically 5-15), output your final JSON answer immediately.',
 		'',
 		`Query: ${searchQuery}`,
@@ -388,8 +418,30 @@ export const searchTool = (options = {}) => {
 	const dupBlockCounts = new Map();
 	// Track pagination counts per query to cap runaway pagination
 	const paginationCounts = new Map();
+	// Track consecutive no-result searches (circuit breaker)
+	let consecutiveNoResults = 0;
+	const MAX_CONSECUTIVE_NO_RESULTS = 4;
+	// Track normalized query concepts for fuzzy dedup (catches quote/syntax variations)
+	const failedConcepts = new Map(); // normalizedKey → count
 	const MAX_PAGES_PER_QUERY = 3;
+	/**
+	 * Normalize a search query to detect syntax-level duplicates.
+	 * Strips quotes, dots, underscores/hyphens, and lowercases.
+	 * "ctxGetData", "ctx.GetData", "ctx_get_data" all → "ctxgetdata"
+	 * Note: does NOT strip language keywords (func, type) — those change search
+	 * semantics and are already handled as stopwords by the Rust search engine.
+	 */
+	function normalizeQueryConcept(query) {
+		if (!query) return '';
+		return query
+			.replace(/^["']|["']$/g, '')      // strip outer quotes
+			.replace(/\./g, '')                 // "ctx.GetData" → "ctxGetData"
+			.replace(/[_\-\s]+/g, '')           // strip underscores/hyphens/spaces
+			.toLowerCase()
+			.trim();
+	}
 	return tool({
 		name: 'search',
 		description: searchDelegate
@@ -478,6 +530,35 @@ export const searchTool = (options = {}) => {
 					}
 					previousSearches.set(searchKey, { hadResults: false });
 					paginationCounts.set(searchKey, 0);
+					// Fuzzy concept dedup: catch quote/syntax variations of the same failed concept
+					// e.g., "func ctxGetData", "ctxGetData", "ctx.GetData" all normalize to "ctxgetdata"
+					const normalizedKey = `${searchPath}::${normalizeQueryConcept(searchQuery)}`;
+					if (failedConcepts.has(normalizedKey) && failedConcepts.get(normalizedKey) >= 2) {
+						const conceptCount = failedConcepts.get(normalizedKey) + 1;
+						failedConcepts.set(normalizedKey, conceptCount);
+						if (debug) {
+							console.error(`[CONCEPT-DEDUP] Blocked variation of failed concept (${conceptCount}x): "${searchQuery}" normalized to "${normalizeQueryConcept(searchQuery)}"`);
+						}
+						const isSubfolder = path && path !== effectiveSearchCwd && path !== '.';
+						const scopeHint = isSubfolder
+							? `\n- Try searching from the workspace root (omit the path parameter) — the term may exist in a different directory`
+							: `\n- The term does not exist in this codebase at any path`;
+						return `CONCEPT ALREADY FAILED (${conceptCount} variations tried). You already searched for "${normalizeQueryConcept(searchQuery)}" with different quoting/syntax in this path and got NO results each time. Changing quotes, adding "func" prefix, or switching to method syntax will NOT change the results.\n\nChange your strategy:${scopeHint}\n- Use extract on a file you ALREADY found to read actual code and discover real function/type names\n- Use listFiles to browse directories and find what functions actually exist\n- Search for a BROADER concept (e.g., instead of "ctxGetData", try "context" or "middleware data access")\n- If you have enough information from prior searches, provide your final answer NOW`;
+					}
+					// Circuit breaker: too many consecutive no-result searches means the model
+					// is stuck in a loop guessing names that don't exist
+					if (consecutiveNoResults >= MAX_CONSECUTIVE_NO_RESULTS) {
+						if (debug) {
+							console.error(`[CIRCUIT-BREAKER] ${consecutiveNoResults} consecutive no-result searches, blocking: "${searchQuery}"`);
+						}
+						const isSubfolderCB = path && path !== effectiveSearchCwd && path !== '.';
+						const cbScopeHint = isSubfolderCB
+							? `\n- You have been searching in "${path}" — try searching from the workspace root or a different directory`
+							: '';
+						return `CIRCUIT BREAKER: Your last ${consecutiveNoResults} searches ALL returned no results. You appear to be guessing function/type names that don't match what's actually in the code.\n\nChange your approach:${cbScopeHint}\n1. Use extract on files you already found — read the actual code to discover real function names\n2. Use listFiles to browse directories and see what files/functions actually exist\n3. If you found some results earlier, those are likely sufficient — provide your final answer\n\nRetrying search query variations will not help. Discover real names from real code instead.`;
+					}
 				} else {
 					// Cap pagination to prevent runaway page-through of broad queries
 					const pageCount = (paginationCounts.get(searchKey) || 0) + 1;
@@ -493,11 +574,28 @@ export const searchTool = (options = {}) => {
 					const result = maybeAnnotate(await runRawSearch());
 					// Track whether this search had results for better dedup messages
 					if (typeof result === 'string' && result.includes('No results found')) {
+						// Track consecutive no-results and failed concepts for circuit breaker
+						consecutiveNoResults++;
+						const normalizedKey = `${searchPath}::${normalizeQueryConcept(searchQuery)}`;
+						failedConcepts.set(normalizedKey, (failedConcepts.get(normalizedKey) || 0) + 1);
+						if (debug) {
+							console.error(`[NO-RESULTS] consecutiveNoResults=${consecutiveNoResults}, concept "${normalizeQueryConcept(searchQuery)}" failed ${failedConcepts.get(normalizedKey)}x`);
+						}
 						// Append contextual hint for ticket/issue ID queries
 						if (/^[A-Z]+-\d+$/.test(searchQuery.trim()) || /^[A-Z]+-\d+$/.test(searchQuery.replace(/"/g, '').trim())) {
 							return result + '\n\n⚠️ Your query looks like a ticket/issue ID (e.g., JIRA-1234). Ticket IDs are rarely present in source code. Search for the technical concepts described in the ticket instead (e.g., function names, error messages, variable names).';
 						}
+						// Add a hint when approaching the circuit breaker threshold
+						if (consecutiveNoResults >= MAX_CONSECUTIVE_NO_RESULTS - 1) {
+							const isSubfolderWarn = path && path !== effectiveSearchCwd && path !== '.';
+							const warnScopeHint = isSubfolderWarn
+								? ` You are searching in "${path}" — consider searching from the workspace root or a different directory.`
+								: '';
+							return result + `\n\n⚠️ WARNING: ${consecutiveNoResults} consecutive searches returned no results.${warnScopeHint} Before your next action: use extract on a file you already found to read actual code, or use listFiles to discover what functions really exist. One more failed search will trigger the circuit breaker.`;
+						}
 					} else if (typeof result === 'string') {
+						// Successful search — reset consecutive counter
+						consecutiveNoResults = 0;
 						const entry = previousSearches.get(searchKey);
 						if (entry) entry.hadResults = true;
 					}