@probelabs/probe 0.6.0-rc311 → 0.6.0-rc313

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -350,6 +350,7 @@ export class ProbeAgent {
350
350
  // Task management configuration
351
351
  this.enableTasks = !!options.enableTasks;
352
352
  this.taskManager = null; // Initialized per-request in answer()
353
+ this.delegationTask = options.delegationTask || null; // Task description when this is a subagent
353
354
 
354
355
  // Per-instance delegation manager for concurrent delegation limits
355
356
  // Each ProbeAgent instance has its own limits, not shared globally
@@ -3350,14 +3351,19 @@ Follow these instructions carefully:
3350
3351
  this.toolImplementations.task = createTaskTool({
3351
3352
  taskManager: this.taskManager,
3352
3353
  tracer: this.tracer,
3353
- debug: this.debug
3354
+ debug: this.debug,
3355
+ delegationTask: this.delegationTask
3354
3356
  });
3355
3357
  }
3356
3358
 
3357
3359
  // Record telemetry for task initialization
3358
3360
  if (this.tracer && typeof this.tracer.recordTaskEvent === 'function') {
3359
3361
  this.tracer.recordTaskEvent('session_started', {
3360
- 'task.enabled': true
3362
+ 'task.enabled': true,
3363
+ 'agent.session_id': this.tracer?.sessionId ?? null,
3364
+ 'agent.parent_session_id': this.tracer?.parentSessionId ?? null,
3365
+ 'agent.root_session_id': this.tracer?.rootSessionId ?? null,
3366
+ 'agent.kind': this.tracer?.agentKind ?? 'main',
3361
3367
  });
3362
3368
  }
3363
3369
 
@@ -4084,7 +4090,7 @@ or
4084
4090
  }
4085
4091
  return {
4086
4092
  toolChoice: 'none',
4087
- userMessage: `⚠️ TIME BUDGET EXHAUSTED. Your allocated time for this task has run out. You have ${remaining} step(s) remaining to provide your answer.\n\nIMPORTANT: This is a time budget constraint, NOT a system shutdown or error. The system is working perfectly — you simply used all your allocated time.\n\nDo NOT say things like "the system is shutting down" or "try again later" — the user submitted a request and is waiting for YOUR answer right now.\n\nProvide your BEST answer NOW using the information you have already gathered. Do NOT call any more tools. Summarize your findings and respond completely. If something was not completed, honestly state what was not done and provide any partial results or recommendations you can offer.`
4093
+ userMessage: `⚠️ TIME BUDGET EXHAUSTED. Your allocated time for this task has run out. You have ${remaining} step(s) remaining to provide your answer.\n\nIMPORTANT: This is a time budget constraint, NOT a system shutdown or error. The system is working perfectly — you simply used all your allocated time.\n\nDo NOT say things like "the system is shutting down" or "try again later" — the user submitted a request and is waiting for YOUR answer right now.\n\nYou MUST now produce a detailed PROGRESS REPORT so that a follow-up agent can continue your work without starting over. Structure your response as follows:\n\n## Task\nWhat was the original request / goal.\n\n## Completed Work\nWhat you successfully accomplished — include ALL findings, code snippets, file paths, data, and conclusions gathered. Be specific and include actual content, not just descriptions.\n\n## Key Findings\nConcrete facts, answers, or data points you discovered. Include file paths with line numbers, code snippets, configuration values, etc.\n\n## Attempted but Inconclusive\nWhat you tried that did not yield clear results — include the approach and why it was inconclusive, so the next agent does not repeat it.\n\n## Not Started / Remaining\nWhat parts of the task you did not get to, and any recommendations for how to approach them.\n\n## Suggested Next Steps\nSpecific, actionable steps for a follow-up agent to continue this work efficiently.\n\nIMPORTANT: Include ALL useful data you gathered inline — do not just say "I found X", actually include X. The next agent will only see this report, not your tool call history.`
4088
4094
  };
4089
4095
  }
4090
4096
 
@@ -4094,22 +4100,48 @@ or
4094
4100
  return { toolChoice: 'none' };
4095
4101
  }
4096
4102
 
4097
- // Last-iteration warning — force text-only and tell the AI to summarize
4103
+ // Last-iteration warning — force text-only and request a structured progress report
4098
4104
  if (stepNumber === maxIterations - 1) {
4099
- // Build a brief summary of tools used so the model can reference them in its answer
4100
- const searchesTried = _toolCallLog
4101
- .filter(tc => tc.name === 'search')
4102
- .map(tc => `"${tc.args.query || ''}"${tc.args.exact ? ' (exact)' : ''}`)
4103
- .filter((v, i, a) => a.indexOf(v) === i); // unique
4104
- const searchSummary = searchesTried.length > 0
4105
- ? `\nSearches attempted: ${searchesTried.join(', ')}`
4106
- : '';
4105
+ // Build a detailed activity log so the model can produce an accurate handoff report
4106
+ const toolActivity = _toolCallLog
4107
+ .filter(tc => tc.name !== '_assistant_text')
4108
+ .map(tc => {
4109
+ const argStr = tc.name === 'search'
4110
+ ? `query="${tc.args.query || ''}"${tc.args.exact ? ' exact' : ''} path=${tc.args.path || '.'}`
4111
+ : JSON.stringify(tc.args || {}).substring(0, 200);
4112
+ const brief = tc.resultBrief ? ` → ${tc.resultBrief.substring(0, 150)}` : '';
4113
+ return ` [step ${tc.step}] ${tc.name}(${argStr})${brief}`;
4114
+ })
4115
+ .join('\n');
4116
+ const activityLog = toolActivity ? `\n\nTool activity so far:\n${toolActivity}` : '';
4107
4117
 
4108
4118
  // For code-searcher subagents: instruct to output structured JSON even on partial results
4109
4119
  const isCodeSearcher = this.promptType === 'code-searcher';
4110
4120
  const lastIterMessage = isCodeSearcher
4111
- ? `⚠️ LAST ITERATION — you are out of tool calls. Output your JSON response NOW with whatever files you have verified so far. Set confidence to "low" if your search was incomplete. Include the "searches" array listing all search queries you made with their paths and outcomes.${searchSummary}`
4112
- : `⚠️ LAST ITERATION — you are out of tool calls. Provide your BEST answer NOW with the information gathered so far. If you could not find what was requested, explain exactly what you searched for and why it did not work, so the caller can try a different approach.${searchSummary}`;
4121
+ ? `⚠️ LAST ITERATION — you are out of tool calls. Output your JSON response NOW with whatever files you have verified so far. Set confidence to "low" if your search was incomplete. Include the "searches" array listing all search queries you made with their paths and outcomes.${activityLog}`
4122
+ : `⚠️ ITERATION LIMIT REACHED — you have no more tool calls. You MUST now produce a detailed PROGRESS REPORT so that a follow-up agent can continue your work without starting over.
4123
+
4124
+ Structure your response as follows:
4125
+
4126
+ ## Task
4127
+ What was the original request / goal.
4128
+
4129
+ ## Completed Work
4130
+ What you successfully accomplished — include ALL findings, code snippets, file paths, data, and conclusions gathered. Be specific and include actual content, not just descriptions.
4131
+
4132
+ ## Key Findings
4133
+ Concrete facts, answers, or data points you discovered. Include file paths with line numbers, code snippets, configuration values, etc.
4134
+
4135
+ ## Attempted but Inconclusive
4136
+ What you tried that did not yield clear results — include the approach and why it was inconclusive, so the next agent does not repeat it.
4137
+
4138
+ ## Not Started / Remaining
4139
+ What parts of the task you did not get to, and any recommendations for how to approach them.
4140
+
4141
+ ## Suggested Next Steps
4142
+ Specific, actionable steps for a follow-up agent to continue this work efficiently.
4143
+
4144
+ IMPORTANT: Include ALL useful data you gathered inline — do not just say "I found X", actually include X. The next agent will only see this report, not your tool call history.${activityLog}`;
4113
4145
 
4114
4146
  return {
4115
4147
  toolChoice: 'none',
@@ -4208,13 +4240,26 @@ Double-check your response based on the criteria above. If everything looks good
4208
4240
  currentIteration++;
4209
4241
  toolContext.currentIteration = currentIteration;
4210
4242
 
4211
- // Track tool calls for failure diagnostics
4243
+ // Track tool calls for failure diagnostics and progress reports
4212
4244
  if (toolCalls?.length > 0) {
4213
- for (const tc of toolCalls) {
4214
- _toolCallLog.push({ name: tc.toolName, args: tc.args || {} });
4245
+ for (let i = 0; i < toolCalls.length; i++) {
4246
+ const tc = toolCalls[i];
4247
+ const tr = toolResults?.[i];
4248
+ let resultBrief = '';
4249
+ if (tr) {
4250
+ const raw = typeof tr.result === 'string' ? tr.result : JSON.stringify(tr.result);
4251
+ resultBrief = raw ? raw.substring(0, 500) : '';
4252
+ }
4253
+ const tcArgs = tc.args || (typeof tc.input === 'string' ? (() => { try { return JSON.parse(tc.input); } catch { return {}; } })() : tc.input) || {};
4254
+ _toolCallLog.push({ name: tc.toolName, args: tcArgs, resultBrief, step: currentIteration });
4215
4255
  }
4216
4256
  }
4217
4257
 
4258
+ // Track assistant text output per step for progress reports
4259
+ if (text && text.trim()) {
4260
+ _toolCallLog.push({ name: '_assistant_text', args: {}, resultBrief: text.substring(0, 1000), step: currentIteration });
4261
+ }
4262
+
4218
4263
  // Record telemetry — include model's reasoning and tool call details
4219
4264
  if (this.tracer) {
4220
4265
  const stepEvent = {
@@ -4633,13 +4678,16 @@ Double-check your response based on the criteria above. If everything looks good
4633
4678
  `Some of your tool calls were cancelled mid-execution because the timeout observer determined the time limit was reached.\n\n` +
4634
4679
  `IMPORTANT: This is a time budget constraint, NOT a system shutdown or error. The system is working perfectly — you simply used all your allocated time. ` +
4635
4680
  `Do NOT say things like "the system is shutting down" or "try again later." The user is waiting for your answer RIGHT NOW.\n\n` +
4636
- `Please provide a DETAILED summary of:\n` +
4637
- `1. What you were asked to do (the original task)\n` +
4638
- `2. What you accomplished include ALL findings, code snippets, data, and conclusions you gathered\n` +
4639
- `3. What was still in progress or not yet started\n` +
4640
- `4. Any partial results or recommendations you can offer based on what you found so far` +
4681
+ `You MUST produce a detailed PROGRESS REPORT so that a follow-up agent can continue your work without starting over. ` +
4682
+ `Structure your response with these sections:\n\n` +
4683
+ `## Task\nWhat was the original request / goal.\n\n` +
4684
+ `## Completed Work\nWhat you successfully accomplished — include ALL findings, code snippets, file paths, data, and conclusions gathered. Be specific and include actual content, not just descriptions.\n\n` +
4685
+ `## Key Findings\nConcrete facts, answers, or data points you discovered. Include file paths with line numbers, code snippets, configuration values, etc.\n\n` +
4686
+ `## Attempted but Inconclusive\nWhat you tried that did not yield clear results — include the approach and why it was inconclusive, so the next agent does not repeat it.\n\n` +
4687
+ `## Not Started / Remaining\nWhat parts of the task you did not get to, and any recommendations for how to approach them.\n\n` +
4688
+ `## Suggested Next Steps\nSpecific, actionable steps for a follow-up agent to continue this work efficiently.` +
4641
4689
  `${taskContext}${schemaContext}\n\n` +
4642
- `Be thoroughthis is the user's only response. Include all useful information you collected.`;
4690
+ `IMPORTANT: Include ALL useful data you gathered inline do not just say "I found X", actually include X. The next agent will only see this report, not your tool call history.`;
4643
4691
 
4644
4692
  const summaryMessages = [
4645
4693
  ...currentMessages,
@@ -4791,21 +4839,33 @@ Double-check your response based on the criteria above. If everything looks good
4791
4839
  const searchQueries = [];
4792
4840
  const searchDetails = [];
4793
4841
  const toolCounts = {};
4842
+ const toolTimeline = [];
4794
4843
  for (const tc of _toolCallLog) {
4844
+ if (tc.name === '_assistant_text') continue;
4795
4845
  toolCounts[tc.name] = (toolCounts[tc.name] || 0) + 1;
4796
4846
  if (tc.name === 'search') {
4797
4847
  const q = tc.args.query || '';
4798
4848
  const p = tc.args.path || '.';
4799
4849
  const exact = tc.args.exact ? ' (exact)' : '';
4800
4850
  searchQueries.push(`"${q}"${exact}`);
4801
- searchDetails.push({ query: q, path: p, had_results: false });
4851
+ searchDetails.push({ query: q, path: p, had_results: !!(tc.resultBrief && tc.resultBrief.trim()) });
4802
4852
  }
4853
+ const argStr = tc.name === 'search'
4854
+ ? `query="${tc.args.query || ''}"${tc.args.exact ? ' exact' : ''}`
4855
+ : JSON.stringify(tc.args || {}).substring(0, 150);
4856
+ const brief = tc.resultBrief ? ` → ${tc.resultBrief.substring(0, 200)}` : ' → (no result)';
4857
+ toolTimeline.push(` [step ${tc.step}] ${tc.name}(${argStr})${brief}`);
4803
4858
  }
4804
4859
  const toolBreakdown = Object.entries(toolCounts)
4805
4860
  .map(([name, count]) => `${name}: ${count}x`)
4806
4861
  .join(', ');
4807
4862
  const uniqueSearches = [...new Set(searchQueries)];
4808
4863
 
4864
+ // Collect any assistant text fragments as partial findings
4865
+ const assistantTexts = _toolCallLog
4866
+ .filter(tc => tc.name === '_assistant_text' && tc.resultBrief)
4867
+ .map(tc => tc.resultBrief);
4868
+
4809
4869
  // For code-searcher subagents: produce structured JSON so the parent
4810
4870
  // can still use partial results instead of getting a plain error string.
4811
4871
  if (this.promptType === 'code-searcher') {
@@ -4816,12 +4876,18 @@ Double-check your response based on the criteria above. If everything looks good
4816
4876
  searches: searchDetails
4817
4877
  });
4818
4878
  } else {
4819
- let summary = `I was unable to complete your request after ${currentIteration} tool iterations.\n\n`;
4820
- summary += `Tool calls made: ${toolBreakdown || 'none'}\n`;
4879
+ let summary = `## Progress Report (iteration limit reached after ${currentIteration} steps)\n\n`;
4880
+ summary += `### Tool Usage Summary\n${toolBreakdown || 'none'}\n\n`;
4821
4881
  if (uniqueSearches.length > 0) {
4822
- summary += `Search queries tried: ${uniqueSearches.join(', ')}\n`;
4882
+ summary += `### Search Queries Attempted\n${uniqueSearches.join(', ')}\n\n`;
4883
+ }
4884
+ if (toolTimeline.length > 0) {
4885
+ summary += `### Step-by-Step Activity Log\n${toolTimeline.join('\n')}\n\n`;
4886
+ }
4887
+ if (assistantTexts.length > 0) {
4888
+ summary += `### Partial Findings\n${assistantTexts.join('\n\n')}\n\n`;
4823
4889
  }
4824
- summary += `\nThe search approach may be fundamentally wrong for this query. Consider: using exact=true for literal string matching, using bash/grep for pattern-based file searches, or trying a completely different strategy instead of repeating similar searches.`;
4890
+ summary += `### Recommendation for Follow-Up\nThe iteration limit was reached before the task could be completed. A follow-up agent should review the activity log above to avoid repeating the same searches, and consider alternative approaches such as: using exact=true for literal string matching, using bash/grep for pattern-based file searches, or trying a different strategy.`;
4825
4891
  finalResult = summary;
4826
4892
  }
4827
4893
  } catch {
@@ -136,9 +136,28 @@ export class SimpleTelemetry {
136
136
  * Simple tracer for application-level tracing
137
137
  */
138
138
  export class SimpleAppTracer {
139
- constructor(telemetry, sessionId = null) {
139
+ constructor(telemetry, sessionId = null, options = {}) {
140
140
  this.telemetry = telemetry;
141
141
  this.sessionId = sessionId || this.generateSessionId();
142
+ this.parentSessionId = options.parentSessionId || null;
143
+ this.rootSessionId = options.rootSessionId || this.sessionId;
144
+ this.agentKind = options.agentKind || 'main';
145
+ }
146
+
147
+ /**
148
+ * Create a child tracer for a delegated subagent.
149
+ * Inherits the same telemetry backend but scopes events to the child session.
150
+ * @param {string} childSessionId - The subagent's session ID
151
+ * @param {Object} [options] - Additional options
152
+ * @param {string} [options.agentKind='delegate'] - Kind of child agent
153
+ * @returns {SimpleAppTracer} A new tracer scoped to the child session
154
+ */
155
+ createChildTracer(childSessionId, options = {}) {
156
+ return new SimpleAppTracer(this.telemetry, childSessionId, {
157
+ parentSessionId: this.sessionId,
158
+ rootSessionId: this.rootSessionId,
159
+ agentKind: options.agentKind || 'delegate',
160
+ });
142
161
  }
143
162
 
144
163
  generateSessionId() {
@@ -500,13 +500,28 @@ export class TaskManager {
500
500
 
501
501
  let line = ` <task id="${this._escapeXml(task.id)}" status="${this._escapeXml(task.status)}"`;
502
502
  if (task.priority) line += ` priority="${this._escapeXml(task.priority)}"`;
503
+ if (task.dependencies.length > 0) line += ` depends_on="${this._escapeXml(task.dependencies.join(','))}"`;
503
504
  if (blockers.length > 0) line += ` blocked_by="${this._escapeXml(blockers.join(','))}"`;
504
505
  line += `>${this._escapeXml(task.title)}</task>`;
505
506
 
506
507
  return line;
507
508
  });
508
509
 
509
- return `<task_status>\n${taskLines.join('\n')}\n</task_status>`;
510
+ // Add a brief status line so the AI can quickly assess progress
511
+ let completed = 0, inProgress = 0, pending = 0, cancelled = 0;
512
+ for (const t of tasks) {
513
+ if (t.status === 'completed') completed++;
514
+ else if (t.status === 'in_progress') inProgress++;
515
+ else if (t.status === 'pending') pending++;
516
+ else if (t.status === 'cancelled') cancelled++;
517
+ }
518
+ const statusLine = ` <!-- ${completed}/${tasks.length} completed` +
519
+ (inProgress > 0 ? `, ${inProgress} in progress` : '') +
520
+ (pending > 0 ? `, ${pending} pending` : '') +
521
+ (cancelled > 0 ? `, ${cancelled} cancelled` : '') +
522
+ ` -->`;
523
+
524
+ return `<task_status>\n${statusLine}\n${taskLines.join('\n')}\n</task_status>`;
510
525
  }
511
526
 
512
527
  /**