@probelabs/probe 0.6.0-rc311 → 0.6.0-rc313
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/binaries/{probe-v0.6.0-rc311-aarch64-apple-darwin.tar.gz → probe-v0.6.0-rc313-aarch64-apple-darwin.tar.gz} +0 -0
- package/bin/binaries/{probe-v0.6.0-rc311-aarch64-unknown-linux-musl.tar.gz → probe-v0.6.0-rc313-aarch64-unknown-linux-musl.tar.gz} +0 -0
- package/bin/binaries/{probe-v0.6.0-rc311-x86_64-apple-darwin.tar.gz → probe-v0.6.0-rc313-x86_64-apple-darwin.tar.gz} +0 -0
- package/bin/binaries/{probe-v0.6.0-rc311-x86_64-pc-windows-msvc.zip → probe-v0.6.0-rc313-x86_64-pc-windows-msvc.zip} +0 -0
- package/bin/binaries/{probe-v0.6.0-rc311-x86_64-unknown-linux-musl.tar.gz → probe-v0.6.0-rc313-x86_64-unknown-linux-musl.tar.gz} +0 -0
- package/build/agent/ProbeAgent.js +94 -28
- package/build/agent/simpleTelemetry.js +20 -1
- package/build/agent/tasks/TaskManager.js +16 -1
- package/build/agent/tasks/taskTool.js +279 -62
- package/build/delegate.js +4 -1
- package/cjs/agent/ProbeAgent.cjs +448 -117
- package/cjs/agent/simpleTelemetry.cjs +20 -2
- package/cjs/index.cjs +448 -117
- package/package.json +1 -1
- package/src/agent/ProbeAgent.js +94 -28
- package/src/agent/simpleTelemetry.js +20 -1
- package/src/agent/tasks/TaskManager.js +16 -1
- package/src/agent/tasks/taskTool.js +279 -62
- package/src/delegate.js +4 -1
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -350,6 +350,7 @@ export class ProbeAgent {
|
|
|
350
350
|
// Task management configuration
|
|
351
351
|
this.enableTasks = !!options.enableTasks;
|
|
352
352
|
this.taskManager = null; // Initialized per-request in answer()
|
|
353
|
+
this.delegationTask = options.delegationTask || null; // Task description when this is a subagent
|
|
353
354
|
|
|
354
355
|
// Per-instance delegation manager for concurrent delegation limits
|
|
355
356
|
// Each ProbeAgent instance has its own limits, not shared globally
|
|
@@ -3350,14 +3351,19 @@ Follow these instructions carefully:
|
|
|
3350
3351
|
this.toolImplementations.task = createTaskTool({
|
|
3351
3352
|
taskManager: this.taskManager,
|
|
3352
3353
|
tracer: this.tracer,
|
|
3353
|
-
debug: this.debug
|
|
3354
|
+
debug: this.debug,
|
|
3355
|
+
delegationTask: this.delegationTask
|
|
3354
3356
|
});
|
|
3355
3357
|
}
|
|
3356
3358
|
|
|
3357
3359
|
// Record telemetry for task initialization
|
|
3358
3360
|
if (this.tracer && typeof this.tracer.recordTaskEvent === 'function') {
|
|
3359
3361
|
this.tracer.recordTaskEvent('session_started', {
|
|
3360
|
-
'task.enabled': true
|
|
3362
|
+
'task.enabled': true,
|
|
3363
|
+
'agent.session_id': this.tracer?.sessionId ?? null,
|
|
3364
|
+
'agent.parent_session_id': this.tracer?.parentSessionId ?? null,
|
|
3365
|
+
'agent.root_session_id': this.tracer?.rootSessionId ?? null,
|
|
3366
|
+
'agent.kind': this.tracer?.agentKind ?? 'main',
|
|
3361
3367
|
});
|
|
3362
3368
|
}
|
|
3363
3369
|
|
|
@@ -4084,7 +4090,7 @@ or
|
|
|
4084
4090
|
}
|
|
4085
4091
|
return {
|
|
4086
4092
|
toolChoice: 'none',
|
|
4087
|
-
userMessage: `⚠️ TIME BUDGET EXHAUSTED. Your allocated time for this task has run out. You have ${remaining} step(s) remaining to provide your answer.\n\nIMPORTANT: This is a time budget constraint, NOT a system shutdown or error. The system is working perfectly — you simply used all your allocated time.\n\nDo NOT say things like "the system is shutting down" or "try again later" — the user submitted a request and is waiting for YOUR answer right now.\n\
|
|
4093
|
+
userMessage: `⚠️ TIME BUDGET EXHAUSTED. Your allocated time for this task has run out. You have ${remaining} step(s) remaining to provide your answer.\n\nIMPORTANT: This is a time budget constraint, NOT a system shutdown or error. The system is working perfectly — you simply used all your allocated time.\n\nDo NOT say things like "the system is shutting down" or "try again later" — the user submitted a request and is waiting for YOUR answer right now.\n\nYou MUST now produce a detailed PROGRESS REPORT so that a follow-up agent can continue your work without starting over. Structure your response as follows:\n\n## Task\nWhat was the original request / goal.\n\n## Completed Work\nWhat you successfully accomplished — include ALL findings, code snippets, file paths, data, and conclusions gathered. Be specific and include actual content, not just descriptions.\n\n## Key Findings\nConcrete facts, answers, or data points you discovered. Include file paths with line numbers, code snippets, configuration values, etc.\n\n## Attempted but Inconclusive\nWhat you tried that did not yield clear results — include the approach and why it was inconclusive, so the next agent does not repeat it.\n\n## Not Started / Remaining\nWhat parts of the task you did not get to, and any recommendations for how to approach them.\n\n## Suggested Next Steps\nSpecific, actionable steps for a follow-up agent to continue this work efficiently.\n\nIMPORTANT: Include ALL useful data you gathered inline — do not just say "I found X", actually include X. The next agent will only see this report, not your tool call history.`
|
|
4088
4094
|
};
|
|
4089
4095
|
}
|
|
4090
4096
|
|
|
@@ -4094,22 +4100,48 @@ or
|
|
|
4094
4100
|
return { toolChoice: 'none' };
|
|
4095
4101
|
}
|
|
4096
4102
|
|
|
4097
|
-
// Last-iteration warning — force text-only and
|
|
4103
|
+
// Last-iteration warning — force text-only and request a structured progress report
|
|
4098
4104
|
if (stepNumber === maxIterations - 1) {
|
|
4099
|
-
// Build a
|
|
4100
|
-
const
|
|
4101
|
-
.filter(tc => tc.name
|
|
4102
|
-
.map(tc =>
|
|
4103
|
-
|
|
4104
|
-
|
|
4105
|
-
|
|
4106
|
-
|
|
4105
|
+
// Build a detailed activity log so the model can produce an accurate handoff report
|
|
4106
|
+
const toolActivity = _toolCallLog
|
|
4107
|
+
.filter(tc => tc.name !== '_assistant_text')
|
|
4108
|
+
.map(tc => {
|
|
4109
|
+
const argStr = tc.name === 'search'
|
|
4110
|
+
? `query="${tc.args.query || ''}"${tc.args.exact ? ' exact' : ''} path=${tc.args.path || '.'}`
|
|
4111
|
+
: JSON.stringify(tc.args || {}).substring(0, 200);
|
|
4112
|
+
const brief = tc.resultBrief ? ` → ${tc.resultBrief.substring(0, 150)}` : '';
|
|
4113
|
+
return ` [step ${tc.step}] ${tc.name}(${argStr})${brief}`;
|
|
4114
|
+
})
|
|
4115
|
+
.join('\n');
|
|
4116
|
+
const activityLog = toolActivity ? `\n\nTool activity so far:\n${toolActivity}` : '';
|
|
4107
4117
|
|
|
4108
4118
|
// For code-searcher subagents: instruct to output structured JSON even on partial results
|
|
4109
4119
|
const isCodeSearcher = this.promptType === 'code-searcher';
|
|
4110
4120
|
const lastIterMessage = isCodeSearcher
|
|
4111
|
-
? `⚠️ LAST ITERATION — you are out of tool calls. Output your JSON response NOW with whatever files you have verified so far. Set confidence to "low" if your search was incomplete. Include the "searches" array listing all search queries you made with their paths and outcomes.${
|
|
4112
|
-
: `⚠️
|
|
4121
|
+
? `⚠️ LAST ITERATION — you are out of tool calls. Output your JSON response NOW with whatever files you have verified so far. Set confidence to "low" if your search was incomplete. Include the "searches" array listing all search queries you made with their paths and outcomes.${activityLog}`
|
|
4122
|
+
: `⚠️ ITERATION LIMIT REACHED — you have no more tool calls. You MUST now produce a detailed PROGRESS REPORT so that a follow-up agent can continue your work without starting over.
|
|
4123
|
+
|
|
4124
|
+
Structure your response as follows:
|
|
4125
|
+
|
|
4126
|
+
## Task
|
|
4127
|
+
What was the original request / goal.
|
|
4128
|
+
|
|
4129
|
+
## Completed Work
|
|
4130
|
+
What you successfully accomplished — include ALL findings, code snippets, file paths, data, and conclusions gathered. Be specific and include actual content, not just descriptions.
|
|
4131
|
+
|
|
4132
|
+
## Key Findings
|
|
4133
|
+
Concrete facts, answers, or data points you discovered. Include file paths with line numbers, code snippets, configuration values, etc.
|
|
4134
|
+
|
|
4135
|
+
## Attempted but Inconclusive
|
|
4136
|
+
What you tried that did not yield clear results — include the approach and why it was inconclusive, so the next agent does not repeat it.
|
|
4137
|
+
|
|
4138
|
+
## Not Started / Remaining
|
|
4139
|
+
What parts of the task you did not get to, and any recommendations for how to approach them.
|
|
4140
|
+
|
|
4141
|
+
## Suggested Next Steps
|
|
4142
|
+
Specific, actionable steps for a follow-up agent to continue this work efficiently.
|
|
4143
|
+
|
|
4144
|
+
IMPORTANT: Include ALL useful data you gathered inline — do not just say "I found X", actually include X. The next agent will only see this report, not your tool call history.${activityLog}`;
|
|
4113
4145
|
|
|
4114
4146
|
return {
|
|
4115
4147
|
toolChoice: 'none',
|
|
@@ -4208,13 +4240,26 @@ Double-check your response based on the criteria above. If everything looks good
|
|
|
4208
4240
|
currentIteration++;
|
|
4209
4241
|
toolContext.currentIteration = currentIteration;
|
|
4210
4242
|
|
|
4211
|
-
// Track tool calls for failure diagnostics
|
|
4243
|
+
// Track tool calls for failure diagnostics and progress reports
|
|
4212
4244
|
if (toolCalls?.length > 0) {
|
|
4213
|
-
for (
|
|
4214
|
-
|
|
4245
|
+
for (let i = 0; i < toolCalls.length; i++) {
|
|
4246
|
+
const tc = toolCalls[i];
|
|
4247
|
+
const tr = toolResults?.[i];
|
|
4248
|
+
let resultBrief = '';
|
|
4249
|
+
if (tr) {
|
|
4250
|
+
const raw = typeof tr.result === 'string' ? tr.result : JSON.stringify(tr.result);
|
|
4251
|
+
resultBrief = raw ? raw.substring(0, 500) : '';
|
|
4252
|
+
}
|
|
4253
|
+
const tcArgs = tc.args || (typeof tc.input === 'string' ? (() => { try { return JSON.parse(tc.input); } catch { return {}; } })() : tc.input) || {};
|
|
4254
|
+
_toolCallLog.push({ name: tc.toolName, args: tcArgs, resultBrief, step: currentIteration });
|
|
4215
4255
|
}
|
|
4216
4256
|
}
|
|
4217
4257
|
|
|
4258
|
+
// Track assistant text output per step for progress reports
|
|
4259
|
+
if (text && text.trim()) {
|
|
4260
|
+
_toolCallLog.push({ name: '_assistant_text', args: {}, resultBrief: text.substring(0, 1000), step: currentIteration });
|
|
4261
|
+
}
|
|
4262
|
+
|
|
4218
4263
|
// Record telemetry — include model's reasoning and tool call details
|
|
4219
4264
|
if (this.tracer) {
|
|
4220
4265
|
const stepEvent = {
|
|
@@ -4633,13 +4678,16 @@ Double-check your response based on the criteria above. If everything looks good
|
|
|
4633
4678
|
`Some of your tool calls were cancelled mid-execution because the timeout observer determined the time limit was reached.\n\n` +
|
|
4634
4679
|
`IMPORTANT: This is a time budget constraint, NOT a system shutdown or error. The system is working perfectly — you simply used all your allocated time. ` +
|
|
4635
4680
|
`Do NOT say things like "the system is shutting down" or "try again later." The user is waiting for your answer RIGHT NOW.\n\n` +
|
|
4636
|
-
`
|
|
4637
|
-
`
|
|
4638
|
-
|
|
4639
|
-
|
|
4640
|
-
|
|
4681
|
+
`You MUST produce a detailed PROGRESS REPORT so that a follow-up agent can continue your work without starting over. ` +
|
|
4682
|
+
`Structure your response with these sections:\n\n` +
|
|
4683
|
+
`## Task\nWhat was the original request / goal.\n\n` +
|
|
4684
|
+
`## Completed Work\nWhat you successfully accomplished — include ALL findings, code snippets, file paths, data, and conclusions gathered. Be specific and include actual content, not just descriptions.\n\n` +
|
|
4685
|
+
`## Key Findings\nConcrete facts, answers, or data points you discovered. Include file paths with line numbers, code snippets, configuration values, etc.\n\n` +
|
|
4686
|
+
`## Attempted but Inconclusive\nWhat you tried that did not yield clear results — include the approach and why it was inconclusive, so the next agent does not repeat it.\n\n` +
|
|
4687
|
+
`## Not Started / Remaining\nWhat parts of the task you did not get to, and any recommendations for how to approach them.\n\n` +
|
|
4688
|
+
`## Suggested Next Steps\nSpecific, actionable steps for a follow-up agent to continue this work efficiently.` +
|
|
4641
4689
|
`${taskContext}${schemaContext}\n\n` +
|
|
4642
|
-
`
|
|
4690
|
+
`IMPORTANT: Include ALL useful data you gathered inline — do not just say "I found X", actually include X. The next agent will only see this report, not your tool call history.`;
|
|
4643
4691
|
|
|
4644
4692
|
const summaryMessages = [
|
|
4645
4693
|
...currentMessages,
|
|
@@ -4791,21 +4839,33 @@ Double-check your response based on the criteria above. If everything looks good
|
|
|
4791
4839
|
const searchQueries = [];
|
|
4792
4840
|
const searchDetails = [];
|
|
4793
4841
|
const toolCounts = {};
|
|
4842
|
+
const toolTimeline = [];
|
|
4794
4843
|
for (const tc of _toolCallLog) {
|
|
4844
|
+
if (tc.name === '_assistant_text') continue;
|
|
4795
4845
|
toolCounts[tc.name] = (toolCounts[tc.name] || 0) + 1;
|
|
4796
4846
|
if (tc.name === 'search') {
|
|
4797
4847
|
const q = tc.args.query || '';
|
|
4798
4848
|
const p = tc.args.path || '.';
|
|
4799
4849
|
const exact = tc.args.exact ? ' (exact)' : '';
|
|
4800
4850
|
searchQueries.push(`"${q}"${exact}`);
|
|
4801
|
-
searchDetails.push({ query: q, path: p, had_results:
|
|
4851
|
+
searchDetails.push({ query: q, path: p, had_results: !!(tc.resultBrief && tc.resultBrief.trim()) });
|
|
4802
4852
|
}
|
|
4853
|
+
const argStr = tc.name === 'search'
|
|
4854
|
+
? `query="${tc.args.query || ''}"${tc.args.exact ? ' exact' : ''}`
|
|
4855
|
+
: JSON.stringify(tc.args || {}).substring(0, 150);
|
|
4856
|
+
const brief = tc.resultBrief ? ` → ${tc.resultBrief.substring(0, 200)}` : ' → (no result)';
|
|
4857
|
+
toolTimeline.push(` [step ${tc.step}] ${tc.name}(${argStr})${brief}`);
|
|
4803
4858
|
}
|
|
4804
4859
|
const toolBreakdown = Object.entries(toolCounts)
|
|
4805
4860
|
.map(([name, count]) => `${name}: ${count}x`)
|
|
4806
4861
|
.join(', ');
|
|
4807
4862
|
const uniqueSearches = [...new Set(searchQueries)];
|
|
4808
4863
|
|
|
4864
|
+
// Collect any assistant text fragments as partial findings
|
|
4865
|
+
const assistantTexts = _toolCallLog
|
|
4866
|
+
.filter(tc => tc.name === '_assistant_text' && tc.resultBrief)
|
|
4867
|
+
.map(tc => tc.resultBrief);
|
|
4868
|
+
|
|
4809
4869
|
// For code-searcher subagents: produce structured JSON so the parent
|
|
4810
4870
|
// can still use partial results instead of getting a plain error string.
|
|
4811
4871
|
if (this.promptType === 'code-searcher') {
|
|
@@ -4816,12 +4876,18 @@ Double-check your response based on the criteria above. If everything looks good
|
|
|
4816
4876
|
searches: searchDetails
|
|
4817
4877
|
});
|
|
4818
4878
|
} else {
|
|
4819
|
-
let summary =
|
|
4820
|
-
summary +=
|
|
4879
|
+
let summary = `## Progress Report (iteration limit reached after ${currentIteration} steps)\n\n`;
|
|
4880
|
+
summary += `### Tool Usage Summary\n${toolBreakdown || 'none'}\n\n`;
|
|
4821
4881
|
if (uniqueSearches.length > 0) {
|
|
4822
|
-
summary +=
|
|
4882
|
+
summary += `### Search Queries Attempted\n${uniqueSearches.join(', ')}\n\n`;
|
|
4883
|
+
}
|
|
4884
|
+
if (toolTimeline.length > 0) {
|
|
4885
|
+
summary += `### Step-by-Step Activity Log\n${toolTimeline.join('\n')}\n\n`;
|
|
4886
|
+
}
|
|
4887
|
+
if (assistantTexts.length > 0) {
|
|
4888
|
+
summary += `### Partial Findings\n${assistantTexts.join('\n\n')}\n\n`;
|
|
4823
4889
|
}
|
|
4824
|
-
summary +=
|
|
4890
|
+
summary += `### Recommendation for Follow-Up\nThe iteration limit was reached before the task could be completed. A follow-up agent should review the activity log above to avoid repeating the same searches, and consider alternative approaches such as: using exact=true for literal string matching, using bash/grep for pattern-based file searches, or trying a different strategy.`;
|
|
4825
4891
|
finalResult = summary;
|
|
4826
4892
|
}
|
|
4827
4893
|
} catch {
|
|
@@ -136,9 +136,28 @@ export class SimpleTelemetry {
|
|
|
136
136
|
* Simple tracer for application-level tracing
|
|
137
137
|
*/
|
|
138
138
|
export class SimpleAppTracer {
|
|
139
|
-
constructor(telemetry, sessionId = null) {
|
|
139
|
+
constructor(telemetry, sessionId = null, options = {}) {
|
|
140
140
|
this.telemetry = telemetry;
|
|
141
141
|
this.sessionId = sessionId || this.generateSessionId();
|
|
142
|
+
this.parentSessionId = options.parentSessionId || null;
|
|
143
|
+
this.rootSessionId = options.rootSessionId || this.sessionId;
|
|
144
|
+
this.agentKind = options.agentKind || 'main';
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Create a child tracer for a delegated subagent.
|
|
149
|
+
* Inherits the same telemetry backend but scopes events to the child session.
|
|
150
|
+
* @param {string} childSessionId - The subagent's session ID
|
|
151
|
+
* @param {Object} [options] - Additional options
|
|
152
|
+
* @param {string} [options.agentKind='delegate'] - Kind of child agent
|
|
153
|
+
* @returns {SimpleAppTracer} A new tracer scoped to the child session
|
|
154
|
+
*/
|
|
155
|
+
createChildTracer(childSessionId, options = {}) {
|
|
156
|
+
return new SimpleAppTracer(this.telemetry, childSessionId, {
|
|
157
|
+
parentSessionId: this.sessionId,
|
|
158
|
+
rootSessionId: this.rootSessionId,
|
|
159
|
+
agentKind: options.agentKind || 'delegate',
|
|
160
|
+
});
|
|
142
161
|
}
|
|
143
162
|
|
|
144
163
|
generateSessionId() {
|
|
@@ -500,13 +500,28 @@ export class TaskManager {
|
|
|
500
500
|
|
|
501
501
|
let line = ` <task id="${this._escapeXml(task.id)}" status="${this._escapeXml(task.status)}"`;
|
|
502
502
|
if (task.priority) line += ` priority="${this._escapeXml(task.priority)}"`;
|
|
503
|
+
if (task.dependencies.length > 0) line += ` depends_on="${this._escapeXml(task.dependencies.join(','))}"`;
|
|
503
504
|
if (blockers.length > 0) line += ` blocked_by="${this._escapeXml(blockers.join(','))}"`;
|
|
504
505
|
line += `>${this._escapeXml(task.title)}</task>`;
|
|
505
506
|
|
|
506
507
|
return line;
|
|
507
508
|
});
|
|
508
509
|
|
|
509
|
-
|
|
510
|
+
// Add a brief status line so the AI can quickly assess progress
|
|
511
|
+
let completed = 0, inProgress = 0, pending = 0, cancelled = 0;
|
|
512
|
+
for (const t of tasks) {
|
|
513
|
+
if (t.status === 'completed') completed++;
|
|
514
|
+
else if (t.status === 'in_progress') inProgress++;
|
|
515
|
+
else if (t.status === 'pending') pending++;
|
|
516
|
+
else if (t.status === 'cancelled') cancelled++;
|
|
517
|
+
}
|
|
518
|
+
const statusLine = ` <!-- ${completed}/${tasks.length} completed` +
|
|
519
|
+
(inProgress > 0 ? `, ${inProgress} in progress` : '') +
|
|
520
|
+
(pending > 0 ? `, ${pending} pending` : '') +
|
|
521
|
+
(cancelled > 0 ? `, ${cancelled} cancelled` : '') +
|
|
522
|
+
` -->`;
|
|
523
|
+
|
|
524
|
+
return `<task_status>\n${statusLine}\n${taskLines.join('\n')}\n</task_status>`;
|
|
510
525
|
}
|
|
511
526
|
|
|
512
527
|
/**
|