@probelabs/probe 0.6.0-rc271 → 0.6.0-rc273
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/binaries/{probe-v0.6.0-rc271-aarch64-apple-darwin.tar.gz → probe-v0.6.0-rc273-aarch64-apple-darwin.tar.gz} +0 -0
- package/bin/binaries/probe-v0.6.0-rc273-aarch64-unknown-linux-musl.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc273-x86_64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc273-x86_64-pc-windows-msvc.zip +0 -0
- package/bin/binaries/probe-v0.6.0-rc273-x86_64-unknown-linux-musl.tar.gz +0 -0
- package/build/agent/contextCompactor.js +42 -25
- package/build/agent/index.js +78 -177
- package/build/agent/shared/prompts.js +4 -3
- package/build/agent/tasks/taskTool.js +46 -235
- package/build/tools/analyzeAll.js +3 -4
- package/build/tools/edit.js +3 -3
- package/build/tools/vercel.js +1 -1
- package/cjs/agent/ProbeAgent.cjs +95 -202
- package/cjs/index.cjs +95 -202
- package/package.json +1 -1
- package/src/agent/contextCompactor.js +42 -25
- package/src/agent/shared/prompts.js +4 -3
- package/src/agent/tasks/taskTool.js +46 -235
- package/src/tools/analyzeAll.js +3 -4
- package/src/tools/edit.js +3 -3
- package/src/tools/vercel.js +1 -1
- package/bin/binaries/probe-v0.6.0-rc271-aarch64-unknown-linux-musl.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc271-x86_64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc271-x86_64-pc-windows-msvc.zip +0 -0
- package/bin/binaries/probe-v0.6.0-rc271-x86_64-unknown-linux-musl.tar.gz +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Task Tool -
|
|
2
|
+
* Task Tool - definition and executor for task management
|
|
3
3
|
* @module agent/tasks/taskTool
|
|
4
4
|
*/
|
|
5
5
|
|
|
@@ -35,241 +35,57 @@ export const taskSchema = z.object({
|
|
|
35
35
|
});
|
|
36
36
|
|
|
37
37
|
/**
|
|
38
|
-
* Task tool
|
|
38
|
+
* Task tool definition (legacy export, no longer used — tool is registered natively via taskSchema)
|
|
39
39
|
*/
|
|
40
|
-
export const taskToolDefinition =
|
|
41
|
-
Manage tasks for tracking progress during code exploration and problem-solving. Create tasks to break down complex problems, track dependencies, and ensure all work is completed.
|
|
42
|
-
|
|
43
|
-
Parameters:
|
|
44
|
-
- action: (required) The action to perform: create, update, complete, delete, list
|
|
45
|
-
- tasks: (optional) Array of task objects for batch operations. Place raw JSON array directly between tags.
|
|
46
|
-
- id: (optional) Task ID for single operations (e.g., "task-1")
|
|
47
|
-
- title: (optional) Task title for create/update
|
|
48
|
-
- description: (optional) Task description for create/update
|
|
49
|
-
- status: (optional) Task status for update: pending, in_progress, completed, cancelled
|
|
50
|
-
- priority: (optional) Task priority: low, medium, high, critical
|
|
51
|
-
- dependencies: (optional) JSON array of task IDs that must be completed first
|
|
52
|
-
- after: (optional) Task ID to insert the new task after (for ordering). By default, new tasks are appended to the end
|
|
53
|
-
|
|
54
|
-
IMPORTANT - JSON Format:
|
|
55
|
-
Place raw JSON arrays directly between tags without quotes or escaping:
|
|
56
|
-
CORRECT: <tasks>[{"title": "Do X"}]</tasks>
|
|
57
|
-
INCORRECT: <tasks>"[{\"title\": \"Do X\"}]"</tasks>
|
|
58
|
-
|
|
59
|
-
Usage Examples:
|
|
60
|
-
|
|
61
|
-
Creating a single task:
|
|
62
|
-
<task>
|
|
63
|
-
<action>create</action>
|
|
64
|
-
<title>Analyze authentication module</title>
|
|
65
|
-
<description>Search and understand how authentication works</description>
|
|
66
|
-
<priority>high</priority>
|
|
67
|
-
</task>
|
|
68
|
-
|
|
69
|
-
Creating multiple tasks with dependencies:
|
|
70
|
-
<task>
|
|
71
|
-
<action>create</action>
|
|
72
|
-
<tasks>[
|
|
73
|
-
{"title": "Search for user model", "priority": "high"},
|
|
74
|
-
{"title": "Analyze authentication flow", "dependencies": ["task-1"]},
|
|
75
|
-
{"title": "Review session management", "dependencies": ["task-2"]}
|
|
76
|
-
]</tasks>
|
|
77
|
-
</task>
|
|
78
|
-
|
|
79
|
-
Inserting a task after a specific task (instead of appending to end):
|
|
80
|
-
<task>
|
|
81
|
-
<action>create</action>
|
|
82
|
-
<title>Investigate error handling</title>
|
|
83
|
-
<after>task-2</after>
|
|
84
|
-
</task>
|
|
85
|
-
|
|
86
|
-
Updating a task status:
|
|
87
|
-
<task>
|
|
88
|
-
<action>update</action>
|
|
89
|
-
<id>task-1</id>
|
|
90
|
-
<status>in_progress</status>
|
|
91
|
-
</task>
|
|
92
|
-
|
|
93
|
-
Batch updating multiple tasks:
|
|
94
|
-
<task>
|
|
95
|
-
<action>update</action>
|
|
96
|
-
<tasks>[
|
|
97
|
-
{"id": "task-1", "status": "completed"},
|
|
98
|
-
{"id": "task-2", "status": "in_progress"}
|
|
99
|
-
]</tasks>
|
|
100
|
-
</task>
|
|
101
|
-
|
|
102
|
-
Completing a task:
|
|
103
|
-
<task>
|
|
104
|
-
<action>complete</action>
|
|
105
|
-
<id>task-1</id>
|
|
106
|
-
</task>
|
|
107
|
-
|
|
108
|
-
Cancelling a task:
|
|
109
|
-
<task>
|
|
110
|
-
<action>update</action>
|
|
111
|
-
<id>task-1</id>
|
|
112
|
-
<status>cancelled</status>
|
|
113
|
-
</task>
|
|
114
|
-
|
|
115
|
-
Deleting a task:
|
|
116
|
-
<task>
|
|
117
|
-
<action>delete</action>
|
|
118
|
-
<id>task-1</id>
|
|
119
|
-
</task>
|
|
120
|
-
|
|
121
|
-
Listing all tasks:
|
|
122
|
-
<task>
|
|
123
|
-
<action>list</action>
|
|
124
|
-
</task>
|
|
125
|
-
`;
|
|
40
|
+
export const taskToolDefinition = '';
|
|
126
41
|
|
|
127
42
|
/**
|
|
128
|
-
* Task system prompt addition -
|
|
43
|
+
* Task system prompt addition - guidance for AI on when and how to use tasks
|
|
129
44
|
*/
|
|
130
|
-
export const taskSystemPrompt = `[Task Management
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
## When to
|
|
135
|
-
|
|
136
|
-
CREATE
|
|
137
|
-
- "Fix bug A AND add feature B" →
|
|
138
|
-
- "Investigate auth, payments, AND notifications" →
|
|
139
|
-
- "Implement X, then add tests, then update docs" →
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
- "
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
-
|
|
162
|
-
-
|
|
163
|
-
-
|
|
164
|
-
|
|
165
|
-
**Good patterns**:
|
|
166
|
-
- One task per distinct deliverable ✓
|
|
167
|
-
- One task per phase (implement, test, document) ✓
|
|
168
|
-
- One task per different type of work ✓
|
|
169
|
-
|
|
170
|
-
MODIFY TASKS when (during execution):
|
|
171
|
-
- You discover the problem is more complex than expected → Add new tasks
|
|
172
|
-
- A single task covers too much scope → Split into smaller tasks
|
|
173
|
-
- You find related work that needs attention → Add dependent tasks
|
|
174
|
-
- A task becomes irrelevant based on findings → Cancel it
|
|
175
|
-
- Task priorities change based on discoveries → Update priority
|
|
176
|
-
- You learn new context → Update task description
|
|
177
|
-
|
|
178
|
-
## Task Workflow
|
|
179
|
-
|
|
180
|
-
**STEP 1 - Plan (at start):**
|
|
181
|
-
Analyze the request and create tasks for each logical step:
|
|
182
|
-
|
|
183
|
-
<task>
|
|
184
|
-
<action>create</action>
|
|
185
|
-
<tasks>[
|
|
186
|
-
{"title": "Search for authentication module", "priority": "high"},
|
|
187
|
-
{"title": "Analyze login flow implementation", "dependencies": ["task-1"]},
|
|
188
|
-
{"title": "Find session management code", "dependencies": ["task-1"]},
|
|
189
|
-
{"title": "Summarize authentication architecture", "dependencies": ["task-2", "task-3"]}
|
|
190
|
-
]</tasks>
|
|
191
|
-
</task>
|
|
192
|
-
|
|
193
|
-
**STEP 2 - Execute (during work):**
|
|
194
|
-
Update task status as you work:
|
|
195
|
-
|
|
196
|
-
<task>
|
|
197
|
-
<action>update</action>
|
|
198
|
-
<id>task-1</id>
|
|
199
|
-
<status>in_progress</status>
|
|
200
|
-
</task>
|
|
201
|
-
|
|
202
|
-
... do the work (search, extract, etc.) ...
|
|
203
|
-
|
|
204
|
-
<task>
|
|
205
|
-
<action>complete</action>
|
|
206
|
-
<id>task-1</id>
|
|
207
|
-
</task>
|
|
208
|
-
|
|
209
|
-
**STEP 2b - Adapt (when you discover new work):**
|
|
210
|
-
As you work, you may discover that:
|
|
211
|
-
- A task is more complex than expected → Split it into subtasks
|
|
212
|
-
- New areas need investigation → Add new tasks
|
|
213
|
-
- Some tasks are no longer needed → Cancel them
|
|
214
|
-
- Task order should change → Update dependencies
|
|
215
|
-
|
|
216
|
-
*Adding a new task when you discover more work:*
|
|
217
|
-
<task>
|
|
218
|
-
<action>create</action>
|
|
219
|
-
<title>Investigate caching layer</title>
|
|
220
|
-
<description>Found references to Redis caching in auth module</description>
|
|
221
|
-
</task>
|
|
222
|
-
|
|
223
|
-
*Inserting a task after a specific task (to maintain logical order):*
|
|
224
|
-
<task>
|
|
225
|
-
<action>create</action>
|
|
226
|
-
<title>Check rate limiting</title>
|
|
227
|
-
<after>task-2</after>
|
|
228
|
-
</task>
|
|
229
|
-
|
|
230
|
-
*Cancelling and splitting a complex task:*
|
|
231
|
-
<task>
|
|
232
|
-
<action>update</action>
|
|
233
|
-
<id>task-3</id>
|
|
234
|
-
<status>cancelled</status>
|
|
235
|
-
</task>
|
|
236
|
-
<task>
|
|
237
|
-
<action>create</action>
|
|
238
|
-
<tasks>[
|
|
239
|
-
{"title": "Review JWT token generation", "priority": "high"},
|
|
240
|
-
{"title": "Review token refresh logic"}
|
|
241
|
-
]</tasks>
|
|
242
|
-
</task>
|
|
243
|
-
|
|
244
|
-
**STEP 3 - Finish (before completion):**
|
|
245
|
-
Before calling attempt_completion, ensure ALL tasks are either:
|
|
246
|
-
- \`completed\` - you finished the work
|
|
247
|
-
- \`cancelled\` - no longer needed
|
|
248
|
-
|
|
249
|
-
If you created tasks, you MUST resolve them all before completing.
|
|
250
|
-
|
|
251
|
-
## Key Rules
|
|
252
|
-
|
|
253
|
-
1. **Dependencies are enforced**: A task cannot start until its dependencies are completed
|
|
254
|
-
2. **Circular dependencies are rejected**: task-1 → task-2 → task-1 is invalid
|
|
255
|
-
3. **Completion is blocked**: attempt_completion will fail if tasks remain unresolved
|
|
256
|
-
4. **List to review**: Use <task><action>list</action></task> to see current task status
|
|
257
|
-
5. **Tasks are living documents**: Add, split, or cancel tasks as you learn more about the problem
|
|
45
|
+
export const taskSystemPrompt = `[Task Management]
|
|
46
|
+
|
|
47
|
+
Use the task tool to track progress on complex requests with multiple distinct goals.
|
|
48
|
+
|
|
49
|
+
## When to Use Tasks
|
|
50
|
+
|
|
51
|
+
CREATE tasks when the request has **multiple separate deliverables**:
|
|
52
|
+
- "Fix bug A AND add feature B" → two tasks
|
|
53
|
+
- "Investigate auth, payments, AND notifications" → three tasks
|
|
54
|
+
- "Implement X, then add tests, then update docs" → three sequential tasks
|
|
55
|
+
|
|
56
|
+
SKIP tasks for single-goal requests, even complex ones:
|
|
57
|
+
- "How does ranking work?" — just investigate and answer
|
|
58
|
+
- "Explain the authentication flow" — just trace and explain
|
|
59
|
+
Multiple internal steps (search, read, analyze) for one goal ≠ multiple tasks.
|
|
60
|
+
|
|
61
|
+
## Granularity
|
|
62
|
+
|
|
63
|
+
Tasks = logical units of work, not files or steps.
|
|
64
|
+
- "Fix 8 similar test files" → ONE task (same fix repeated)
|
|
65
|
+
- "Update API + tests + docs" → THREE tasks (different work types)
|
|
66
|
+
- Max 3–4 tasks. More means you're too granular.
|
|
67
|
+
|
|
68
|
+
## Workflow
|
|
69
|
+
|
|
70
|
+
1. **Plan**: Call task tool with action="create" and a tasks array up front
|
|
71
|
+
2. **Execute**: Update status to "in_progress" / "completed" as you work. Add, split, or cancel tasks as you learn more.
|
|
72
|
+
3. **Finish**: All tasks must be "completed" or "cancelled" before calling attempt_completion.
|
|
73
|
+
|
|
74
|
+
## Rules
|
|
75
|
+
|
|
76
|
+
- Dependencies are enforced: a task cannot start until its dependencies are completed
|
|
77
|
+
- Circular dependencies are rejected
|
|
78
|
+
- attempt_completion is blocked while tasks remain unresolved
|
|
258
79
|
`;
|
|
259
80
|
|
|
260
81
|
/**
|
|
261
82
|
* Task guidance to inject at start of request
|
|
262
83
|
*/
|
|
263
|
-
export const taskGuidancePrompt =
|
|
264
|
-
Does this request have MULTIPLE DISTINCT GOALS?
|
|
84
|
+
export const taskGuidancePrompt = `Does this request have MULTIPLE DISTINCT GOALS?
|
|
265
85
|
- "Do A AND B AND C" (multiple goals) → Create tasks for each goal
|
|
266
86
|
- "Investigate/explain/find X" (single goal) → Skip tasks, just answer directly
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
Only create tasks when there are separate deliverables the user is asking for.
|
|
270
|
-
|
|
271
|
-
If creating tasks, use the task tool with action="create" first.
|
|
272
|
-
</task_guidance>`;
|
|
87
|
+
Multiple internal steps for ONE goal = NO tasks needed.
|
|
88
|
+
If creating tasks, use the task tool with action="create" first.`;
|
|
273
89
|
|
|
274
90
|
/**
|
|
275
91
|
* Create task completion blocked message
|
|
@@ -277,20 +93,15 @@ If creating tasks, use the task tool with action="create" first.
|
|
|
277
93
|
* @returns {string} Formatted message
|
|
278
94
|
*/
|
|
279
95
|
export function createTaskCompletionBlockedMessage(taskSummary) {
|
|
280
|
-
return
|
|
281
|
-
You cannot complete yet. The following tasks are still unresolved:
|
|
96
|
+
return `You cannot complete yet. The following tasks are still unresolved:
|
|
282
97
|
|
|
283
98
|
${taskSummary}
|
|
284
99
|
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
- Or cancel if no longer needed: <task><action>update</action><id>task-X</id><status>cancelled</status></task>
|
|
289
|
-
|
|
290
|
-
2. After ALL tasks are resolved (completed or cancelled), call attempt_completion again.
|
|
100
|
+
For each pending/in_progress task, either:
|
|
101
|
+
- Complete it: call task tool with action="complete", id="task-X"
|
|
102
|
+
- Cancel it: call task tool with action="update", id="task-X", status="cancelled"
|
|
291
103
|
|
|
292
|
-
|
|
293
|
-
</task_completion_blocked>`;
|
|
104
|
+
After all tasks are resolved, call attempt_completion again.`;
|
|
294
105
|
}
|
|
295
106
|
|
|
296
107
|
/**
|
|
@@ -176,8 +176,7 @@ Instructions:
|
|
|
176
176
|
- Format as a structured list if multiple items found
|
|
177
177
|
- If nothing relevant is found in this chunk, respond with "No relevant items found in this chunk."
|
|
178
178
|
- Do NOT summarize the code - extract the specific information requested
|
|
179
|
-
-
|
|
180
|
-
- Do NOT use the shorthand <attempt_complete></attempt_complete> format`;
|
|
179
|
+
- When done, use the attempt_completion tool with your answer as the result.`;
|
|
181
180
|
|
|
182
181
|
try {
|
|
183
182
|
const result = await delegate({
|
|
@@ -273,7 +272,7 @@ async function aggregateResults(chunkResults, aggregation, extractionPrompt, opt
|
|
|
273
272
|
.map(r => `--- Chunk ${r.chunk.id} ---\n${stripResultTags(r.result)}`)
|
|
274
273
|
.join('\n\n');
|
|
275
274
|
|
|
276
|
-
const completionNote = `\n\
|
|
275
|
+
const completionNote = `\n\nWhen done, use the attempt_completion tool with your answer as the result.`;
|
|
277
276
|
|
|
278
277
|
const aggregationPrompts = {
|
|
279
278
|
summarize: `Synthesize these analyses into a comprehensive summary. Combine related findings, remove redundancy, and present a coherent overview.
|
|
@@ -460,7 +459,7 @@ Your answer should:
|
|
|
460
459
|
|
|
461
460
|
Format your response as a well-structured document that fully answers: "${question}"
|
|
462
461
|
|
|
463
|
-
|
|
462
|
+
When done, use the attempt_completion tool with your answer as the result.`;
|
|
464
463
|
|
|
465
464
|
try {
|
|
466
465
|
const result = await delegate({
|
package/build/tools/edit.js
CHANGED
|
@@ -88,7 +88,7 @@ async function handleSymbolEdit({ resolvedPath, file_path, symbol, new_string, p
|
|
|
88
88
|
if (fileTracker) {
|
|
89
89
|
const check = fileTracker.checkSymbolContent(resolvedPath, symbol, symbolInfo.code);
|
|
90
90
|
if (!check.ok && check.reason === 'stale') {
|
|
91
|
-
return `Error editing ${file_path}: Symbol "${symbol}" has changed since you last read it. Use extract to re-read the current content, then retry
|
|
91
|
+
return `Error editing ${file_path}: Symbol "${symbol}" has changed since you last read it. Use the extract tool with targets="${file_path}#${symbol}" to re-read the current content, then retry.`;
|
|
92
92
|
}
|
|
93
93
|
}
|
|
94
94
|
|
|
@@ -395,7 +395,7 @@ Parameters:
|
|
|
395
395
|
// Check if file has been seen in this session (read-before-write guard)
|
|
396
396
|
if (options.fileTracker && !options.fileTracker.isFileSeen(resolvedPath)) {
|
|
397
397
|
const displayPath = toRelativePath(resolvedPath, workspaceRoot);
|
|
398
|
-
return `Error editing ${displayPath}: This file has not been read yet in this session. Use
|
|
398
|
+
return `Error editing ${displayPath}: This file has not been read yet in this session. Use the extract tool with targets="${displayPath}" to read the file first, then retry your edit.`;
|
|
399
399
|
}
|
|
400
400
|
|
|
401
401
|
// Route to appropriate mode (priority: symbol > start_line > old_string)
|
|
@@ -425,7 +425,7 @@ Parameters:
|
|
|
425
425
|
const staleCheck = options.fileTracker.checkTextEditStaleness(resolvedPath);
|
|
426
426
|
if (!staleCheck.ok) {
|
|
427
427
|
const displayPath = toRelativePath(resolvedPath, workspaceRoot);
|
|
428
|
-
return `Error editing ${displayPath}: ${staleCheck.message}\n\
|
|
428
|
+
return `Error editing ${displayPath}: ${staleCheck.message}\n\nUse the extract tool with targets="${displayPath}" to re-read the file, then retry.`;
|
|
429
429
|
}
|
|
430
430
|
}
|
|
431
431
|
|
package/build/tools/vercel.js
CHANGED
|
@@ -145,7 +145,7 @@ function buildSearchDelegateTask({ searchQuery, searchPath, exact, language, all
|
|
|
145
145
|
'',
|
|
146
146
|
'Strategy for complex queries:',
|
|
147
147
|
'1. Analyze the query - identify key concepts, entities, and relationships',
|
|
148
|
-
'2. Run focused searches for each concept (e.g., "
|
|
148
|
+
'2. Run focused searches for each independent concept (e.g., for "how do payments work and how are emails sent", search "payments" and "emails" separately since they are unrelated)',
|
|
149
149
|
'3. Use extract to verify relevance of promising results',
|
|
150
150
|
'4. Combine all relevant targets in your final response',
|
|
151
151
|
'',
|